| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.118375, |
| "eval_steps": 500, |
| "global_step": 2900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00125, |
| "grad_norm": 0.36102330684661865, |
| "learning_rate": 5.5665e-06, |
| "loss": 2.681707572937012, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0025, |
| "grad_norm": 0.34577861428260803, |
| "learning_rate": 1.17515e-05, |
| "loss": 2.6720260620117187, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00375, |
| "grad_norm": 0.3295978009700775, |
| "learning_rate": 1.79365e-05, |
| "loss": 2.672147750854492, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 0.32688695192337036, |
| "learning_rate": 2.41215e-05, |
| "loss": 2.675041389465332, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.00625, |
| "grad_norm": 0.3257655203342438, |
| "learning_rate": 3.03065e-05, |
| "loss": 2.675174522399902, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0075, |
| "grad_norm": 0.336309552192688, |
| "learning_rate": 3.6491499999999994e-05, |
| "loss": 2.6966915130615234, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00875, |
| "grad_norm": 0.3346744179725647, |
| "learning_rate": 4.26765e-05, |
| "loss": 2.6632720947265627, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.32752859592437744, |
| "learning_rate": 4.88615e-05, |
| "loss": 2.695608139038086, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.01125, |
| "grad_norm": 0.32975664734840393, |
| "learning_rate": 5.50465e-05, |
| "loss": 2.6731294631958007, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0125, |
| "grad_norm": 0.33192330598831177, |
| "learning_rate": 6.12315e-05, |
| "loss": 2.6482282638549806, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.01375, |
| "grad_norm": 0.3244248032569885, |
| "learning_rate": 6.74165e-05, |
| "loss": 2.700460433959961, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.015, |
| "grad_norm": 0.32452520728111267, |
| "learning_rate": 7.36015e-05, |
| "loss": 2.6673652648925783, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.01625, |
| "grad_norm": 0.32950156927108765, |
| "learning_rate": 7.97865e-05, |
| "loss": 2.66015510559082, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0175, |
| "grad_norm": 0.3157300651073456, |
| "learning_rate": 8.597149999999999e-05, |
| "loss": 2.653401184082031, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.01875, |
| "grad_norm": 0.3447306156158447, |
| "learning_rate": 9.21565e-05, |
| "loss": 2.638433837890625, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.33060336112976074, |
| "learning_rate": 9.834150000000001e-05, |
| "loss": 2.6599313735961916, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.02125, |
| "grad_norm": 0.33370116353034973, |
| "learning_rate": 0.00010452649999999999, |
| "loss": 2.675436019897461, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0225, |
| "grad_norm": 0.32309311628341675, |
| "learning_rate": 0.0001107115, |
| "loss": 2.682134246826172, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.02375, |
| "grad_norm": 0.3298942446708679, |
| "learning_rate": 0.0001168965, |
| "loss": 2.6672037124633787, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 0.3257051408290863, |
| "learning_rate": 0.0001230815, |
| "loss": 2.6710464477539064, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.02625, |
| "grad_norm": 0.32734546065330505, |
| "learning_rate": 0.00012369959364576377, |
| "loss": 2.6694522857666017, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0275, |
| "grad_norm": 0.3286871016025543, |
| "learning_rate": 0.00012369818897130838, |
| "loss": 2.67569580078125, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.02875, |
| "grad_norm": 0.3206029534339905, |
| "learning_rate": 0.0001236957809826964, |
| "loss": 2.671968460083008, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.32244956493377686, |
| "learning_rate": 0.0001236923697189907, |
| "loss": 2.6653528213500977, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.03125, |
| "grad_norm": 0.3286353647708893, |
| "learning_rate": 0.00012368795523552952, |
| "loss": 2.644626998901367, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0325, |
| "grad_norm": 0.31846532225608826, |
| "learning_rate": 0.00012368253760392556, |
| "loss": 2.6661434173583984, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.03375, |
| "grad_norm": 0.34063664078712463, |
| "learning_rate": 0.00012367611691206466, |
| "loss": 2.658544921875, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.035, |
| "grad_norm": 0.3394038677215576, |
| "learning_rate": 0.00012366869326410474, |
| "loss": 2.671076202392578, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.03625, |
| "grad_norm": 0.3454046845436096, |
| "learning_rate": 0.00012366026678047368, |
| "loss": 2.690570068359375, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0375, |
| "grad_norm": 0.32945406436920166, |
| "learning_rate": 0.00012365083759786766, |
| "loss": 2.6626564025878907, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.03875, |
| "grad_norm": 0.3266613483428955, |
| "learning_rate": 0.00012364040586924886, |
| "loss": 2.6811601638793947, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.32137027382850647, |
| "learning_rate": 0.0001236289717638429, |
| "loss": 2.656772422790527, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.04125, |
| "grad_norm": 0.31430286169052124, |
| "learning_rate": 0.00012361653546713627, |
| "loss": 2.667566680908203, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0425, |
| "grad_norm": 0.3187640905380249, |
| "learning_rate": 0.00012360309718087312, |
| "loss": 2.6774127960205076, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.04375, |
| "grad_norm": 0.3238705098628998, |
| "learning_rate": 0.00012358865712305212, |
| "loss": 2.650909423828125, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.045, |
| "grad_norm": 0.3178948163986206, |
| "learning_rate": 0.00012357321552792288, |
| "loss": 2.6466007232666016, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.04625, |
| "grad_norm": 0.3393631875514984, |
| "learning_rate": 0.0001235567726459822, |
| "loss": 2.6694786071777346, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0475, |
| "grad_norm": 0.33097463846206665, |
| "learning_rate": 0.00012353932874396988, |
| "loss": 2.6705909729003907, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.04875, |
| "grad_norm": 0.3237457275390625, |
| "learning_rate": 0.00012352088410486452, |
| "loss": 2.666813087463379, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.32804396748542786, |
| "learning_rate": 0.0001235014390278789, |
| "loss": 2.6341262817382813, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.05125, |
| "grad_norm": 0.3098997473716736, |
| "learning_rate": 0.0001234809938284551, |
| "loss": 2.653286361694336, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0525, |
| "grad_norm": 0.31869447231292725, |
| "learning_rate": 0.00012345954883825937, |
| "loss": 2.6676279067993165, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.05375, |
| "grad_norm": 0.3462599813938141, |
| "learning_rate": 0.0001234371044051768, |
| "loss": 2.6937137603759767, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.055, |
| "grad_norm": 0.33410680294036865, |
| "learning_rate": 0.00012341366089330566, |
| "loss": 2.6624752044677735, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.05625, |
| "grad_norm": 0.3401891589164734, |
| "learning_rate": 0.00012338921868295142, |
| "loss": 2.6673324584960936, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.0575, |
| "grad_norm": 0.3144513964653015, |
| "learning_rate": 0.00012336377817062075, |
| "loss": 2.6684280395507813, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.05875, |
| "grad_norm": 0.31319352984428406, |
| "learning_rate": 0.00012333733976901485, |
| "loss": 2.6631874084472655, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.3231050670146942, |
| "learning_rate": 0.00012330990390702298, |
| "loss": 2.6671581268310547, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.06125, |
| "grad_norm": 0.3283950686454773, |
| "learning_rate": 0.00012328147102971544, |
| "loss": 2.6682722091674806, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.0625, |
| "grad_norm": 0.3203584849834442, |
| "learning_rate": 0.0001232520415983362, |
| "loss": 2.6619497299194337, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.06375, |
| "grad_norm": 0.3314996063709259, |
| "learning_rate": 0.00012322161609029563, |
| "loss": 2.675333023071289, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.065, |
| "grad_norm": 0.3124040961265564, |
| "learning_rate": 0.00012319019499916267, |
| "loss": 2.674266052246094, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.06625, |
| "grad_norm": 0.334187775850296, |
| "learning_rate": 0.0001231577788346567, |
| "loss": 2.6644060134887697, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.0675, |
| "grad_norm": 0.33853819966316223, |
| "learning_rate": 0.00012312436812263953, |
| "loss": 2.6285802841186525, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.06875, |
| "grad_norm": 0.3226993680000305, |
| "learning_rate": 0.00012308996340510664, |
| "loss": 2.6620355606079102, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.31965890526771545, |
| "learning_rate": 0.0001230545652401785, |
| "loss": 2.669430160522461, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.07125, |
| "grad_norm": 0.323632150888443, |
| "learning_rate": 0.00012301817420209152, |
| "loss": 2.6710559844970705, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.0725, |
| "grad_norm": 0.3202168047428131, |
| "learning_rate": 0.00012298079088118863, |
| "loss": 2.6743343353271483, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.07375, |
| "grad_norm": 0.3278695344924927, |
| "learning_rate": 0.00012294241588390982, |
| "loss": 2.643411636352539, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 0.3302673101425171, |
| "learning_rate": 0.0001229030498327823, |
| "loss": 2.7156848907470703, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.07625, |
| "grad_norm": 0.31964629888534546, |
| "learning_rate": 0.00012286269336641027, |
| "loss": 2.6369789123535154, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.0775, |
| "grad_norm": 0.32528844475746155, |
| "learning_rate": 0.00012282134713946472, |
| "loss": 2.655129241943359, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.07875, |
| "grad_norm": 0.3346538245677948, |
| "learning_rate": 0.00012277901182267275, |
| "loss": 2.6634849548339843, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.32035592198371887, |
| "learning_rate": 0.00012273568810280665, |
| "loss": 2.6622406005859376, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.08125, |
| "grad_norm": 0.32753705978393555, |
| "learning_rate": 0.00012269137668267276, |
| "loss": 2.6673862457275392, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.0825, |
| "grad_norm": 0.3323623538017273, |
| "learning_rate": 0.00012264607828110018, |
| "loss": 2.6660182952880858, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.08375, |
| "grad_norm": 0.3228432238101959, |
| "learning_rate": 0.0001225997936329289, |
| "loss": 2.690377044677734, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.085, |
| "grad_norm": 0.3340938687324524, |
| "learning_rate": 0.00012255252348899816, |
| "loss": 2.6579252243041993, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.08625, |
| "grad_norm": 0.32717493176460266, |
| "learning_rate": 0.00012250426861613406, |
| "loss": 2.6669349670410156, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.0875, |
| "grad_norm": 0.3213510513305664, |
| "learning_rate": 0.0001224550297971371, |
| "loss": 2.658818817138672, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.08875, |
| "grad_norm": 0.3103785216808319, |
| "learning_rate": 0.00012240480783076967, |
| "loss": 2.64670467376709, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.3206445276737213, |
| "learning_rate": 0.00012235360353174288, |
| "loss": 2.649314117431641, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.09125, |
| "grad_norm": 0.3210267722606659, |
| "learning_rate": 0.00012230141773070355, |
| "loss": 2.6637636184692384, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.0925, |
| "grad_norm": 0.312549352645874, |
| "learning_rate": 0.00012224825127422055, |
| "loss": 2.6725765228271485, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.09375, |
| "grad_norm": 0.32557615637779236, |
| "learning_rate": 0.00012219410502477114, |
| "loss": 2.6337608337402343, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.095, |
| "grad_norm": 0.31713125109672546, |
| "learning_rate": 0.00012213897986072705, |
| "loss": 2.6361785888671876, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.09625, |
| "grad_norm": 0.3173486590385437, |
| "learning_rate": 0.00012208287667634017, |
| "loss": 2.6491493225097655, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.0975, |
| "grad_norm": 0.32202011346817017, |
| "learning_rate": 0.00012202579638172791, |
| "loss": 2.665495681762695, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.09875, |
| "grad_norm": 0.31751731038093567, |
| "learning_rate": 0.0001219677399028587, |
| "loss": 2.670880889892578, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.3310263156890869, |
| "learning_rate": 0.00012190870818153682, |
| "loss": 2.6745986938476562, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.10125, |
| "grad_norm": 0.3246520757675171, |
| "learning_rate": 0.00012184870217538704, |
| "loss": 2.6367824554443358, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.1025, |
| "grad_norm": 0.31728002429008484, |
| "learning_rate": 0.0001217877228578393, |
| "loss": 2.657224655151367, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.10375, |
| "grad_norm": 0.32666370272636414, |
| "learning_rate": 0.00012172577121811272, |
| "loss": 2.629240798950195, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.105, |
| "grad_norm": 0.32864195108413696, |
| "learning_rate": 0.00012166284826119965, |
| "loss": 2.6314460754394533, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.10625, |
| "grad_norm": 0.331391304731369, |
| "learning_rate": 0.00012159895500784936, |
| "loss": 2.6207229614257814, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.1075, |
| "grad_norm": 0.32856595516204834, |
| "learning_rate": 0.00012153409249455148, |
| "loss": 2.6828586578369142, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.10875, |
| "grad_norm": 0.3259557783603668, |
| "learning_rate": 0.00012146826177351913, |
| "loss": 2.6800840377807615, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.3368566930294037, |
| "learning_rate": 0.00012140146391267196, |
| "loss": 2.644548797607422, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.11125, |
| "grad_norm": 0.3319634199142456, |
| "learning_rate": 0.00012133369999561872, |
| "loss": 2.6457305908203126, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.1125, |
| "grad_norm": 0.31302639842033386, |
| "learning_rate": 0.00012126497112163972, |
| "loss": 2.6418832778930663, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.11375, |
| "grad_norm": 0.32079464197158813, |
| "learning_rate": 0.00012119527840566905, |
| "loss": 2.6311697006225585, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.115, |
| "grad_norm": 0.32719048857688904, |
| "learning_rate": 0.00012112462297827639, |
| "loss": 2.641567611694336, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.11625, |
| "grad_norm": 0.32264548540115356, |
| "learning_rate": 0.00012105300598564874, |
| "loss": 2.6696403503417967, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.1175, |
| "grad_norm": 0.3197903335094452, |
| "learning_rate": 0.00012098042858957183, |
| "loss": 2.6566593170166017, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.11875, |
| "grad_norm": 0.3231068253517151, |
| "learning_rate": 0.00012090689196741124, |
| "loss": 2.63052978515625, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.3268223702907562, |
| "learning_rate": 0.00012083239731209331, |
| "loss": 2.6513845443725588, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.12125, |
| "grad_norm": 0.3304605484008789, |
| "learning_rate": 0.00012075694583208578, |
| "loss": 2.6264434814453126, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.1225, |
| "grad_norm": 0.3171931505203247, |
| "learning_rate": 0.00012068053875137824, |
| "loss": 2.636788558959961, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.12375, |
| "grad_norm": 0.3341807425022125, |
| "learning_rate": 0.00012060317730946224, |
| "loss": 2.6531208038330076, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.3334127962589264, |
| "learning_rate": 0.00012052486276131108, |
| "loss": 2.6705049514770507, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.12625, |
| "grad_norm": 0.307980477809906, |
| "learning_rate": 0.00012044559637735965, |
| "loss": 2.6561138153076174, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.1275, |
| "grad_norm": 0.31699395179748535, |
| "learning_rate": 0.00012036537944348368, |
| "loss": 2.633596420288086, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.12875, |
| "grad_norm": 0.32349589467048645, |
| "learning_rate": 0.0001202842132609789, |
| "loss": 2.651826858520508, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.3407875895500183, |
| "learning_rate": 0.00012020209914653999, |
| "loss": 2.6381755828857423, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.13125, |
| "grad_norm": 0.31691980361938477, |
| "learning_rate": 0.00012011903843223914, |
| "loss": 2.6360122680664064, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1325, |
| "grad_norm": 0.31067660450935364, |
| "learning_rate": 0.0001200350324655045, |
| "loss": 2.6421882629394533, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.13375, |
| "grad_norm": 0.32634156942367554, |
| "learning_rate": 0.0001199500826090983, |
| "loss": 2.63830509185791, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.135, |
| "grad_norm": 0.3369225263595581, |
| "learning_rate": 0.00011986419024109472, |
| "loss": 2.63408203125, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.13625, |
| "grad_norm": 0.3302381932735443, |
| "learning_rate": 0.0001197773567548576, |
| "loss": 2.6358100891113283, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.1375, |
| "grad_norm": 0.33104801177978516, |
| "learning_rate": 0.00011968958355901778, |
| "loss": 2.6341053009033204, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.13875, |
| "grad_norm": 0.3302455544471741, |
| "learning_rate": 0.00011960087207745023, |
| "loss": 2.659340667724609, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.318013995885849, |
| "learning_rate": 0.00011951122374925103, |
| "loss": 2.6539737701416017, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.14125, |
| "grad_norm": 0.31688031554222107, |
| "learning_rate": 0.00011942064002871398, |
| "loss": 2.650745391845703, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.1425, |
| "grad_norm": 0.3218444883823395, |
| "learning_rate": 0.00011932912238530696, |
| "loss": 2.6293779373168946, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.14375, |
| "grad_norm": 0.31668025255203247, |
| "learning_rate": 0.0001192366723036482, |
| "loss": 2.652189254760742, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.145, |
| "grad_norm": 0.32894524931907654, |
| "learning_rate": 0.0001191432912834821, |
| "loss": 2.6034008026123048, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.14625, |
| "grad_norm": 0.326031357049942, |
| "learning_rate": 0.00011904898083965494, |
| "loss": 2.6356990814208983, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.1475, |
| "grad_norm": 0.3148091733455658, |
| "learning_rate": 0.00011895374250209033, |
| "loss": 2.6438148498535154, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.14875, |
| "grad_norm": 0.3154153823852539, |
| "learning_rate": 0.00011885757781576434, |
| "loss": 2.653242301940918, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.31809449195861816, |
| "learning_rate": 0.00011876048834068046, |
| "loss": 2.6228126525878905, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.15125, |
| "grad_norm": 0.32725268602371216, |
| "learning_rate": 0.0001186624756518443, |
| "loss": 2.6216796875, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.1525, |
| "grad_norm": 0.32540032267570496, |
| "learning_rate": 0.00011856354133923805, |
| "loss": 2.67537841796875, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.15375, |
| "grad_norm": 0.3263508975505829, |
| "learning_rate": 0.00011846368700779467, |
| "loss": 2.6610176086425783, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.155, |
| "grad_norm": 0.3205776512622833, |
| "learning_rate": 0.00011836291427737183, |
| "loss": 2.6613521575927734, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.15625, |
| "grad_norm": 0.31028124690055847, |
| "learning_rate": 0.00011826122478272567, |
| "loss": 2.633769416809082, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.1575, |
| "grad_norm": 0.31673797965049744, |
| "learning_rate": 0.00011815862017348429, |
| "loss": 2.624924087524414, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.15875, |
| "grad_norm": 0.32373106479644775, |
| "learning_rate": 0.00011805510211412097, |
| "loss": 2.6462501525878905, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.31725797057151794, |
| "learning_rate": 0.0001179506722839271, |
| "loss": 2.6365428924560548, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.16125, |
| "grad_norm": 0.3195420205593109, |
| "learning_rate": 0.00011784533237698511, |
| "loss": 2.6311481475830076, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.1625, |
| "grad_norm": 0.3341420888900757, |
| "learning_rate": 0.00011773908410214081, |
| "loss": 2.642291450500488, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.16375, |
| "grad_norm": 0.3230491876602173, |
| "learning_rate": 0.00011763192918297575, |
| "loss": 2.638113594055176, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.165, |
| "grad_norm": 0.3223067820072174, |
| "learning_rate": 0.0001175238693577793, |
| "loss": 2.6444271087646483, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.16625, |
| "grad_norm": 0.31934627890586853, |
| "learning_rate": 0.00011741490637952035, |
| "loss": 2.6657215118408204, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.1675, |
| "grad_norm": 0.3097170889377594, |
| "learning_rate": 0.00011730504201581893, |
| "loss": 2.645807647705078, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.16875, |
| "grad_norm": 0.32414084672927856, |
| "learning_rate": 0.00011719427804891757, |
| "loss": 2.641864776611328, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.31383687257766724, |
| "learning_rate": 0.00011708261627565232, |
| "loss": 2.662236785888672, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.17125, |
| "grad_norm": 0.31501343846321106, |
| "learning_rate": 0.00011697005850742364, |
| "loss": 2.6557693481445312, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.1725, |
| "grad_norm": 0.31809887290000916, |
| "learning_rate": 0.00011685660657016701, |
| "loss": 2.6280593872070312, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.17375, |
| "grad_norm": 0.31885311007499695, |
| "learning_rate": 0.0001167422623043233, |
| "loss": 2.6564004898071287, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 0.3105798065662384, |
| "learning_rate": 0.00011662702756480891, |
| "loss": 2.64355354309082, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.17625, |
| "grad_norm": 0.3361447751522064, |
| "learning_rate": 0.00011651090422098569, |
| "loss": 2.6594215393066407, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.1775, |
| "grad_norm": 0.32253745198249817, |
| "learning_rate": 0.00011639389415663065, |
| "loss": 2.642239570617676, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.17875, |
| "grad_norm": 0.32338932156562805, |
| "learning_rate": 0.00011627599926990531, |
| "loss": 2.6702959060668947, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.3116281032562256, |
| "learning_rate": 0.00011615722147332501, |
| "loss": 2.6370218276977537, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.18125, |
| "grad_norm": 0.3282069265842438, |
| "learning_rate": 0.00011603756269372781, |
| "loss": 2.589012336730957, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.1825, |
| "grad_norm": 0.32347872853279114, |
| "learning_rate": 0.00011591702487224326, |
| "loss": 2.638626480102539, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.18375, |
| "grad_norm": 0.31963029503822327, |
| "learning_rate": 0.0001157956099642609, |
| "loss": 2.6150590896606447, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.185, |
| "grad_norm": 0.31573331356048584, |
| "learning_rate": 0.00011567331993939861, |
| "loss": 2.6242300033569337, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.18625, |
| "grad_norm": 0.318210631608963, |
| "learning_rate": 0.00011555015678147051, |
| "loss": 2.6236839294433594, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 0.3299921751022339, |
| "learning_rate": 0.0001154261224884549, |
| "loss": 2.633551597595215, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.18875, |
| "grad_norm": 0.32802239060401917, |
| "learning_rate": 0.00011530121907246187, |
| "loss": 2.650678253173828, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.3139156401157379, |
| "learning_rate": 0.0001151754485597005, |
| "loss": 2.6056631088256834, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.19125, |
| "grad_norm": 0.320236474275589, |
| "learning_rate": 0.00011504881299044619, |
| "loss": 2.6355617523193358, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.1925, |
| "grad_norm": 0.3379780054092407, |
| "learning_rate": 0.00011492131441900742, |
| "loss": 2.6405055999755858, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.19375, |
| "grad_norm": 0.3395773470401764, |
| "learning_rate": 0.00011479295491369245, |
| "loss": 2.6217134475708006, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.195, |
| "grad_norm": 0.33206456899642944, |
| "learning_rate": 0.00011466373655677584, |
| "loss": 2.6553268432617188, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.19625, |
| "grad_norm": 0.3266463577747345, |
| "learning_rate": 0.00011453366144446457, |
| "loss": 2.615655517578125, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.1975, |
| "grad_norm": 0.3166464567184448, |
| "learning_rate": 0.0001144027316868641, |
| "loss": 2.6240345001220704, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.19875, |
| "grad_norm": 0.31986290216445923, |
| "learning_rate": 0.00011427094940794416, |
| "loss": 2.6230613708496096, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.3255802392959595, |
| "learning_rate": 0.00011413831674550421, |
| "loss": 2.6539276123046873, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.20125, |
| "grad_norm": 0.3255312144756317, |
| "learning_rate": 0.00011400483585113883, |
| "loss": 2.6217121124267577, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.2025, |
| "grad_norm": 0.3323643207550049, |
| "learning_rate": 0.0001138705088902028, |
| "loss": 2.652513885498047, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.20375, |
| "grad_norm": 0.3227868974208832, |
| "learning_rate": 0.00011373533804177592, |
| "loss": 2.630014991760254, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.205, |
| "grad_norm": 0.31701064109802246, |
| "learning_rate": 0.00011359932549862779, |
| "loss": 2.639967346191406, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.20625, |
| "grad_norm": 0.3187071681022644, |
| "learning_rate": 0.00011346247346718207, |
| "loss": 2.6362884521484373, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.2075, |
| "grad_norm": 0.31707099080085754, |
| "learning_rate": 0.00011332478416748083, |
| "loss": 2.649311065673828, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.20875, |
| "grad_norm": 0.3297825753688812, |
| "learning_rate": 0.00011318625983314848, |
| "loss": 2.6421716690063475, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.3198815584182739, |
| "learning_rate": 0.00011304690271135548, |
| "loss": 2.633087730407715, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.21125, |
| "grad_norm": 0.3226505219936371, |
| "learning_rate": 0.00011290671506278205, |
| "loss": 2.6442310333251955, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.2125, |
| "grad_norm": 0.33370015025138855, |
| "learning_rate": 0.00011276569916158123, |
| "loss": 2.6304306030273437, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.21375, |
| "grad_norm": 0.3307320773601532, |
| "learning_rate": 0.0001126238572953423, |
| "loss": 2.6353145599365235, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.215, |
| "grad_norm": 0.31320619583129883, |
| "learning_rate": 0.00011248119176505343, |
| "loss": 2.6117172241210938, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.21625, |
| "grad_norm": 0.3411354422569275, |
| "learning_rate": 0.00011233770488506444, |
| "loss": 2.6199378967285156, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.2175, |
| "grad_norm": 0.3345658779144287, |
| "learning_rate": 0.0001121933989830493, |
| "loss": 2.617340850830078, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.21875, |
| "grad_norm": 0.328173965215683, |
| "learning_rate": 0.0001120482763999683, |
| "loss": 2.646270751953125, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.31834596395492554, |
| "learning_rate": 0.00011190233949003007, |
| "loss": 2.6598697662353517, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.22125, |
| "grad_norm": 0.32211023569107056, |
| "learning_rate": 0.00011175559062065348, |
| "loss": 2.617197036743164, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.2225, |
| "grad_norm": 0.30770230293273926, |
| "learning_rate": 0.00011160803217242911, |
| "loss": 2.6376068115234377, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.22375, |
| "grad_norm": 0.3243764042854309, |
| "learning_rate": 0.00011145966653908078, |
| "loss": 2.606427764892578, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 0.33548685908317566, |
| "learning_rate": 0.00011131049612742655, |
| "loss": 2.6384208679199217, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.22625, |
| "grad_norm": 0.3262486159801483, |
| "learning_rate": 0.00011116052335733979, |
| "loss": 2.658290672302246, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.2275, |
| "grad_norm": 0.31495559215545654, |
| "learning_rate": 0.00011100975066170992, |
| "loss": 2.662753105163574, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.22875, |
| "grad_norm": 0.3250574469566345, |
| "learning_rate": 0.00011085818048640288, |
| "loss": 2.6388259887695313, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.34293144941329956, |
| "learning_rate": 0.00011070581529022152, |
| "loss": 2.6388187408447266, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.23125, |
| "grad_norm": 0.31609639525413513, |
| "learning_rate": 0.00011055265754486565, |
| "loss": 2.637576675415039, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.2325, |
| "grad_norm": 0.3181133270263672, |
| "learning_rate": 0.00011039870973489204, |
| "loss": 2.634903907775879, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.23375, |
| "grad_norm": 0.3416786193847656, |
| "learning_rate": 0.00011024397435767398, |
| "loss": 2.616485023498535, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.235, |
| "grad_norm": 0.315266489982605, |
| "learning_rate": 0.00011008845392336087, |
| "loss": 2.6373340606689455, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.23625, |
| "grad_norm": 0.3316870927810669, |
| "learning_rate": 0.0001099321509548375, |
| "loss": 2.6363605499267577, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.2375, |
| "grad_norm": 0.3230259418487549, |
| "learning_rate": 0.00010977506798768303, |
| "loss": 2.5958734512329102, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.23875, |
| "grad_norm": 0.32272425293922424, |
| "learning_rate": 0.00010961720757012995, |
| "loss": 2.608958435058594, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.3131502568721771, |
| "learning_rate": 0.00010945857226302276, |
| "loss": 2.6321544647216797, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.24125, |
| "grad_norm": 0.32143065333366394, |
| "learning_rate": 0.00010929916463977628, |
| "loss": 2.613364410400391, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.2425, |
| "grad_norm": 0.3152971565723419, |
| "learning_rate": 0.00010913898728633408, |
| "loss": 2.613265800476074, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.24375, |
| "grad_norm": 0.32848265767097473, |
| "learning_rate": 0.00010897804280112643, |
| "loss": 2.6013004302978517, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.000875, |
| "grad_norm": 0.3237718939781189, |
| "learning_rate": 0.00010881633379502814, |
| "loss": 2.8611122131347657, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.002125, |
| "grad_norm": 0.3280915915966034, |
| "learning_rate": 0.00010865386289131632, |
| "loss": 2.5412445068359375, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.003375, |
| "grad_norm": 0.33189550042152405, |
| "learning_rate": 0.00010849063272562764, |
| "loss": 2.559256362915039, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.004625, |
| "grad_norm": 0.3265272378921509, |
| "learning_rate": 0.00010832664594591574, |
| "loss": 2.5583423614501952, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.005875, |
| "grad_norm": 0.3453090488910675, |
| "learning_rate": 0.00010816190521240819, |
| "loss": 2.5712684631347655, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.007125, |
| "grad_norm": 0.3423366844654083, |
| "learning_rate": 0.00010799641319756335, |
| "loss": 2.5412336349487306, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.008375, |
| "grad_norm": 0.32097378373146057, |
| "learning_rate": 0.00010783017258602704, |
| "loss": 2.5253084182739256, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.009625, |
| "grad_norm": 0.3252958357334137, |
| "learning_rate": 0.00010766318607458898, |
| "loss": 2.5738031387329103, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.010875, |
| "grad_norm": 0.3372173011302948, |
| "learning_rate": 0.00010749545637213897, |
| "loss": 2.54388370513916, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.012125, |
| "grad_norm": 0.33359599113464355, |
| "learning_rate": 0.00010732698619962306, |
| "loss": 2.55248908996582, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.013375, |
| "grad_norm": 0.34591928124427795, |
| "learning_rate": 0.00010715777828999937, |
| "loss": 2.5376352310180663, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.014625, |
| "grad_norm": 0.35322073101997375, |
| "learning_rate": 0.00010698783538819372, |
| "loss": 2.534122085571289, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.015875, |
| "grad_norm": 0.3539016544818878, |
| "learning_rate": 0.00010681716025105512, |
| "loss": 2.492664337158203, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.017125, |
| "grad_norm": 0.3429170548915863, |
| "learning_rate": 0.00010664575564731107, |
| "loss": 2.5008804321289064, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.018375, |
| "grad_norm": 0.3576091229915619, |
| "learning_rate": 0.00010647362435752263, |
| "loss": 2.5176633834838866, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.019625, |
| "grad_norm": 0.3297135829925537, |
| "learning_rate": 0.00010630076917403929, |
| "loss": 2.500911331176758, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.020875, |
| "grad_norm": 0.33164292573928833, |
| "learning_rate": 0.00010612719290095374, |
| "loss": 2.513214111328125, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.022125, |
| "grad_norm": 0.32680895924568176, |
| "learning_rate": 0.00010595289835405624, |
| "loss": 2.501193809509277, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.023375, |
| "grad_norm": 0.33465543389320374, |
| "learning_rate": 0.00010577788836078916, |
| "loss": 2.4999351501464844, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.024625, |
| "grad_norm": 0.3344171941280365, |
| "learning_rate": 0.00010560216576020092, |
| "loss": 2.4867813110351564, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.025875, |
| "grad_norm": 0.34607550501823425, |
| "learning_rate": 0.00010542573340289998, |
| "loss": 2.503824234008789, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.027125, |
| "grad_norm": 0.33892592787742615, |
| "learning_rate": 0.00010524859415100871, |
| "loss": 2.4990135192871095, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.028375, |
| "grad_norm": 0.3448082208633423, |
| "learning_rate": 0.00010507075087811677, |
| "loss": 2.4324840545654296, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.029625, |
| "grad_norm": 0.3321894407272339, |
| "learning_rate": 0.00010489220646923464, |
| "loss": 2.4842708587646483, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.030875, |
| "grad_norm": 0.3443576395511627, |
| "learning_rate": 0.0001047129638207468, |
| "loss": 2.485816764831543, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.032125, |
| "grad_norm": 0.3381134271621704, |
| "learning_rate": 0.00010453302584036468, |
| "loss": 2.4841537475585938, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.033375, |
| "grad_norm": 0.3401469588279724, |
| "learning_rate": 0.00010435239544707952, |
| "loss": 2.48382453918457, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.034625, |
| "grad_norm": 0.35364025831222534, |
| "learning_rate": 0.00010417107557111507, |
| "loss": 2.4872058868408202, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.035875, |
| "grad_norm": 0.3584776818752289, |
| "learning_rate": 0.00010398906915388, |
| "loss": 2.455089569091797, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.037125, |
| "grad_norm": 0.3385666608810425, |
| "learning_rate": 0.00010380637914792015, |
| "loss": 2.4457998275756836, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.038375, |
| "grad_norm": 0.3520835340023041, |
| "learning_rate": 0.00010362300851687071, |
| "loss": 2.479095458984375, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.039625, |
| "grad_norm": 0.34799060225486755, |
| "learning_rate": 0.00010343896023540814, |
| "loss": 2.4659198760986327, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.040875, |
| "grad_norm": 0.35186630487442017, |
| "learning_rate": 0.00010325423728920182, |
| "loss": 2.4467798233032227, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.042125, |
| "grad_norm": 0.3423445224761963, |
| "learning_rate": 0.00010306884267486574, |
| "loss": 2.4702438354492187, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.043375, |
| "grad_norm": 0.3398495018482208, |
| "learning_rate": 0.00010288277939990981, |
| "loss": 2.471152496337891, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.044625, |
| "grad_norm": 0.34717217087745667, |
| "learning_rate": 0.00010269605048269109, |
| "loss": 2.4720317840576174, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.045875, |
| "grad_norm": 0.34331125020980835, |
| "learning_rate": 0.00010250865895236482, |
| "loss": 2.4562469482421876, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.047125, |
| "grad_norm": 0.35022589564323425, |
| "learning_rate": 0.00010232060784883528, |
| "loss": 2.461803436279297, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.048375, |
| "grad_norm": 0.3725920617580414, |
| "learning_rate": 0.00010213190022270653, |
| "loss": 2.4350805282592773, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.049625, |
| "grad_norm": 0.3634240925312042, |
| "learning_rate": 0.00010194253913523282, |
| "loss": 2.454206848144531, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.050875, |
| "grad_norm": 0.35172227025032043, |
| "learning_rate": 0.000101752527658269, |
| "loss": 2.4318115234375, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.052125, |
| "grad_norm": 0.35827362537384033, |
| "learning_rate": 0.00010156186887422071, |
| "loss": 2.4692001342773438, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.053375, |
| "grad_norm": 0.36834755539894104, |
| "learning_rate": 0.00010137056587599428, |
| "loss": 2.4683910369873048, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.054625, |
| "grad_norm": 0.3573245108127594, |
| "learning_rate": 0.00010117862176694666, |
| "loss": 2.4428688049316407, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.055875, |
| "grad_norm": 0.33202221989631653, |
| "learning_rate": 0.00010098603966083503, |
| "loss": 2.4585454940795897, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.057125, |
| "grad_norm": 0.35598650574684143, |
| "learning_rate": 0.00010079282268176628, |
| "loss": 2.4740036010742186, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.058375, |
| "grad_norm": 0.36041730642318726, |
| "learning_rate": 0.00010059897396414633, |
| "loss": 2.4598981857299806, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.059625, |
| "grad_norm": 0.3481718599796295, |
| "learning_rate": 0.00010040449665262931, |
| "loss": 2.4539608001708983, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.060875, |
| "grad_norm": 0.3672044277191162, |
| "learning_rate": 0.00010020939390206654, |
| "loss": 2.433728790283203, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.062125, |
| "grad_norm": 0.35078802704811096, |
| "learning_rate": 0.00010001366887745531, |
| "loss": 2.454706573486328, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.063375, |
| "grad_norm": 0.36530086398124695, |
| "learning_rate": 9.981732475388758e-05, |
| "loss": 2.4748252868652343, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.064625, |
| "grad_norm": 0.3578907251358032, |
| "learning_rate": 9.962036471649851e-05, |
| "loss": 2.480423355102539, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.065875, |
| "grad_norm": 0.370403528213501, |
| "learning_rate": 9.942279196041466e-05, |
| "loss": 2.480521011352539, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.067125, |
| "grad_norm": 0.36263224482536316, |
| "learning_rate": 9.922460969070231e-05, |
| "loss": 2.4786655426025392, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.068375, |
| "grad_norm": 0.35858920216560364, |
| "learning_rate": 9.902582112231533e-05, |
| "loss": 2.461780548095703, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.069625, |
| "grad_norm": 0.35088691115379333, |
| "learning_rate": 9.882642948004314e-05, |
| "loss": 2.4797664642333985, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.070875, |
| "grad_norm": 0.36078205704689026, |
| "learning_rate": 9.862643799845839e-05, |
| "loss": 2.4529985427856444, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.072125, |
| "grad_norm": 0.35207876563072205, |
| "learning_rate": 9.842584992186434e-05, |
| "loss": 2.4753444671630858, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.073375, |
| "grad_norm": 0.36091098189353943, |
| "learning_rate": 9.822466850424243e-05, |
| "loss": 2.4327056884765623, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.074625, |
| "grad_norm": 0.3577967584133148, |
| "learning_rate": 9.802289700919933e-05, |
| "loss": 2.461964416503906, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.075875, |
| "grad_norm": 0.36178645491600037, |
| "learning_rate": 9.782053870991414e-05, |
| "loss": 2.4669708251953124, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.077125, |
| "grad_norm": 0.3427974581718445, |
| "learning_rate": 9.761759688908519e-05, |
| "loss": 2.4416053771972654, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.078375, |
| "grad_norm": 0.3656075894832611, |
| "learning_rate": 9.741407483887678e-05, |
| "loss": 2.4402462005615235, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.079625, |
| "grad_norm": 0.37002095580101013, |
| "learning_rate": 9.720997586086587e-05, |
| "loss": 2.451791191101074, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.080875, |
| "grad_norm": 0.3515098989009857, |
| "learning_rate": 9.700530326598842e-05, |
| "loss": 2.459187889099121, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.082125, |
| "grad_norm": 0.3765217959880829, |
| "learning_rate": 9.680006037448575e-05, |
| "loss": 2.4384769439697265, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.083375, |
| "grad_norm": 0.5566070675849915, |
| "learning_rate": 9.659425051585065e-05, |
| "loss": 2.4481531143188477, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.084625, |
| "grad_norm": 0.3657555878162384, |
| "learning_rate": 9.638787702877333e-05, |
| "loss": 2.470143508911133, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.085875, |
| "grad_norm": 0.35306495428085327, |
| "learning_rate": 9.618094326108734e-05, |
| "loss": 2.4623140335083007, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.087125, |
| "grad_norm": 0.35708507895469666, |
| "learning_rate": 9.597345256971521e-05, |
| "loss": 2.4393037796020507, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.088375, |
| "grad_norm": 0.36429449915885925, |
| "learning_rate": 9.576540832061398e-05, |
| "loss": 2.4460866928100584, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.089625, |
| "grad_norm": 0.3617342710494995, |
| "learning_rate": 9.555681388872065e-05, |
| "loss": 2.476423454284668, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.090875, |
| "grad_norm": 0.3526591360569, |
| "learning_rate": 9.534767265789737e-05, |
| "loss": 2.460892105102539, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.092125, |
| "grad_norm": 0.3697713613510132, |
| "learning_rate": 9.51379880208766e-05, |
| "loss": 2.46860294342041, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.093375, |
| "grad_norm": 0.37604451179504395, |
| "learning_rate": 9.492776337920603e-05, |
| "loss": 2.465809631347656, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.094625, |
| "grad_norm": 0.37269482016563416, |
| "learning_rate": 9.471700214319343e-05, |
| "loss": 2.4291683197021485, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.095875, |
| "grad_norm": 0.37273484468460083, |
| "learning_rate": 9.45057077318513e-05, |
| "loss": 2.447264862060547, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.097125, |
| "grad_norm": 0.3633696734905243, |
| "learning_rate": 9.429388357284143e-05, |
| "loss": 2.471749114990234, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.098375, |
| "grad_norm": 0.35682767629623413, |
| "learning_rate": 9.40815331024193e-05, |
| "loss": 2.42556209564209, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.099625, |
| "grad_norm": 0.3471936285495758, |
| "learning_rate": 9.386865976537827e-05, |
| "loss": 2.446389007568359, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.100875, |
| "grad_norm": 0.38089418411254883, |
| "learning_rate": 9.365526701499384e-05, |
| "loss": 2.4501571655273438, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.102125, |
| "grad_norm": 0.3654205799102783, |
| "learning_rate": 9.344135831296749e-05, |
| "loss": 2.439041519165039, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.103375, |
| "grad_norm": 0.3512708842754364, |
| "learning_rate": 9.322693712937054e-05, |
| "loss": 2.4336933135986327, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.104625, |
| "grad_norm": 0.36569294333457947, |
| "learning_rate": 9.301200694258795e-05, |
| "loss": 2.444048309326172, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.105875, |
| "grad_norm": 0.36901962757110596, |
| "learning_rate": 9.279657123926178e-05, |
| "loss": 2.4316547393798826, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.107125, |
| "grad_norm": 0.36593225598335266, |
| "learning_rate": 9.25806335142348e-05, |
| "loss": 2.4314062118530275, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.108375, |
| "grad_norm": 0.371039479970932, |
| "learning_rate": 9.236419727049352e-05, |
| "loss": 2.4478275299072267, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.109625, |
| "grad_norm": 0.3607841730117798, |
| "learning_rate": 9.214726601911162e-05, |
| "loss": 2.471347999572754, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.110875, |
| "grad_norm": 0.35733747482299805, |
| "learning_rate": 9.192984327919289e-05, |
| "loss": 2.4454570770263673, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.112125, |
| "grad_norm": 0.3512793183326721, |
| "learning_rate": 9.171193257781413e-05, |
| "loss": 2.4474578857421876, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.113375, |
| "grad_norm": 0.3591439127922058, |
| "learning_rate": 9.149353744996798e-05, |
| "loss": 2.3968666076660154, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.114625, |
| "grad_norm": 0.37512722611427307, |
| "learning_rate": 9.127466143850551e-05, |
| "loss": 2.4625476837158202, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.115875, |
| "grad_norm": 0.3683817982673645, |
| "learning_rate": 9.105530809407877e-05, |
| "loss": 2.4239782333374023, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.117125, |
| "grad_norm": 0.350392609834671, |
| "learning_rate": 9.08354809750833e-05, |
| "loss": 2.4604770660400392, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.118375, |
| "grad_norm": 0.3627133071422577, |
| "learning_rate": 9.061518364760018e-05, |
| "loss": 2.4404422760009767, |
| "step": 2900 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 8000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.8847708770441626e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|