| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.7777777777777777, |
| "eval_steps": 100, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008888888888888889, |
| "grad_norm": 4.868620036481513, |
| "learning_rate": 1.4792899408284025e-07, |
| "loss": 1.254, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.017777777777777778, |
| "grad_norm": 4.907376524481664, |
| "learning_rate": 2.958579881656805e-07, |
| "loss": 1.2718, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02666666666666667, |
| "grad_norm": 4.924364407249927, |
| "learning_rate": 4.4378698224852073e-07, |
| "loss": 1.2432, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.035555555555555556, |
| "grad_norm": 3.4088932051993455, |
| "learning_rate": 5.91715976331361e-07, |
| "loss": 1.2112, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.044444444444444446, |
| "grad_norm": 2.9113656297806103, |
| "learning_rate": 7.396449704142013e-07, |
| "loss": 1.2001, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05333333333333334, |
| "grad_norm": 2.3204535662374544, |
| "learning_rate": 8.875739644970415e-07, |
| "loss": 1.1006, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06222222222222222, |
| "grad_norm": 1.8516910199282557, |
| "learning_rate": 1.0355029585798817e-06, |
| "loss": 1.0786, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07111111111111111, |
| "grad_norm": 1.8962999657740618, |
| "learning_rate": 1.183431952662722e-06, |
| "loss": 1.0518, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.6146752217951077, |
| "learning_rate": 1.3313609467455623e-06, |
| "loss": 1.0076, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.08888888888888889, |
| "grad_norm": 1.6900956108641927, |
| "learning_rate": 1.4792899408284026e-06, |
| "loss": 1.0064, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08888888888888889, |
| "eval_loss": 0.9964859485626221, |
| "eval_runtime": 37.5504, |
| "eval_samples_per_second": 53.262, |
| "eval_steps_per_second": 6.658, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.09777777777777778, |
| "grad_norm": 1.8080972395438724, |
| "learning_rate": 1.6272189349112426e-06, |
| "loss": 0.9937, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.10666666666666667, |
| "grad_norm": 1.7684913996303595, |
| "learning_rate": 1.775147928994083e-06, |
| "loss": 0.9937, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.11555555555555555, |
| "grad_norm": 1.4352385051753562, |
| "learning_rate": 1.9230769230769234e-06, |
| "loss": 0.9882, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.12444444444444444, |
| "grad_norm": 1.5616454174683354, |
| "learning_rate": 2.0710059171597635e-06, |
| "loss": 0.9763, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 1.763519993122216, |
| "learning_rate": 2.2189349112426035e-06, |
| "loss": 1.0136, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.14222222222222222, |
| "grad_norm": 1.6007254163395832, |
| "learning_rate": 2.366863905325444e-06, |
| "loss": 0.9758, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1511111111111111, |
| "grad_norm": 1.5948360204769163, |
| "learning_rate": 2.5147928994082845e-06, |
| "loss": 0.9505, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.5057997454321306, |
| "learning_rate": 2.6627218934911246e-06, |
| "loss": 0.9591, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1688888888888889, |
| "grad_norm": 1.5390209924258027, |
| "learning_rate": 2.8106508875739646e-06, |
| "loss": 0.9463, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 1.461925655745632, |
| "learning_rate": 2.958579881656805e-06, |
| "loss": 0.9226, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "eval_loss": 0.9364051222801208, |
| "eval_runtime": 36.7175, |
| "eval_samples_per_second": 54.47, |
| "eval_steps_per_second": 6.809, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.18666666666666668, |
| "grad_norm": 1.5374045647524592, |
| "learning_rate": 3.106508875739645e-06, |
| "loss": 0.9472, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.19555555555555557, |
| "grad_norm": 1.5912062989392914, |
| "learning_rate": 3.2544378698224853e-06, |
| "loss": 0.9343, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.20444444444444446, |
| "grad_norm": 1.7723775001947806, |
| "learning_rate": 3.4023668639053257e-06, |
| "loss": 0.9391, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.21333333333333335, |
| "grad_norm": 1.6435892953143434, |
| "learning_rate": 3.550295857988166e-06, |
| "loss": 0.9368, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 1.4424599436942067, |
| "learning_rate": 3.6982248520710063e-06, |
| "loss": 0.9319, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2311111111111111, |
| "grad_norm": 1.728159212104628, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 0.8866, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.3326845832416128, |
| "learning_rate": 3.9940828402366864e-06, |
| "loss": 0.9045, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.24888888888888888, |
| "grad_norm": 1.6886292509720702, |
| "learning_rate": 4.142011834319527e-06, |
| "loss": 0.9265, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2577777777777778, |
| "grad_norm": 1.6019493123530608, |
| "learning_rate": 4.289940828402367e-06, |
| "loss": 0.9331, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 1.6146648266355992, |
| "learning_rate": 4.437869822485207e-06, |
| "loss": 0.9237, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "eval_loss": 0.9182068109512329, |
| "eval_runtime": 36.535, |
| "eval_samples_per_second": 54.742, |
| "eval_steps_per_second": 6.843, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.27555555555555555, |
| "grad_norm": 1.3862729857445228, |
| "learning_rate": 4.5857988165680475e-06, |
| "loss": 0.9306, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.28444444444444444, |
| "grad_norm": 1.651953174906888, |
| "learning_rate": 4.733727810650888e-06, |
| "loss": 0.9459, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.29333333333333333, |
| "grad_norm": 1.755393092768391, |
| "learning_rate": 4.8816568047337285e-06, |
| "loss": 0.9519, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3022222222222222, |
| "grad_norm": 1.5247145129317299, |
| "learning_rate": 4.99999464967688e-06, |
| "loss": 0.9419, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3111111111111111, |
| "grad_norm": 1.5673209156579126, |
| "learning_rate": 4.999807390772256e-06, |
| "loss": 0.9357, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.5928733064190967, |
| "learning_rate": 4.999352638611963e-06, |
| "loss": 0.947, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3288888888888889, |
| "grad_norm": 1.7766805751830876, |
| "learning_rate": 4.998630441857007e-06, |
| "loss": 0.9325, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3377777777777778, |
| "grad_norm": 1.511653536436131, |
| "learning_rate": 4.997640877786446e-06, |
| "loss": 0.906, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.3466666666666667, |
| "grad_norm": 1.4941501475149104, |
| "learning_rate": 4.996384052289124e-06, |
| "loss": 0.9349, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 1.5183679914795427, |
| "learning_rate": 4.994860099852339e-06, |
| "loss": 0.9025, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "eval_loss": 0.9070032835006714, |
| "eval_runtime": 36.6, |
| "eval_samples_per_second": 54.645, |
| "eval_steps_per_second": 6.831, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.36444444444444446, |
| "grad_norm": 1.5024543245788338, |
| "learning_rate": 4.993069183547456e-06, |
| "loss": 0.9083, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.37333333333333335, |
| "grad_norm": 1.6343439673384825, |
| "learning_rate": 4.991011495012451e-06, |
| "loss": 0.9507, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.38222222222222224, |
| "grad_norm": 1.7262818739661763, |
| "learning_rate": 4.98868725443141e-06, |
| "loss": 0.9567, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.39111111111111113, |
| "grad_norm": 1.6061518007474969, |
| "learning_rate": 4.986096710510968e-06, |
| "loss": 0.9333, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.6028615990046051, |
| "learning_rate": 4.9832401404536915e-06, |
| "loss": 0.9358, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4088888888888889, |
| "grad_norm": 1.570005612548181, |
| "learning_rate": 4.980117849928419e-06, |
| "loss": 0.8983, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4177777777777778, |
| "grad_norm": 1.5495976820237416, |
| "learning_rate": 4.976730173037556e-06, |
| "loss": 0.9312, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.4266666666666667, |
| "grad_norm": 1.6311647053909046, |
| "learning_rate": 4.973077472281319e-06, |
| "loss": 0.9197, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.43555555555555553, |
| "grad_norm": 1.4775941411466225, |
| "learning_rate": 4.969160138518946e-06, |
| "loss": 0.9067, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 1.60402515279859, |
| "learning_rate": 4.964978590926879e-06, |
| "loss": 0.9169, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "eval_loss": 0.8998147249221802, |
| "eval_runtime": 37.1803, |
| "eval_samples_per_second": 53.792, |
| "eval_steps_per_second": 6.724, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4533333333333333, |
| "grad_norm": 1.4700844366599264, |
| "learning_rate": 4.960533276953902e-06, |
| "loss": 0.9235, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.4622222222222222, |
| "grad_norm": 1.6631199340680172, |
| "learning_rate": 4.955824672273265e-06, |
| "loss": 0.9018, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.4711111111111111, |
| "grad_norm": 1.7111177341739554, |
| "learning_rate": 4.950853280731785e-06, |
| "loss": 0.9181, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.4535444071016672, |
| "learning_rate": 4.945619634295929e-06, |
| "loss": 0.9499, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.4888888888888889, |
| "grad_norm": 1.3828751580671985, |
| "learning_rate": 4.940124292994895e-06, |
| "loss": 0.9081, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.49777777777777776, |
| "grad_norm": 1.6075817183262429, |
| "learning_rate": 4.9343678448606816e-06, |
| "loss": 0.9224, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5066666666666667, |
| "grad_norm": 1.5519162793061656, |
| "learning_rate": 4.928350905865165e-06, |
| "loss": 0.9026, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5155555555555555, |
| "grad_norm": 1.635116289002661, |
| "learning_rate": 4.92207411985419e-06, |
| "loss": 0.9227, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.5244444444444445, |
| "grad_norm": 1.5336828605332502, |
| "learning_rate": 4.915538158478674e-06, |
| "loss": 0.9226, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 1.6800188276640182, |
| "learning_rate": 4.908743721122734e-06, |
| "loss": 0.8682, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "eval_loss": 0.893156886100769, |
| "eval_runtime": 36.6444, |
| "eval_samples_per_second": 54.579, |
| "eval_steps_per_second": 6.822, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5422222222222223, |
| "grad_norm": 1.5154383041086907, |
| "learning_rate": 4.901691534828853e-06, |
| "loss": 0.8901, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5511111111111111, |
| "grad_norm": 1.5069638989634389, |
| "learning_rate": 4.894382354220077e-06, |
| "loss": 0.9143, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.5826508156504, |
| "learning_rate": 4.886816961419272e-06, |
| "loss": 0.8948, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.5688888888888889, |
| "grad_norm": 1.5966300524059291, |
| "learning_rate": 4.8789961659654276e-06, |
| "loss": 0.9182, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.5777777777777777, |
| "grad_norm": 1.5542668195010378, |
| "learning_rate": 4.870920804727034e-06, |
| "loss": 0.9145, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5866666666666667, |
| "grad_norm": 1.5961850069901384, |
| "learning_rate": 4.862591741812533e-06, |
| "loss": 0.8982, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5955555555555555, |
| "grad_norm": 1.4942467173404939, |
| "learning_rate": 4.8540098684778505e-06, |
| "loss": 0.9123, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.6044444444444445, |
| "grad_norm": 1.5917562014794315, |
| "learning_rate": 4.845176103031035e-06, |
| "loss": 0.8976, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.6133333333333333, |
| "grad_norm": 1.6105397844621, |
| "learning_rate": 4.836091390733983e-06, |
| "loss": 0.8688, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "grad_norm": 1.6497475274540199, |
| "learning_rate": 4.826756703701298e-06, |
| "loss": 0.8827, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "eval_loss": 0.8888917565345764, |
| "eval_runtime": 36.7414, |
| "eval_samples_per_second": 54.435, |
| "eval_steps_per_second": 6.804, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6311111111111111, |
| "grad_norm": 1.4922158605433329, |
| "learning_rate": 4.817173040796263e-06, |
| "loss": 0.8649, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.6152696298420064, |
| "learning_rate": 4.807341427523969e-06, |
| "loss": 0.9177, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.6488888888888888, |
| "grad_norm": 1.5961353957665738, |
| "learning_rate": 4.797262915921561e-06, |
| "loss": 0.8991, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.6577777777777778, |
| "grad_norm": 1.5667464158602376, |
| "learning_rate": 4.7869385844456825e-06, |
| "loss": 0.9503, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.5781196814649665, |
| "learning_rate": 4.776369537857062e-06, |
| "loss": 0.9105, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6755555555555556, |
| "grad_norm": 1.503944616712719, |
| "learning_rate": 4.765556907102306e-06, |
| "loss": 0.9263, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.6844444444444444, |
| "grad_norm": 1.6473086807633828, |
| "learning_rate": 4.7545018491928755e-06, |
| "loss": 0.8819, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.6933333333333334, |
| "grad_norm": 1.4117747098000493, |
| "learning_rate": 4.743205547081281e-06, |
| "loss": 0.8922, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.7022222222222222, |
| "grad_norm": 1.4708303212935516, |
| "learning_rate": 4.731669209534504e-06, |
| "loss": 0.9025, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 1.5712775868269122, |
| "learning_rate": 4.719894071004645e-06, |
| "loss": 0.9096, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "eval_loss": 0.8853357434272766, |
| "eval_runtime": 36.6826, |
| "eval_samples_per_second": 54.522, |
| "eval_steps_per_second": 6.815, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.4534761342687224, |
| "learning_rate": 4.707881391496837e-06, |
| "loss": 0.9035, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.7288888888888889, |
| "grad_norm": 1.5619341405621416, |
| "learning_rate": 4.695632456434414e-06, |
| "loss": 0.8942, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.7377777777777778, |
| "grad_norm": 1.6530414569806062, |
| "learning_rate": 4.683148576521363e-06, |
| "loss": 0.895, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.7466666666666667, |
| "grad_norm": 1.500702857508256, |
| "learning_rate": 4.670431087602079e-06, |
| "loss": 0.8826, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.7555555555555555, |
| "grad_norm": 1.5622670153829135, |
| "learning_rate": 4.657481350518409e-06, |
| "loss": 0.9172, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.7644444444444445, |
| "grad_norm": 1.5483259428719673, |
| "learning_rate": 4.644300750964045e-06, |
| "loss": 0.9304, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.7733333333333333, |
| "grad_norm": 1.468282447027174, |
| "learning_rate": 4.630890699336244e-06, |
| "loss": 0.8819, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.7822222222222223, |
| "grad_norm": 1.671539158314848, |
| "learning_rate": 4.6172526305849094e-06, |
| "loss": 0.8929, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.7911111111111111, |
| "grad_norm": 1.6595480420379722, |
| "learning_rate": 4.603388004059037e-06, |
| "loss": 0.9401, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.4846657745286256, |
| "learning_rate": 4.589298303350565e-06, |
| "loss": 0.9054, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_loss": 0.8810927867889404, |
| "eval_runtime": 35.9747, |
| "eval_samples_per_second": 55.595, |
| "eval_steps_per_second": 6.949, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8088888888888889, |
| "grad_norm": 1.8077709952778653, |
| "learning_rate": 4.574985036135613e-06, |
| "loss": 0.8756, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.8177777777777778, |
| "grad_norm": 1.5746787689057065, |
| "learning_rate": 4.5604497340131635e-06, |
| "loss": 0.8656, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.8266666666666667, |
| "grad_norm": 1.4640338579966414, |
| "learning_rate": 4.545693952341159e-06, |
| "loss": 0.934, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.8355555555555556, |
| "grad_norm": 1.6059690989804483, |
| "learning_rate": 4.5307192700700804e-06, |
| "loss": 0.9242, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.8444444444444444, |
| "grad_norm": 1.5097697360744204, |
| "learning_rate": 4.515527289573986e-06, |
| "loss": 0.906, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.8533333333333334, |
| "grad_norm": 1.4679308675173353, |
| "learning_rate": 4.50011963647905e-06, |
| "loss": 0.9128, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.8622222222222222, |
| "grad_norm": 1.6181492334082097, |
| "learning_rate": 4.484497959489608e-06, |
| "loss": 0.9166, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.8711111111111111, |
| "grad_norm": 1.558080687611738, |
| "learning_rate": 4.468663930211743e-06, |
| "loss": 0.8939, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.615024667449851, |
| "learning_rate": 4.452619242974408e-06, |
| "loss": 0.895, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 1.4440060757462254, |
| "learning_rate": 4.436365614648128e-06, |
| "loss": 0.86, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "eval_loss": 0.8786353468894958, |
| "eval_runtime": 35.8226, |
| "eval_samples_per_second": 55.831, |
| "eval_steps_per_second": 6.979, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8977777777777778, |
| "grad_norm": 1.410698981929063, |
| "learning_rate": 4.4199047844612825e-06, |
| "loss": 0.9238, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.9066666666666666, |
| "grad_norm": 1.518825110706773, |
| "learning_rate": 4.4032385138139985e-06, |
| "loss": 0.9239, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.9155555555555556, |
| "grad_norm": 1.6639850633807787, |
| "learning_rate": 4.386368586089674e-06, |
| "loss": 0.8846, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.9244444444444444, |
| "grad_norm": 1.6210196882366308, |
| "learning_rate": 4.369296806464141e-06, |
| "loss": 0.9081, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 1.5864235639001156, |
| "learning_rate": 4.3520250017125076e-06, |
| "loss": 0.8935, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.9422222222222222, |
| "grad_norm": 1.572481998249584, |
| "learning_rate": 4.334555020013675e-06, |
| "loss": 0.8712, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.9511111111111111, |
| "grad_norm": 1.458386818849127, |
| "learning_rate": 4.316888730752583e-06, |
| "loss": 0.9231, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.4402325640506388, |
| "learning_rate": 4.299028024320166e-06, |
| "loss": 0.8799, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.9688888888888889, |
| "grad_norm": 1.340308449364537, |
| "learning_rate": 4.280974811911071e-06, |
| "loss": 0.9094, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.9777777777777777, |
| "grad_norm": 1.5272352631206383, |
| "learning_rate": 4.262731025319159e-06, |
| "loss": 0.9017, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9777777777777777, |
| "eval_loss": 0.8765040040016174, |
| "eval_runtime": 36.05, |
| "eval_samples_per_second": 55.478, |
| "eval_steps_per_second": 6.935, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9866666666666667, |
| "grad_norm": 1.5223097724923493, |
| "learning_rate": 4.244298616730781e-06, |
| "loss": 0.906, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.9955555555555555, |
| "grad_norm": 1.5240087315865598, |
| "learning_rate": 4.2256795585158894e-06, |
| "loss": 0.9239, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.0044444444444445, |
| "grad_norm": 1.4157186107882624, |
| "learning_rate": 4.2068758430169805e-06, |
| "loss": 0.8415, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.0133333333333334, |
| "grad_norm": 1.3850656963075334, |
| "learning_rate": 4.187889482335905e-06, |
| "loss": 0.795, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.0222222222222221, |
| "grad_norm": 1.5545137194154015, |
| "learning_rate": 4.168722508118562e-06, |
| "loss": 0.8158, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.031111111111111, |
| "grad_norm": 1.436097134909851, |
| "learning_rate": 4.1493769713374995e-06, |
| "loss": 0.8242, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 1.4545054070568668, |
| "learning_rate": 4.12985494207245e-06, |
| "loss": 0.8222, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.048888888888889, |
| "grad_norm": 1.6781126937570572, |
| "learning_rate": 4.110158509288822e-06, |
| "loss": 0.7896, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.0577777777777777, |
| "grad_norm": 1.6451559840416778, |
| "learning_rate": 4.090289780614167e-06, |
| "loss": 0.8267, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "grad_norm": 1.8165606673067634, |
| "learning_rate": 4.070250882112652e-06, |
| "loss": 0.8243, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "eval_loss": 0.8799266219139099, |
| "eval_runtime": 35.8312, |
| "eval_samples_per_second": 55.817, |
| "eval_steps_per_second": 6.977, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.0755555555555556, |
| "grad_norm": 1.5731027558902222, |
| "learning_rate": 4.050043958057561e-06, |
| "loss": 0.7882, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.0844444444444445, |
| "grad_norm": 1.5157766166322681, |
| "learning_rate": 4.029671170701841e-06, |
| "loss": 0.7994, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.0933333333333333, |
| "grad_norm": 1.656247414095981, |
| "learning_rate": 4.009134700046735e-06, |
| "loss": 0.8028, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.1022222222222222, |
| "grad_norm": 1.6145210447139136, |
| "learning_rate": 3.988436743608506e-06, |
| "loss": 0.7792, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 1.5895335709957146, |
| "learning_rate": 3.967579516183292e-06, |
| "loss": 0.8461, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 1.5213968296343328, |
| "learning_rate": 3.946565249610108e-06, |
| "loss": 0.8084, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.1288888888888888, |
| "grad_norm": 1.6388524059550809, |
| "learning_rate": 3.925396192532032e-06, |
| "loss": 0.796, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.1377777777777778, |
| "grad_norm": 1.6244300431135235, |
| "learning_rate": 3.90407461015558e-06, |
| "loss": 0.8123, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.1466666666666667, |
| "grad_norm": 1.6092762028671914, |
| "learning_rate": 3.882602784008327e-06, |
| "loss": 0.7696, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.1555555555555554, |
| "grad_norm": 1.5021829727375555, |
| "learning_rate": 3.8609830116947596e-06, |
| "loss": 0.8015, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.1555555555555554, |
| "eval_loss": 0.8797820210456848, |
| "eval_runtime": 36.1345, |
| "eval_samples_per_second": 55.349, |
| "eval_steps_per_second": 6.919, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.1644444444444444, |
| "grad_norm": 1.615911865378651, |
| "learning_rate": 3.839217606650426e-06, |
| "loss": 0.8034, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.1733333333333333, |
| "grad_norm": 1.7219139523140692, |
| "learning_rate": 3.817308897894387e-06, |
| "loss": 0.8028, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.1822222222222223, |
| "grad_norm": 1.5565441923912047, |
| "learning_rate": 3.7952592297799904e-06, |
| "loss": 0.7707, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.1911111111111112, |
| "grad_norm": 1.5035890539708752, |
| "learning_rate": 3.7730709617440227e-06, |
| "loss": 0.7985, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 1.4799634437203686, |
| "learning_rate": 3.750746468054227e-06, |
| "loss": 0.7902, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.208888888888889, |
| "grad_norm": 1.3993362652699162, |
| "learning_rate": 3.7282881375552475e-06, |
| "loss": 0.7858, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.2177777777777778, |
| "grad_norm": 1.53157940698711, |
| "learning_rate": 3.70569837341301e-06, |
| "loss": 0.8245, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.2266666666666666, |
| "grad_norm": 1.5047456544840674, |
| "learning_rate": 3.6829795928575703e-06, |
| "loss": 0.7838, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.2355555555555555, |
| "grad_norm": 1.6203070493661107, |
| "learning_rate": 3.6601342269244528e-06, |
| "loss": 0.8158, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.2444444444444445, |
| "grad_norm": 1.722919280316014, |
| "learning_rate": 3.6371647201945216e-06, |
| "loss": 0.7866, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.2444444444444445, |
| "eval_loss": 0.8785194158554077, |
| "eval_runtime": 36.4407, |
| "eval_samples_per_second": 54.884, |
| "eval_steps_per_second": 6.86, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.2533333333333334, |
| "grad_norm": 1.5717366793745968, |
| "learning_rate": 3.6140735305323943e-06, |
| "loss": 0.7952, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.2622222222222224, |
| "grad_norm": 1.6799739973960937, |
| "learning_rate": 3.5908631288234374e-06, |
| "loss": 0.8413, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.271111111111111, |
| "grad_norm": 1.7727944476099253, |
| "learning_rate": 3.5675359987093665e-06, |
| "loss": 0.8119, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 1.6172985005067175, |
| "learning_rate": 3.5440946363224855e-06, |
| "loss": 0.7956, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.2888888888888888, |
| "grad_norm": 1.346340940135618, |
| "learning_rate": 3.5205415500185836e-06, |
| "loss": 0.7975, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.2977777777777777, |
| "grad_norm": 1.5095896332297247, |
| "learning_rate": 3.4968792601085296e-06, |
| "loss": 0.8253, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.3066666666666666, |
| "grad_norm": 1.7498432683213323, |
| "learning_rate": 3.473110298588584e-06, |
| "loss": 0.823, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.3155555555555556, |
| "grad_norm": 1.5152609108895707, |
| "learning_rate": 3.4492372088694605e-06, |
| "loss": 0.838, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.3244444444444445, |
| "grad_norm": 1.6196695100410112, |
| "learning_rate": 3.4252625455041684e-06, |
| "loss": 0.8212, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 1.4865828502504883, |
| "learning_rate": 3.4011888739146587e-06, |
| "loss": 0.8163, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "eval_loss": 0.8743957281112671, |
| "eval_runtime": 36.0452, |
| "eval_samples_per_second": 55.486, |
| "eval_steps_per_second": 6.936, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.3422222222222222, |
| "grad_norm": 1.4717740488802518, |
| "learning_rate": 3.377018770117315e-06, |
| "loss": 0.8238, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.3511111111111112, |
| "grad_norm": 1.559356688866601, |
| "learning_rate": 3.3527548204472985e-06, |
| "loss": 0.824, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 1.674377707216799, |
| "learning_rate": 3.3283996212818015e-06, |
| "loss": 0.7708, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.3688888888888888, |
| "grad_norm": 1.6987925635864727, |
| "learning_rate": 3.303955778762217e-06, |
| "loss": 0.8285, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.3777777777777778, |
| "grad_norm": 1.6182932763097473, |
| "learning_rate": 3.2794259085152703e-06, |
| "loss": 0.8112, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.3866666666666667, |
| "grad_norm": 1.5442535633131087, |
| "learning_rate": 3.254812635373128e-06, |
| "loss": 0.7727, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.3955555555555557, |
| "grad_norm": 1.6816544725748876, |
| "learning_rate": 3.2301185930925318e-06, |
| "loss": 0.7945, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.4044444444444444, |
| "grad_norm": 1.8215956260335184, |
| "learning_rate": 3.205346424072967e-06, |
| "loss": 0.8047, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.4133333333333333, |
| "grad_norm": 1.548045904992036, |
| "learning_rate": 3.180498779073915e-06, |
| "loss": 0.7649, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.4222222222222223, |
| "grad_norm": 1.7043703318380858, |
| "learning_rate": 3.1555783169312048e-06, |
| "loss": 0.8066, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.4222222222222223, |
| "eval_loss": 0.8725046515464783, |
| "eval_runtime": 35.6589, |
| "eval_samples_per_second": 56.087, |
| "eval_steps_per_second": 7.011, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.431111111111111, |
| "grad_norm": 1.629807976853238, |
| "learning_rate": 3.1305877042725036e-06, |
| "loss": 0.8237, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 1.5484379121797882, |
| "learning_rate": 3.1055296152319732e-06, |
| "loss": 0.8076, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.448888888888889, |
| "grad_norm": 1.7262688696739736, |
| "learning_rate": 3.0804067311641217e-06, |
| "loss": 0.8333, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.4577777777777778, |
| "grad_norm": 1.8260535210069693, |
| "learning_rate": 3.0552217403568855e-06, |
| "loss": 0.7926, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.4666666666666668, |
| "grad_norm": 1.6750611032983032, |
| "learning_rate": 3.0299773377439677e-06, |
| "loss": 0.7915, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.4755555555555555, |
| "grad_norm": 1.3990474179417702, |
| "learning_rate": 3.0046762246164608e-06, |
| "loss": 0.8013, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.4844444444444445, |
| "grad_norm": 1.8809571305092, |
| "learning_rate": 2.979321108333799e-06, |
| "loss": 0.7652, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.4933333333333334, |
| "grad_norm": 1.5210473212334878, |
| "learning_rate": 2.953914702034054e-06, |
| "loss": 0.7984, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.5022222222222221, |
| "grad_norm": 1.4249046917639296, |
| "learning_rate": 2.928459724343613e-06, |
| "loss": 0.8404, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.511111111111111, |
| "grad_norm": 1.6177483971885367, |
| "learning_rate": 2.9029588990862717e-06, |
| "loss": 0.8194, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.511111111111111, |
| "eval_loss": 0.8726724982261658, |
| "eval_runtime": 35.94, |
| "eval_samples_per_second": 55.648, |
| "eval_steps_per_second": 6.956, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 1.5584170495767722, |
| "learning_rate": 2.8774149549917697e-06, |
| "loss": 0.7978, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.528888888888889, |
| "grad_norm": 1.5710118549978462, |
| "learning_rate": 2.8518306254037996e-06, |
| "loss": 0.813, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.537777777777778, |
| "grad_norm": 1.5689927318459866, |
| "learning_rate": 2.82620864798753e-06, |
| "loss": 0.8105, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.5466666666666666, |
| "grad_norm": 1.4832935326504852, |
| "learning_rate": 2.800551764436652e-06, |
| "loss": 0.8546, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.5555555555555556, |
| "grad_norm": 1.5783175731659531, |
| "learning_rate": 2.774862720180008e-06, |
| "loss": 0.7933, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.5644444444444443, |
| "grad_norm": 1.538282343725092, |
| "learning_rate": 2.749144264087814e-06, |
| "loss": 0.7878, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.5733333333333333, |
| "grad_norm": 1.7371423308538598, |
| "learning_rate": 2.7233991481775173e-06, |
| "loss": 0.8287, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.5822222222222222, |
| "grad_norm": 1.4670462319523592, |
| "learning_rate": 2.697630127319312e-06, |
| "loss": 0.8091, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.5911111111111111, |
| "grad_norm": 1.4813586250805493, |
| "learning_rate": 2.6718399589413533e-06, |
| "loss": 0.8116, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 1.4827255891188802, |
| "learning_rate": 2.6460314027347002e-06, |
| "loss": 0.8274, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_loss": 0.8705858588218689, |
| "eval_runtime": 37.1653, |
| "eval_samples_per_second": 53.814, |
| "eval_steps_per_second": 6.727, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.608888888888889, |
| "grad_norm": 1.2776381474133234, |
| "learning_rate": 2.6202072203580098e-06, |
| "loss": 0.7884, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.6177777777777778, |
| "grad_norm": 1.538210347010849, |
| "learning_rate": 2.594370175142029e-06, |
| "loss": 0.7876, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.6266666666666667, |
| "grad_norm": 1.725667854174387, |
| "learning_rate": 2.5685230317938946e-06, |
| "loss": 0.7747, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.6355555555555554, |
| "grad_norm": 1.3174960157067677, |
| "learning_rate": 2.542668556101305e-06, |
| "loss": 0.7909, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.6444444444444444, |
| "grad_norm": 1.5132311970848813, |
| "learning_rate": 2.516809514636556e-06, |
| "loss": 0.8031, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.6533333333333333, |
| "grad_norm": 1.482050621647811, |
| "learning_rate": 2.4909486744605105e-06, |
| "loss": 0.787, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.6622222222222223, |
| "grad_norm": 1.553711235421291, |
| "learning_rate": 2.4650888028264993e-06, |
| "loss": 0.801, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.6711111111111112, |
| "grad_norm": 1.3848153492761215, |
| "learning_rate": 2.439232666884216e-06, |
| "loss": 0.7986, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 1.595050634131418, |
| "learning_rate": 2.413383033383614e-06, |
| "loss": 0.8155, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.6888888888888889, |
| "grad_norm": 1.5059920276903438, |
| "learning_rate": 2.3875426683788497e-06, |
| "loss": 0.7773, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.6888888888888889, |
| "eval_loss": 0.8696685433387756, |
| "eval_runtime": 36.3642, |
| "eval_samples_per_second": 54.999, |
| "eval_steps_per_second": 6.875, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.6977777777777778, |
| "grad_norm": 1.4898946275893297, |
| "learning_rate": 2.3617143369322988e-06, |
| "loss": 0.7831, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.7066666666666666, |
| "grad_norm": 1.7074625969738908, |
| "learning_rate": 2.33590080281868e-06, |
| "loss": 0.7772, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.7155555555555555, |
| "grad_norm": 1.5452190188528385, |
| "learning_rate": 2.310104828229313e-06, |
| "loss": 0.7799, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.7244444444444444, |
| "grad_norm": 1.7059988950700535, |
| "learning_rate": 2.2843291734765544e-06, |
| "loss": 0.8215, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.7333333333333334, |
| "grad_norm": 1.727770499458536, |
| "learning_rate": 2.2585765966984236e-06, |
| "loss": 0.8464, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.7422222222222223, |
| "grad_norm": 1.5915714270418408, |
| "learning_rate": 2.2328498535634704e-06, |
| "loss": 0.7807, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.751111111111111, |
| "grad_norm": 1.5627049232503645, |
| "learning_rate": 2.2071516969758988e-06, |
| "loss": 0.7882, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 1.8357823815684302, |
| "learning_rate": 2.181484876780996e-06, |
| "loss": 0.7988, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.7688888888888887, |
| "grad_norm": 1.5731340393984028, |
| "learning_rate": 2.1558521394708793e-06, |
| "loss": 0.8354, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 1.5262237155455842, |
| "learning_rate": 2.1302562278906106e-06, |
| "loss": 0.7985, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "eval_loss": 0.8678115010261536, |
| "eval_runtime": 36.2471, |
| "eval_samples_per_second": 55.177, |
| "eval_steps_per_second": 6.897, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3375, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 74533121556480.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|