| { | |
| "best_metric": 2.783235788345337, | |
| "best_model_checkpoint": "/content/Train/checkpoint-1400", | |
| "epoch": 2.0, | |
| "global_step": 1556, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001994858611825193, | |
| "loss": 3.2724, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019820051413881748, | |
| "loss": 3.2838, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001969151670951157, | |
| "loss": 3.3858, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019562982005141388, | |
| "loss": 3.1207, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001943444730077121, | |
| "loss": 3.4414, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001930591259640103, | |
| "loss": 3.2139, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0001917737789203085, | |
| "loss": 3.1755, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001904884318766067, | |
| "loss": 3.7297, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001892030848329049, | |
| "loss": 3.4405, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001879177377892031, | |
| "loss": 3.1529, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.05897435897435897, | |
| "eval_loss": 3.211608648300171, | |
| "eval_runtime": 5.3959, | |
| "eval_samples_per_second": 72.278, | |
| "eval_steps_per_second": 9.081, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001866323907455013, | |
| "loss": 3.1837, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001853470437017995, | |
| "loss": 3.2418, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001840616966580977, | |
| "loss": 3.2269, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001827763496143959, | |
| "loss": 3.1065, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00018149100257069408, | |
| "loss": 3.3563, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001803341902313625, | |
| "loss": 3.2934, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017904884318766068, | |
| "loss": 3.4139, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001777634961439589, | |
| "loss": 3.2532, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017647814910025708, | |
| "loss": 3.2196, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00017519280205655527, | |
| "loss": 3.1644, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.06923076923076923, | |
| "eval_loss": 3.209580421447754, | |
| "eval_runtime": 4.8409, | |
| "eval_samples_per_second": 80.564, | |
| "eval_steps_per_second": 10.122, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00017390745501285349, | |
| "loss": 3.3494, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00017262210796915167, | |
| "loss": 3.336, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001713367609254499, | |
| "loss": 3.2151, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00017005141388174808, | |
| "loss": 3.2835, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0001687660668380463, | |
| "loss": 3.4358, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00016748071979434448, | |
| "loss": 3.303, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016619537275064267, | |
| "loss": 3.2475, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016491002570694088, | |
| "loss": 3.1522, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00016362467866323907, | |
| "loss": 3.2906, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016233933161953728, | |
| "loss": 3.1549, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.06923076923076923, | |
| "eval_loss": 3.1985976696014404, | |
| "eval_runtime": 4.6899, | |
| "eval_samples_per_second": 83.157, | |
| "eval_steps_per_second": 10.448, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001610539845758355, | |
| "loss": 3.4558, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00015976863753213369, | |
| "loss": 3.2304, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001584832904884319, | |
| "loss": 3.2686, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001571979434447301, | |
| "loss": 3.3832, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00015591259640102828, | |
| "loss": 3.3443, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001546272493573265, | |
| "loss": 3.2797, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015334190231362468, | |
| "loss": 3.2673, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001520565552699229, | |
| "loss": 3.1919, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00015077120822622108, | |
| "loss": 3.1238, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001494858611825193, | |
| "loss": 3.2998, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.1076923076923077, | |
| "eval_loss": 3.1967546939849854, | |
| "eval_runtime": 4.6763, | |
| "eval_samples_per_second": 83.399, | |
| "eval_steps_per_second": 10.478, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014820051413881748, | |
| "loss": 3.237, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00014691516709511567, | |
| "loss": 3.1511, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00014562982005141388, | |
| "loss": 3.1972, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001443444730077121, | |
| "loss": 3.3029, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001430591259640103, | |
| "loss": 3.2639, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0001417737789203085, | |
| "loss": 3.2602, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001404884318766067, | |
| "loss": 3.1096, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0001392030848329049, | |
| "loss": 3.2692, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001379177377892031, | |
| "loss": 3.3299, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0001366323907455013, | |
| "loss": 3.1344, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.1717948717948718, | |
| "eval_loss": 3.1666767597198486, | |
| "eval_runtime": 4.8058, | |
| "eval_samples_per_second": 81.152, | |
| "eval_steps_per_second": 10.196, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001353470437017995, | |
| "loss": 3.0552, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00013406169665809768, | |
| "loss": 3.3452, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001327763496143959, | |
| "loss": 3.3108, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00013149100257069408, | |
| "loss": 3.0806, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0001302056555269923, | |
| "loss": 3.4034, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00012892030848329049, | |
| "loss": 3.0853, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00012763496143958867, | |
| "loss": 3.1718, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00012634961439588692, | |
| "loss": 3.1265, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0001250642673521851, | |
| "loss": 3.183, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001237789203084833, | |
| "loss": 3.3638, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.17435897435897435, | |
| "eval_loss": 3.1383864879608154, | |
| "eval_runtime": 4.7872, | |
| "eval_samples_per_second": 81.466, | |
| "eval_steps_per_second": 10.236, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0001224935732647815, | |
| "loss": 3.1725, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001212082262210797, | |
| "loss": 3.5329, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0001199228791773779, | |
| "loss": 3.1399, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0001186375321336761, | |
| "loss": 3.1998, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0001173521850899743, | |
| "loss": 3.1562, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0001160668380462725, | |
| "loss": 3.3428, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0001147814910025707, | |
| "loss": 3.0586, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0001134961439588689, | |
| "loss": 3.0464, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00011221079691516709, | |
| "loss": 3.1097, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00011092544987146529, | |
| "loss": 3.1482, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.20256410256410257, | |
| "eval_loss": 3.096599578857422, | |
| "eval_runtime": 4.7875, | |
| "eval_samples_per_second": 81.462, | |
| "eval_steps_per_second": 10.235, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00010964010282776349, | |
| "loss": 3.1459, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00010835475578406172, | |
| "loss": 3.1789, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0001070694087403599, | |
| "loss": 3.2216, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0001057840616966581, | |
| "loss": 2.8712, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010449871465295631, | |
| "loss": 3.1489, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010321336760925451, | |
| "loss": 3.438, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010192802056555271, | |
| "loss": 3.23, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00010064267352185091, | |
| "loss": 3.1966, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 9.93573264781491e-05, | |
| "loss": 3.0066, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 9.80719794344473e-05, | |
| "loss": 3.1366, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_accuracy": 0.18974358974358974, | |
| "eval_loss": 3.0483875274658203, | |
| "eval_runtime": 4.6499, | |
| "eval_samples_per_second": 83.872, | |
| "eval_steps_per_second": 10.538, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 9.67866323907455e-05, | |
| "loss": 3.1106, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.550128534704372e-05, | |
| "loss": 2.9235, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.421593830334192e-05, | |
| "loss": 3.0565, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.29305912596401e-05, | |
| "loss": 3.0503, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.16452442159383e-05, | |
| "loss": 3.2726, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 9.03598971722365e-05, | |
| "loss": 3.0256, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 8.907455012853471e-05, | |
| "loss": 3.0892, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 8.778920308483291e-05, | |
| "loss": 3.1357, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 8.650385604113111e-05, | |
| "loss": 3.1402, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.521850899742931e-05, | |
| "loss": 3.0206, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_accuracy": 0.30256410256410254, | |
| "eval_loss": 3.016371250152588, | |
| "eval_runtime": 4.6485, | |
| "eval_samples_per_second": 83.899, | |
| "eval_steps_per_second": 10.541, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.393316195372751e-05, | |
| "loss": 3.0057, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.264781491002571e-05, | |
| "loss": 3.1709, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 8.136246786632391e-05, | |
| "loss": 3.2243, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 8.007712082262212e-05, | |
| "loss": 3.19, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 7.87917737789203e-05, | |
| "loss": 3.2785, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 7.750642673521852e-05, | |
| "loss": 2.8424, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 7.622107969151672e-05, | |
| "loss": 3.2575, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 7.493573264781492e-05, | |
| "loss": 2.9012, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 7.365038560411311e-05, | |
| "loss": 3.1516, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 7.236503856041131e-05, | |
| "loss": 2.921, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_accuracy": 0.3230769230769231, | |
| "eval_loss": 2.984600305557251, | |
| "eval_runtime": 4.9539, | |
| "eval_samples_per_second": 78.726, | |
| "eval_steps_per_second": 9.891, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 7.107969151670951e-05, | |
| "loss": 2.9828, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 6.979434447300771e-05, | |
| "loss": 3.0413, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 6.850899742930593e-05, | |
| "loss": 2.9759, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 6.722365038560411e-05, | |
| "loss": 2.9434, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 6.593830334190231e-05, | |
| "loss": 3.0893, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 6.465295629820052e-05, | |
| "loss": 3.0797, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 6.336760925449872e-05, | |
| "loss": 2.9291, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 6.208226221079692e-05, | |
| "loss": 3.0357, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 6.079691516709511e-05, | |
| "loss": 3.0109, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 5.951156812339333e-05, | |
| "loss": 3.0027, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_accuracy": 0.33589743589743587, | |
| "eval_loss": 2.933824062347412, | |
| "eval_runtime": 4.8337, | |
| "eval_samples_per_second": 80.684, | |
| "eval_steps_per_second": 10.137, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 5.822622107969152e-05, | |
| "loss": 3.2462, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 5.694087403598972e-05, | |
| "loss": 2.9376, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 5.5655526992287924e-05, | |
| "loss": 2.9242, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 5.437017994858612e-05, | |
| "loss": 2.862, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 5.308483290488432e-05, | |
| "loss": 3.1005, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 5.1799485861182514e-05, | |
| "loss": 3.1811, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 5.051413881748073e-05, | |
| "loss": 3.1254, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 4.922879177377892e-05, | |
| "loss": 2.797, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4.7943444730077124e-05, | |
| "loss": 3.1899, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.6658097686375325e-05, | |
| "loss": 2.9047, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_accuracy": 0.34615384615384615, | |
| "eval_loss": 2.8916842937469482, | |
| "eval_runtime": 4.6342, | |
| "eval_samples_per_second": 84.157, | |
| "eval_steps_per_second": 10.574, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 4.537275064267352e-05, | |
| "loss": 3.1712, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 4.408740359897173e-05, | |
| "loss": 2.9201, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.280205655526993e-05, | |
| "loss": 2.9952, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 4.151670951156812e-05, | |
| "loss": 2.8225, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 4.0231362467866324e-05, | |
| "loss": 2.9507, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.8946015424164526e-05, | |
| "loss": 2.96, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.766066838046273e-05, | |
| "loss": 3.0775, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.637532133676093e-05, | |
| "loss": 2.9242, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.508997429305913e-05, | |
| "loss": 2.8814, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.380462724935733e-05, | |
| "loss": 2.8579, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_accuracy": 0.4025641025641026, | |
| "eval_loss": 2.8616135120391846, | |
| "eval_runtime": 4.7031, | |
| "eval_samples_per_second": 82.924, | |
| "eval_steps_per_second": 10.419, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.251928020565553e-05, | |
| "loss": 2.6972, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.1233933161953726e-05, | |
| "loss": 2.8446, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.994858611825193e-05, | |
| "loss": 2.915, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.866323907455013e-05, | |
| "loss": 3.1146, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.737789203084833e-05, | |
| "loss": 2.9168, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.6092544987146534e-05, | |
| "loss": 2.8762, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.480719794344473e-05, | |
| "loss": 2.9499, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.3521850899742933e-05, | |
| "loss": 2.907, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.2236503856041134e-05, | |
| "loss": 3.0447, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.095115681233933e-05, | |
| "loss": 2.988, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_accuracy": 0.4076923076923077, | |
| "eval_loss": 2.783235788345337, | |
| "eval_runtime": 4.64, | |
| "eval_samples_per_second": 84.052, | |
| "eval_steps_per_second": 10.56, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9665809768637533e-05, | |
| "loss": 2.8267, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.8380462724935734e-05, | |
| "loss": 2.7718, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.7095115681233935e-05, | |
| "loss": 2.8327, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.5809768637532136e-05, | |
| "loss": 2.9942, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.4524421593830334e-05, | |
| "loss": 3.0647, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.3239074550128535e-05, | |
| "loss": 2.8267, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.1953727506426736e-05, | |
| "loss": 2.8312, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.0668380462724936e-05, | |
| "loss": 3.1317, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 9.383033419023137e-06, | |
| "loss": 3.1244, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 8.097686375321336e-06, | |
| "loss": 2.8553, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_accuracy": 0.3871794871794872, | |
| "eval_loss": 2.821709632873535, | |
| "eval_runtime": 4.6901, | |
| "eval_samples_per_second": 83.154, | |
| "eval_steps_per_second": 10.448, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 6.812339331619537e-06, | |
| "loss": 2.6944, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 5.526992287917738e-06, | |
| "loss": 3.1573, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.241645244215939e-06, | |
| "loss": 2.8412, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 2.956298200514139e-06, | |
| "loss": 2.6937, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.6709511568123394e-06, | |
| "loss": 2.65, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 1556, | |
| "total_flos": 1.1959742538326016e+17, | |
| "train_loss": 3.1177605087162594, | |
| "train_runtime": 412.2299, | |
| "train_samples_per_second": 7.549, | |
| "train_steps_per_second": 3.775 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.4076923076923077, | |
| "eval_loss": 2.783235788345337, | |
| "eval_runtime": 5.3396, | |
| "eval_samples_per_second": 73.039, | |
| "eval_steps_per_second": 9.177, | |
| "step": 1556 | |
| } | |
| ], | |
| "max_steps": 1556, | |
| "num_train_epochs": 2, | |
| "total_flos": 1.1959742538326016e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |