| { | |
| "best_metric": 0.9869447350502014, | |
| "best_model_checkpoint": "./output_c/checkpoint-842163", | |
| "epoch": 50.0, | |
| "global_step": 859350, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.960016291383022e-05, | |
| "loss": 2.1017, | |
| "step": 17186 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.6622060537338257, | |
| "eval_runtime": 41.4466, | |
| "eval_samples_per_second": 1657.917, | |
| "eval_steps_per_second": 51.826, | |
| "step": 17187 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.920034910106476e-05, | |
| "loss": 1.684, | |
| "step": 34372 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.5238641500473022, | |
| "eval_runtime": 41.103, | |
| "eval_samples_per_second": 1671.774, | |
| "eval_steps_per_second": 52.259, | |
| "step": 34374 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.8800558561703613e-05, | |
| "loss": 1.5681, | |
| "step": 51558 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.4425514936447144, | |
| "eval_runtime": 40.7311, | |
| "eval_samples_per_second": 1687.039, | |
| "eval_steps_per_second": 52.736, | |
| "step": 51561 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.840076802234247e-05, | |
| "loss": 1.495, | |
| "step": 68744 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.3773192167282104, | |
| "eval_runtime": 40.0387, | |
| "eval_samples_per_second": 1716.214, | |
| "eval_steps_per_second": 53.648, | |
| "step": 68748 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.8001000756385642e-05, | |
| "loss": 1.4413, | |
| "step": 85930 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.3373253345489502, | |
| "eval_runtime": 40.4299, | |
| "eval_samples_per_second": 1699.61, | |
| "eval_steps_per_second": 53.129, | |
| "step": 85935 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.7601186943620177e-05, | |
| "loss": 1.3994, | |
| "step": 103116 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.3044638633728027, | |
| "eval_runtime": 40.8305, | |
| "eval_samples_per_second": 1682.934, | |
| "eval_steps_per_second": 52.608, | |
| "step": 103122 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.7201396404259034e-05, | |
| "loss": 1.3648, | |
| "step": 120302 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.2776345014572144, | |
| "eval_runtime": 40.5929, | |
| "eval_samples_per_second": 1692.784, | |
| "eval_steps_per_second": 52.916, | |
| "step": 120309 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 1.680160586489789e-05, | |
| "loss": 1.3356, | |
| "step": 137488 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.2525078058242798, | |
| "eval_runtime": 41.1415, | |
| "eval_samples_per_second": 1670.213, | |
| "eval_steps_per_second": 52.21, | |
| "step": 137496 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 1.640179205213243e-05, | |
| "loss": 1.3103, | |
| "step": 154674 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 1.229679822921753, | |
| "eval_runtime": 40.8721, | |
| "eval_samples_per_second": 1681.22, | |
| "eval_steps_per_second": 52.554, | |
| "step": 154683 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 1.6002001512771283e-05, | |
| "loss": 1.287, | |
| "step": 171860 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 1.212567925453186, | |
| "eval_runtime": 39.8522, | |
| "eval_samples_per_second": 1724.248, | |
| "eval_steps_per_second": 53.899, | |
| "step": 171870 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 1.560218770000582e-05, | |
| "loss": 1.2684, | |
| "step": 189046 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 1.194634199142456, | |
| "eval_runtime": 39.8234, | |
| "eval_samples_per_second": 1725.494, | |
| "eval_steps_per_second": 53.938, | |
| "step": 189057 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 1.5202373887240358e-05, | |
| "loss": 1.2503, | |
| "step": 206232 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 1.181823968887329, | |
| "eval_runtime": 40.2477, | |
| "eval_samples_per_second": 1707.301, | |
| "eval_steps_per_second": 53.369, | |
| "step": 206244 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 1.4802606621283528e-05, | |
| "loss": 1.2338, | |
| "step": 223418 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 1.1666418313980103, | |
| "eval_runtime": 40.5887, | |
| "eval_samples_per_second": 1692.959, | |
| "eval_steps_per_second": 52.921, | |
| "step": 223431 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 1.4402816081922383e-05, | |
| "loss": 1.2175, | |
| "step": 240604 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 1.1534117460250854, | |
| "eval_runtime": 40.4201, | |
| "eval_samples_per_second": 1700.022, | |
| "eval_steps_per_second": 53.142, | |
| "step": 240618 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 1.4003048815965557e-05, | |
| "loss": 1.2051, | |
| "step": 257790 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 1.1395701169967651, | |
| "eval_runtime": 40.5078, | |
| "eval_samples_per_second": 1696.341, | |
| "eval_steps_per_second": 53.027, | |
| "step": 257805 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 1.3603258276604412e-05, | |
| "loss": 1.1938, | |
| "step": 274976 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 1.1312499046325684, | |
| "eval_runtime": 40.784, | |
| "eval_samples_per_second": 1684.852, | |
| "eval_steps_per_second": 52.668, | |
| "step": 274992 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 1.3203491010647584e-05, | |
| "loss": 1.182, | |
| "step": 292162 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 1.1207791566848755, | |
| "eval_runtime": 40.735, | |
| "eval_samples_per_second": 1686.879, | |
| "eval_steps_per_second": 52.731, | |
| "step": 292179 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 1.2803700471286437e-05, | |
| "loss": 1.1706, | |
| "step": 309348 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 1.11408269405365, | |
| "eval_runtime": 40.5382, | |
| "eval_samples_per_second": 1695.07, | |
| "eval_steps_per_second": 52.987, | |
| "step": 309366 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 1.2403886658520976e-05, | |
| "loss": 1.1597, | |
| "step": 326534 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 1.1045416593551636, | |
| "eval_runtime": 41.1519, | |
| "eval_samples_per_second": 1669.791, | |
| "eval_steps_per_second": 52.197, | |
| "step": 326553 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 1.2004096119159831e-05, | |
| "loss": 1.1519, | |
| "step": 343720 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 1.0968821048736572, | |
| "eval_runtime": 40.942, | |
| "eval_samples_per_second": 1678.35, | |
| "eval_steps_per_second": 52.464, | |
| "step": 343740 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 1.1604305579798688e-05, | |
| "loss": 1.1436, | |
| "step": 360906 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 1.0863711833953857, | |
| "eval_runtime": 40.8058, | |
| "eval_samples_per_second": 1683.95, | |
| "eval_steps_per_second": 52.64, | |
| "step": 360927 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 1.120451504043754e-05, | |
| "loss": 1.1336, | |
| "step": 378092 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 1.082047939300537, | |
| "eval_runtime": 40.4735, | |
| "eval_samples_per_second": 1697.777, | |
| "eval_steps_per_second": 53.072, | |
| "step": 378114 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 1.0804701227672078e-05, | |
| "loss": 1.1265, | |
| "step": 395278 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 1.0744354724884033, | |
| "eval_runtime": 41.164, | |
| "eval_samples_per_second": 1669.298, | |
| "eval_steps_per_second": 52.182, | |
| "step": 395301 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 1.0404910688310935e-05, | |
| "loss": 1.119, | |
| "step": 412464 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 1.0701904296875, | |
| "eval_runtime": 40.4778, | |
| "eval_samples_per_second": 1697.599, | |
| "eval_steps_per_second": 53.066, | |
| "step": 412488 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 1.000512014894979e-05, | |
| "loss": 1.1117, | |
| "step": 429650 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 1.062601089477539, | |
| "eval_runtime": 40.2464, | |
| "eval_samples_per_second": 1707.357, | |
| "eval_steps_per_second": 53.371, | |
| "step": 429675 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 9.60535288299296e-06, | |
| "loss": 1.1048, | |
| "step": 446836 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 1.0572917461395264, | |
| "eval_runtime": 39.7303, | |
| "eval_samples_per_second": 1729.537, | |
| "eval_steps_per_second": 54.065, | |
| "step": 446862 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 9.205562343631815e-06, | |
| "loss": 1.098, | |
| "step": 464022 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 1.0520741939544678, | |
| "eval_runtime": 39.4819, | |
| "eval_samples_per_second": 1740.42, | |
| "eval_steps_per_second": 54.405, | |
| "step": 464049 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 8.80577180427067e-06, | |
| "loss": 1.0924, | |
| "step": 481208 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 1.0490448474884033, | |
| "eval_runtime": 39.5503, | |
| "eval_samples_per_second": 1737.408, | |
| "eval_steps_per_second": 54.311, | |
| "step": 481236 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 8.405981264909526e-06, | |
| "loss": 1.087, | |
| "step": 498394 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 1.0411999225616455, | |
| "eval_runtime": 39.4139, | |
| "eval_samples_per_second": 1743.421, | |
| "eval_steps_per_second": 54.499, | |
| "step": 498423 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 8.006213998952698e-06, | |
| "loss": 1.0812, | |
| "step": 515580 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 1.0427082777023315, | |
| "eval_runtime": 40.8396, | |
| "eval_samples_per_second": 1682.558, | |
| "eval_steps_per_second": 52.596, | |
| "step": 515610 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 7.606423459591552e-06, | |
| "loss": 1.076, | |
| "step": 532766 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 1.031008243560791, | |
| "eval_runtime": 39.0796, | |
| "eval_samples_per_second": 1758.334, | |
| "eval_steps_per_second": 54.965, | |
| "step": 532797 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 7.206632920230407e-06, | |
| "loss": 1.0707, | |
| "step": 549952 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 1.0325006246566772, | |
| "eval_runtime": 38.4227, | |
| "eval_samples_per_second": 1788.396, | |
| "eval_steps_per_second": 55.904, | |
| "step": 549984 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 6.806842380869262e-06, | |
| "loss": 1.0654, | |
| "step": 567138 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 1.021201252937317, | |
| "eval_runtime": 37.9747, | |
| "eval_samples_per_second": 1809.492, | |
| "eval_steps_per_second": 56.564, | |
| "step": 567171 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 6.407051841508117e-06, | |
| "loss": 1.0609, | |
| "step": 584324 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 1.0170767307281494, | |
| "eval_runtime": 38.0157, | |
| "eval_samples_per_second": 1807.544, | |
| "eval_steps_per_second": 56.503, | |
| "step": 584358 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 6.007238028742655e-06, | |
| "loss": 1.0572, | |
| "step": 601510 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 1.0200223922729492, | |
| "eval_runtime": 38.7209, | |
| "eval_samples_per_second": 1774.625, | |
| "eval_steps_per_second": 55.474, | |
| "step": 601545 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 5.607470762785827e-06, | |
| "loss": 1.0528, | |
| "step": 618696 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 1.0177444219589233, | |
| "eval_runtime": 38.1852, | |
| "eval_samples_per_second": 1799.519, | |
| "eval_steps_per_second": 56.252, | |
| "step": 618732 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 5.207656950020364e-06, | |
| "loss": 1.0485, | |
| "step": 635882 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 1.0121095180511475, | |
| "eval_runtime": 37.9666, | |
| "eval_samples_per_second": 1809.879, | |
| "eval_steps_per_second": 56.576, | |
| "step": 635919 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "learning_rate": 4.807889684063537e-06, | |
| "loss": 1.0456, | |
| "step": 653068 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 1.0075100660324097, | |
| "eval_runtime": 37.7745, | |
| "eval_samples_per_second": 1819.084, | |
| "eval_steps_per_second": 56.864, | |
| "step": 653106 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "learning_rate": 4.408075871298074e-06, | |
| "loss": 1.0436, | |
| "step": 670254 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 1.0085304975509644, | |
| "eval_runtime": 37.8436, | |
| "eval_samples_per_second": 1815.761, | |
| "eval_steps_per_second": 56.76, | |
| "step": 670293 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 4.008308605341247e-06, | |
| "loss": 1.0403, | |
| "step": 687440 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 1.0014568567276, | |
| "eval_runtime": 37.7063, | |
| "eval_samples_per_second": 1822.377, | |
| "eval_steps_per_second": 56.967, | |
| "step": 687480 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "learning_rate": 3.608494792575784e-06, | |
| "loss": 1.0358, | |
| "step": 704626 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 0.9997268915176392, | |
| "eval_runtime": 37.9829, | |
| "eval_samples_per_second": 1809.103, | |
| "eval_steps_per_second": 56.552, | |
| "step": 704667 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "learning_rate": 3.208704253214639e-06, | |
| "loss": 1.0339, | |
| "step": 721812 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 1.0009877681732178, | |
| "eval_runtime": 37.7238, | |
| "eval_samples_per_second": 1821.528, | |
| "eval_steps_per_second": 56.94, | |
| "step": 721854 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "learning_rate": 2.808890440449177e-06, | |
| "loss": 1.0312, | |
| "step": 738998 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_loss": 0.9973294138908386, | |
| "eval_runtime": 37.9123, | |
| "eval_samples_per_second": 1812.472, | |
| "eval_steps_per_second": 56.657, | |
| "step": 739041 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "learning_rate": 2.4090999010880316e-06, | |
| "loss": 1.0278, | |
| "step": 756184 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 0.9942373037338257, | |
| "eval_runtime": 37.8136, | |
| "eval_samples_per_second": 1817.205, | |
| "eval_steps_per_second": 56.805, | |
| "step": 756228 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "learning_rate": 2.0093093617268868e-06, | |
| "loss": 1.0258, | |
| "step": 773370 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_loss": 0.9922175407409668, | |
| "eval_runtime": 37.7488, | |
| "eval_samples_per_second": 1820.324, | |
| "eval_steps_per_second": 56.903, | |
| "step": 773415 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "learning_rate": 1.6094955489614245e-06, | |
| "loss": 1.024, | |
| "step": 790556 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 0.9902246594429016, | |
| "eval_runtime": 37.7011, | |
| "eval_samples_per_second": 1822.624, | |
| "eval_steps_per_second": 56.974, | |
| "step": 790602 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "learning_rate": 1.2096817361959622e-06, | |
| "loss": 1.0213, | |
| "step": 807742 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_loss": 0.9919160008430481, | |
| "eval_runtime": 37.515, | |
| "eval_samples_per_second": 1831.667, | |
| "eval_steps_per_second": 57.257, | |
| "step": 807789 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "learning_rate": 8.09891196834817e-07, | |
| "loss": 1.0202, | |
| "step": 824928 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 0.9896969199180603, | |
| "eval_runtime": 37.5208, | |
| "eval_samples_per_second": 1831.385, | |
| "eval_steps_per_second": 57.248, | |
| "step": 824976 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "learning_rate": 4.1007738406935476e-07, | |
| "loss": 1.0184, | |
| "step": 842114 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_loss": 0.9869447350502014, | |
| "eval_runtime": 37.5715, | |
| "eval_samples_per_second": 1828.912, | |
| "eval_steps_per_second": 57.171, | |
| "step": 842163 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 1.0286844708209695e-08, | |
| "loss": 1.0185, | |
| "step": 859300 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 0.9913281798362732, | |
| "eval_runtime": 38.0547, | |
| "eval_samples_per_second": 1805.692, | |
| "eval_steps_per_second": 56.445, | |
| "step": 859350 | |
| } | |
| ], | |
| "max_steps": 859350, | |
| "num_train_epochs": 50, | |
| "total_flos": 9.113148657842688e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |