| { | |
| "best_metric": 1.012279748916626, | |
| "best_model_checkpoint": "./output_c/checkpoint-824976", | |
| "epoch": 48.0, | |
| "global_step": 824976, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.960020946063886e-05, | |
| "loss": 3.5114, | |
| "step": 17186 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.5249948501586914, | |
| "eval_runtime": 71.153, | |
| "eval_samples_per_second": 965.735, | |
| "eval_steps_per_second": 30.188, | |
| "step": 17187 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.920034910106476e-05, | |
| "loss": 2.4281, | |
| "step": 34372 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.0742549896240234, | |
| "eval_runtime": 71.0702, | |
| "eval_samples_per_second": 966.861, | |
| "eval_steps_per_second": 30.224, | |
| "step": 34374 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.8800535288299298e-05, | |
| "loss": 2.0989, | |
| "step": 51558 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.8547435998916626, | |
| "eval_runtime": 71.5379, | |
| "eval_samples_per_second": 960.54, | |
| "eval_steps_per_second": 30.026, | |
| "step": 51561 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.8400698202129518e-05, | |
| "loss": 1.9144, | |
| "step": 68744 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.7082182168960571, | |
| "eval_runtime": 71.3486, | |
| "eval_samples_per_second": 963.089, | |
| "eval_steps_per_second": 30.106, | |
| "step": 68748 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.8000861115959738e-05, | |
| "loss": 1.7926, | |
| "step": 85930 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.614298701286316, | |
| "eval_runtime": 72.4304, | |
| "eval_samples_per_second": 948.703, | |
| "eval_steps_per_second": 29.656, | |
| "step": 85935 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.7601047303194276e-05, | |
| "loss": 1.7014, | |
| "step": 103116 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.5449920892715454, | |
| "eval_runtime": 71.3467, | |
| "eval_samples_per_second": 963.114, | |
| "eval_steps_per_second": 30.107, | |
| "step": 103122 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.7201210217024496e-05, | |
| "loss": 1.6306, | |
| "step": 120302 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.4825724363327026, | |
| "eval_runtime": 71.2883, | |
| "eval_samples_per_second": 963.903, | |
| "eval_steps_per_second": 30.131, | |
| "step": 120309 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 1.6801373130854716e-05, | |
| "loss": 1.5745, | |
| "step": 137488 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.4376918077468872, | |
| "eval_runtime": 72.4942, | |
| "eval_samples_per_second": 947.869, | |
| "eval_steps_per_second": 29.63, | |
| "step": 137496 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 1.6401559318089254e-05, | |
| "loss": 1.5262, | |
| "step": 154674 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 1.3938175439834595, | |
| "eval_runtime": 71.2189, | |
| "eval_samples_per_second": 964.842, | |
| "eval_steps_per_second": 30.161, | |
| "step": 154683 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 1.6001722231919474e-05, | |
| "loss": 1.4839, | |
| "step": 171860 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 1.363573431968689, | |
| "eval_runtime": 71.173, | |
| "eval_samples_per_second": 965.465, | |
| "eval_steps_per_second": 30.18, | |
| "step": 171870 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 1.5601908419154012e-05, | |
| "loss": 1.4505, | |
| "step": 189046 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 1.3312129974365234, | |
| "eval_runtime": 71.256, | |
| "eval_samples_per_second": 964.339, | |
| "eval_steps_per_second": 30.145, | |
| "step": 189057 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 1.5202071332984234e-05, | |
| "loss": 1.4187, | |
| "step": 206232 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 1.3099381923675537, | |
| "eval_runtime": 71.1495, | |
| "eval_samples_per_second": 965.783, | |
| "eval_steps_per_second": 30.19, | |
| "step": 206244 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 1.480225752021877e-05, | |
| "loss": 1.39, | |
| "step": 223418 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 1.2823187112808228, | |
| "eval_runtime": 69.4163, | |
| "eval_samples_per_second": 989.898, | |
| "eval_steps_per_second": 30.944, | |
| "step": 223431 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 1.4402420434048992e-05, | |
| "loss": 1.3651, | |
| "step": 240604 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 1.2610409259796143, | |
| "eval_runtime": 69.462, | |
| "eval_samples_per_second": 989.246, | |
| "eval_steps_per_second": 30.923, | |
| "step": 240618 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 1.4002583347879212e-05, | |
| "loss": 1.3436, | |
| "step": 257790 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 1.2433568239212036, | |
| "eval_runtime": 69.4357, | |
| "eval_samples_per_second": 989.621, | |
| "eval_steps_per_second": 30.935, | |
| "step": 257805 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 1.3602722988305115e-05, | |
| "loss": 1.3245, | |
| "step": 274976 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 1.2258530855178833, | |
| "eval_runtime": 69.4442, | |
| "eval_samples_per_second": 989.499, | |
| "eval_steps_per_second": 30.931, | |
| "step": 274992 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 1.3202885902135337e-05, | |
| "loss": 1.305, | |
| "step": 292162 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 1.2092918157577515, | |
| "eval_runtime": 69.4339, | |
| "eval_samples_per_second": 989.646, | |
| "eval_steps_per_second": 30.936, | |
| "step": 292179 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 1.2803072089369875e-05, | |
| "loss": 1.2878, | |
| "step": 309348 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 1.1935821771621704, | |
| "eval_runtime": 69.4288, | |
| "eval_samples_per_second": 989.72, | |
| "eval_steps_per_second": 30.938, | |
| "step": 309366 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 1.2403304823413045e-05, | |
| "loss": 1.2716, | |
| "step": 326534 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 1.1804686784744263, | |
| "eval_runtime": 69.4238, | |
| "eval_samples_per_second": 989.791, | |
| "eval_steps_per_second": 30.94, | |
| "step": 326553 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 1.2003421190434631e-05, | |
| "loss": 1.2577, | |
| "step": 343720 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 1.170041799545288, | |
| "eval_runtime": 69.4213, | |
| "eval_samples_per_second": 989.826, | |
| "eval_steps_per_second": 30.942, | |
| "step": 343740 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 1.1603584104264853e-05, | |
| "loss": 1.2451, | |
| "step": 360906 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 1.1530039310455322, | |
| "eval_runtime": 69.5559, | |
| "eval_samples_per_second": 987.911, | |
| "eval_steps_per_second": 30.882, | |
| "step": 360927 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 1.1203747018095073e-05, | |
| "loss": 1.2312, | |
| "step": 378092 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 1.1468385457992554, | |
| "eval_runtime": 70.6551, | |
| "eval_samples_per_second": 972.542, | |
| "eval_steps_per_second": 30.401, | |
| "step": 378114 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 1.0803933205329611e-05, | |
| "loss": 1.2189, | |
| "step": 395278 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 1.1345593929290771, | |
| "eval_runtime": 69.4572, | |
| "eval_samples_per_second": 989.315, | |
| "eval_steps_per_second": 30.926, | |
| "step": 395301 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 1.040411939256415e-05, | |
| "loss": 1.2081, | |
| "step": 412464 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 1.1272401809692383, | |
| "eval_runtime": 69.4251, | |
| "eval_samples_per_second": 989.772, | |
| "eval_steps_per_second": 30.94, | |
| "step": 412488 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 1.0004305579798686e-05, | |
| "loss": 1.1972, | |
| "step": 429650 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 1.1170649528503418, | |
| "eval_runtime": 69.594, | |
| "eval_samples_per_second": 987.37, | |
| "eval_steps_per_second": 30.865, | |
| "step": 429675 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 9.604491767033224e-06, | |
| "loss": 1.187, | |
| "step": 446836 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 1.1084502935409546, | |
| "eval_runtime": 69.4386, | |
| "eval_samples_per_second": 989.579, | |
| "eval_steps_per_second": 30.934, | |
| "step": 446862 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 9.204654680863444e-06, | |
| "loss": 1.1777, | |
| "step": 464022 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 1.100696086883545, | |
| "eval_runtime": 69.6537, | |
| "eval_samples_per_second": 986.524, | |
| "eval_steps_per_second": 30.838, | |
| "step": 464049 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 8.804840868097981e-06, | |
| "loss": 1.1691, | |
| "step": 481208 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 1.0973334312438965, | |
| "eval_runtime": 69.4707, | |
| "eval_samples_per_second": 989.122, | |
| "eval_steps_per_second": 30.92, | |
| "step": 481236 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 8.40502705533252e-06, | |
| "loss": 1.1611, | |
| "step": 498394 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 1.0852068662643433, | |
| "eval_runtime": 69.6222, | |
| "eval_samples_per_second": 986.969, | |
| "eval_steps_per_second": 30.852, | |
| "step": 498423 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 8.005189969162739e-06, | |
| "loss": 1.1534, | |
| "step": 515580 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 1.082985520362854, | |
| "eval_runtime": 69.4202, | |
| "eval_samples_per_second": 989.842, | |
| "eval_steps_per_second": 30.942, | |
| "step": 515610 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 7.6053761563972775e-06, | |
| "loss": 1.1453, | |
| "step": 532766 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 1.0722641944885254, | |
| "eval_runtime": 69.4459, | |
| "eval_samples_per_second": 989.476, | |
| "eval_steps_per_second": 30.931, | |
| "step": 532797 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 7.205539070227498e-06, | |
| "loss": 1.1387, | |
| "step": 549952 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 1.0714243650436401, | |
| "eval_runtime": 69.4689, | |
| "eval_samples_per_second": 989.147, | |
| "eval_steps_per_second": 30.92, | |
| "step": 549984 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 6.805678710653402e-06, | |
| "loss": 1.1304, | |
| "step": 567138 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 1.0586808919906616, | |
| "eval_runtime": 69.5198, | |
| "eval_samples_per_second": 988.423, | |
| "eval_steps_per_second": 30.898, | |
| "step": 567171 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 6.405864897887939e-06, | |
| "loss": 1.1243, | |
| "step": 584324 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 1.0512875318527222, | |
| "eval_runtime": 69.4412, | |
| "eval_samples_per_second": 989.542, | |
| "eval_steps_per_second": 30.933, | |
| "step": 584358 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 6.006051085122476e-06, | |
| "loss": 1.1192, | |
| "step": 601510 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 1.0537949800491333, | |
| "eval_runtime": 69.726, | |
| "eval_samples_per_second": 985.5, | |
| "eval_steps_per_second": 30.806, | |
| "step": 601545 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 5.606213998952698e-06, | |
| "loss": 1.1126, | |
| "step": 618696 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 1.049811840057373, | |
| "eval_runtime": 69.9084, | |
| "eval_samples_per_second": 982.929, | |
| "eval_steps_per_second": 30.726, | |
| "step": 618732 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 5.206446732995869e-06, | |
| "loss": 1.1083, | |
| "step": 635882 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 1.0433541536331177, | |
| "eval_runtime": 69.7631, | |
| "eval_samples_per_second": 984.977, | |
| "eval_steps_per_second": 30.79, | |
| "step": 635919 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "learning_rate": 4.8066096468260895e-06, | |
| "loss": 1.1037, | |
| "step": 653068 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 1.0362184047698975, | |
| "eval_runtime": 69.5998, | |
| "eval_samples_per_second": 987.287, | |
| "eval_steps_per_second": 30.862, | |
| "step": 653106 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "learning_rate": 4.4067492872519926e-06, | |
| "loss": 1.0997, | |
| "step": 670254 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 1.037041187286377, | |
| "eval_runtime": 69.6134, | |
| "eval_samples_per_second": 987.095, | |
| "eval_steps_per_second": 30.856, | |
| "step": 670293 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 4.006935474486531e-06, | |
| "loss": 1.0952, | |
| "step": 687440 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 1.0285437107086182, | |
| "eval_runtime": 69.7372, | |
| "eval_samples_per_second": 985.341, | |
| "eval_steps_per_second": 30.801, | |
| "step": 687480 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "learning_rate": 3.6071216617210684e-06, | |
| "loss": 1.0891, | |
| "step": 704626 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 1.0275565385818481, | |
| "eval_runtime": 69.6129, | |
| "eval_samples_per_second": 987.101, | |
| "eval_steps_per_second": 30.856, | |
| "step": 704667 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "learning_rate": 3.207261302146972e-06, | |
| "loss": 1.086, | |
| "step": 721812 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 1.0269238948822021, | |
| "eval_runtime": 69.6407, | |
| "eval_samples_per_second": 986.708, | |
| "eval_steps_per_second": 30.844, | |
| "step": 721854 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "learning_rate": 2.8074707627858266e-06, | |
| "loss": 1.0831, | |
| "step": 738998 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_loss": 1.0213665962219238, | |
| "eval_runtime": 69.6166, | |
| "eval_samples_per_second": 987.049, | |
| "eval_steps_per_second": 30.855, | |
| "step": 739041 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "learning_rate": 2.4076569500203645e-06, | |
| "loss": 1.0791, | |
| "step": 756184 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 1.020618200302124, | |
| "eval_runtime": 69.8024, | |
| "eval_samples_per_second": 984.422, | |
| "eval_steps_per_second": 30.773, | |
| "step": 756228 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "learning_rate": 2.007819863850585e-06, | |
| "loss": 1.077, | |
| "step": 773370 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_loss": 1.0169862508773804, | |
| "eval_runtime": 69.6571, | |
| "eval_samples_per_second": 986.475, | |
| "eval_steps_per_second": 30.837, | |
| "step": 773415 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "learning_rate": 1.608052597893757e-06, | |
| "loss": 1.0739, | |
| "step": 790556 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 1.0156688690185547, | |
| "eval_runtime": 69.7503, | |
| "eval_samples_per_second": 985.157, | |
| "eval_steps_per_second": 30.796, | |
| "step": 790602 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "learning_rate": 1.2081922383196604e-06, | |
| "loss": 1.0709, | |
| "step": 807742 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_loss": 1.0143157243728638, | |
| "eval_runtime": 69.6201, | |
| "eval_samples_per_second": 986.999, | |
| "eval_steps_per_second": 30.853, | |
| "step": 807789 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "learning_rate": 8.084249723628325e-07, | |
| "loss": 1.0693, | |
| "step": 824928 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 1.012279748916626, | |
| "eval_runtime": 69.6307, | |
| "eval_samples_per_second": 986.849, | |
| "eval_steps_per_second": 30.848, | |
| "step": 824976 | |
| } | |
| ], | |
| "max_steps": 859350, | |
| "num_train_epochs": 50, | |
| "total_flos": 1.7374699189290516e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |