| { |
| "best_metric": 1.012279748916626, |
| "best_model_checkpoint": "./output_c/checkpoint-824976", |
| "epoch": 48.0, |
| "global_step": 824976, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.960020946063886e-05, |
| "loss": 3.5114, |
| "step": 17186 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 2.5249948501586914, |
| "eval_runtime": 71.153, |
| "eval_samples_per_second": 965.735, |
| "eval_steps_per_second": 30.188, |
| "step": 17187 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.920034910106476e-05, |
| "loss": 2.4281, |
| "step": 34372 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 2.0742549896240234, |
| "eval_runtime": 71.0702, |
| "eval_samples_per_second": 966.861, |
| "eval_steps_per_second": 30.224, |
| "step": 34374 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 1.8800535288299298e-05, |
| "loss": 2.0989, |
| "step": 51558 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 1.8547435998916626, |
| "eval_runtime": 71.5379, |
| "eval_samples_per_second": 960.54, |
| "eval_steps_per_second": 30.026, |
| "step": 51561 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 1.8400698202129518e-05, |
| "loss": 1.9144, |
| "step": 68744 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 1.7082182168960571, |
| "eval_runtime": 71.3486, |
| "eval_samples_per_second": 963.089, |
| "eval_steps_per_second": 30.106, |
| "step": 68748 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 1.8000861115959738e-05, |
| "loss": 1.7926, |
| "step": 85930 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 1.614298701286316, |
| "eval_runtime": 72.4304, |
| "eval_samples_per_second": 948.703, |
| "eval_steps_per_second": 29.656, |
| "step": 85935 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 1.7601047303194276e-05, |
| "loss": 1.7014, |
| "step": 103116 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 1.5449920892715454, |
| "eval_runtime": 71.3467, |
| "eval_samples_per_second": 963.114, |
| "eval_steps_per_second": 30.107, |
| "step": 103122 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 1.7201210217024496e-05, |
| "loss": 1.6306, |
| "step": 120302 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 1.4825724363327026, |
| "eval_runtime": 71.2883, |
| "eval_samples_per_second": 963.903, |
| "eval_steps_per_second": 30.131, |
| "step": 120309 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 1.6801373130854716e-05, |
| "loss": 1.5745, |
| "step": 137488 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 1.4376918077468872, |
| "eval_runtime": 72.4942, |
| "eval_samples_per_second": 947.869, |
| "eval_steps_per_second": 29.63, |
| "step": 137496 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 1.6401559318089254e-05, |
| "loss": 1.5262, |
| "step": 154674 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 1.3938175439834595, |
| "eval_runtime": 71.2189, |
| "eval_samples_per_second": 964.842, |
| "eval_steps_per_second": 30.161, |
| "step": 154683 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 1.6001722231919474e-05, |
| "loss": 1.4839, |
| "step": 171860 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 1.363573431968689, |
| "eval_runtime": 71.173, |
| "eval_samples_per_second": 965.465, |
| "eval_steps_per_second": 30.18, |
| "step": 171870 |
| }, |
| { |
| "epoch": 11.0, |
| "learning_rate": 1.5601908419154012e-05, |
| "loss": 1.4505, |
| "step": 189046 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 1.3312129974365234, |
| "eval_runtime": 71.256, |
| "eval_samples_per_second": 964.339, |
| "eval_steps_per_second": 30.145, |
| "step": 189057 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 1.5202071332984234e-05, |
| "loss": 1.4187, |
| "step": 206232 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 1.3099381923675537, |
| "eval_runtime": 71.1495, |
| "eval_samples_per_second": 965.783, |
| "eval_steps_per_second": 30.19, |
| "step": 206244 |
| }, |
| { |
| "epoch": 13.0, |
| "learning_rate": 1.480225752021877e-05, |
| "loss": 1.39, |
| "step": 223418 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 1.2823187112808228, |
| "eval_runtime": 69.4163, |
| "eval_samples_per_second": 989.898, |
| "eval_steps_per_second": 30.944, |
| "step": 223431 |
| }, |
| { |
| "epoch": 14.0, |
| "learning_rate": 1.4402420434048992e-05, |
| "loss": 1.3651, |
| "step": 240604 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 1.2610409259796143, |
| "eval_runtime": 69.462, |
| "eval_samples_per_second": 989.246, |
| "eval_steps_per_second": 30.923, |
| "step": 240618 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 1.4002583347879212e-05, |
| "loss": 1.3436, |
| "step": 257790 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_loss": 1.2433568239212036, |
| "eval_runtime": 69.4357, |
| "eval_samples_per_second": 989.621, |
| "eval_steps_per_second": 30.935, |
| "step": 257805 |
| }, |
| { |
| "epoch": 16.0, |
| "learning_rate": 1.3602722988305115e-05, |
| "loss": 1.3245, |
| "step": 274976 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 1.2258530855178833, |
| "eval_runtime": 69.4442, |
| "eval_samples_per_second": 989.499, |
| "eval_steps_per_second": 30.931, |
| "step": 274992 |
| }, |
| { |
| "epoch": 17.0, |
| "learning_rate": 1.3202885902135337e-05, |
| "loss": 1.305, |
| "step": 292162 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_loss": 1.2092918157577515, |
| "eval_runtime": 69.4339, |
| "eval_samples_per_second": 989.646, |
| "eval_steps_per_second": 30.936, |
| "step": 292179 |
| }, |
| { |
| "epoch": 18.0, |
| "learning_rate": 1.2803072089369875e-05, |
| "loss": 1.2878, |
| "step": 309348 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_loss": 1.1935821771621704, |
| "eval_runtime": 69.4288, |
| "eval_samples_per_second": 989.72, |
| "eval_steps_per_second": 30.938, |
| "step": 309366 |
| }, |
| { |
| "epoch": 19.0, |
| "learning_rate": 1.2403304823413045e-05, |
| "loss": 1.2716, |
| "step": 326534 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_loss": 1.1804686784744263, |
| "eval_runtime": 69.4238, |
| "eval_samples_per_second": 989.791, |
| "eval_steps_per_second": 30.94, |
| "step": 326553 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 1.2003421190434631e-05, |
| "loss": 1.2577, |
| "step": 343720 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 1.170041799545288, |
| "eval_runtime": 69.4213, |
| "eval_samples_per_second": 989.826, |
| "eval_steps_per_second": 30.942, |
| "step": 343740 |
| }, |
| { |
| "epoch": 21.0, |
| "learning_rate": 1.1603584104264853e-05, |
| "loss": 1.2451, |
| "step": 360906 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_loss": 1.1530039310455322, |
| "eval_runtime": 69.5559, |
| "eval_samples_per_second": 987.911, |
| "eval_steps_per_second": 30.882, |
| "step": 360927 |
| }, |
| { |
| "epoch": 22.0, |
| "learning_rate": 1.1203747018095073e-05, |
| "loss": 1.2312, |
| "step": 378092 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_loss": 1.1468385457992554, |
| "eval_runtime": 70.6551, |
| "eval_samples_per_second": 972.542, |
| "eval_steps_per_second": 30.401, |
| "step": 378114 |
| }, |
| { |
| "epoch": 23.0, |
| "learning_rate": 1.0803933205329611e-05, |
| "loss": 1.2189, |
| "step": 395278 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_loss": 1.1345593929290771, |
| "eval_runtime": 69.4572, |
| "eval_samples_per_second": 989.315, |
| "eval_steps_per_second": 30.926, |
| "step": 395301 |
| }, |
| { |
| "epoch": 24.0, |
| "learning_rate": 1.040411939256415e-05, |
| "loss": 1.2081, |
| "step": 412464 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_loss": 1.1272401809692383, |
| "eval_runtime": 69.4251, |
| "eval_samples_per_second": 989.772, |
| "eval_steps_per_second": 30.94, |
| "step": 412488 |
| }, |
| { |
| "epoch": 25.0, |
| "learning_rate": 1.0004305579798686e-05, |
| "loss": 1.1972, |
| "step": 429650 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_loss": 1.1170649528503418, |
| "eval_runtime": 69.594, |
| "eval_samples_per_second": 987.37, |
| "eval_steps_per_second": 30.865, |
| "step": 429675 |
| }, |
| { |
| "epoch": 26.0, |
| "learning_rate": 9.604491767033224e-06, |
| "loss": 1.187, |
| "step": 446836 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_loss": 1.1084502935409546, |
| "eval_runtime": 69.4386, |
| "eval_samples_per_second": 989.579, |
| "eval_steps_per_second": 30.934, |
| "step": 446862 |
| }, |
| { |
| "epoch": 27.0, |
| "learning_rate": 9.204654680863444e-06, |
| "loss": 1.1777, |
| "step": 464022 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_loss": 1.100696086883545, |
| "eval_runtime": 69.6537, |
| "eval_samples_per_second": 986.524, |
| "eval_steps_per_second": 30.838, |
| "step": 464049 |
| }, |
| { |
| "epoch": 28.0, |
| "learning_rate": 8.804840868097981e-06, |
| "loss": 1.1691, |
| "step": 481208 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_loss": 1.0973334312438965, |
| "eval_runtime": 69.4707, |
| "eval_samples_per_second": 989.122, |
| "eval_steps_per_second": 30.92, |
| "step": 481236 |
| }, |
| { |
| "epoch": 29.0, |
| "learning_rate": 8.40502705533252e-06, |
| "loss": 1.1611, |
| "step": 498394 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_loss": 1.0852068662643433, |
| "eval_runtime": 69.6222, |
| "eval_samples_per_second": 986.969, |
| "eval_steps_per_second": 30.852, |
| "step": 498423 |
| }, |
| { |
| "epoch": 30.0, |
| "learning_rate": 8.005189969162739e-06, |
| "loss": 1.1534, |
| "step": 515580 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_loss": 1.082985520362854, |
| "eval_runtime": 69.4202, |
| "eval_samples_per_second": 989.842, |
| "eval_steps_per_second": 30.942, |
| "step": 515610 |
| }, |
| { |
| "epoch": 31.0, |
| "learning_rate": 7.6053761563972775e-06, |
| "loss": 1.1453, |
| "step": 532766 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_loss": 1.0722641944885254, |
| "eval_runtime": 69.4459, |
| "eval_samples_per_second": 989.476, |
| "eval_steps_per_second": 30.931, |
| "step": 532797 |
| }, |
| { |
| "epoch": 32.0, |
| "learning_rate": 7.205539070227498e-06, |
| "loss": 1.1387, |
| "step": 549952 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_loss": 1.0714243650436401, |
| "eval_runtime": 69.4689, |
| "eval_samples_per_second": 989.147, |
| "eval_steps_per_second": 30.92, |
| "step": 549984 |
| }, |
| { |
| "epoch": 33.0, |
| "learning_rate": 6.805678710653402e-06, |
| "loss": 1.1304, |
| "step": 567138 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_loss": 1.0586808919906616, |
| "eval_runtime": 69.5198, |
| "eval_samples_per_second": 988.423, |
| "eval_steps_per_second": 30.898, |
| "step": 567171 |
| }, |
| { |
| "epoch": 34.0, |
| "learning_rate": 6.405864897887939e-06, |
| "loss": 1.1243, |
| "step": 584324 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_loss": 1.0512875318527222, |
| "eval_runtime": 69.4412, |
| "eval_samples_per_second": 989.542, |
| "eval_steps_per_second": 30.933, |
| "step": 584358 |
| }, |
| { |
| "epoch": 35.0, |
| "learning_rate": 6.006051085122476e-06, |
| "loss": 1.1192, |
| "step": 601510 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_loss": 1.0537949800491333, |
| "eval_runtime": 69.726, |
| "eval_samples_per_second": 985.5, |
| "eval_steps_per_second": 30.806, |
| "step": 601545 |
| }, |
| { |
| "epoch": 36.0, |
| "learning_rate": 5.606213998952698e-06, |
| "loss": 1.1126, |
| "step": 618696 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_loss": 1.049811840057373, |
| "eval_runtime": 69.9084, |
| "eval_samples_per_second": 982.929, |
| "eval_steps_per_second": 30.726, |
| "step": 618732 |
| }, |
| { |
| "epoch": 37.0, |
| "learning_rate": 5.206446732995869e-06, |
| "loss": 1.1083, |
| "step": 635882 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_loss": 1.0433541536331177, |
| "eval_runtime": 69.7631, |
| "eval_samples_per_second": 984.977, |
| "eval_steps_per_second": 30.79, |
| "step": 635919 |
| }, |
| { |
| "epoch": 38.0, |
| "learning_rate": 4.8066096468260895e-06, |
| "loss": 1.1037, |
| "step": 653068 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_loss": 1.0362184047698975, |
| "eval_runtime": 69.5998, |
| "eval_samples_per_second": 987.287, |
| "eval_steps_per_second": 30.862, |
| "step": 653106 |
| }, |
| { |
| "epoch": 39.0, |
| "learning_rate": 4.4067492872519926e-06, |
| "loss": 1.0997, |
| "step": 670254 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_loss": 1.037041187286377, |
| "eval_runtime": 69.6134, |
| "eval_samples_per_second": 987.095, |
| "eval_steps_per_second": 30.856, |
| "step": 670293 |
| }, |
| { |
| "epoch": 40.0, |
| "learning_rate": 4.006935474486531e-06, |
| "loss": 1.0952, |
| "step": 687440 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_loss": 1.0285437107086182, |
| "eval_runtime": 69.7372, |
| "eval_samples_per_second": 985.341, |
| "eval_steps_per_second": 30.801, |
| "step": 687480 |
| }, |
| { |
| "epoch": 41.0, |
| "learning_rate": 3.6071216617210684e-06, |
| "loss": 1.0891, |
| "step": 704626 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_loss": 1.0275565385818481, |
| "eval_runtime": 69.6129, |
| "eval_samples_per_second": 987.101, |
| "eval_steps_per_second": 30.856, |
| "step": 704667 |
| }, |
| { |
| "epoch": 42.0, |
| "learning_rate": 3.207261302146972e-06, |
| "loss": 1.086, |
| "step": 721812 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_loss": 1.0269238948822021, |
| "eval_runtime": 69.6407, |
| "eval_samples_per_second": 986.708, |
| "eval_steps_per_second": 30.844, |
| "step": 721854 |
| }, |
| { |
| "epoch": 43.0, |
| "learning_rate": 2.8074707627858266e-06, |
| "loss": 1.0831, |
| "step": 738998 |
| }, |
| { |
| "epoch": 43.0, |
| "eval_loss": 1.0213665962219238, |
| "eval_runtime": 69.6166, |
| "eval_samples_per_second": 987.049, |
| "eval_steps_per_second": 30.855, |
| "step": 739041 |
| }, |
| { |
| "epoch": 44.0, |
| "learning_rate": 2.4076569500203645e-06, |
| "loss": 1.0791, |
| "step": 756184 |
| }, |
| { |
| "epoch": 44.0, |
| "eval_loss": 1.020618200302124, |
| "eval_runtime": 69.8024, |
| "eval_samples_per_second": 984.422, |
| "eval_steps_per_second": 30.773, |
| "step": 756228 |
| }, |
| { |
| "epoch": 45.0, |
| "learning_rate": 2.007819863850585e-06, |
| "loss": 1.077, |
| "step": 773370 |
| }, |
| { |
| "epoch": 45.0, |
| "eval_loss": 1.0169862508773804, |
| "eval_runtime": 69.6571, |
| "eval_samples_per_second": 986.475, |
| "eval_steps_per_second": 30.837, |
| "step": 773415 |
| }, |
| { |
| "epoch": 46.0, |
| "learning_rate": 1.608052597893757e-06, |
| "loss": 1.0739, |
| "step": 790556 |
| }, |
| { |
| "epoch": 46.0, |
| "eval_loss": 1.0156688690185547, |
| "eval_runtime": 69.7503, |
| "eval_samples_per_second": 985.157, |
| "eval_steps_per_second": 30.796, |
| "step": 790602 |
| }, |
| { |
| "epoch": 47.0, |
| "learning_rate": 1.2081922383196604e-06, |
| "loss": 1.0709, |
| "step": 807742 |
| }, |
| { |
| "epoch": 47.0, |
| "eval_loss": 1.0143157243728638, |
| "eval_runtime": 69.6201, |
| "eval_samples_per_second": 986.999, |
| "eval_steps_per_second": 30.853, |
| "step": 807789 |
| }, |
| { |
| "epoch": 48.0, |
| "learning_rate": 8.084249723628325e-07, |
| "loss": 1.0693, |
| "step": 824928 |
| }, |
| { |
| "epoch": 48.0, |
| "eval_loss": 1.012279748916626, |
| "eval_runtime": 69.6307, |
| "eval_samples_per_second": 986.849, |
| "eval_steps_per_second": 30.848, |
| "step": 824976 |
| } |
| ], |
| "max_steps": 859350, |
| "num_train_epochs": 50, |
| "total_flos": 1.7374699189290516e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|