| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9971617786187323, | |
| "eval_steps": 100.0, | |
| "global_step": 396, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00015, | |
| "loss": 9.4494, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0003, | |
| "loss": 7.3931, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002920212765957447, | |
| "loss": 6.6623, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00028404255319148934, | |
| "loss": 6.2849, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00027606382978723404, | |
| "loss": 5.968, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0002680851063829787, | |
| "loss": 5.6831, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0002601063829787234, | |
| "loss": 5.4843, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00025212765957446806, | |
| "loss": 5.2955, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0002441489361702127, | |
| "loss": 5.1497, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00023617021276595742, | |
| "loss": 5.0148, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0002281914893617021, | |
| "loss": 4.8996, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00022021276595744679, | |
| "loss": 4.7704, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0002122340425531915, | |
| "loss": 4.663, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00020425531914893615, | |
| "loss": 4.5895, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00019627659574468083, | |
| "loss": 4.488, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.0001882978723404255, | |
| "loss": 4.3955, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.0001803191489361702, | |
| "loss": 4.3052, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.0001723404255319149, | |
| "loss": 4.2514, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00016436170212765956, | |
| "loss": 4.1705, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00015638297872340426, | |
| "loss": 4.0962, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00014840425531914892, | |
| "loss": 4.0549, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.0001404255319148936, | |
| "loss": 4.0031, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00013244680851063828, | |
| "loss": 3.9261, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00012446808510638296, | |
| "loss": 3.8936, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.00011648936170212764, | |
| "loss": 3.8438, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.00010851063829787234, | |
| "loss": 3.8208, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 0.00010053191489361702, | |
| "loss": 3.7774, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 9.25531914893617e-05, | |
| "loss": 3.7364, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 8.457446808510637e-05, | |
| "loss": 3.709, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 7.659574468085105e-05, | |
| "loss": 3.6827, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 6.861702127659574e-05, | |
| "loss": 3.6612, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 6.063829787234042e-05, | |
| "loss": 3.632, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 5.26595744680851e-05, | |
| "loss": 3.6141, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 4.468085106382978e-05, | |
| "loss": 3.5908, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 3.670212765957446e-05, | |
| "loss": 3.5763, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 2.8723404255319147e-05, | |
| "loss": 3.5524, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.0744680851063828e-05, | |
| "loss": 3.5515, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.276595744680851e-05, | |
| "loss": 3.5409, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 4.7872340425531906e-06, | |
| "loss": 3.5372, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 396, | |
| "total_flos": 9.733927091526697e+17, | |
| "train_loss": 4.541526129751494, | |
| "train_runtime": 4638.0405, | |
| "train_samples_per_second": 43.753, | |
| "train_steps_per_second": 0.085 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 396, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "total_flos": 9.733927091526697e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |