| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.976982097186701, |
| "global_step": 291, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.790940766550522e-05, |
| "loss": 1.9828, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.442508710801395e-05, |
| "loss": 1.6574, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 9.094076655052265e-05, |
| "loss": 1.5428, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.31, |
| "eval_loss": 1.5080170631408691, |
| "eval_runtime": 3.3315, |
| "eval_samples_per_second": 166.892, |
| "eval_steps_per_second": 2.701, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 8.745644599303136e-05, |
| "loss": 1.4842, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 8.397212543554008e-05, |
| "loss": 1.4473, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 8.048780487804879e-05, |
| "loss": 1.4192, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.61, |
| "eval_loss": 1.4152321815490723, |
| "eval_runtime": 3.3113, |
| "eval_samples_per_second": 167.908, |
| "eval_steps_per_second": 2.718, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 7.700348432055749e-05, |
| "loss": 1.4157, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 7.35191637630662e-05, |
| "loss": 1.3794, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 7.003484320557492e-05, |
| "loss": 1.3618, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.92, |
| "eval_loss": 1.3724521398544312, |
| "eval_runtime": 3.2923, |
| "eval_samples_per_second": 168.879, |
| "eval_steps_per_second": 2.734, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 6.655052264808363e-05, |
| "loss": 1.3483, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 6.306620209059234e-05, |
| "loss": 1.3452, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 5.958188153310105e-05, |
| "loss": 1.3297, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.23, |
| "eval_loss": 1.3456683158874512, |
| "eval_runtime": 3.2951, |
| "eval_samples_per_second": 168.734, |
| "eval_steps_per_second": 2.731, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 5.6097560975609764e-05, |
| "loss": 1.3149, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 5.261324041811847e-05, |
| "loss": 1.3135, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 4.9128919860627184e-05, |
| "loss": 1.3014, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_loss": 1.3272855281829834, |
| "eval_runtime": 3.2699, |
| "eval_samples_per_second": 170.036, |
| "eval_steps_per_second": 2.752, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 4.564459930313589e-05, |
| "loss": 1.3014, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 4.21602787456446e-05, |
| "loss": 1.2839, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 3.867595818815331e-05, |
| "loss": 1.2845, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.84, |
| "eval_loss": 1.3136122226715088, |
| "eval_runtime": 3.363, |
| "eval_samples_per_second": 165.328, |
| "eval_steps_per_second": 2.676, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 3.5191637630662025e-05, |
| "loss": 1.2693, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 3.170731707317073e-05, |
| "loss": 1.2665, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 2.8222996515679445e-05, |
| "loss": 1.2614, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.15, |
| "eval_loss": 1.3054131269454956, |
| "eval_runtime": 3.3215, |
| "eval_samples_per_second": 167.392, |
| "eval_steps_per_second": 2.71, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 2.4738675958188155e-05, |
| "loss": 1.2486, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2.1254355400696865e-05, |
| "loss": 1.2521, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 1.7770034843205575e-05, |
| "loss": 1.2466, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.46, |
| "eval_loss": 1.2998528480529785, |
| "eval_runtime": 3.3399, |
| "eval_samples_per_second": 166.47, |
| "eval_steps_per_second": 2.695, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.4285714285714285e-05, |
| "loss": 1.2489, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 1.0801393728222997e-05, |
| "loss": 1.238, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 7.317073170731707e-06, |
| "loss": 1.2353, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.76, |
| "eval_loss": 1.2975901365280151, |
| "eval_runtime": 3.3306, |
| "eval_samples_per_second": 166.935, |
| "eval_steps_per_second": 2.702, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 3.832752613240418e-06, |
| "loss": 1.2444, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 3.4843205574912896e-07, |
| "loss": 1.2413, |
| "step": 290 |
| } |
| ], |
| "max_steps": 291, |
| "num_train_epochs": 3, |
| "total_flos": 3.2140961068036915e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|