| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9577464788732395, | |
| "global_step": 420, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 3.769569158554077, | |
| "eval_runtime": 27.5829, | |
| "eval_samples_per_second": 80.34, | |
| "eval_steps_per_second": 1.269, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 3.7680795192718506, | |
| "eval_runtime": 27.3249, | |
| "eval_samples_per_second": 81.098, | |
| "eval_steps_per_second": 1.281, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 3.7672388553619385, | |
| "eval_runtime": 27.4916, | |
| "eval_samples_per_second": 80.606, | |
| "eval_steps_per_second": 1.273, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 3.765839099884033, | |
| "eval_runtime": 27.4005, | |
| "eval_samples_per_second": 80.874, | |
| "eval_steps_per_second": 1.277, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 3.763303518295288, | |
| "eval_runtime": 27.5296, | |
| "eval_samples_per_second": 80.495, | |
| "eval_steps_per_second": 1.271, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_loss": 3.7637836933135986, | |
| "eval_runtime": 27.328, | |
| "eval_samples_per_second": 81.089, | |
| "eval_steps_per_second": 1.281, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_loss": 3.760821580886841, | |
| "eval_runtime": 27.5647, | |
| "eval_samples_per_second": 80.393, | |
| "eval_steps_per_second": 1.27, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_loss": 3.7594783306121826, | |
| "eval_runtime": 27.3075, | |
| "eval_samples_per_second": 81.15, | |
| "eval_steps_per_second": 1.282, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 3.7594757080078125, | |
| "eval_runtime": 27.4379, | |
| "eval_samples_per_second": 80.764, | |
| "eval_steps_per_second": 1.276, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_loss": 3.7583823204040527, | |
| "eval_runtime": 27.5694, | |
| "eval_samples_per_second": 80.379, | |
| "eval_steps_per_second": 1.27, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 3.7562084197998047, | |
| "eval_runtime": 27.3431, | |
| "eval_samples_per_second": 81.044, | |
| "eval_steps_per_second": 1.28, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_loss": 3.755629301071167, | |
| "eval_runtime": 27.5374, | |
| "eval_samples_per_second": 80.472, | |
| "eval_steps_per_second": 1.271, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_loss": 3.7544658184051514, | |
| "eval_runtime": 27.3291, | |
| "eval_samples_per_second": 81.086, | |
| "eval_steps_per_second": 1.281, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 3.7536823749542236, | |
| "eval_runtime": 27.3625, | |
| "eval_samples_per_second": 80.987, | |
| "eval_steps_per_second": 1.279, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_loss": 3.7540736198425293, | |
| "eval_runtime": 27.6591, | |
| "eval_samples_per_second": 80.118, | |
| "eval_steps_per_second": 1.265, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_loss": 3.7542569637298584, | |
| "eval_runtime": 27.4748, | |
| "eval_samples_per_second": 80.656, | |
| "eval_steps_per_second": 1.274, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 3.752318859100342, | |
| "eval_runtime": 27.6148, | |
| "eval_samples_per_second": 80.247, | |
| "eval_steps_per_second": 1.267, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_loss": 3.7517406940460205, | |
| "eval_runtime": 27.3496, | |
| "eval_samples_per_second": 81.025, | |
| "eval_steps_per_second": 1.28, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_loss": 3.751498222351074, | |
| "eval_runtime": 27.5367, | |
| "eval_samples_per_second": 80.474, | |
| "eval_steps_per_second": 1.271, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 3.7509350776672363, | |
| "eval_runtime": 27.354, | |
| "eval_samples_per_second": 81.012, | |
| "eval_steps_per_second": 1.28, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_loss": 3.750244140625, | |
| "eval_runtime": 27.3514, | |
| "eval_samples_per_second": 81.02, | |
| "eval_steps_per_second": 1.28, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_loss": 3.750495433807373, | |
| "eval_runtime": 27.5933, | |
| "eval_samples_per_second": 80.309, | |
| "eval_steps_per_second": 1.268, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_loss": 3.7491180896759033, | |
| "eval_runtime": 27.3222, | |
| "eval_samples_per_second": 81.106, | |
| "eval_steps_per_second": 1.281, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_loss": 3.747929096221924, | |
| "eval_runtime": 27.6237, | |
| "eval_samples_per_second": 80.221, | |
| "eval_steps_per_second": 1.267, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_loss": 3.749088764190674, | |
| "eval_runtime": 27.2827, | |
| "eval_samples_per_second": 81.224, | |
| "eval_steps_per_second": 1.283, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_loss": 3.748143434524536, | |
| "eval_runtime": 27.5685, | |
| "eval_samples_per_second": 80.381, | |
| "eval_steps_per_second": 1.27, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_loss": 3.747373580932617, | |
| "eval_runtime": 27.3404, | |
| "eval_samples_per_second": 81.052, | |
| "eval_steps_per_second": 1.28, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_loss": 3.7469279766082764, | |
| "eval_runtime": 27.5885, | |
| "eval_samples_per_second": 80.323, | |
| "eval_steps_per_second": 1.269, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 3.7472589015960693, | |
| "eval_runtime": 27.3166, | |
| "eval_samples_per_second": 81.123, | |
| "eval_steps_per_second": 1.281, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_loss": 3.7470862865448, | |
| "eval_runtime": 27.2994, | |
| "eval_samples_per_second": 81.174, | |
| "eval_steps_per_second": 1.282, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_loss": 3.746790885925293, | |
| "eval_runtime": 27.3969, | |
| "eval_samples_per_second": 80.885, | |
| "eval_steps_per_second": 1.278, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_loss": 3.747112989425659, | |
| "eval_runtime": 27.3496, | |
| "eval_samples_per_second": 81.025, | |
| "eval_steps_per_second": 1.28, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_loss": 3.747150421142578, | |
| "eval_runtime": 27.5712, | |
| "eval_samples_per_second": 80.374, | |
| "eval_steps_per_second": 1.269, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "eval_loss": 3.7463109493255615, | |
| "eval_runtime": 27.3541, | |
| "eval_samples_per_second": 81.012, | |
| "eval_steps_per_second": 1.28, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_loss": 3.745861530303955, | |
| "eval_runtime": 27.5761, | |
| "eval_samples_per_second": 80.36, | |
| "eval_steps_per_second": 1.269, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "eval_loss": 3.7457809448242188, | |
| "eval_runtime": 27.4138, | |
| "eval_samples_per_second": 80.835, | |
| "eval_steps_per_second": 1.277, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_loss": 3.745532751083374, | |
| "eval_runtime": 27.5336, | |
| "eval_samples_per_second": 80.483, | |
| "eval_steps_per_second": 1.271, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "eval_loss": 3.7454142570495605, | |
| "eval_runtime": 27.3417, | |
| "eval_samples_per_second": 81.048, | |
| "eval_steps_per_second": 1.28, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_loss": 3.7453861236572266, | |
| "eval_runtime": 27.3314, | |
| "eval_samples_per_second": 81.079, | |
| "eval_steps_per_second": 1.281, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_loss": 3.7453415393829346, | |
| "eval_runtime": 27.5694, | |
| "eval_samples_per_second": 80.379, | |
| "eval_steps_per_second": 1.27, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_loss": 3.745169162750244, | |
| "eval_runtime": 27.3841, | |
| "eval_samples_per_second": 80.923, | |
| "eval_steps_per_second": 1.278, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_loss": 3.745060682296753, | |
| "eval_runtime": 27.5883, | |
| "eval_samples_per_second": 80.324, | |
| "eval_steps_per_second": 1.269, | |
| "step": 420 | |
| } | |
| ], | |
| "max_steps": 426, | |
| "num_train_epochs": 3, | |
| "total_flos": 875997356359680.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |