{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.013021237017516665, "eval_steps": 3, "global_step": 21, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006200589055960316, "grad_norm": 0.39027148485183716, "learning_rate": 4.000000000000001e-06, "loss": 1.1401, "step": 1 }, { "epoch": 0.0012401178111920632, "grad_norm": 0.1772935390472412, "learning_rate": 8.000000000000001e-06, "loss": 0.9133, "step": 2 }, { "epoch": 0.0018601767167880949, "grad_norm": 0.192855104804039, "learning_rate": 1.2e-05, "loss": 0.8036, "step": 3 }, { "epoch": 0.0018601767167880949, "eval_loss": 0.993510365486145, "eval_runtime": 46.5106, "eval_samples_per_second": 2.15, "eval_steps_per_second": 2.15, "step": 3 }, { "epoch": 0.0024802356223841263, "grad_norm": 0.23705194890499115, "learning_rate": 1.6000000000000003e-05, "loss": 0.9389, "step": 4 }, { "epoch": 0.0031002945279801583, "grad_norm": 0.22702816128730774, "learning_rate": 2e-05, "loss": 0.7836, "step": 5 }, { "epoch": 0.0037203534335761897, "grad_norm": 0.22573529183864594, "learning_rate": 1.9200000000000003e-05, "loss": 0.8874, "step": 6 }, { "epoch": 0.0037203534335761897, "eval_loss": 0.9887735843658447, "eval_runtime": 47.2769, "eval_samples_per_second": 2.115, "eval_steps_per_second": 2.115, "step": 6 }, { "epoch": 0.004340412339172222, "grad_norm": 0.231398344039917, "learning_rate": 1.8400000000000003e-05, "loss": 0.8755, "step": 7 }, { "epoch": 0.004960471244768253, "grad_norm": 0.28229379653930664, "learning_rate": 1.76e-05, "loss": 0.971, "step": 8 }, { "epoch": 0.005580530150364285, "grad_norm": 0.21991951763629913, "learning_rate": 1.6800000000000002e-05, "loss": 0.8546, "step": 9 }, { "epoch": 0.005580530150364285, "eval_loss": 0.9774181842803955, "eval_runtime": 47.3982, "eval_samples_per_second": 2.11, "eval_steps_per_second": 2.11, "step": 9 }, { "epoch": 0.0062005890559603165, "grad_norm": 0.26028916239738464, "learning_rate": 1.6000000000000003e-05, "loss": 1.0344, "step": 10 }, { "epoch": 0.0068206479615563476, "grad_norm": 0.23867206275463104, "learning_rate": 1.5200000000000002e-05, "loss": 0.8132, "step": 11 }, { "epoch": 0.0074407068671523795, "grad_norm": 0.30641764402389526, "learning_rate": 1.4400000000000001e-05, "loss": 1.1168, "step": 12 }, { "epoch": 0.0074407068671523795, "eval_loss": 0.963714599609375, "eval_runtime": 47.188, "eval_samples_per_second": 2.119, "eval_steps_per_second": 2.119, "step": 12 }, { "epoch": 0.008060765772748411, "grad_norm": 0.25963348150253296, "learning_rate": 1.3600000000000002e-05, "loss": 0.9735, "step": 13 }, { "epoch": 0.008680824678344443, "grad_norm": 0.2063651829957962, "learning_rate": 1.2800000000000001e-05, "loss": 0.7957, "step": 14 }, { "epoch": 0.009300883583940475, "grad_norm": 0.2287115454673767, "learning_rate": 1.2e-05, "loss": 0.8343, "step": 15 }, { "epoch": 0.009300883583940475, "eval_loss": 0.9510444402694702, "eval_runtime": 47.3972, "eval_samples_per_second": 2.11, "eval_steps_per_second": 2.11, "step": 15 }, { "epoch": 0.009920942489536505, "grad_norm": 0.2136881947517395, "learning_rate": 1.1200000000000001e-05, "loss": 0.7861, "step": 16 }, { "epoch": 0.010541001395132537, "grad_norm": 0.16134004294872284, "learning_rate": 1.04e-05, "loss": 0.8375, "step": 17 }, { "epoch": 0.01116106030072857, "grad_norm": 0.20020240545272827, "learning_rate": 9.600000000000001e-06, "loss": 0.8086, "step": 18 }, { "epoch": 0.01116106030072857, "eval_loss": 0.9415214657783508, "eval_runtime": 47.2291, "eval_samples_per_second": 2.117, "eval_steps_per_second": 2.117, "step": 18 }, { "epoch": 0.011781119206324601, "grad_norm": 0.2330344021320343, "learning_rate": 8.8e-06, "loss": 0.7858, "step": 19 }, { "epoch": 0.012401178111920633, "grad_norm": 0.175759956240654, "learning_rate": 8.000000000000001e-06, "loss": 0.6359, "step": 20 }, { "epoch": 0.013021237017516665, "grad_norm": 0.21558189392089844, "learning_rate": 7.2000000000000005e-06, "loss": 0.8966, "step": 21 }, { "epoch": 0.013021237017516665, "eval_loss": 0.9336352348327637, "eval_runtime": 47.0196, "eval_samples_per_second": 2.127, "eval_steps_per_second": 2.127, "step": 21 } ], "logging_steps": 1, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.495772591437824e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }