{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.01860176716788095, "eval_steps": 3, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006200589055960316, "grad_norm": 0.6006524562835693, "learning_rate": 4.000000000000001e-06, "loss": 1.4363, "step": 1 }, { "epoch": 0.0012401178111920632, "grad_norm": 0.4452102482318878, "learning_rate": 8.000000000000001e-06, "loss": 1.1641, "step": 2 }, { "epoch": 0.0018601767167880949, "grad_norm": 0.418393075466156, "learning_rate": 1.2e-05, "loss": 1.0005, "step": 3 }, { "epoch": 0.0018601767167880949, "eval_loss": 1.2559138536453247, "eval_runtime": 50.2632, "eval_samples_per_second": 1.99, "eval_steps_per_second": 1.99, "step": 3 }, { "epoch": 0.0024802356223841263, "grad_norm": 0.4599858820438385, "learning_rate": 1.6000000000000003e-05, "loss": 1.1938, "step": 4 }, { "epoch": 0.0031002945279801583, "grad_norm": 0.4313387870788574, "learning_rate": 2e-05, "loss": 0.9897, "step": 5 }, { "epoch": 0.0037203534335761897, "grad_norm": 0.4715091586112976, "learning_rate": 1.9200000000000003e-05, "loss": 1.1105, "step": 6 }, { "epoch": 0.0037203534335761897, "eval_loss": 1.2492942810058594, "eval_runtime": 49.6589, "eval_samples_per_second": 2.014, "eval_steps_per_second": 2.014, "step": 6 }, { "epoch": 0.004340412339172222, "grad_norm": 0.4590495824813843, "learning_rate": 1.8400000000000003e-05, "loss": 1.0824, "step": 7 }, { "epoch": 0.004960471244768253, "grad_norm": 0.6232957243919373, "learning_rate": 1.76e-05, "loss": 1.2176, "step": 8 }, { "epoch": 0.005580530150364285, "grad_norm": 0.4820755422115326, "learning_rate": 1.6800000000000002e-05, "loss": 1.0774, "step": 9 }, { "epoch": 0.005580530150364285, "eval_loss": 1.2356064319610596, "eval_runtime": 49.9029, "eval_samples_per_second": 2.004, "eval_steps_per_second": 2.004, "step": 9 }, { "epoch": 0.0062005890559603165, "grad_norm": 0.614038348197937, "learning_rate": 1.6000000000000003e-05, "loss": 1.2989, "step": 10 }, { "epoch": 0.0068206479615563476, "grad_norm": 0.5838705897331238, "learning_rate": 1.5200000000000002e-05, "loss": 1.05, "step": 11 }, { "epoch": 0.0074407068671523795, "grad_norm": 0.6922520995140076, "learning_rate": 1.4400000000000001e-05, "loss": 1.3979, "step": 12 }, { "epoch": 0.0074407068671523795, "eval_loss": 1.2183419466018677, "eval_runtime": 50.5422, "eval_samples_per_second": 1.979, "eval_steps_per_second": 1.979, "step": 12 }, { "epoch": 0.008060765772748411, "grad_norm": 0.6026104688644409, "learning_rate": 1.3600000000000002e-05, "loss": 1.1974, "step": 13 }, { "epoch": 0.008680824678344443, "grad_norm": 0.46571722626686096, "learning_rate": 1.2800000000000001e-05, "loss": 0.9946, "step": 14 }, { "epoch": 0.009300883583940475, "grad_norm": 0.5651541948318481, "learning_rate": 1.2e-05, "loss": 1.0568, "step": 15 }, { "epoch": 0.009300883583940475, "eval_loss": 1.1998839378356934, "eval_runtime": 50.192, "eval_samples_per_second": 1.992, "eval_steps_per_second": 1.992, "step": 15 }, { "epoch": 0.009920942489536505, "grad_norm": 0.4988487958908081, "learning_rate": 1.1200000000000001e-05, "loss": 0.9318, "step": 16 }, { "epoch": 0.010541001395132537, "grad_norm": 0.4823167324066162, "learning_rate": 1.04e-05, "loss": 1.0622, "step": 17 }, { "epoch": 0.01116106030072857, "grad_norm": 0.5371574759483337, "learning_rate": 9.600000000000001e-06, "loss": 1.0033, "step": 18 }, { "epoch": 0.01116106030072857, "eval_loss": 1.1833301782608032, "eval_runtime": 50.0653, "eval_samples_per_second": 1.997, "eval_steps_per_second": 1.997, "step": 18 }, { "epoch": 0.011781119206324601, "grad_norm": 0.5795661807060242, "learning_rate": 8.8e-06, "loss": 0.9854, "step": 19 }, { "epoch": 0.012401178111920633, "grad_norm": 0.41762855648994446, "learning_rate": 8.000000000000001e-06, "loss": 0.8116, "step": 20 }, { "epoch": 0.013021237017516665, "grad_norm": 0.6034093499183655, "learning_rate": 7.2000000000000005e-06, "loss": 1.1081, "step": 21 }, { "epoch": 0.013021237017516665, "eval_loss": 1.1694728136062622, "eval_runtime": 50.5031, "eval_samples_per_second": 1.98, "eval_steps_per_second": 1.98, "step": 21 }, { "epoch": 0.013641295923112695, "grad_norm": 0.5518467426300049, "learning_rate": 6.4000000000000006e-06, "loss": 1.0851, "step": 22 }, { "epoch": 0.014261354828708727, "grad_norm": 0.7419660687446594, "learning_rate": 5.600000000000001e-06, "loss": 1.2086, "step": 23 }, { "epoch": 0.014881413734304759, "grad_norm": 0.5225710272789001, "learning_rate": 4.800000000000001e-06, "loss": 1.044, "step": 24 }, { "epoch": 0.014881413734304759, "eval_loss": 1.1590291261672974, "eval_runtime": 50.214, "eval_samples_per_second": 1.991, "eval_steps_per_second": 1.991, "step": 24 }, { "epoch": 0.01550147263990079, "grad_norm": 0.539212167263031, "learning_rate": 4.000000000000001e-06, "loss": 1.0468, "step": 25 }, { "epoch": 0.016121531545496823, "grad_norm": 0.4953361451625824, "learning_rate": 3.2000000000000003e-06, "loss": 0.9279, "step": 26 }, { "epoch": 0.016741590451092855, "grad_norm": 0.4864480793476105, "learning_rate": 2.4000000000000003e-06, "loss": 0.9413, "step": 27 }, { "epoch": 0.016741590451092855, "eval_loss": 1.1525880098342896, "eval_runtime": 50.5444, "eval_samples_per_second": 1.978, "eval_steps_per_second": 1.978, "step": 27 }, { "epoch": 0.017361649356688887, "grad_norm": 0.4352854788303375, "learning_rate": 1.6000000000000001e-06, "loss": 0.8065, "step": 28 }, { "epoch": 0.01798170826228492, "grad_norm": 0.5856941342353821, "learning_rate": 8.000000000000001e-07, "loss": 1.1299, "step": 29 }, { "epoch": 0.01860176716788095, "grad_norm": 0.5818368196487427, "learning_rate": 0.0, "loss": 0.8976, "step": 30 }, { "epoch": 0.01860176716788095, "eval_loss": 1.1500217914581299, "eval_runtime": 50.2769, "eval_samples_per_second": 1.989, "eval_steps_per_second": 1.989, "step": 30 } ], "logging_steps": 1, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.438329862521242e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }