| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9992356201031916, |
| "eval_steps": 1000, |
| "global_step": 15693, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00019116803670426305, |
| "grad_norm": 4.6432929039001465, |
| "learning_rate": 0.0, |
| "loss": 2.5575, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.19116803670426305, |
| "grad_norm": 2.9217827320098877, |
| "learning_rate": 1.0609600679694139e-05, |
| "loss": 2.0273, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.19116803670426305, |
| "eval_cosine_accuracy": 0.948447048664093, |
| "eval_loss": 1.0634701251983643, |
| "eval_runtime": 24.1557, |
| "eval_samples_per_second": 391.874, |
| "eval_steps_per_second": 1.532, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3823360734085261, |
| "grad_norm": 3.177168607711792, |
| "learning_rate": 2.122982158028887e-05, |
| "loss": 1.5838, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3823360734085261, |
| "eval_cosine_accuracy": 0.9582716822624207, |
| "eval_loss": 0.9441654086112976, |
| "eval_runtime": 28.4841, |
| "eval_samples_per_second": 332.326, |
| "eval_steps_per_second": 1.299, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5735041101127891, |
| "grad_norm": 2.7391252517700195, |
| "learning_rate": 2.4856048128759225e-05, |
| "loss": 1.3312, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.5735041101127891, |
| "eval_cosine_accuracy": 0.9655609726905823, |
| "eval_loss": 0.8855878114700317, |
| "eval_runtime": 23.5458, |
| "eval_samples_per_second": 402.025, |
| "eval_steps_per_second": 1.571, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.7646721468170522, |
| "grad_norm": 3.0197041034698486, |
| "learning_rate": 2.4073541741828953e-05, |
| "loss": 1.2429, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.7646721468170522, |
| "eval_cosine_accuracy": 0.9687302112579346, |
| "eval_loss": 0.8447386026382446, |
| "eval_runtime": 23.4979, |
| "eval_samples_per_second": 402.845, |
| "eval_steps_per_second": 1.575, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.9558401835213153, |
| "grad_norm": 3.199629783630371, |
| "learning_rate": 2.2652020358120533e-05, |
| "loss": 1.7176, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.9558401835213153, |
| "eval_cosine_accuracy": 0.9684132933616638, |
| "eval_loss": 0.8515172600746155, |
| "eval_runtime": 23.5494, |
| "eval_samples_per_second": 401.964, |
| "eval_steps_per_second": 1.571, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.1469520351614753, |
| "grad_norm": 2.7392985820770264, |
| "learning_rate": 2.0672198967023248e-05, |
| "loss": 1.61, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.1469520351614753, |
| "eval_cosine_accuracy": 0.9707373976707458, |
| "eval_loss": 0.8034478425979614, |
| "eval_runtime": 23.5058, |
| "eval_samples_per_second": 402.71, |
| "eval_steps_per_second": 1.574, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.3380470093636538, |
| "grad_norm": 2.6674864292144775, |
| "learning_rate": 1.823944505897115e-05, |
| "loss": 1.2334, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.3380470093636538, |
| "eval_cosine_accuracy": 0.9709486365318298, |
| "eval_loss": 0.7899209260940552, |
| "eval_runtime": 23.366, |
| "eval_samples_per_second": 405.118, |
| "eval_steps_per_second": 1.583, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.5291419835658322, |
| "grad_norm": 2.4728574752807617, |
| "learning_rate": 1.5492655130798728e-05, |
| "loss": 1.2111, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.5291419835658322, |
| "eval_cosine_accuracy": 0.9720050692558289, |
| "eval_loss": 0.784406840801239, |
| "eval_runtime": 23.2176, |
| "eval_samples_per_second": 407.708, |
| "eval_steps_per_second": 1.594, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.7202369577680106, |
| "grad_norm": 2.6904804706573486, |
| "learning_rate": 1.2578015279283461e-05, |
| "loss": 1.192, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.7202369577680106, |
| "eval_cosine_accuracy": 0.9716881513595581, |
| "eval_loss": 0.7789891362190247, |
| "eval_runtime": 23.3546, |
| "eval_samples_per_second": 405.315, |
| "eval_steps_per_second": 1.584, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.9113319319701891, |
| "grad_norm": 5.677177429199219, |
| "learning_rate": 9.661934964736864e-06, |
| "loss": 1.1742, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.9113319319701891, |
| "eval_cosine_accuracy": 0.9725332856178284, |
| "eval_loss": 0.7686395049095154, |
| "eval_runtime": 23.3491, |
| "eval_samples_per_second": 405.412, |
| "eval_steps_per_second": 1.585, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.1024269061723677, |
| "grad_norm": 2.5367815494537354, |
| "learning_rate": 6.899610037668264e-06, |
| "loss": 1.1347, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.1024269061723677, |
| "eval_cosine_accuracy": 0.9733784198760986, |
| "eval_loss": 0.7704218626022339, |
| "eval_runtime": 23.8618, |
| "eval_samples_per_second": 396.701, |
| "eval_steps_per_second": 1.551, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.2935218803745463, |
| "grad_norm": 3.3757665157318115, |
| "learning_rate": 4.448753637174272e-06, |
| "loss": 1.1288, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.2935218803745463, |
| "eval_cosine_accuracy": 0.9730615019798279, |
| "eval_loss": 0.7699152231216431, |
| "eval_runtime": 23.5668, |
| "eval_samples_per_second": 401.667, |
| "eval_steps_per_second": 1.57, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.484616854576725, |
| "grad_norm": 2.40852952003479, |
| "learning_rate": 2.4398020690224317e-06, |
| "loss": 1.1093, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.484616854576725, |
| "eval_cosine_accuracy": 0.9727445840835571, |
| "eval_loss": 0.7687693238258362, |
| "eval_runtime": 23.1387, |
| "eval_samples_per_second": 409.098, |
| "eval_steps_per_second": 1.599, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.675711828778903, |
| "grad_norm": 3.2049450874328613, |
| "learning_rate": 9.874551120119327e-07, |
| "loss": 1.1176, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.675711828778903, |
| "eval_cosine_accuracy": 0.9733784198760986, |
| "eval_loss": 0.7677363157272339, |
| "eval_runtime": 23.4321, |
| "eval_samples_per_second": 403.975, |
| "eval_steps_per_second": 1.579, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.8668068029810816, |
| "grad_norm": 2.339397668838501, |
| "learning_rate": 1.6900769526510123e-07, |
| "loss": 1.1247, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.8668068029810816, |
| "eval_cosine_accuracy": 0.9734840393066406, |
| "eval_loss": 0.7677423357963562, |
| "eval_runtime": 23.8594, |
| "eval_samples_per_second": 396.74, |
| "eval_steps_per_second": 1.551, |
| "step": 15000 |
| } |
| ], |
| "logging_steps": 1000, |
| "max_steps": 15693, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|