| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 1, |
| "global_step": 17, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 27.238576889038086, |
| "learning_rate": 5e-06, |
| "loss": 2.4516, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 13.931468963623047, |
| "learning_rate": 4.957432749209755e-06, |
| "loss": 2.069, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 10.875990867614746, |
| "learning_rate": 4.83118057351089e-06, |
| "loss": 1.7016, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 9.849050521850586, |
| "learning_rate": 4.625542839324036e-06, |
| "loss": 1.4704, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 8.515420913696289, |
| "learning_rate": 4.3475222930516484e-06, |
| "loss": 1.2682, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 7.926646709442139, |
| "learning_rate": 4.006586590948141e-06, |
| "loss": 1.0279, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 6.248785495758057, |
| "learning_rate": 3.6143458894413463e-06, |
| "loss": 1.0175, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 6.138355255126953, |
| "learning_rate": 3.184157475180208e-06, |
| "loss": 0.9081, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "eval_loss": 1.0566511154174805, |
| "eval_runtime": 3.7727, |
| "eval_samples_per_second": 0.795, |
| "eval_steps_per_second": 0.265, |
| "step": 8 |
| }, |
| { |
| "checkpoint_runtime": 60.7829 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 2.969385862350464, |
| "learning_rate": 2.730670898658255e-06, |
| "loss": 1.1001, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "eval_loss": 1.030696988105774, |
| "eval_runtime": 3.7741, |
| "eval_samples_per_second": 0.795, |
| "eval_steps_per_second": 0.265, |
| "step": 9 |
| }, |
| { |
| "checkpoint_runtime": 62.4096 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 2.176196813583374, |
| "learning_rate": 2.269329101341745e-06, |
| "loss": 0.9383, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "eval_loss": 1.0097538232803345, |
| "eval_runtime": 3.7773, |
| "eval_samples_per_second": 0.794, |
| "eval_steps_per_second": 0.265, |
| "step": 10 |
| }, |
| { |
| "checkpoint_runtime": 65.596 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 1.8730698823928833, |
| "learning_rate": 1.8158425248197931e-06, |
| "loss": 0.908, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "eval_loss": 0.9935200810432434, |
| "eval_runtime": 3.7904, |
| "eval_samples_per_second": 0.791, |
| "eval_steps_per_second": 0.264, |
| "step": 11 |
| }, |
| { |
| "checkpoint_runtime": 71.8184 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 1.5293439626693726, |
| "learning_rate": 1.3856541105586545e-06, |
| "loss": 0.8389, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "eval_loss": 0.9816959500312805, |
| "eval_runtime": 3.7837, |
| "eval_samples_per_second": 0.793, |
| "eval_steps_per_second": 0.264, |
| "step": 12 |
| }, |
| { |
| "checkpoint_runtime": 71.1631 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 1.3013556003570557, |
| "learning_rate": 9.934134090518593e-07, |
| "loss": 0.7742, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "eval_loss": 0.9741225838661194, |
| "eval_runtime": 3.7962, |
| "eval_samples_per_second": 0.79, |
| "eval_steps_per_second": 0.263, |
| "step": 13 |
| }, |
| { |
| "checkpoint_runtime": 73.8347 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 1.3270924091339111, |
| "learning_rate": 6.524777069483526e-07, |
| "loss": 0.8797, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "eval_loss": 0.9687883853912354, |
| "eval_runtime": 4.1861, |
| "eval_samples_per_second": 0.717, |
| "eval_steps_per_second": 0.239, |
| "step": 14 |
| }, |
| { |
| "checkpoint_runtime": 76.1419 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 1.2872895002365112, |
| "learning_rate": 3.7445716067596506e-07, |
| "loss": 0.8989, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "eval_loss": 0.9653826355934143, |
| "eval_runtime": 5.1398, |
| "eval_samples_per_second": 0.584, |
| "eval_steps_per_second": 0.195, |
| "step": 15 |
| }, |
| { |
| "checkpoint_runtime": 77.0676 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 1.2387118339538574, |
| "learning_rate": 1.6881942648911077e-07, |
| "loss": 0.8853, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "eval_loss": 0.9634800553321838, |
| "eval_runtime": 4.8803, |
| "eval_samples_per_second": 0.615, |
| "eval_steps_per_second": 0.205, |
| "step": 16 |
| }, |
| { |
| "checkpoint_runtime": 79.8227 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.144616961479187, |
| "learning_rate": 4.256725079024554e-08, |
| "loss": 0.802, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.9631189703941345, |
| "eval_runtime": 4.9196, |
| "eval_samples_per_second": 0.61, |
| "eval_steps_per_second": 0.203, |
| "step": 17 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 17, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0006846020937318e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|