| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 10160, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.984251968503937, | |
| "grad_norm": 11363.6455078125, | |
| "learning_rate": 0.0005993999999999999, | |
| "loss": 1.4164, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.1411797595750629, | |
| "eval_loss": 5.487102031707764, | |
| "eval_runtime": 4.0212, | |
| "eval_samples_per_second": 53.964, | |
| "eval_steps_per_second": 1.741, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.968503937007874, | |
| "grad_norm": 10689.5205078125, | |
| "learning_rate": 0.0005345633187772926, | |
| "loss": 1.259, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.15497758979862383, | |
| "eval_loss": 5.215289115905762, | |
| "eval_runtime": 3.6094, | |
| "eval_samples_per_second": 60.12, | |
| "eval_steps_per_second": 1.939, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.952755905511811, | |
| "grad_norm": 10506.4365234375, | |
| "learning_rate": 0.000469061135371179, | |
| "loss": 1.2097, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.16820727407180283, | |
| "eval_loss": 5.055620193481445, | |
| "eval_runtime": 3.6003, | |
| "eval_samples_per_second": 60.272, | |
| "eval_steps_per_second": 1.944, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 3.937007874015748, | |
| "grad_norm": 10437.658203125, | |
| "learning_rate": 0.00040355895196506547, | |
| "loss": 1.1584, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.1963981350383724, | |
| "eval_loss": 4.808844566345215, | |
| "eval_runtime": 5.4678, | |
| "eval_samples_per_second": 39.687, | |
| "eval_steps_per_second": 1.28, | |
| "step": 4064 | |
| }, | |
| { | |
| "epoch": 4.921259842519685, | |
| "grad_norm": 9832.3310546875, | |
| "learning_rate": 0.000338056768558952, | |
| "loss": 1.0942, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.22212703021995364, | |
| "eval_loss": 4.599267482757568, | |
| "eval_runtime": 5.6349, | |
| "eval_samples_per_second": 38.51, | |
| "eval_steps_per_second": 1.242, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 5.905511811023622, | |
| "grad_norm": 10148.5263671875, | |
| "learning_rate": 0.0002725545851528384, | |
| "loss": 1.0487, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.2408578102031798, | |
| "eval_loss": 4.4708170890808105, | |
| "eval_runtime": 5.523, | |
| "eval_samples_per_second": 39.29, | |
| "eval_steps_per_second": 1.267, | |
| "step": 6096 | |
| }, | |
| { | |
| "epoch": 6.889763779527559, | |
| "grad_norm": 10588.759765625, | |
| "learning_rate": 0.00020705240174672486, | |
| "loss": 1.0156, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.25204036541704616, | |
| "eval_loss": 4.3842902183532715, | |
| "eval_runtime": 3.6379, | |
| "eval_samples_per_second": 59.649, | |
| "eval_steps_per_second": 1.924, | |
| "step": 7112 | |
| }, | |
| { | |
| "epoch": 7.874015748031496, | |
| "grad_norm": 11235.5517578125, | |
| "learning_rate": 0.00014155021834061135, | |
| "loss": 0.9895, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.26269084743928506, | |
| "eval_loss": 4.315659999847412, | |
| "eval_runtime": 3.6224, | |
| "eval_samples_per_second": 59.905, | |
| "eval_steps_per_second": 1.932, | |
| "step": 8128 | |
| }, | |
| { | |
| "epoch": 8.858267716535433, | |
| "grad_norm": 11518.4150390625, | |
| "learning_rate": 7.604803493449781e-05, | |
| "loss": 0.9678, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.26984227186234633, | |
| "eval_loss": 4.2638258934021, | |
| "eval_runtime": 3.5903, | |
| "eval_samples_per_second": 60.44, | |
| "eval_steps_per_second": 1.95, | |
| "step": 9144 | |
| }, | |
| { | |
| "epoch": 9.84251968503937, | |
| "grad_norm": 11941.603515625, | |
| "learning_rate": 1.054585152838428e-05, | |
| "loss": 0.9507, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.2735848205831162, | |
| "eval_loss": 4.236743450164795, | |
| "eval_runtime": 3.5683, | |
| "eval_samples_per_second": 60.813, | |
| "eval_steps_per_second": 1.962, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 10160, | |
| "total_flos": 8.490945871872e+16, | |
| "train_loss": 1.1083988550141102, | |
| "train_runtime": 9862.8215, | |
| "train_samples_per_second": 32.948, | |
| "train_steps_per_second": 1.03 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 10160, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.490945871872e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |