| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 100, | |
| "global_step": 90, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1694915254237288, | |
| "grad_norm": 1.3848826885223389, | |
| "learning_rate": 0.0002985402103112355, | |
| "loss": 2.723, | |
| "num_input_tokens_seen": 175104, | |
| "step": 5, | |
| "train_runtime": 83.0375, | |
| "train_tokens_per_second": 2108.734 | |
| }, | |
| { | |
| "epoch": 0.3389830508474576, | |
| "grad_norm": 0.6895682215690613, | |
| "learning_rate": 0.00029265847744427303, | |
| "loss": 2.3916, | |
| "num_input_tokens_seen": 354944, | |
| "step": 10, | |
| "train_runtime": 164.5936, | |
| "train_tokens_per_second": 2156.487 | |
| }, | |
| { | |
| "epoch": 0.5084745762711864, | |
| "grad_norm": 0.6077523827552795, | |
| "learning_rate": 0.000282442138928839, | |
| "loss": 2.2298, | |
| "num_input_tokens_seen": 540288, | |
| "step": 15, | |
| "train_runtime": 235.3105, | |
| "train_tokens_per_second": 2296.064 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 0.6539971828460693, | |
| "learning_rate": 0.00026820161304100823, | |
| "loss": 2.1544, | |
| "num_input_tokens_seen": 724032, | |
| "step": 20, | |
| "train_runtime": 314.4139, | |
| "train_tokens_per_second": 2302.799 | |
| }, | |
| { | |
| "epoch": 0.847457627118644, | |
| "grad_norm": 0.5982179045677185, | |
| "learning_rate": 0.0002503695909538287, | |
| "loss": 2.093, | |
| "num_input_tokens_seen": 915328, | |
| "step": 25, | |
| "train_runtime": 621.4047, | |
| "train_tokens_per_second": 1472.998 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.7921583652496338, | |
| "learning_rate": 0.0002294878896349807, | |
| "loss": 1.9914, | |
| "num_input_tokens_seen": 1076448, | |
| "step": 30, | |
| "train_runtime": 945.877, | |
| "train_tokens_per_second": 1138.042 | |
| }, | |
| { | |
| "epoch": 1.1694915254237288, | |
| "grad_norm": 0.6233165264129639, | |
| "learning_rate": 0.0002061909890123868, | |
| "loss": 1.8562, | |
| "num_input_tokens_seen": 1255264, | |
| "step": 35, | |
| "train_runtime": 1311.2231, | |
| "train_tokens_per_second": 957.323 | |
| }, | |
| { | |
| "epoch": 1.3389830508474576, | |
| "grad_norm": 0.6240576505661011, | |
| "learning_rate": 0.00018118675362266385, | |
| "loss": 1.8674, | |
| "num_input_tokens_seen": 1441760, | |
| "step": 40, | |
| "train_runtime": 1702.9404, | |
| "train_tokens_per_second": 846.63 | |
| }, | |
| { | |
| "epoch": 1.5084745762711864, | |
| "grad_norm": 0.5960806608200073, | |
| "learning_rate": 0.00015523492450537517, | |
| "loss": 1.8146, | |
| "num_input_tokens_seen": 1629792, | |
| "step": 45, | |
| "train_runtime": 2106.6678, | |
| "train_tokens_per_second": 773.635 | |
| }, | |
| { | |
| "epoch": 1.6779661016949152, | |
| "grad_norm": 0.6159402132034302, | |
| "learning_rate": 0.0001291240348559902, | |
| "loss": 1.7993, | |
| "num_input_tokens_seen": 1810016, | |
| "step": 50, | |
| "train_runtime": 2464.8114, | |
| "train_tokens_per_second": 734.343 | |
| }, | |
| { | |
| "epoch": 1.847457627118644, | |
| "grad_norm": 0.6029852628707886, | |
| "learning_rate": 0.0001036474508437579, | |
| "loss": 1.7685, | |
| "num_input_tokens_seen": 1995104, | |
| "step": 55, | |
| "train_runtime": 2847.5911, | |
| "train_tokens_per_second": 700.629 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.8139386773109436, | |
| "learning_rate": 7.957926558211642e-05, | |
| "loss": 1.7569, | |
| "num_input_tokens_seen": 2155824, | |
| "step": 60, | |
| "train_runtime": 3157.3683, | |
| "train_tokens_per_second": 682.791 | |
| }, | |
| { | |
| "epoch": 2.169491525423729, | |
| "grad_norm": 0.603970468044281, | |
| "learning_rate": 5.765077870115125e-05, | |
| "loss": 1.6611, | |
| "num_input_tokens_seen": 2341104, | |
| "step": 65, | |
| "train_runtime": 3543.124, | |
| "train_tokens_per_second": 660.746 | |
| }, | |
| { | |
| "epoch": 2.3389830508474576, | |
| "grad_norm": 0.6044986248016357, | |
| "learning_rate": 3.852827617839084e-05, | |
| "loss": 1.6719, | |
| "num_input_tokens_seen": 2521200, | |
| "step": 70, | |
| "train_runtime": 3896.3924, | |
| "train_tokens_per_second": 647.06 | |
| }, | |
| { | |
| "epoch": 2.5084745762711864, | |
| "grad_norm": 0.5742617249488831, | |
| "learning_rate": 2.2792785576536105e-05, | |
| "loss": 1.6857, | |
| "num_input_tokens_seen": 2709040, | |
| "step": 75, | |
| "train_runtime": 4297.8213, | |
| "train_tokens_per_second": 630.329 | |
| }, | |
| { | |
| "epoch": 2.6779661016949152, | |
| "grad_norm": 0.5705291628837585, | |
| "learning_rate": 1.0922421814981901e-05, | |
| "loss": 1.6768, | |
| "num_input_tokens_seen": 2896048, | |
| "step": 80, | |
| "train_runtime": 4682.8232, | |
| "train_tokens_per_second": 618.441 | |
| }, | |
| { | |
| "epoch": 2.847457627118644, | |
| "grad_norm": 0.5796510577201843, | |
| "learning_rate": 3.2778598899291465e-06, | |
| "loss": 1.628, | |
| "num_input_tokens_seen": 3073200, | |
| "step": 85, | |
| "train_runtime": 5015.5353, | |
| "train_tokens_per_second": 612.736 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.8749147057533264, | |
| "learning_rate": 9.137594713563568e-08, | |
| "loss": 1.6688, | |
| "num_input_tokens_seen": 3234144, | |
| "step": 90, | |
| "train_runtime": 5323.9438, | |
| "train_tokens_per_second": 607.471 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "num_input_tokens_seen": 3234144, | |
| "step": 90, | |
| "total_flos": 5179803374518272.0, | |
| "train_loss": 1.9132584571838378, | |
| "train_runtime": 5325.3956, | |
| "train_samples_per_second": 1.062, | |
| "train_steps_per_second": 0.017 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 90, | |
| "num_input_tokens_seen": 3234144, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5179803374518272.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |