{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005, "grad_norm": 29.21416664123535, "learning_rate": 0.000199, "loss": 12.1369, "step": 10 }, { "epoch": 0.01, "grad_norm": 2.8678336143493652, "learning_rate": 0.00019800000000000002, "loss": 3.0885, "step": 20 }, { "epoch": 0.015, "grad_norm": 0.6990471482276917, "learning_rate": 0.00019700000000000002, "loss": 0.2248, "step": 30 }, { "epoch": 0.02, "grad_norm": 0.3682764768600464, "learning_rate": 0.000196, "loss": 0.1795, "step": 40 }, { "epoch": 0.025, "grad_norm": 0.20672959089279175, "learning_rate": 0.000195, "loss": 0.1333, "step": 50 }, { "epoch": 0.03, "grad_norm": 0.19836053252220154, "learning_rate": 0.000194, "loss": 0.1045, "step": 60 }, { "epoch": 0.035, "grad_norm": 0.3058789372444153, "learning_rate": 0.000193, "loss": 0.0776, "step": 70 }, { "epoch": 0.04, "grad_norm": 0.20461063086986542, "learning_rate": 0.000192, "loss": 0.0537, "step": 80 }, { "epoch": 0.045, "grad_norm": 0.320773184299469, "learning_rate": 0.000191, "loss": 0.0363, "step": 90 }, { "epoch": 0.05, "grad_norm": 0.2853151559829712, "learning_rate": 0.00019, "loss": 0.0251, "step": 100 }, { "epoch": 0.055, "grad_norm": 0.19527700543403625, "learning_rate": 0.00018899999999999999, "loss": 0.0164, "step": 110 }, { "epoch": 0.06, "grad_norm": 0.2739509344100952, "learning_rate": 0.000188, "loss": 0.0106, "step": 120 }, { "epoch": 0.065, "grad_norm": 0.1407923698425293, "learning_rate": 0.00018700000000000002, "loss": 0.0079, "step": 130 }, { "epoch": 0.07, "grad_norm": 0.10815149545669556, "learning_rate": 0.00018600000000000002, "loss": 0.0058, "step": 140 }, { "epoch": 0.075, "grad_norm": 0.25316667556762695, "learning_rate": 0.00018500000000000002, "loss": 0.005, "step": 150 }, { "epoch": 0.08, "grad_norm": 0.0536436066031456, "learning_rate": 0.00018400000000000003, "loss": 0.0042, "step": 160 }, { "epoch": 0.085, "grad_norm": 0.12960447371006012, "learning_rate": 0.000183, "loss": 0.0037, "step": 170 }, { "epoch": 0.09, "grad_norm": 0.19423653185367584, "learning_rate": 0.000182, "loss": 0.0039, "step": 180 }, { "epoch": 0.095, "grad_norm": 0.030633121728897095, "learning_rate": 0.000181, "loss": 0.0033, "step": 190 }, { "epoch": 0.1, "grad_norm": 0.0728762298822403, "learning_rate": 0.00018, "loss": 0.0034, "step": 200 } ], "logging_steps": 10, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 880899155558400.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }