{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 0.6088799834251404, "learning_rate": 6.04062581035517e-06, "loss": 0.3063, "step": 5 }, { "epoch": 0.1, "grad_norm": 0.38557198643684387, "learning_rate": 1.3591408073299131e-05, "loss": 0.2715, "step": 10 }, { "epoch": 0.15, "grad_norm": 0.3368349075317383, "learning_rate": 2.1142190336243096e-05, "loss": 0.2602, "step": 15 }, { "epoch": 0.2, "grad_norm": 0.2673914134502411, "learning_rate": 2.8692972599187053e-05, "loss": 0.2493, "step": 20 }, { "epoch": 0.25, "grad_norm": 0.2763494849205017, "learning_rate": 3.624375486213102e-05, "loss": 0.2324, "step": 25 }, { "epoch": 0.3, "grad_norm": 0.26726964116096497, "learning_rate": 4.379453712507498e-05, "loss": 0.2447, "step": 30 }, { "epoch": 0.35, "grad_norm": 0.2522192597389221, "learning_rate": 5.1345319388018946e-05, "loss": 0.2434, "step": 35 }, { "epoch": 0.4, "grad_norm": 0.25608450174331665, "learning_rate": 5.283111510242651e-05, "loss": 0.2395, "step": 40 }, { "epoch": 0.45, "grad_norm": 0.24589276313781738, "learning_rate": 5.273224346336359e-05, "loss": 0.2422, "step": 45 }, { "epoch": 0.5, "grad_norm": 0.22992444038391113, "learning_rate": 5.255768540001372e-05, "loss": 0.2192, "step": 50 }, { "epoch": 0.55, "grad_norm": 0.21682168543338776, "learning_rate": 5.230805405394084e-05, "loss": 0.2239, "step": 55 }, { "epoch": 0.6, "grad_norm": 0.2279157191514969, "learning_rate": 5.19842262643524e-05, "loss": 0.228, "step": 60 }, { "epoch": 0.65, "grad_norm": 0.2328806221485138, "learning_rate": 5.1587339488169767e-05, "loss": 0.244, "step": 65 }, { "epoch": 0.7, "grad_norm": 0.22070817649364471, "learning_rate": 5.1118787804669286e-05, "loss": 0.2343, "step": 70 }, { "epoch": 0.75, "grad_norm": 0.20909719169139862, "learning_rate": 5.058021701872798e-05, "loss": 0.2185, "step": 75 }, { "epoch": 0.8, "grad_norm": 0.2322309911251068, "learning_rate": 4.997351887987385e-05, "loss": 0.2218, "step": 80 }, { "epoch": 0.85, "grad_norm": 0.23244695365428925, "learning_rate": 4.930082443744676e-05, "loss": 0.2163, "step": 85 }, { "epoch": 0.9, "grad_norm": 0.23916946351528168, "learning_rate": 4.856449655520976e-05, "loss": 0.2173, "step": 90 }, { "epoch": 0.95, "grad_norm": 0.21331144869327545, "learning_rate": 4.776712161170415e-05, "loss": 0.2092, "step": 95 }, { "epoch": 1.0, "grad_norm": 0.22136829793453217, "learning_rate": 4.691150041550029e-05, "loss": 0.2345, "step": 100 }, { "epoch": 1.0, "eval_loss": 0.21842744946479797, "eval_runtime": 5.3148, "eval_samples_per_second": 10.537, "eval_steps_per_second": 5.268, "step": 100 } ], "logging_steps": 5, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.0877297825153024e+17, "train_batch_size": 14, "trial_name": null, "trial_params": null }