{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 57, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2702702702702703, "grad_norm": 5.16129094151242, "learning_rate": 4.9394909565685894e-05, "loss": 0.3072, "num_input_tokens_seen": 588672, "step": 5, "train_runtime": 43.9847, "train_tokens_per_second": 13383.571 }, { "epoch": 0.5405405405405406, "grad_norm": 1.1352466919383923, "learning_rate": 4.698684378016222e-05, "loss": 0.2503, "num_input_tokens_seen": 1191152, "step": 10, "train_runtime": 84.6669, "train_tokens_per_second": 14068.69 }, { "epoch": 0.8108108108108109, "grad_norm": 0.9235059881629116, "learning_rate": 4.2919562829211283e-05, "loss": 0.1434, "num_input_tokens_seen": 1792128, "step": 15, "train_runtime": 124.6102, "train_tokens_per_second": 14381.876 }, { "epoch": 1.054054054054054, "grad_norm": 1.1350634950914822, "learning_rate": 3.7500000000000003e-05, "loss": 0.119, "num_input_tokens_seen": 2338720, "step": 20, "train_runtime": 160.5005, "train_tokens_per_second": 14571.423 }, { "epoch": 1.3243243243243243, "grad_norm": 0.6261741496657048, "learning_rate": 3.1137137178519985e-05, "loss": 0.0704, "num_input_tokens_seen": 2944992, "step": 25, "train_runtime": 201.2088, "train_tokens_per_second": 14636.494 }, { "epoch": 1.5945945945945947, "grad_norm": 0.6896041221746694, "learning_rate": 2.4311141440795953e-05, "loss": 0.0714, "num_input_tokens_seen": 3543936, "step": 30, "train_runtime": 240.9507, "train_tokens_per_second": 14708.137 }, { "epoch": 1.864864864864865, "grad_norm": 0.5393100198337258, "learning_rate": 1.7537129724957642e-05, "loss": 0.0651, "num_input_tokens_seen": 4133600, "step": 35, "train_runtime": 280.1915, "train_tokens_per_second": 14752.768 }, { "epoch": 2.108108108108108, "grad_norm": 0.4797863235648709, "learning_rate": 1.1326296046939333e-05, "loss": 0.0524, "num_input_tokens_seen": 4674192, "step": 40, "train_runtime": 316.3527, "train_tokens_per_second": 14775.256 }, { "epoch": 2.3783783783783785, "grad_norm": 0.28429321473085484, "learning_rate": 6.147334755577596e-06, "loss": 0.0349, "num_input_tokens_seen": 5271408, "step": 45, "train_runtime": 357.1448, "train_tokens_per_second": 14759.861 }, { "epoch": 2.6486486486486487, "grad_norm": 0.3086922220806531, "learning_rate": 2.391070982560564e-06, "loss": 0.028, "num_input_tokens_seen": 5854528, "step": 50, "train_runtime": 395.8893, "train_tokens_per_second": 14788.297 }, { "epoch": 2.918918918918919, "grad_norm": 0.3181234653618051, "learning_rate": 3.4096741493194197e-07, "loss": 0.0241, "num_input_tokens_seen": 6461216, "step": 55, "train_runtime": 437.0222, "train_tokens_per_second": 14784.639 }, { "epoch": 3.0, "num_input_tokens_seen": 6642400, "step": 57, "total_flos": 13254831570944.0, "train_loss": 0.10312069207429886, "train_runtime": 603.3131, "train_samples_per_second": 11.735, "train_steps_per_second": 0.094 } ], "logging_steps": 5, "max_steps": 57, "num_input_tokens_seen": 6642400, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 13254831570944.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }