| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9981417573666047, | |
| "eval_steps": 1000, | |
| "global_step": 235, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04247411733474914, | |
| "grad_norm": 1.3431863193141913, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.8327, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08494823466949827, | |
| "grad_norm": 0.41521534037052726, | |
| "learning_rate": 9.975707393083328e-06, | |
| "loss": 0.4694, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12742235200424742, | |
| "grad_norm": 0.38317108565393027, | |
| "learning_rate": 9.85729110226596e-06, | |
| "loss": 0.3993, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.16989646933899655, | |
| "grad_norm": 0.3203704741083575, | |
| "learning_rate": 9.642632564563576e-06, | |
| "loss": 0.3737, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2123705866737457, | |
| "grad_norm": 0.35407140018594674, | |
| "learning_rate": 9.33598501898256e-06, | |
| "loss": 0.363, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.25484470400849485, | |
| "grad_norm": 0.2987790081243625, | |
| "learning_rate": 8.943424372477455e-06, | |
| "loss": 0.3542, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.29731882134324394, | |
| "grad_norm": 0.3251607042881122, | |
| "learning_rate": 8.472728812079436e-06, | |
| "loss": 0.3535, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3397929386779931, | |
| "grad_norm": 0.3322057447228592, | |
| "learning_rate": 7.933224688089059e-06, | |
| "loss": 0.3511, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.38226705601274225, | |
| "grad_norm": 0.36862206101316886, | |
| "learning_rate": 7.3356017219999236e-06, | |
| "loss": 0.3576, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4247411733474914, | |
| "grad_norm": 0.3112927889650568, | |
| "learning_rate": 6.691701200618925e-06, | |
| "loss": 0.3515, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4672152906822405, | |
| "grad_norm": 0.31680745740449606, | |
| "learning_rate": 6.014281353099601e-06, | |
| "loss": 0.3528, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5096894080169897, | |
| "grad_norm": 0.3283258075639949, | |
| "learning_rate": 5.3167645597022855e-06, | |
| "loss": 0.3364, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5521635253517387, | |
| "grad_norm": 0.31364450191106397, | |
| "learning_rate": 4.612971401080521e-06, | |
| "loss": 0.3269, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5946376426864879, | |
| "grad_norm": 0.35535488707779117, | |
| "learning_rate": 3.916846817634618e-06, | |
| "loss": 0.3416, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.637111760021237, | |
| "grad_norm": 0.35214971530781963, | |
| "learning_rate": 3.2421838048042516e-06, | |
| "loss": 0.3313, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6795858773559862, | |
| "grad_norm": 0.3667073918432754, | |
| "learning_rate": 2.602350118994782e-06, | |
| "loss": 0.3211, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7220599946907353, | |
| "grad_norm": 0.3249609307457873, | |
| "learning_rate": 2.0100234091793778e-06, | |
| "loss": 0.3331, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7645341120254845, | |
| "grad_norm": 0.35937052836931965, | |
| "learning_rate": 1.4769400222728974e-06, | |
| "loss": 0.3282, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8070082293602336, | |
| "grad_norm": 0.3201091913172408, | |
| "learning_rate": 1.0136624594416828e-06, | |
| "loss": 0.3198, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8494823466949828, | |
| "grad_norm": 0.32597141477518937, | |
| "learning_rate": 6.29370090964262e-07, | |
| "loss": 0.3194, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8919564640297318, | |
| "grad_norm": 0.34862063200061194, | |
| "learning_rate": 3.3167727641356064e-07, | |
| "loss": 0.3368, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.934430581364481, | |
| "grad_norm": 0.3023880427720345, | |
| "learning_rate": 1.2648249392289925e-07, | |
| "loss": 0.3456, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9769046986992301, | |
| "grad_norm": 0.3059194197037955, | |
| "learning_rate": 1.785146788478298e-08, | |
| "loss": 0.3124, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9981417573666047, | |
| "step": 235, | |
| "total_flos": 486915353608192.0, | |
| "train_loss": 0.36814887523651124, | |
| "train_runtime": 13036.3047, | |
| "train_samples_per_second": 2.311, | |
| "train_steps_per_second": 0.018 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 235, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 486915353608192.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |