| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 25, |
| "global_step": 66, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.22988505747126436, |
| "grad_norm": 1.4753433465957642, |
| "learning_rate": 0.00011428571428571428, |
| "loss": 4.5501, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.45977011494252873, |
| "grad_norm": 3.072629451751709, |
| "learning_rate": 0.00019322033898305085, |
| "loss": 3.6174, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.6896551724137931, |
| "grad_norm": 4.133386611938477, |
| "learning_rate": 0.00017627118644067798, |
| "loss": 2.4791, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.9195402298850575, |
| "grad_norm": 2.3766379356384277, |
| "learning_rate": 0.00015932203389830508, |
| "loss": 1.7968, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.1379310344827587, |
| "grad_norm": 3.2854702472686768, |
| "learning_rate": 0.0001423728813559322, |
| "loss": 1.7796, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.367816091954023, |
| "grad_norm": 2.0210888385772705, |
| "learning_rate": 0.00012542372881355933, |
| "loss": 1.4906, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.5977011494252875, |
| "grad_norm": 2.1205132007598877, |
| "learning_rate": 0.00010847457627118644, |
| "loss": 1.4216, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.8275862068965516, |
| "grad_norm": 1.489486575126648, |
| "learning_rate": 9.152542372881357e-05, |
| "loss": 1.1526, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.045977011494253, |
| "grad_norm": 1.803501844406128, |
| "learning_rate": 7.457627118644068e-05, |
| "loss": 1.3507, |
| "step": 45 |
| }, |
| { |
| "epoch": 2.2758620689655173, |
| "grad_norm": 1.8296611309051514, |
| "learning_rate": 5.76271186440678e-05, |
| "loss": 1.1522, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.5057471264367814, |
| "grad_norm": 2.4213690757751465, |
| "learning_rate": 4.067796610169492e-05, |
| "loss": 1.0809, |
| "step": 55 |
| }, |
| { |
| "epoch": 2.735632183908046, |
| "grad_norm": 1.8211500644683838, |
| "learning_rate": 2.3728813559322036e-05, |
| "loss": 1.1041, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.9655172413793105, |
| "grad_norm": 1.997066617012024, |
| "learning_rate": 6.779661016949153e-06, |
| "loss": 1.0011, |
| "step": 65 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 66, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 399230325227520.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|