| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 252, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.11904761904761904, |
| "grad_norm": 0.895657479763031, |
| "learning_rate": 9.642857142857143e-05, |
| "loss": 2.7858, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.23809523809523808, |
| "grad_norm": 0.5870405435562134, |
| "learning_rate": 9.246031746031747e-05, |
| "loss": 1.5709, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.6833087801933289, |
| "learning_rate": 8.849206349206349e-05, |
| "loss": 1.372, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.7731186747550964, |
| "learning_rate": 8.452380952380952e-05, |
| "loss": 1.2796, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5952380952380952, |
| "grad_norm": 0.8870206475257874, |
| "learning_rate": 8.055555555555556e-05, |
| "loss": 1.1928, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.9653159976005554, |
| "learning_rate": 7.658730158730159e-05, |
| "loss": 1.0905, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 1.034477710723877, |
| "learning_rate": 7.261904761904762e-05, |
| "loss": 0.9988, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 1.150348424911499, |
| "learning_rate": 6.865079365079366e-05, |
| "loss": 0.9214, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 1.0706491470336914, |
| "learning_rate": 6.46825396825397e-05, |
| "loss": 0.8236, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.1904761904761905, |
| "grad_norm": 1.3665771484375, |
| "learning_rate": 6.0714285714285715e-05, |
| "loss": 0.7343, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.3095238095238095, |
| "grad_norm": 1.350333333015442, |
| "learning_rate": 5.674603174603175e-05, |
| "loss": 0.7267, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 1.3638492822647095, |
| "learning_rate": 5.2777777777777784e-05, |
| "loss": 0.7088, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.5476190476190477, |
| "grad_norm": 1.357682228088379, |
| "learning_rate": 4.880952380952381e-05, |
| "loss": 0.6916, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 1.5811983346939087, |
| "learning_rate": 4.4841269841269846e-05, |
| "loss": 0.6879, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "grad_norm": 1.407701015472412, |
| "learning_rate": 4.0873015873015874e-05, |
| "loss": 0.6336, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.9047619047619047, |
| "grad_norm": 1.427190899848938, |
| "learning_rate": 3.690476190476191e-05, |
| "loss": 0.6333, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.0238095238095237, |
| "grad_norm": 1.3829928636550903, |
| "learning_rate": 3.2936507936507936e-05, |
| "loss": 0.6057, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 1.3527870178222656, |
| "learning_rate": 2.8968253968253974e-05, |
| "loss": 0.522, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.261904761904762, |
| "grad_norm": 1.621706485748291, |
| "learning_rate": 2.5e-05, |
| "loss": 0.4988, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.380952380952381, |
| "grad_norm": 1.5113567113876343, |
| "learning_rate": 2.1031746031746032e-05, |
| "loss": 0.5114, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 1.4099675416946411, |
| "learning_rate": 1.7063492063492063e-05, |
| "loss": 0.4697, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.619047619047619, |
| "grad_norm": 1.669247031211853, |
| "learning_rate": 1.3095238095238096e-05, |
| "loss": 0.4832, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.738095238095238, |
| "grad_norm": 1.6166075468063354, |
| "learning_rate": 9.126984126984127e-06, |
| "loss": 0.4741, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 1.547747254371643, |
| "learning_rate": 5.158730158730159e-06, |
| "loss": 0.4545, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.9761904761904763, |
| "grad_norm": 1.6597768068313599, |
| "learning_rate": 1.1904761904761904e-06, |
| "loss": 0.5079, |
| "step": 250 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 252, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1454376593850368e+18, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|