| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 100.0, | |
| "global_step": 35, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02857142857142857, | |
| "grad_norm": 0.04918956756591797, | |
| "learning_rate": 0.0, | |
| "loss": 0.5625, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.05714285714285714, | |
| "grad_norm": 0.04941160976886749, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5623, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.08571428571428572, | |
| "grad_norm": 0.05087224394083023, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5009, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.11428571428571428, | |
| "grad_norm": 0.053648702800273895, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 0.5467, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 0.056984659284353256, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5003, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.17142857142857143, | |
| "grad_norm": 0.06416453421115875, | |
| "learning_rate": 0.00019948693233918952, | |
| "loss": 0.4514, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.07367783039808273, | |
| "learning_rate": 0.00019795299412524945, | |
| "loss": 0.4195, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.22857142857142856, | |
| "grad_norm": 0.09809113293886185, | |
| "learning_rate": 0.00019541392564000488, | |
| "loss": 0.3534, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.2571428571428571, | |
| "grad_norm": 0.2686680257320404, | |
| "learning_rate": 0.00019189578116202307, | |
| "loss": 0.3339, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 0.12331166863441467, | |
| "learning_rate": 0.00018743466161445823, | |
| "loss": 0.2941, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.3142857142857143, | |
| "grad_norm": 0.13653112947940826, | |
| "learning_rate": 0.00018207634412072764, | |
| "loss": 0.2648, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.34285714285714286, | |
| "grad_norm": 0.12992143630981445, | |
| "learning_rate": 0.0001758758122692791, | |
| "loss": 0.2015, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.37142857142857144, | |
| "grad_norm": 0.13980498909950256, | |
| "learning_rate": 0.00016889669190756868, | |
| "loss": 0.1789, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.1361037641763687, | |
| "learning_rate": 0.0001612105982547663, | |
| "loss": 0.1397, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 0.11609825491905212, | |
| "learning_rate": 0.00015289640103269625, | |
| "loss": 0.1201, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.45714285714285713, | |
| "grad_norm": 0.099339060485363, | |
| "learning_rate": 0.00014403941515576344, | |
| "loss": 0.0973, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.4857142857142857, | |
| "grad_norm": 0.09612716734409332, | |
| "learning_rate": 0.00013473052528448201, | |
| "loss": 0.0855, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.5142857142857142, | |
| "grad_norm": 0.10055588185787201, | |
| "learning_rate": 0.00012506525322587207, | |
| "loss": 0.0849, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.5428571428571428, | |
| "grad_norm": 0.08217591792345047, | |
| "learning_rate": 0.00011514277775045768, | |
| "loss": 0.0725, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.07315342128276825, | |
| "learning_rate": 0.00010506491688387127, | |
| "loss": 0.0684, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.08092162758111954, | |
| "learning_rate": 9.493508311612874e-05, | |
| "loss": 0.0573, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.6285714285714286, | |
| "grad_norm": 0.06385105103254318, | |
| "learning_rate": 8.485722224954237e-05, | |
| "loss": 0.0594, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.6571428571428571, | |
| "grad_norm": 0.05351648107171059, | |
| "learning_rate": 7.493474677412794e-05, | |
| "loss": 0.0552, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.6857142857142857, | |
| "grad_norm": 0.05012252926826477, | |
| "learning_rate": 6.526947471551798e-05, | |
| "loss": 0.0607, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.04566913843154907, | |
| "learning_rate": 5.596058484423656e-05, | |
| "loss": 0.0547, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.7428571428571429, | |
| "grad_norm": 0.044832587242126465, | |
| "learning_rate": 4.710359896730379e-05, | |
| "loss": 0.0483, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.7714285714285715, | |
| "grad_norm": 0.05177149549126625, | |
| "learning_rate": 3.878940174523371e-05, | |
| "loss": 0.0544, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.04932279884815216, | |
| "learning_rate": 3.110330809243134e-05, | |
| "loss": 0.0483, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.8285714285714286, | |
| "grad_norm": 0.05275353044271469, | |
| "learning_rate": 2.4124187730720917e-05, | |
| "loss": 0.0479, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.04073500633239746, | |
| "learning_rate": 1.7923655879272393e-05, | |
| "loss": 0.0525, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.8857142857142857, | |
| "grad_norm": 0.04079524427652359, | |
| "learning_rate": 1.2565338385541792e-05, | |
| "loss": 0.0469, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.9142857142857143, | |
| "grad_norm": 0.06132270023226738, | |
| "learning_rate": 8.10421883797694e-06, | |
| "loss": 0.0421, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.9428571428571428, | |
| "grad_norm": 0.04404183104634285, | |
| "learning_rate": 4.586074359995119e-06, | |
| "loss": 0.0467, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.9714285714285714, | |
| "grad_norm": 0.03794850781559944, | |
| "learning_rate": 2.0470058747505516e-06, | |
| "loss": 0.0467, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.04732782021164894, | |
| "learning_rate": 5.130676608104845e-07, | |
| "loss": 0.0425, | |
| "step": 35 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 35, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.2046170486276096e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |