| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 36, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.027777777777777776, |
| "grad_norm": 22.67836931373644, |
| "learning_rate": 1e-05, |
| "loss": 19.3158, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.05555555555555555, |
| "grad_norm": 25.528554454653026, |
| "learning_rate": 9.982876141412857e-06, |
| "loss": 16.4902, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.08333333333333333, |
| "grad_norm": 172.9631001519472, |
| "learning_rate": 9.931634888554937e-06, |
| "loss": 16.5242, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 28.11231293384171, |
| "learning_rate": 9.846666218300808e-06, |
| "loss": 14.0419, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.1388888888888889, |
| "grad_norm": 90.23398804316926, |
| "learning_rate": 9.728616793536588e-06, |
| "loss": 14.8229, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 146.4150512008885, |
| "learning_rate": 9.578385041664926e-06, |
| "loss": 14.3093, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.19444444444444445, |
| "grad_norm": 29.876775310695155, |
| "learning_rate": 9.397114317029975e-06, |
| "loss": 12.2314, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 20.704280611190857, |
| "learning_rate": 9.186184199300464e-06, |
| "loss": 11.6216, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 15.561815859465199, |
| "learning_rate": 8.947199994035402e-06, |
| "loss": 11.1165, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.2777777777777778, |
| "grad_norm": 9.255900465441924, |
| "learning_rate": 8.681980515339464e-06, |
| "loss": 10.2489, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3055555555555556, |
| "grad_norm": 9.141237857626225, |
| "learning_rate": 8.392544243589428e-06, |
| "loss": 9.6206, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 7.367404565747214, |
| "learning_rate": 8.081093963579707e-06, |
| "loss": 8.9421, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.3611111111111111, |
| "grad_norm": 7.276080472587209, |
| "learning_rate": 7.75e-06, |
| "loss": 8.3113, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.3888888888888889, |
| "grad_norm": 7.4332326669293005, |
| "learning_rate": 7.401782177833148e-06, |
| "loss": 7.9473, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.4166666666666667, |
| "grad_norm": 6.227852054297136, |
| "learning_rate": 7.0390906449655104e-06, |
| "loss": 7.4265, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 9.533857403224232, |
| "learning_rate": 6.664685702961344e-06, |
| "loss": 7.0564, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.4722222222222222, |
| "grad_norm": 11.027416694984288, |
| "learning_rate": 6.281416799501188e-06, |
| "loss": 6.6986, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 8.015569378629083, |
| "learning_rate": 5.892200842364463e-06, |
| "loss": 6.4416, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.5277777777777778, |
| "grad_norm": 10.173585030611866, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 6.313, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 6.910614565786783, |
| "learning_rate": 5.107799157635538e-06, |
| "loss": 5.8678, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5833333333333334, |
| "grad_norm": 9.582544535267417, |
| "learning_rate": 4.718583200498814e-06, |
| "loss": 5.656, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.6111111111111112, |
| "grad_norm": 5.662411790388903, |
| "learning_rate": 4.335314297038656e-06, |
| "loss": 5.4231, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.6388888888888888, |
| "grad_norm": 5.28340071917807, |
| "learning_rate": 3.960909355034491e-06, |
| "loss": 5.2888, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 6.812817877166794, |
| "learning_rate": 3.598217822166854e-06, |
| "loss": 5.2311, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.6944444444444444, |
| "grad_norm": 4.939213318761914, |
| "learning_rate": 3.2500000000000015e-06, |
| "loss": 5.0425, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.7222222222222222, |
| "grad_norm": 5.861252411712965, |
| "learning_rate": 2.9189060364202944e-06, |
| "loss": 4.9106, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 4.913591223534713, |
| "learning_rate": 2.607455756410573e-06, |
| "loss": 4.6825, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 4.526799704205077, |
| "learning_rate": 2.3180194846605367e-06, |
| "loss": 4.5745, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.8055555555555556, |
| "grad_norm": 4.561348704304984, |
| "learning_rate": 2.0528000059646e-06, |
| "loss": 4.498, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 3.7034148333699637, |
| "learning_rate": 1.8138158006995366e-06, |
| "loss": 4.439, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8611111111111112, |
| "grad_norm": 3.474665263821162, |
| "learning_rate": 1.602885682970026e-06, |
| "loss": 4.1679, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 3.4083753091607747, |
| "learning_rate": 1.4216149583350756e-06, |
| "loss": 4.4125, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.9166666666666666, |
| "grad_norm": 3.214452041424971, |
| "learning_rate": 1.2713832064634127e-06, |
| "loss": 4.1909, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.9444444444444444, |
| "grad_norm": 2.9121418083489643, |
| "learning_rate": 1.1533337816991932e-06, |
| "loss": 4.2952, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.9722222222222222, |
| "grad_norm": 2.9431831966910993, |
| "learning_rate": 1.0683651114450641e-06, |
| "loss": 4.0494, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 4.464172024031719, |
| "learning_rate": 1.0171238585871451e-06, |
| "loss": 4.0939, |
| "step": 36 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 36, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 74680891342848.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|