| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.96969696969697, |
| "eval_steps": 500, |
| "global_step": 44, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12121212121212122, |
| "grad_norm": 1.8328726291656494, |
| "learning_rate": 0.0, |
| "loss": 2.5573, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.24242424242424243, |
| "grad_norm": 2.0915732383728027, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 2.1189, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 1.0810253620147705, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 1.7979, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.48484848484848486, |
| "grad_norm": 1.1273187398910522, |
| "learning_rate": 0.0001, |
| "loss": 2.0746, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 1.1481099128723145, |
| "learning_rate": 0.00013333333333333334, |
| "loss": 1.9376, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 1.120679259300232, |
| "learning_rate": 0.00016666666666666666, |
| "loss": 1.9616, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.8484848484848485, |
| "grad_norm": 0.42956575751304626, |
| "learning_rate": 0.0002, |
| "loss": 1.5583, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.9696969696969697, |
| "grad_norm": 0.5541684031486511, |
| "learning_rate": 0.00023333333333333333, |
| "loss": 1.6371, |
| "step": 8 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8354697823524475, |
| "learning_rate": 0.0002666666666666667, |
| "loss": 1.9758, |
| "step": 9 |
| }, |
| { |
| "epoch": 1.121212121212121, |
| "grad_norm": 0.8120695352554321, |
| "learning_rate": 0.0003, |
| "loss": 1.8198, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.2424242424242424, |
| "grad_norm": 0.595156729221344, |
| "learning_rate": 0.0003333333333333333, |
| "loss": 1.649, |
| "step": 11 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 0.4005779027938843, |
| "learning_rate": 0.00036666666666666667, |
| "loss": 1.3083, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.4848484848484849, |
| "grad_norm": 0.8254678845405579, |
| "learning_rate": 0.0004, |
| "loss": 1.2479, |
| "step": 13 |
| }, |
| { |
| "epoch": 1.606060606060606, |
| "grad_norm": 0.582369863986969, |
| "learning_rate": 0.00043333333333333337, |
| "loss": 1.4063, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.7272727272727273, |
| "grad_norm": 0.7945718765258789, |
| "learning_rate": 0.00046666666666666666, |
| "loss": 1.4941, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.8484848484848486, |
| "grad_norm": 0.7387524843215942, |
| "learning_rate": 0.0005, |
| "loss": 1.3542, |
| "step": 16 |
| }, |
| { |
| "epoch": 1.9696969696969697, |
| "grad_norm": 0.6516833305358887, |
| "learning_rate": 0.0004986304738420684, |
| "loss": 1.8561, |
| "step": 17 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 2.1795907020568848, |
| "learning_rate": 0.0004945369001834514, |
| "loss": 1.1684, |
| "step": 18 |
| }, |
| { |
| "epoch": 2.121212121212121, |
| "grad_norm": 2.144760847091675, |
| "learning_rate": 0.0004877641290737884, |
| "loss": 1.3242, |
| "step": 19 |
| }, |
| { |
| "epoch": 2.242424242424242, |
| "grad_norm": 1.0639714002609253, |
| "learning_rate": 0.0004783863644106502, |
| "loss": 1.116, |
| "step": 20 |
| }, |
| { |
| "epoch": 2.3636363636363638, |
| "grad_norm": 2.3435370922088623, |
| "learning_rate": 0.00046650635094610973, |
| "loss": 1.4533, |
| "step": 21 |
| }, |
| { |
| "epoch": 2.484848484848485, |
| "grad_norm": 0.9590378999710083, |
| "learning_rate": 0.0004522542485937369, |
| "loss": 1.0565, |
| "step": 22 |
| }, |
| { |
| "epoch": 2.606060606060606, |
| "grad_norm": 0.8274413347244263, |
| "learning_rate": 0.00043578620636934855, |
| "loss": 1.0777, |
| "step": 23 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 0.7866584062576294, |
| "learning_rate": 0.0004172826515897146, |
| "loss": 0.9818, |
| "step": 24 |
| }, |
| { |
| "epoch": 2.8484848484848486, |
| "grad_norm": 1.3257405757904053, |
| "learning_rate": 0.0003969463130731183, |
| "loss": 0.9458, |
| "step": 25 |
| }, |
| { |
| "epoch": 2.9696969696969697, |
| "grad_norm": 0.7941730618476868, |
| "learning_rate": 0.000375, |
| "loss": 0.8782, |
| "step": 26 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.2293105125427246, |
| "learning_rate": 0.0003516841607689501, |
| "loss": 0.9678, |
| "step": 27 |
| }, |
| { |
| "epoch": 3.121212121212121, |
| "grad_norm": 0.9322613477706909, |
| "learning_rate": 0.00032725424859373687, |
| "loss": 0.5318, |
| "step": 28 |
| }, |
| { |
| "epoch": 3.242424242424242, |
| "grad_norm": 0.8166446089744568, |
| "learning_rate": 0.0003019779227044398, |
| "loss": 0.9006, |
| "step": 29 |
| }, |
| { |
| "epoch": 3.3636363636363638, |
| "grad_norm": 1.2094213962554932, |
| "learning_rate": 0.0002761321158169134, |
| "loss": 0.6391, |
| "step": 30 |
| }, |
| { |
| "epoch": 3.484848484848485, |
| "grad_norm": 0.9135984778404236, |
| "learning_rate": 0.00025, |
| "loss": 0.6285, |
| "step": 31 |
| }, |
| { |
| "epoch": 3.606060606060606, |
| "grad_norm": 0.9478852152824402, |
| "learning_rate": 0.00022386788418308668, |
| "loss": 0.6269, |
| "step": 32 |
| }, |
| { |
| "epoch": 3.7272727272727275, |
| "grad_norm": 0.5533197522163391, |
| "learning_rate": 0.0001980220772955602, |
| "loss": 0.5646, |
| "step": 33 |
| }, |
| { |
| "epoch": 3.8484848484848486, |
| "grad_norm": 1.0226417779922485, |
| "learning_rate": 0.00017274575140626317, |
| "loss": 0.5521, |
| "step": 34 |
| }, |
| { |
| "epoch": 3.9696969696969697, |
| "grad_norm": 1.2138278484344482, |
| "learning_rate": 0.00014831583923105, |
| "loss": 0.8734, |
| "step": 35 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 2.8926355838775635, |
| "learning_rate": 0.00012500000000000006, |
| "loss": 0.8668, |
| "step": 36 |
| }, |
| { |
| "epoch": 4.121212121212121, |
| "grad_norm": 0.9145299792289734, |
| "learning_rate": 0.00010305368692688174, |
| "loss": 0.2851, |
| "step": 37 |
| }, |
| { |
| "epoch": 4.242424242424242, |
| "grad_norm": 0.7148826718330383, |
| "learning_rate": 8.271734841028553e-05, |
| "loss": 0.6555, |
| "step": 38 |
| }, |
| { |
| "epoch": 4.363636363636363, |
| "grad_norm": 1.015910267829895, |
| "learning_rate": 6.421379363065141e-05, |
| "loss": 0.3664, |
| "step": 39 |
| }, |
| { |
| "epoch": 4.484848484848484, |
| "grad_norm": 0.9201410412788391, |
| "learning_rate": 4.7745751406263163e-05, |
| "loss": 0.575, |
| "step": 40 |
| }, |
| { |
| "epoch": 4.606060606060606, |
| "grad_norm": 0.8212230801582336, |
| "learning_rate": 3.3493649053890325e-05, |
| "loss": 0.4084, |
| "step": 41 |
| }, |
| { |
| "epoch": 4.7272727272727275, |
| "grad_norm": 0.8163782358169556, |
| "learning_rate": 2.1613635589349755e-05, |
| "loss": 0.3754, |
| "step": 42 |
| }, |
| { |
| "epoch": 4.848484848484849, |
| "grad_norm": 0.7215772867202759, |
| "learning_rate": 1.2235870926211617e-05, |
| "loss": 0.2209, |
| "step": 43 |
| }, |
| { |
| "epoch": 4.96969696969697, |
| "grad_norm": 1.07026207447052, |
| "learning_rate": 5.463099816548578e-06, |
| "loss": 0.3138, |
| "step": 44 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 45, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2267168340344832.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|