| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9587628865979383, |
| "eval_steps": 500, |
| "global_step": 64, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.030927835051546393, |
| "grad_norm": 31.330896377563477, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 4.8446, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.061855670103092786, |
| "grad_norm": 31.82550811767578, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 4.7928, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.09278350515463918, |
| "grad_norm": 30.859010696411133, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 4.6291, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.12371134020618557, |
| "grad_norm": 31.643869400024414, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 4.713, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.15463917525773196, |
| "grad_norm": 33.41943359375, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 4.9265, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.18556701030927836, |
| "grad_norm": 32.8159065246582, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 4.832, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.21649484536082475, |
| "grad_norm": 31.771827697753906, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 4.6801, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.24742268041237114, |
| "grad_norm": 33.52857971191406, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 4.9088, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.27835051546391754, |
| "grad_norm": 31.47663688659668, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 4.5767, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.30927835051546393, |
| "grad_norm": 34.30753707885742, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 4.961, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3402061855670103, |
| "grad_norm": 31.3031005859375, |
| "learning_rate": 5.5e-07, |
| "loss": 4.7181, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.3711340206185567, |
| "grad_norm": 32.63737106323242, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 4.7796, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.4020618556701031, |
| "grad_norm": 31.14106559753418, |
| "learning_rate": 6.5e-07, |
| "loss": 4.5596, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.4329896907216495, |
| "grad_norm": 30.995929718017578, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 4.5709, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.4639175257731959, |
| "grad_norm": 29.113582611083984, |
| "learning_rate": 7.5e-07, |
| "loss": 4.4329, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.4948453608247423, |
| "grad_norm": 27.933208465576172, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 4.337, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.5257731958762887, |
| "grad_norm": 27.216915130615234, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 4.3253, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.5567010309278351, |
| "grad_norm": 25.551725387573242, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 4.3318, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.5876288659793815, |
| "grad_norm": 22.587017059326172, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 4.0206, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.6185567010309279, |
| "grad_norm": 21.570398330688477, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 3.8186, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6494845360824743, |
| "grad_norm": 20.41568946838379, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 3.771, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.6804123711340206, |
| "grad_norm": 19.1239070892334, |
| "learning_rate": 1.1e-06, |
| "loss": 3.5997, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.711340206185567, |
| "grad_norm": 19.375389099121094, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 3.4865, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.7422680412371134, |
| "grad_norm": 19.06153106689453, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 3.3836, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.7731958762886598, |
| "grad_norm": 17.918527603149414, |
| "learning_rate": 1.25e-06, |
| "loss": 3.141, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.8041237113402062, |
| "grad_norm": 18.42892837524414, |
| "learning_rate": 1.3e-06, |
| "loss": 3.0393, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.8350515463917526, |
| "grad_norm": 18.298789978027344, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 3.0098, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.865979381443299, |
| "grad_norm": 16.423397064208984, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 2.786, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.8969072164948454, |
| "grad_norm": 14.053132057189941, |
| "learning_rate": 1.45e-06, |
| "loss": 2.5791, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.9278350515463918, |
| "grad_norm": 14.001585006713867, |
| "learning_rate": 1.5e-06, |
| "loss": 2.5818, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.9587628865979382, |
| "grad_norm": 13.626611709594727, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 2.3909, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.9896907216494846, |
| "grad_norm": 13.853208541870117, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 2.2437, |
| "step": 32 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 13.853208541870117, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 2.2363, |
| "step": 33 |
| }, |
| { |
| "epoch": 1.0309278350515463, |
| "grad_norm": 24.948280334472656, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 2.1462, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.0618556701030928, |
| "grad_norm": 15.244880676269531, |
| "learning_rate": 1.75e-06, |
| "loss": 1.9498, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.0927835051546393, |
| "grad_norm": 15.106721878051758, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 1.7473, |
| "step": 36 |
| }, |
| { |
| "epoch": 1.1237113402061856, |
| "grad_norm": 14.083307266235352, |
| "learning_rate": 1.85e-06, |
| "loss": 1.6159, |
| "step": 37 |
| }, |
| { |
| "epoch": 1.1546391752577319, |
| "grad_norm": 15.086730003356934, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 1.4901, |
| "step": 38 |
| }, |
| { |
| "epoch": 1.1855670103092784, |
| "grad_norm": 13.394895553588867, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 1.2992, |
| "step": 39 |
| }, |
| { |
| "epoch": 1.2164948453608249, |
| "grad_norm": 13.134459495544434, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.1725, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.2474226804123711, |
| "grad_norm": 13.013129234313965, |
| "learning_rate": 2.05e-06, |
| "loss": 1.056, |
| "step": 41 |
| }, |
| { |
| "epoch": 1.2783505154639174, |
| "grad_norm": 12.309725761413574, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.8779, |
| "step": 42 |
| }, |
| { |
| "epoch": 1.309278350515464, |
| "grad_norm": 11.717962265014648, |
| "learning_rate": 2.15e-06, |
| "loss": 0.7724, |
| "step": 43 |
| }, |
| { |
| "epoch": 1.3402061855670104, |
| "grad_norm": 10.531135559082031, |
| "learning_rate": 2.2e-06, |
| "loss": 0.6223, |
| "step": 44 |
| }, |
| { |
| "epoch": 1.3711340206185567, |
| "grad_norm": 9.451440811157227, |
| "learning_rate": 2.25e-06, |
| "loss": 0.5095, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.402061855670103, |
| "grad_norm": 7.987078666687012, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 0.3841, |
| "step": 46 |
| }, |
| { |
| "epoch": 1.4329896907216495, |
| "grad_norm": 6.726036071777344, |
| "learning_rate": 2.35e-06, |
| "loss": 0.3333, |
| "step": 47 |
| }, |
| { |
| "epoch": 1.463917525773196, |
| "grad_norm": 4.108280658721924, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.2602, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.4948453608247423, |
| "grad_norm": 2.6678011417388916, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 0.2251, |
| "step": 49 |
| }, |
| { |
| "epoch": 1.5257731958762886, |
| "grad_norm": 2.8046882152557373, |
| "learning_rate": 2.5e-06, |
| "loss": 0.1984, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.556701030927835, |
| "grad_norm": 2.6669921875, |
| "learning_rate": 2.55e-06, |
| "loss": 0.2084, |
| "step": 51 |
| }, |
| { |
| "epoch": 1.5876288659793816, |
| "grad_norm": 1.8236641883850098, |
| "learning_rate": 2.6e-06, |
| "loss": 0.169, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.6185567010309279, |
| "grad_norm": 1.405070185661316, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 0.1635, |
| "step": 53 |
| }, |
| { |
| "epoch": 1.6494845360824741, |
| "grad_norm": 1.869597315788269, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 0.1572, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.6804123711340206, |
| "grad_norm": 1.2718607187271118, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 0.1411, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.7113402061855671, |
| "grad_norm": 0.9617771506309509, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.1427, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.7422680412371134, |
| "grad_norm": 0.6258248090744019, |
| "learning_rate": 2.85e-06, |
| "loss": 0.1241, |
| "step": 57 |
| }, |
| { |
| "epoch": 1.7731958762886597, |
| "grad_norm": 0.5329176187515259, |
| "learning_rate": 2.9e-06, |
| "loss": 0.1214, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.8041237113402062, |
| "grad_norm": 0.8642020225524902, |
| "learning_rate": 2.95e-06, |
| "loss": 0.1336, |
| "step": 59 |
| }, |
| { |
| "epoch": 1.8350515463917527, |
| "grad_norm": 0.627882182598114, |
| "learning_rate": 3e-06, |
| "loss": 0.121, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.865979381443299, |
| "grad_norm": 0.7841255068778992, |
| "learning_rate": 3.05e-06, |
| "loss": 0.1163, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.8969072164948453, |
| "grad_norm": 0.5962179899215698, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 0.1117, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.9278350515463918, |
| "grad_norm": 0.5344879627227783, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 0.1162, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.9587628865979383, |
| "grad_norm": 0.4333738386631012, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.1152, |
| "step": 64 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 192, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 32, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4150236643105178e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|