| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 94, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02127659574468085, |
| "grad_norm": 0.010590367854915593, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.1926, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 0.012578046075256057, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.1918, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.06382978723404255, |
| "grad_norm": 0.01783746648173419, |
| "learning_rate": 2.4e-05, |
| "loss": 0.1886, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 0.011420689161964937, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.1882, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.10638297872340426, |
| "grad_norm": 0.013423415695331824, |
| "learning_rate": 4e-05, |
| "loss": 0.187, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.1276595744680851, |
| "grad_norm": 0.01152432232223242, |
| "learning_rate": 4.8e-05, |
| "loss": 0.2075, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.14893617021276595, |
| "grad_norm": 0.015072513285239043, |
| "learning_rate": 5.6e-05, |
| "loss": 0.183, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 0.015315464416221777, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 0.1874, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.19148936170212766, |
| "grad_norm": 0.018399336292684178, |
| "learning_rate": 7.2e-05, |
| "loss": 0.1718, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 0.014954736005891176, |
| "learning_rate": 8e-05, |
| "loss": 0.1774, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.23404255319148937, |
| "grad_norm": 0.01479471763787684, |
| "learning_rate": 7.997202819153595e-05, |
| "loss": 0.1777, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 0.01862812665496443, |
| "learning_rate": 7.988815188724721e-05, |
| "loss": 0.1849, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.2765957446808511, |
| "grad_norm": 0.02410221937010479, |
| "learning_rate": 7.974848839572971e-05, |
| "loss": 0.1788, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.2978723404255319, |
| "grad_norm": 0.021232213070075102, |
| "learning_rate": 7.955323304900514e-05, |
| "loss": 0.1747, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.3191489361702128, |
| "grad_norm": 0.01725885242932473, |
| "learning_rate": 7.930265892933154e-05, |
| "loss": 0.1688, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 0.025692596523345317, |
| "learning_rate": 7.899711648727294e-05, |
| "loss": 0.1477, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.3617021276595745, |
| "grad_norm": 0.01718245851051447, |
| "learning_rate": 7.863703305156273e-05, |
| "loss": 0.16, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.3829787234042553, |
| "grad_norm": 0.011160309300696343, |
| "learning_rate": 7.822291223144564e-05, |
| "loss": 0.1531, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.40425531914893614, |
| "grad_norm": 0.01107708936078071, |
| "learning_rate": 7.775533321233471e-05, |
| "loss": 0.1509, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 0.010576674973397301, |
| "learning_rate": 7.723494994576818e-05, |
| "loss": 0.1431, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.44680851063829785, |
| "grad_norm": 0.0105518464049501, |
| "learning_rate": 7.666249023479905e-05, |
| "loss": 0.1453, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.46808510638297873, |
| "grad_norm": 0.010711155060558916, |
| "learning_rate": 7.603875471609677e-05, |
| "loss": 0.138, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.48936170212765956, |
| "grad_norm": 0.011893252910486476, |
| "learning_rate": 7.536461574018439e-05, |
| "loss": 0.1408, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 0.01141199299709111, |
| "learning_rate": 7.464101615137756e-05, |
| "loss": 0.1394, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.5319148936170213, |
| "grad_norm": 0.01139466276815542, |
| "learning_rate": 7.386896796913137e-05, |
| "loss": 0.1232, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.5531914893617021, |
| "grad_norm": 0.010588063613807498, |
| "learning_rate": 7.30495509726398e-05, |
| "loss": 0.1103, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.574468085106383, |
| "grad_norm": 0.009514650300091422, |
| "learning_rate": 7.218391119066674e-05, |
| "loss": 0.1166, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 0.009355160096341924, |
| "learning_rate": 7.12732592987212e-05, |
| "loss": 0.1118, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.6170212765957447, |
| "grad_norm": 0.008616859607025575, |
| "learning_rate": 7.031886892581813e-05, |
| "loss": 0.1177, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 0.008631624572027048, |
| "learning_rate": 6.932207487319305e-05, |
| "loss": 0.0997, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.6595744680851063, |
| "grad_norm": 0.008903768115760128, |
| "learning_rate": 6.828427124746191e-05, |
| "loss": 0.1159, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 0.00879605417805913, |
| "learning_rate": 6.720690951083678e-05, |
| "loss": 0.0942, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.7021276595744681, |
| "grad_norm": 0.007750175176445602, |
| "learning_rate": 6.609149645112485e-05, |
| "loss": 0.1025, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.723404255319149, |
| "grad_norm": 0.007082352739867091, |
| "learning_rate": 6.493959207434934e-05, |
| "loss": 0.102, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.7446808510638298, |
| "grad_norm": 0.007627139601877899, |
| "learning_rate": 6.375280742294007e-05, |
| "loss": 0.107, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 0.007426561201267845, |
| "learning_rate": 6.253280232254489e-05, |
| "loss": 0.1018, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.7872340425531915, |
| "grad_norm": 0.007882109839878693, |
| "learning_rate": 6.128128306061347e-05, |
| "loss": 0.0892, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.8085106382978723, |
| "grad_norm": 0.013030069958457369, |
| "learning_rate": 6.000000000000001e-05, |
| "loss": 0.0898, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.8297872340425532, |
| "grad_norm": 0.009784583072582934, |
| "learning_rate": 5.869074513092249e-05, |
| "loss": 0.1085, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 0.008732072983394946, |
| "learning_rate": 5.735534956470233e-05, |
| "loss": 0.1145, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.8723404255319149, |
| "grad_norm": 0.008429338310314638, |
| "learning_rate": 5.5995680972789634e-05, |
| "loss": 0.0878, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.8936170212765957, |
| "grad_norm": 0.007466407512896108, |
| "learning_rate": 5.461364097465581e-05, |
| "loss": 0.0966, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.9148936170212766, |
| "grad_norm": 0.00846237817917418, |
| "learning_rate": 5.321116247820669e-05, |
| "loss": 0.0875, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.9361702127659575, |
| "grad_norm": 0.008633522389774042, |
| "learning_rate": 5.179020697643618e-05, |
| "loss": 0.097, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.9574468085106383, |
| "grad_norm": 0.009377024238313315, |
| "learning_rate": 5.0352761804100835e-05, |
| "loss": 0.1073, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.9787234042553191, |
| "grad_norm": 0.008304089582340315, |
| "learning_rate": 4.890083735825258e-05, |
| "loss": 0.0848, |
| "step": 46 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.008892842589324183, |
| "learning_rate": 4.743646428651659e-05, |
| "loss": 0.0958, |
| "step": 47 |
| }, |
| { |
| "epoch": 1.0212765957446808, |
| "grad_norm": 0.008548911922600459, |
| "learning_rate": 4.5961690647046974e-05, |
| "loss": 0.0806, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.0425531914893618, |
| "grad_norm": 0.009102783969006578, |
| "learning_rate": 4.4478579044132314e-05, |
| "loss": 0.0929, |
| "step": 49 |
| }, |
| { |
| "epoch": 1.0638297872340425, |
| "grad_norm": 0.0098303537895855, |
| "learning_rate": 4.298920374345698e-05, |
| "loss": 0.1007, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.0851063829787233, |
| "grad_norm": 0.008733968304832005, |
| "learning_rate": 4.149564777105304e-05, |
| "loss": 0.1022, |
| "step": 51 |
| }, |
| { |
| "epoch": 1.1063829787234043, |
| "grad_norm": 0.00963321041491538, |
| "learning_rate": 4e-05, |
| "loss": 0.1007, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.127659574468085, |
| "grad_norm": 0.009732084621038351, |
| "learning_rate": 3.850435222894698e-05, |
| "loss": 0.0912, |
| "step": 53 |
| }, |
| { |
| "epoch": 1.148936170212766, |
| "grad_norm": 0.007844561628680342, |
| "learning_rate": 3.7010796256543034e-05, |
| "loss": 0.0833, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.1702127659574468, |
| "grad_norm": 0.00900048645607817, |
| "learning_rate": 3.552142095586769e-05, |
| "loss": 0.0854, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.1914893617021276, |
| "grad_norm": 0.010829388692212415, |
| "learning_rate": 3.403830935295302e-05, |
| "loss": 0.0997, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.2127659574468086, |
| "grad_norm": 0.008444877808242452, |
| "learning_rate": 3.256353571348342e-05, |
| "loss": 0.0891, |
| "step": 57 |
| }, |
| { |
| "epoch": 1.2340425531914894, |
| "grad_norm": 0.01641554525408798, |
| "learning_rate": 3.109916264174743e-05, |
| "loss": 0.0831, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.2553191489361701, |
| "grad_norm": 0.009669606083368287, |
| "learning_rate": 2.9647238195899168e-05, |
| "loss": 0.0884, |
| "step": 59 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "grad_norm": 0.00925986379152417, |
| "learning_rate": 2.8209793023563833e-05, |
| "loss": 0.0819, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.297872340425532, |
| "grad_norm": 0.009309633393499708, |
| "learning_rate": 2.678883752179333e-05, |
| "loss": 0.1053, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.3191489361702127, |
| "grad_norm": 0.009580822751866055, |
| "learning_rate": 2.53863590253442e-05, |
| "loss": 0.0886, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.3404255319148937, |
| "grad_norm": 0.009599560162292626, |
| "learning_rate": 2.400431902721037e-05, |
| "loss": 0.1032, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.3617021276595744, |
| "grad_norm": 0.008364225946026623, |
| "learning_rate": 2.264465043529768e-05, |
| "loss": 0.0799, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.3829787234042552, |
| "grad_norm": 0.008677790719447326, |
| "learning_rate": 2.130925486907752e-05, |
| "loss": 0.0875, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.4042553191489362, |
| "grad_norm": 0.009664434602952684, |
| "learning_rate": 2.0000000000000012e-05, |
| "loss": 0.0753, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.425531914893617, |
| "grad_norm": 0.00866555310383692, |
| "learning_rate": 1.8718716939386543e-05, |
| "loss": 0.0823, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.4468085106382977, |
| "grad_norm": 0.008270414259650704, |
| "learning_rate": 1.7467197677455118e-05, |
| "loss": 0.0831, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.4680851063829787, |
| "grad_norm": 0.008404646837006514, |
| "learning_rate": 1.6247192577059943e-05, |
| "loss": 0.083, |
| "step": 69 |
| }, |
| { |
| "epoch": 1.4893617021276595, |
| "grad_norm": 0.009997727625367256, |
| "learning_rate": 1.5060407925650662e-05, |
| "loss": 0.0785, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.5106382978723403, |
| "grad_norm": 0.011232957430370928, |
| "learning_rate": 1.3908503548875167e-05, |
| "loss": 0.0902, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.5319148936170213, |
| "grad_norm": 0.009428808213513893, |
| "learning_rate": 1.2793090489163218e-05, |
| "loss": 0.0805, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.5531914893617023, |
| "grad_norm": 0.00889198463482995, |
| "learning_rate": 1.1715728752538103e-05, |
| "loss": 0.0814, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.574468085106383, |
| "grad_norm": 0.008520858215678556, |
| "learning_rate": 1.0677925126806956e-05, |
| "loss": 0.0933, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.5957446808510638, |
| "grad_norm": 0.007804506720698448, |
| "learning_rate": 9.681131074181876e-06, |
| "loss": 0.0789, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.6170212765957448, |
| "grad_norm": 0.008072405841664344, |
| "learning_rate": 8.72674070127881e-06, |
| "loss": 0.0907, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.6382978723404256, |
| "grad_norm": 0.009915778132331446, |
| "learning_rate": 7.816088809333266e-06, |
| "loss": 0.0748, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.6595744680851063, |
| "grad_norm": 0.008087540877090103, |
| "learning_rate": 6.950449027360213e-06, |
| "loss": 0.0892, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.6808510638297873, |
| "grad_norm": 0.008266341982185095, |
| "learning_rate": 6.1310320308686354e-06, |
| "loss": 0.0842, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "grad_norm": 0.007890851491411855, |
| "learning_rate": 5.358983848622452e-06, |
| "loss": 0.0936, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.7234042553191489, |
| "grad_norm": 0.007735808802877532, |
| "learning_rate": 4.635384259815614e-06, |
| "loss": 0.0815, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.7446808510638299, |
| "grad_norm": 0.009091176030478536, |
| "learning_rate": 3.961245283903239e-06, |
| "loss": 0.0734, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.7659574468085106, |
| "grad_norm": 0.0077500216055024895, |
| "learning_rate": 3.3375097652009526e-06, |
| "loss": 0.0937, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.7872340425531914, |
| "grad_norm": 0.008391041950434233, |
| "learning_rate": 2.765050054231835e-06, |
| "loss": 0.094, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.8085106382978724, |
| "grad_norm": 0.007544446450642002, |
| "learning_rate": 2.244666787665297e-06, |
| "loss": 0.0867, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.8297872340425532, |
| "grad_norm": 0.008127848994774332, |
| "learning_rate": 1.7770877685543687e-06, |
| "loss": 0.085, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.851063829787234, |
| "grad_norm": 0.00802121733098184, |
| "learning_rate": 1.3629669484372722e-06, |
| "loss": 0.0743, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.872340425531915, |
| "grad_norm": 0.007730575027671854, |
| "learning_rate": 1.0028835127270553e-06, |
| "loss": 0.0789, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.8936170212765957, |
| "grad_norm": 0.008395969340272261, |
| "learning_rate": 6.973410706684691e-07, |
| "loss": 0.0776, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.9148936170212765, |
| "grad_norm": 0.0072060175209531195, |
| "learning_rate": 4.467669509948591e-07, |
| "loss": 0.0875, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.9361702127659575, |
| "grad_norm": 0.008227761304066706, |
| "learning_rate": 2.5151160427029584e-07, |
| "loss": 0.0741, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.9574468085106385, |
| "grad_norm": 0.008008937858933658, |
| "learning_rate": 1.1184811275279483e-07, |
| "loss": 0.0843, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.978723404255319, |
| "grad_norm": 0.007849256333358338, |
| "learning_rate": 2.797180846405567e-08, |
| "loss": 0.0812, |
| "step": 93 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.010227461018199118, |
| "learning_rate": 0.0, |
| "loss": 0.0772, |
| "step": 94 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 94, |
| "total_flos": 58264877989888.0, |
| "train_loss": 0.11161886060491522, |
| "train_runtime": 645.3428, |
| "train_samples_per_second": 1.153, |
| "train_steps_per_second": 0.146 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 94, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 58264877989888.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|