| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9898386132695756, |
| "eval_steps": 500, |
| "global_step": 278, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007172743574417215, |
| "grad_norm": 1.7706772687409793, |
| "learning_rate": 3.5714285714285716e-07, |
| "loss": 1.8258, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.03586371787208607, |
| "grad_norm": 1.3979460761770042, |
| "learning_rate": 1.7857142857142859e-06, |
| "loss": 1.7889, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.07172743574417215, |
| "grad_norm": 0.8391387150686271, |
| "learning_rate": 3.5714285714285718e-06, |
| "loss": 1.4752, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10759115361625822, |
| "grad_norm": 0.29622114410594524, |
| "learning_rate": 5.357142857142857e-06, |
| "loss": 0.8542, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.1434548714883443, |
| "grad_norm": 0.1867024790344403, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 0.5935, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.17931858936043035, |
| "grad_norm": 0.1397115741987044, |
| "learning_rate": 8.92857142857143e-06, |
| "loss": 0.4806, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.21518230723251644, |
| "grad_norm": 0.11207900324892817, |
| "learning_rate": 9.9984209464165e-06, |
| "loss": 0.3543, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2510460251046025, |
| "grad_norm": 0.12471379118682165, |
| "learning_rate": 9.980668045715864e-06, |
| "loss": 0.3099, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2869097429766886, |
| "grad_norm": 0.13942124773513623, |
| "learning_rate": 9.94325872368957e-06, |
| "loss": 0.2671, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3227734608487747, |
| "grad_norm": 0.08633657901146753, |
| "learning_rate": 9.886340617840968e-06, |
| "loss": 0.2409, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.3586371787208607, |
| "grad_norm": 0.09525370649740562, |
| "learning_rate": 9.81013835793043e-06, |
| "loss": 0.2193, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3945008965929468, |
| "grad_norm": 0.08224573044279379, |
| "learning_rate": 9.714952679464324e-06, |
| "loss": 0.1873, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.4303646144650329, |
| "grad_norm": 0.0960148670109882, |
| "learning_rate": 9.601159236829353e-06, |
| "loss": 0.1986, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.46622833233711897, |
| "grad_norm": 0.08468885213082336, |
| "learning_rate": 9.46920712075632e-06, |
| "loss": 0.181, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.502092050209205, |
| "grad_norm": 0.06651705476346571, |
| "learning_rate": 9.319617085964177e-06, |
| "loss": 0.1776, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5379557680812911, |
| "grad_norm": 0.07914440346820356, |
| "learning_rate": 9.152979495979064e-06, |
| "loss": 0.1729, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5738194859533772, |
| "grad_norm": 0.06318757579211376, |
| "learning_rate": 8.969951993239177e-06, |
| "loss": 0.1544, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6096832038254633, |
| "grad_norm": 0.06806462149173399, |
| "learning_rate": 8.77125690368052e-06, |
| "loss": 0.1452, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.6455469216975493, |
| "grad_norm": 0.09204580355773379, |
| "learning_rate": 8.557678386046429e-06, |
| "loss": 0.148, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6814106395696354, |
| "grad_norm": 0.07404375144145035, |
| "learning_rate": 8.33005933717126e-06, |
| "loss": 0.1537, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.7172743574417214, |
| "grad_norm": 0.06673338321385565, |
| "learning_rate": 8.089298065451673e-06, |
| "loss": 0.154, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7531380753138075, |
| "grad_norm": 0.07248494023312937, |
| "learning_rate": 7.836344745633785e-06, |
| "loss": 0.1415, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.7890017931858936, |
| "grad_norm": 0.06250391175809607, |
| "learning_rate": 7.572197668907533e-06, |
| "loss": 0.132, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8248655110579797, |
| "grad_norm": 0.056578659740463, |
| "learning_rate": 7.297899303107441e-06, |
| "loss": 0.112, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.8607292289300658, |
| "grad_norm": 0.06486223998598174, |
| "learning_rate": 7.014532178568314e-06, |
| "loss": 0.121, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.8965929468021518, |
| "grad_norm": 0.056771834577397436, |
| "learning_rate": 6.723214615872585e-06, |
| "loss": 0.1134, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.9324566646742379, |
| "grad_norm": 0.06319732340937169, |
| "learning_rate": 6.425096312349881e-06, |
| "loss": 0.1166, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.968320382546324, |
| "grad_norm": 0.0649079486366395, |
| "learning_rate": 6.121353804746907e-06, |
| "loss": 0.1122, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.07566991126398058, |
| "learning_rate": 5.813185825974419e-06, |
| "loss": 0.1192, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.1111445426940918, |
| "eval_runtime": 3.972, |
| "eval_samples_per_second": 17.12, |
| "eval_steps_per_second": 4.28, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.035863717872086, |
| "grad_norm": 0.06962658936027795, |
| "learning_rate": 5.5018085742560745e-06, |
| "loss": 0.0911, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.0717274357441722, |
| "grad_norm": 0.05916257421087811, |
| "learning_rate": 5.188450913349674e-06, |
| "loss": 0.0905, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.1075911536162582, |
| "grad_norm": 0.06400686740251811, |
| "learning_rate": 4.874349522783313e-06, |
| "loss": 0.0977, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.1434548714883443, |
| "grad_norm": 0.05927545440254994, |
| "learning_rate": 4.560744017246284e-06, |
| "loss": 0.0834, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.1793185893604303, |
| "grad_norm": 0.06649126661802829, |
| "learning_rate": 4.248872054396215e-06, |
| "loss": 0.0953, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.2151823072325165, |
| "grad_norm": 0.06680856093283863, |
| "learning_rate": 3.939964450389728e-06, |
| "loss": 0.096, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.2510460251046025, |
| "grad_norm": 0.07655116396488153, |
| "learning_rate": 3.635240322413375e-06, |
| "loss": 0.0843, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.2869097429766887, |
| "grad_norm": 0.0679118797019963, |
| "learning_rate": 3.3359022773850673e-06, |
| "loss": 0.0933, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.3227734608487747, |
| "grad_norm": 0.06352669282993445, |
| "learning_rate": 3.043131665813988e-06, |
| "loss": 0.0869, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.3586371787208606, |
| "grad_norm": 0.05614250533642248, |
| "learning_rate": 2.7580839195498397e-06, |
| "loss": 0.0784, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.3945008965929468, |
| "grad_norm": 0.06268476802798255, |
| "learning_rate": 2.4818839918211963e-06, |
| "loss": 0.0966, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.4303646144650328, |
| "grad_norm": 0.09016849271972376, |
| "learning_rate": 2.2156219175590623e-06, |
| "loss": 0.0861, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.466228332337119, |
| "grad_norm": 0.05616043304073107, |
| "learning_rate": 1.9603485115269743e-06, |
| "loss": 0.0821, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.502092050209205, |
| "grad_norm": 0.056894314885253386, |
| "learning_rate": 1.7170712212352187e-06, |
| "loss": 0.0759, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.5379557680812912, |
| "grad_norm": 0.05749554956523113, |
| "learning_rate": 1.4867501510057548e-06, |
| "loss": 0.0779, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.5738194859533772, |
| "grad_norm": 0.06101506302740175, |
| "learning_rate": 1.2702942728790897e-06, |
| "loss": 0.0812, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.6096832038254631, |
| "grad_norm": 0.052956740115013924, |
| "learning_rate": 1.0685578393169054e-06, |
| "loss": 0.0878, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.6455469216975493, |
| "grad_norm": 0.05444893484606994, |
| "learning_rate": 8.823370118578628e-07, |
| "loss": 0.0808, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.6814106395696355, |
| "grad_norm": 0.05577402210270524, |
| "learning_rate": 7.123667190317396e-07, |
| "loss": 0.0835, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.7172743574417213, |
| "grad_norm": 0.060795662497347525, |
| "learning_rate": 5.593177559322776e-07, |
| "loss": 0.0755, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.7531380753138075, |
| "grad_norm": 0.05146247489312187, |
| "learning_rate": 4.237941368954124e-07, |
| "loss": 0.0808, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.7890017931858937, |
| "grad_norm": 0.05742062783254797, |
| "learning_rate": 3.0633071173062966e-07, |
| "loss": 0.0772, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.8248655110579797, |
| "grad_norm": 0.06159979933911591, |
| "learning_rate": 2.0739105491312028e-07, |
| "loss": 0.0835, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.8607292289300656, |
| "grad_norm": 0.05865983987282493, |
| "learning_rate": 1.2736563606711384e-07, |
| "loss": 0.0752, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.8965929468021518, |
| "grad_norm": 0.06382021063164005, |
| "learning_rate": 6.657027896065982e-08, |
| "loss": 0.077, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.932456664674238, |
| "grad_norm": 0.04725385570056433, |
| "learning_rate": 2.5244915093499134e-08, |
| "loss": 0.0731, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.968320382546324, |
| "grad_norm": 0.053647864100340815, |
| "learning_rate": 3.5526367970539765e-09, |
| "loss": 0.0751, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.9898386132695756, |
| "eval_loss": 0.08560756593942642, |
| "eval_runtime": 3.7696, |
| "eval_samples_per_second": 18.039, |
| "eval_steps_per_second": 4.51, |
| "step": 278 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 278, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.430395390385193e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|