| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.783166904422254, | |
| "eval_steps": 500, | |
| "global_step": 5000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001783166904422254, | |
| "grad_norm": 2.0930111408233643, | |
| "learning_rate": 4.999995641358869e-05, | |
| "loss": 0.7967, | |
| "num_input_tokens_seen": 63024, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.003566333808844508, | |
| "grad_norm": 1.2970882654190063, | |
| "learning_rate": 4.999982565450674e-05, | |
| "loss": 0.7382, | |
| "num_input_tokens_seen": 126336, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005349500713266762, | |
| "grad_norm": 0.8319762349128723, | |
| "learning_rate": 4.999960772321009e-05, | |
| "loss": 0.6823, | |
| "num_input_tokens_seen": 184688, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.007132667617689016, | |
| "grad_norm": 0.9985227584838867, | |
| "learning_rate": 4.999930262045865e-05, | |
| "loss": 0.6836, | |
| "num_input_tokens_seen": 245808, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.00891583452211127, | |
| "grad_norm": 1.065556287765503, | |
| "learning_rate": 4.9998910347316286e-05, | |
| "loss": 0.7561, | |
| "num_input_tokens_seen": 306944, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.010699001426533523, | |
| "grad_norm": 1.066805362701416, | |
| "learning_rate": 4.9998430905150826e-05, | |
| "loss": 0.7299, | |
| "num_input_tokens_seen": 371616, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.012482168330955777, | |
| "grad_norm": 1.2590147256851196, | |
| "learning_rate": 4.999786429563404e-05, | |
| "loss": 0.6834, | |
| "num_input_tokens_seen": 435536, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.014265335235378032, | |
| "grad_norm": 1.0066215991973877, | |
| "learning_rate": 4.999721052074164e-05, | |
| "loss": 0.6511, | |
| "num_input_tokens_seen": 499328, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.016048502139800285, | |
| "grad_norm": 1.0162546634674072, | |
| "learning_rate": 4.99964695827533e-05, | |
| "loss": 0.5992, | |
| "num_input_tokens_seen": 557504, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01783166904422254, | |
| "grad_norm": 0.9829245209693909, | |
| "learning_rate": 4.999564148425258e-05, | |
| "loss": 0.6245, | |
| "num_input_tokens_seen": 621440, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.019614835948644792, | |
| "grad_norm": 0.9447645545005798, | |
| "learning_rate": 4.999472622812701e-05, | |
| "loss": 0.6444, | |
| "num_input_tokens_seen": 685856, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.021398002853067047, | |
| "grad_norm": 1.0958608388900757, | |
| "learning_rate": 4.9993723817567996e-05, | |
| "loss": 0.5194, | |
| "num_input_tokens_seen": 748112, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.023181169757489302, | |
| "grad_norm": 0.9865729808807373, | |
| "learning_rate": 4.999263425607086e-05, | |
| "loss": 0.5021, | |
| "num_input_tokens_seen": 811008, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.024964336661911554, | |
| "grad_norm": 1.2535978555679321, | |
| "learning_rate": 4.9991457547434805e-05, | |
| "loss": 0.6641, | |
| "num_input_tokens_seen": 878272, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02674750356633381, | |
| "grad_norm": 1.6020156145095825, | |
| "learning_rate": 4.9990193695762914e-05, | |
| "loss": 0.5479, | |
| "num_input_tokens_seen": 942608, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.028530670470756064, | |
| "grad_norm": 1.1668367385864258, | |
| "learning_rate": 4.998884270546214e-05, | |
| "loss": 0.6181, | |
| "num_input_tokens_seen": 1005776, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.030313837375178315, | |
| "grad_norm": 1.1580744981765747, | |
| "learning_rate": 4.998740458124324e-05, | |
| "loss": 0.6266, | |
| "num_input_tokens_seen": 1068192, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.03209700427960057, | |
| "grad_norm": 0.9773775339126587, | |
| "learning_rate": 4.9985879328120846e-05, | |
| "loss": 0.5088, | |
| "num_input_tokens_seen": 1128592, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.033880171184022825, | |
| "grad_norm": 1.4142199754714966, | |
| "learning_rate": 4.9984266951413396e-05, | |
| "loss": 0.5199, | |
| "num_input_tokens_seen": 1194592, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.03566333808844508, | |
| "grad_norm": 1.459350347518921, | |
| "learning_rate": 4.998256745674308e-05, | |
| "loss": 0.5855, | |
| "num_input_tokens_seen": 1257744, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.037446504992867335, | |
| "grad_norm": 1.118642807006836, | |
| "learning_rate": 4.99807808500359e-05, | |
| "loss": 0.6148, | |
| "num_input_tokens_seen": 1320944, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.039229671897289584, | |
| "grad_norm": 1.1180983781814575, | |
| "learning_rate": 4.99789071375216e-05, | |
| "loss": 0.5517, | |
| "num_input_tokens_seen": 1382928, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04101283880171184, | |
| "grad_norm": 1.2651177644729614, | |
| "learning_rate": 4.9976946325733654e-05, | |
| "loss": 0.5959, | |
| "num_input_tokens_seen": 1449408, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.042796005706134094, | |
| "grad_norm": 0.9860583543777466, | |
| "learning_rate": 4.997489842150924e-05, | |
| "loss": 0.4779, | |
| "num_input_tokens_seen": 1510752, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04457917261055635, | |
| "grad_norm": 1.0358836650848389, | |
| "learning_rate": 4.997276343198922e-05, | |
| "loss": 0.5474, | |
| "num_input_tokens_seen": 1568928, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.046362339514978604, | |
| "grad_norm": 1.3108216524124146, | |
| "learning_rate": 4.997054136461811e-05, | |
| "loss": 0.4624, | |
| "num_input_tokens_seen": 1631872, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04814550641940086, | |
| "grad_norm": 1.0577709674835205, | |
| "learning_rate": 4.996823222714408e-05, | |
| "loss": 0.558, | |
| "num_input_tokens_seen": 1694000, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.04992867332382311, | |
| "grad_norm": 0.9583589434623718, | |
| "learning_rate": 4.996583602761887e-05, | |
| "loss": 0.535, | |
| "num_input_tokens_seen": 1752208, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.05171184022824536, | |
| "grad_norm": 1.1273239850997925, | |
| "learning_rate": 4.9963352774397845e-05, | |
| "loss": 0.581, | |
| "num_input_tokens_seen": 1809968, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.05349500713266762, | |
| "grad_norm": 0.9180589914321899, | |
| "learning_rate": 4.9960782476139875e-05, | |
| "loss": 0.5853, | |
| "num_input_tokens_seen": 1875584, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05527817403708987, | |
| "grad_norm": 0.9368972778320312, | |
| "learning_rate": 4.9958125141807376e-05, | |
| "loss": 0.5655, | |
| "num_input_tokens_seen": 1936544, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.05706134094151213, | |
| "grad_norm": 1.093083143234253, | |
| "learning_rate": 4.9955380780666233e-05, | |
| "loss": 0.5248, | |
| "num_input_tokens_seen": 1997312, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.05884450784593438, | |
| "grad_norm": 1.0452104806900024, | |
| "learning_rate": 4.99525494022858e-05, | |
| "loss": 0.5912, | |
| "num_input_tokens_seen": 2058400, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.06062767475035663, | |
| "grad_norm": 1.655479073524475, | |
| "learning_rate": 4.9949631016538845e-05, | |
| "loss": 0.5465, | |
| "num_input_tokens_seen": 2123584, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.062410841654778886, | |
| "grad_norm": 1.295340895652771, | |
| "learning_rate": 4.994662563360152e-05, | |
| "loss": 0.6319, | |
| "num_input_tokens_seen": 2187776, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.06419400855920114, | |
| "grad_norm": 1.1385325193405151, | |
| "learning_rate": 4.994353326395334e-05, | |
| "loss": 0.6121, | |
| "num_input_tokens_seen": 2248592, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06597717546362339, | |
| "grad_norm": 1.2202588319778442, | |
| "learning_rate": 4.994035391837713e-05, | |
| "loss": 0.5926, | |
| "num_input_tokens_seen": 2311472, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.06776034236804565, | |
| "grad_norm": 1.1300709247589111, | |
| "learning_rate": 4.9937087607958987e-05, | |
| "loss": 0.5075, | |
| "num_input_tokens_seen": 2374240, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0695435092724679, | |
| "grad_norm": 1.0753881931304932, | |
| "learning_rate": 4.993373434408825e-05, | |
| "loss": 0.5187, | |
| "num_input_tokens_seen": 2434864, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.07132667617689016, | |
| "grad_norm": 1.0271146297454834, | |
| "learning_rate": 4.993029413845746e-05, | |
| "loss": 0.5777, | |
| "num_input_tokens_seen": 2495712, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07310984308131241, | |
| "grad_norm": 1.7475312948226929, | |
| "learning_rate": 4.9926767003062316e-05, | |
| "loss": 0.5091, | |
| "num_input_tokens_seen": 2555184, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.07489300998573467, | |
| "grad_norm": 1.1732685565948486, | |
| "learning_rate": 4.992315295020163e-05, | |
| "loss": 0.5594, | |
| "num_input_tokens_seen": 2616736, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07667617689015692, | |
| "grad_norm": 1.1418745517730713, | |
| "learning_rate": 4.991945199247728e-05, | |
| "loss": 0.633, | |
| "num_input_tokens_seen": 2679568, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.07845934379457917, | |
| "grad_norm": 1.5812561511993408, | |
| "learning_rate": 4.991566414279421e-05, | |
| "loss": 0.5361, | |
| "num_input_tokens_seen": 2741888, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08024251069900143, | |
| "grad_norm": 1.2565455436706543, | |
| "learning_rate": 4.99117894143603e-05, | |
| "loss": 0.5128, | |
| "num_input_tokens_seen": 2804736, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.08202567760342368, | |
| "grad_norm": 1.081152081489563, | |
| "learning_rate": 4.990782782068639e-05, | |
| "loss": 0.4925, | |
| "num_input_tokens_seen": 2864768, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08380884450784594, | |
| "grad_norm": 1.157086730003357, | |
| "learning_rate": 4.9903779375586224e-05, | |
| "loss": 0.5091, | |
| "num_input_tokens_seen": 2925776, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.08559201141226819, | |
| "grad_norm": 1.496232032775879, | |
| "learning_rate": 4.989964409317637e-05, | |
| "loss": 0.5611, | |
| "num_input_tokens_seen": 2984032, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08737517831669044, | |
| "grad_norm": 1.5581008195877075, | |
| "learning_rate": 4.989542198787619e-05, | |
| "loss": 0.4574, | |
| "num_input_tokens_seen": 3047024, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.0891583452211127, | |
| "grad_norm": 1.1673293113708496, | |
| "learning_rate": 4.9891113074407816e-05, | |
| "loss": 0.4982, | |
| "num_input_tokens_seen": 3105552, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09094151212553495, | |
| "grad_norm": 1.1178501844406128, | |
| "learning_rate": 4.988671736779604e-05, | |
| "loss": 0.5412, | |
| "num_input_tokens_seen": 3165632, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.09272467902995721, | |
| "grad_norm": 1.1773957014083862, | |
| "learning_rate": 4.988223488336832e-05, | |
| "loss": 0.5028, | |
| "num_input_tokens_seen": 3229392, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09450784593437946, | |
| "grad_norm": 1.1285181045532227, | |
| "learning_rate": 4.987766563675467e-05, | |
| "loss": 0.5414, | |
| "num_input_tokens_seen": 3287616, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.09629101283880172, | |
| "grad_norm": 1.630057454109192, | |
| "learning_rate": 4.9873009643887666e-05, | |
| "loss": 0.5512, | |
| "num_input_tokens_seen": 3346496, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.09807417974322397, | |
| "grad_norm": 2.1637048721313477, | |
| "learning_rate": 4.986826692100236e-05, | |
| "loss": 0.4881, | |
| "num_input_tokens_seen": 3409312, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.09985734664764621, | |
| "grad_norm": 1.9481849670410156, | |
| "learning_rate": 4.98634374846362e-05, | |
| "loss": 0.4716, | |
| "num_input_tokens_seen": 3472752, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.10164051355206848, | |
| "grad_norm": 1.3725030422210693, | |
| "learning_rate": 4.9858521351629005e-05, | |
| "loss": 0.5286, | |
| "num_input_tokens_seen": 3534032, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.10342368045649072, | |
| "grad_norm": 1.5664440393447876, | |
| "learning_rate": 4.985351853912292e-05, | |
| "loss": 0.4985, | |
| "num_input_tokens_seen": 3598336, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.10520684736091299, | |
| "grad_norm": 1.3553557395935059, | |
| "learning_rate": 4.984842906456231e-05, | |
| "loss": 0.5768, | |
| "num_input_tokens_seen": 3662144, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.10699001426533523, | |
| "grad_norm": 1.2202125787734985, | |
| "learning_rate": 4.984325294569372e-05, | |
| "loss": 0.4933, | |
| "num_input_tokens_seen": 3724048, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.10877318116975748, | |
| "grad_norm": 1.0455083847045898, | |
| "learning_rate": 4.9837990200565834e-05, | |
| "loss": 0.5675, | |
| "num_input_tokens_seen": 3784320, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.11055634807417974, | |
| "grad_norm": 1.5656300783157349, | |
| "learning_rate": 4.983264084752939e-05, | |
| "loss": 0.5315, | |
| "num_input_tokens_seen": 3849040, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.11233951497860199, | |
| "grad_norm": 1.4153857231140137, | |
| "learning_rate": 4.98272049052371e-05, | |
| "loss": 0.5444, | |
| "num_input_tokens_seen": 3909552, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.11412268188302425, | |
| "grad_norm": 1.9048830270767212, | |
| "learning_rate": 4.982168239264364e-05, | |
| "loss": 0.4808, | |
| "num_input_tokens_seen": 3969120, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1159058487874465, | |
| "grad_norm": 1.0821411609649658, | |
| "learning_rate": 4.981607332900552e-05, | |
| "loss": 0.4829, | |
| "num_input_tokens_seen": 4029360, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.11768901569186876, | |
| "grad_norm": 1.2863287925720215, | |
| "learning_rate": 4.9810377733881065e-05, | |
| "loss": 0.5273, | |
| "num_input_tokens_seen": 4091296, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.11947218259629101, | |
| "grad_norm": 1.3957486152648926, | |
| "learning_rate": 4.98045956271303e-05, | |
| "loss": 0.5443, | |
| "num_input_tokens_seen": 4154304, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.12125534950071326, | |
| "grad_norm": 1.1562933921813965, | |
| "learning_rate": 4.979872702891495e-05, | |
| "loss": 0.5046, | |
| "num_input_tokens_seen": 4220400, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.12303851640513552, | |
| "grad_norm": 1.1498775482177734, | |
| "learning_rate": 4.979277195969829e-05, | |
| "loss": 0.5393, | |
| "num_input_tokens_seen": 4279408, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.12482168330955777, | |
| "grad_norm": 1.2570199966430664, | |
| "learning_rate": 4.978673044024514e-05, | |
| "loss": 0.451, | |
| "num_input_tokens_seen": 4339392, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12660485021398002, | |
| "grad_norm": 1.3947458267211914, | |
| "learning_rate": 4.978060249162175e-05, | |
| "loss": 0.5715, | |
| "num_input_tokens_seen": 4399424, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.12838801711840228, | |
| "grad_norm": 1.1799883842468262, | |
| "learning_rate": 4.977438813519574e-05, | |
| "loss": 0.5409, | |
| "num_input_tokens_seen": 4460992, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.13017118402282454, | |
| "grad_norm": 0.9736462831497192, | |
| "learning_rate": 4.976808739263602e-05, | |
| "loss": 0.5298, | |
| "num_input_tokens_seen": 4525664, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.13195435092724678, | |
| "grad_norm": 1.1682716608047485, | |
| "learning_rate": 4.976170028591274e-05, | |
| "loss": 0.481, | |
| "num_input_tokens_seen": 4582160, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.13373751783166904, | |
| "grad_norm": 1.3871419429779053, | |
| "learning_rate": 4.975522683729719e-05, | |
| "loss": 0.5021, | |
| "num_input_tokens_seen": 4649328, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.1355206847360913, | |
| "grad_norm": 1.1554944515228271, | |
| "learning_rate": 4.9748667069361715e-05, | |
| "loss": 0.5064, | |
| "num_input_tokens_seen": 4711088, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.13730385164051356, | |
| "grad_norm": 1.3844372034072876, | |
| "learning_rate": 4.9742021004979656e-05, | |
| "loss": 0.5516, | |
| "num_input_tokens_seen": 4774864, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.1390870185449358, | |
| "grad_norm": 1.4874283075332642, | |
| "learning_rate": 4.9735288667325257e-05, | |
| "loss": 0.4712, | |
| "num_input_tokens_seen": 4834944, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.14087018544935806, | |
| "grad_norm": 1.195500373840332, | |
| "learning_rate": 4.97284700798736e-05, | |
| "loss": 0.5326, | |
| "num_input_tokens_seen": 4897264, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.14265335235378032, | |
| "grad_norm": 1.1240135431289673, | |
| "learning_rate": 4.97215652664005e-05, | |
| "loss": 0.5958, | |
| "num_input_tokens_seen": 4962208, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.14443651925820256, | |
| "grad_norm": 0.8974002599716187, | |
| "learning_rate": 4.971457425098244e-05, | |
| "loss": 0.5536, | |
| "num_input_tokens_seen": 5027264, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.14621968616262482, | |
| "grad_norm": 1.0974167585372925, | |
| "learning_rate": 4.970749705799649e-05, | |
| "loss": 0.4721, | |
| "num_input_tokens_seen": 5093216, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.14800285306704708, | |
| "grad_norm": 1.3087302446365356, | |
| "learning_rate": 4.9700333712120195e-05, | |
| "loss": 0.4383, | |
| "num_input_tokens_seen": 5155296, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.14978601997146934, | |
| "grad_norm": 5.880493640899658, | |
| "learning_rate": 4.969308423833152e-05, | |
| "loss": 0.5098, | |
| "num_input_tokens_seen": 5216416, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.15156918687589158, | |
| "grad_norm": 1.2446019649505615, | |
| "learning_rate": 4.9685748661908756e-05, | |
| "loss": 0.494, | |
| "num_input_tokens_seen": 5278816, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.15335235378031384, | |
| "grad_norm": 1.1921520233154297, | |
| "learning_rate": 4.967832700843041e-05, | |
| "loss": 0.5728, | |
| "num_input_tokens_seen": 5344896, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1551355206847361, | |
| "grad_norm": 1.161622166633606, | |
| "learning_rate": 4.967081930377515e-05, | |
| "loss": 0.5036, | |
| "num_input_tokens_seen": 5400960, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.15691868758915833, | |
| "grad_norm": 1.0513135194778442, | |
| "learning_rate": 4.966322557412168e-05, | |
| "loss": 0.4347, | |
| "num_input_tokens_seen": 5462928, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1587018544935806, | |
| "grad_norm": 1.2251578569412231, | |
| "learning_rate": 4.965554584594868e-05, | |
| "loss": 0.4997, | |
| "num_input_tokens_seen": 5525296, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.16048502139800286, | |
| "grad_norm": 1.2554380893707275, | |
| "learning_rate": 4.9647780146034695e-05, | |
| "loss": 0.511, | |
| "num_input_tokens_seen": 5590640, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.16226818830242512, | |
| "grad_norm": 2.3998403549194336, | |
| "learning_rate": 4.9639928501458035e-05, | |
| "loss": 0.5376, | |
| "num_input_tokens_seen": 5652912, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.16405135520684735, | |
| "grad_norm": 1.3643852472305298, | |
| "learning_rate": 4.963199093959671e-05, | |
| "loss": 0.5668, | |
| "num_input_tokens_seen": 5711952, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.16583452211126962, | |
| "grad_norm": 1.4717122316360474, | |
| "learning_rate": 4.96239674881283e-05, | |
| "loss": 0.4877, | |
| "num_input_tokens_seen": 5773968, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.16761768901569188, | |
| "grad_norm": 1.8179185390472412, | |
| "learning_rate": 4.9615858175029884e-05, | |
| "loss": 0.4669, | |
| "num_input_tokens_seen": 5836064, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1694008559201141, | |
| "grad_norm": 2.963438034057617, | |
| "learning_rate": 4.960766302857793e-05, | |
| "loss": 0.4766, | |
| "num_input_tokens_seen": 5897600, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.17118402282453637, | |
| "grad_norm": 2.9000422954559326, | |
| "learning_rate": 4.9599382077348205e-05, | |
| "loss": 0.542, | |
| "num_input_tokens_seen": 5959856, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.17296718972895864, | |
| "grad_norm": 1.1453759670257568, | |
| "learning_rate": 4.959101535021566e-05, | |
| "loss": 0.5482, | |
| "num_input_tokens_seen": 6016128, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.17475035663338087, | |
| "grad_norm": 1.1614904403686523, | |
| "learning_rate": 4.9582562876354346e-05, | |
| "loss": 0.5361, | |
| "num_input_tokens_seen": 6079664, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.17653352353780313, | |
| "grad_norm": 1.3136591911315918, | |
| "learning_rate": 4.95740246852373e-05, | |
| "loss": 0.5131, | |
| "num_input_tokens_seen": 6137568, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.1783166904422254, | |
| "grad_norm": 1.0961729288101196, | |
| "learning_rate": 4.9565400806636447e-05, | |
| "loss": 0.431, | |
| "num_input_tokens_seen": 6199280, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.18009985734664766, | |
| "grad_norm": 1.3530110120773315, | |
| "learning_rate": 4.9556691270622515e-05, | |
| "loss": 0.526, | |
| "num_input_tokens_seen": 6262272, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.1818830242510699, | |
| "grad_norm": 1.2133769989013672, | |
| "learning_rate": 4.9547896107564886e-05, | |
| "loss": 0.5082, | |
| "num_input_tokens_seen": 6324144, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.18366619115549215, | |
| "grad_norm": 1.2528913021087646, | |
| "learning_rate": 4.9539015348131526e-05, | |
| "loss": 0.5343, | |
| "num_input_tokens_seen": 6386096, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.18544935805991442, | |
| "grad_norm": 1.4908058643341064, | |
| "learning_rate": 4.953004902328887e-05, | |
| "loss": 0.5408, | |
| "num_input_tokens_seen": 6450704, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.18723252496433665, | |
| "grad_norm": 1.0931016206741333, | |
| "learning_rate": 4.9520997164301726e-05, | |
| "loss": 0.53, | |
| "num_input_tokens_seen": 6512512, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.1890156918687589, | |
| "grad_norm": 1.317772626876831, | |
| "learning_rate": 4.951185980273312e-05, | |
| "loss": 0.4741, | |
| "num_input_tokens_seen": 6572848, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.19079885877318117, | |
| "grad_norm": 1.114240288734436, | |
| "learning_rate": 4.9502636970444246e-05, | |
| "loss": 0.5021, | |
| "num_input_tokens_seen": 6634064, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.19258202567760344, | |
| "grad_norm": 1.1686744689941406, | |
| "learning_rate": 4.949332869959432e-05, | |
| "loss": 0.5557, | |
| "num_input_tokens_seen": 6698560, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.19436519258202567, | |
| "grad_norm": 1.2107973098754883, | |
| "learning_rate": 4.948393502264046e-05, | |
| "loss": 0.5101, | |
| "num_input_tokens_seen": 6758000, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.19614835948644793, | |
| "grad_norm": 1.067867398262024, | |
| "learning_rate": 4.9474455972337607e-05, | |
| "loss": 0.4712, | |
| "num_input_tokens_seen": 6823616, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1979315263908702, | |
| "grad_norm": 1.0068106651306152, | |
| "learning_rate": 4.946489158173838e-05, | |
| "loss": 0.4854, | |
| "num_input_tokens_seen": 6883376, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.19971469329529243, | |
| "grad_norm": 1.490473747253418, | |
| "learning_rate": 4.945524188419298e-05, | |
| "loss": 0.5664, | |
| "num_input_tokens_seen": 6943808, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.2014978601997147, | |
| "grad_norm": 1.0813665390014648, | |
| "learning_rate": 4.9445506913349063e-05, | |
| "loss": 0.6241, | |
| "num_input_tokens_seen": 7005728, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.20328102710413695, | |
| "grad_norm": 1.3641761541366577, | |
| "learning_rate": 4.943568670315162e-05, | |
| "loss": 0.4916, | |
| "num_input_tokens_seen": 7068608, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.20506419400855921, | |
| "grad_norm": 1.0902137756347656, | |
| "learning_rate": 4.942578128784287e-05, | |
| "loss": 0.4833, | |
| "num_input_tokens_seen": 7127008, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.20684736091298145, | |
| "grad_norm": 1.430445909500122, | |
| "learning_rate": 4.941579070196214e-05, | |
| "loss": 0.422, | |
| "num_input_tokens_seen": 7191776, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.2086305278174037, | |
| "grad_norm": 1.6088680028915405, | |
| "learning_rate": 4.940571498034572e-05, | |
| "loss": 0.4913, | |
| "num_input_tokens_seen": 7251536, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.21041369472182597, | |
| "grad_norm": 1.3081697225570679, | |
| "learning_rate": 4.939555415812678e-05, | |
| "loss": 0.451, | |
| "num_input_tokens_seen": 7315696, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2121968616262482, | |
| "grad_norm": 1.3625929355621338, | |
| "learning_rate": 4.938530827073522e-05, | |
| "loss": 0.5694, | |
| "num_input_tokens_seen": 7373792, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.21398002853067047, | |
| "grad_norm": 1.1833407878875732, | |
| "learning_rate": 4.9374977353897566e-05, | |
| "loss": 0.5647, | |
| "num_input_tokens_seen": 7434464, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.21576319543509273, | |
| "grad_norm": 1.3193016052246094, | |
| "learning_rate": 4.936456144363681e-05, | |
| "loss": 0.5739, | |
| "num_input_tokens_seen": 7497328, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.21754636233951496, | |
| "grad_norm": 1.4671732187271118, | |
| "learning_rate": 4.935406057627234e-05, | |
| "loss": 0.5399, | |
| "num_input_tokens_seen": 7560816, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.21932952924393723, | |
| "grad_norm": 1.0455771684646606, | |
| "learning_rate": 4.9343474788419767e-05, | |
| "loss": 0.4423, | |
| "num_input_tokens_seen": 7623280, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.2211126961483595, | |
| "grad_norm": 1.2360905408859253, | |
| "learning_rate": 4.9332804116990795e-05, | |
| "loss": 0.4595, | |
| "num_input_tokens_seen": 7685264, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.22289586305278175, | |
| "grad_norm": 1.3082692623138428, | |
| "learning_rate": 4.9322048599193124e-05, | |
| "loss": 0.5022, | |
| "num_input_tokens_seen": 7748000, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.22467902995720399, | |
| "grad_norm": 1.306279182434082, | |
| "learning_rate": 4.931120827253033e-05, | |
| "loss": 0.4287, | |
| "num_input_tokens_seen": 7812992, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.22646219686162625, | |
| "grad_norm": 1.3158313035964966, | |
| "learning_rate": 4.930028317480167e-05, | |
| "loss": 0.4895, | |
| "num_input_tokens_seen": 7876416, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.2282453637660485, | |
| "grad_norm": 1.1636604070663452, | |
| "learning_rate": 4.9289273344102014e-05, | |
| "loss": 0.4975, | |
| "num_input_tokens_seen": 7940544, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.23002853067047074, | |
| "grad_norm": 1.23000168800354, | |
| "learning_rate": 4.927817881882169e-05, | |
| "loss": 0.4295, | |
| "num_input_tokens_seen": 7999472, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.231811697574893, | |
| "grad_norm": 1.54082453250885, | |
| "learning_rate": 4.9266999637646326e-05, | |
| "loss": 0.5753, | |
| "num_input_tokens_seen": 8061168, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.23359486447931527, | |
| "grad_norm": 2.485759973526001, | |
| "learning_rate": 4.925573583955676e-05, | |
| "loss": 0.443, | |
| "num_input_tokens_seen": 8118944, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.23537803138373753, | |
| "grad_norm": 1.284912347793579, | |
| "learning_rate": 4.9244387463828876e-05, | |
| "loss": 0.5421, | |
| "num_input_tokens_seen": 8185072, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.23716119828815976, | |
| "grad_norm": 3.4624996185302734, | |
| "learning_rate": 4.9232954550033484e-05, | |
| "loss": 0.4099, | |
| "num_input_tokens_seen": 8247616, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.23894436519258203, | |
| "grad_norm": 1.1022762060165405, | |
| "learning_rate": 4.922143713803613e-05, | |
| "loss": 0.4784, | |
| "num_input_tokens_seen": 8312240, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2407275320970043, | |
| "grad_norm": 1.1634345054626465, | |
| "learning_rate": 4.920983526799705e-05, | |
| "loss": 0.3882, | |
| "num_input_tokens_seen": 8371088, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.24251069900142652, | |
| "grad_norm": 1.4921728372573853, | |
| "learning_rate": 4.919814898037095e-05, | |
| "loss": 0.5662, | |
| "num_input_tokens_seen": 8435264, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.24429386590584878, | |
| "grad_norm": 1.2474942207336426, | |
| "learning_rate": 4.918637831590689e-05, | |
| "loss": 0.4169, | |
| "num_input_tokens_seen": 8498960, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.24607703281027105, | |
| "grad_norm": 0.9692139625549316, | |
| "learning_rate": 4.917452331564816e-05, | |
| "loss": 0.4681, | |
| "num_input_tokens_seen": 8561168, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2478601997146933, | |
| "grad_norm": 1.57968008518219, | |
| "learning_rate": 4.9162584020932114e-05, | |
| "loss": 0.4668, | |
| "num_input_tokens_seen": 8624528, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.24964336661911554, | |
| "grad_norm": 1.7983195781707764, | |
| "learning_rate": 4.915056047339002e-05, | |
| "loss": 0.5366, | |
| "num_input_tokens_seen": 8684608, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2514265335235378, | |
| "grad_norm": 1.3157538175582886, | |
| "learning_rate": 4.913845271494695e-05, | |
| "loss": 0.4451, | |
| "num_input_tokens_seen": 8747216, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.25320970042796004, | |
| "grad_norm": 1.193604588508606, | |
| "learning_rate": 4.91262607878216e-05, | |
| "loss": 0.5626, | |
| "num_input_tokens_seen": 8807392, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2549928673323823, | |
| "grad_norm": 1.0445785522460938, | |
| "learning_rate": 4.911398473452616e-05, | |
| "loss": 0.4848, | |
| "num_input_tokens_seen": 8868496, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.25677603423680456, | |
| "grad_norm": 1.8069995641708374, | |
| "learning_rate": 4.910162459786617e-05, | |
| "loss": 0.4672, | |
| "num_input_tokens_seen": 8929056, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2585592011412268, | |
| "grad_norm": 1.1339744329452515, | |
| "learning_rate": 4.908918042094033e-05, | |
| "loss": 0.399, | |
| "num_input_tokens_seen": 8991968, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.2603423680456491, | |
| "grad_norm": 1.2230961322784424, | |
| "learning_rate": 4.907665224714042e-05, | |
| "loss": 0.5477, | |
| "num_input_tokens_seen": 9053408, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.26212553495007135, | |
| "grad_norm": 1.2331055402755737, | |
| "learning_rate": 4.906404012015108e-05, | |
| "loss": 0.4485, | |
| "num_input_tokens_seen": 9115920, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.26390870185449355, | |
| "grad_norm": 1.8696657419204712, | |
| "learning_rate": 4.905134408394969e-05, | |
| "loss": 0.4714, | |
| "num_input_tokens_seen": 9184576, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2656918687589158, | |
| "grad_norm": 1.9693909883499146, | |
| "learning_rate": 4.9038564182806234e-05, | |
| "loss": 0.516, | |
| "num_input_tokens_seen": 9247872, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.2674750356633381, | |
| "grad_norm": 1.0184056758880615, | |
| "learning_rate": 4.902570046128312e-05, | |
| "loss": 0.4914, | |
| "num_input_tokens_seen": 9310976, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.26925820256776034, | |
| "grad_norm": 1.165300726890564, | |
| "learning_rate": 4.9012752964235014e-05, | |
| "loss": 0.4695, | |
| "num_input_tokens_seen": 9372016, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.2710413694721826, | |
| "grad_norm": 1.0303696393966675, | |
| "learning_rate": 4.8999721736808714e-05, | |
| "loss": 0.4741, | |
| "num_input_tokens_seen": 9432624, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.27282453637660486, | |
| "grad_norm": 1.2935962677001953, | |
| "learning_rate": 4.898660682444297e-05, | |
| "loss": 0.5044, | |
| "num_input_tokens_seen": 9493360, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.2746077032810271, | |
| "grad_norm": 1.3259665966033936, | |
| "learning_rate": 4.8973408272868347e-05, | |
| "loss": 0.4618, | |
| "num_input_tokens_seen": 9555136, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.27639087018544933, | |
| "grad_norm": 4.303719520568848, | |
| "learning_rate": 4.896012612810704e-05, | |
| "loss": 0.3954, | |
| "num_input_tokens_seen": 9616896, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.2781740370898716, | |
| "grad_norm": 1.2892228364944458, | |
| "learning_rate": 4.894676043647274e-05, | |
| "loss": 0.3872, | |
| "num_input_tokens_seen": 9674752, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.27995720399429386, | |
| "grad_norm": 1.360479474067688, | |
| "learning_rate": 4.8933311244570434e-05, | |
| "loss": 0.4713, | |
| "num_input_tokens_seen": 9736976, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.2817403708987161, | |
| "grad_norm": 1.3140631914138794, | |
| "learning_rate": 4.8919778599296293e-05, | |
| "loss": 0.3917, | |
| "num_input_tokens_seen": 9797136, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2835235378031384, | |
| "grad_norm": 1.0723479986190796, | |
| "learning_rate": 4.890616254783748e-05, | |
| "loss": 0.4911, | |
| "num_input_tokens_seen": 9858928, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.28530670470756064, | |
| "grad_norm": 1.4321138858795166, | |
| "learning_rate": 4.8892463137671963e-05, | |
| "loss": 0.4682, | |
| "num_input_tokens_seen": 9917776, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2870898716119829, | |
| "grad_norm": 1.2900583744049072, | |
| "learning_rate": 4.887868041656839e-05, | |
| "loss": 0.4978, | |
| "num_input_tokens_seen": 9982464, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.2888730385164051, | |
| "grad_norm": 1.1396691799163818, | |
| "learning_rate": 4.886481443258594e-05, | |
| "loss": 0.4178, | |
| "num_input_tokens_seen": 10044208, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2906562054208274, | |
| "grad_norm": 1.40047025680542, | |
| "learning_rate": 4.885086523407405e-05, | |
| "loss": 0.455, | |
| "num_input_tokens_seen": 10105968, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.29243937232524964, | |
| "grad_norm": 1.263271689414978, | |
| "learning_rate": 4.88368328696724e-05, | |
| "loss": 0.4933, | |
| "num_input_tokens_seen": 10166992, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2942225392296719, | |
| "grad_norm": 1.3891979455947876, | |
| "learning_rate": 4.882271738831059e-05, | |
| "loss": 0.5043, | |
| "num_input_tokens_seen": 10232144, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.29600570613409416, | |
| "grad_norm": 1.4529082775115967, | |
| "learning_rate": 4.880851883920809e-05, | |
| "loss": 0.5188, | |
| "num_input_tokens_seen": 10292944, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.2977888730385164, | |
| "grad_norm": 1.1358407735824585, | |
| "learning_rate": 4.879423727187401e-05, | |
| "loss": 0.5159, | |
| "num_input_tokens_seen": 10354256, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.2995720399429387, | |
| "grad_norm": 1.7224394083023071, | |
| "learning_rate": 4.8779872736106916e-05, | |
| "loss": 0.5063, | |
| "num_input_tokens_seen": 10416688, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3013552068473609, | |
| "grad_norm": 1.557830810546875, | |
| "learning_rate": 4.8765425281994704e-05, | |
| "loss": 0.44, | |
| "num_input_tokens_seen": 10477712, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.30313837375178315, | |
| "grad_norm": 1.207537293434143, | |
| "learning_rate": 4.8750894959914377e-05, | |
| "loss": 0.457, | |
| "num_input_tokens_seen": 10539120, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3049215406562054, | |
| "grad_norm": 1.2876931428909302, | |
| "learning_rate": 4.873628182053191e-05, | |
| "loss": 0.4583, | |
| "num_input_tokens_seen": 10602400, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.3067047075606277, | |
| "grad_norm": 1.5556424856185913, | |
| "learning_rate": 4.872158591480206e-05, | |
| "loss": 0.4462, | |
| "num_input_tokens_seen": 10665920, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.30848787446504994, | |
| "grad_norm": 1.2405084371566772, | |
| "learning_rate": 4.870680729396815e-05, | |
| "loss": 0.4229, | |
| "num_input_tokens_seen": 10732768, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.3102710413694722, | |
| "grad_norm": 1.3671534061431885, | |
| "learning_rate": 4.869194600956195e-05, | |
| "loss": 0.5017, | |
| "num_input_tokens_seen": 10794368, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.31205420827389446, | |
| "grad_norm": 1.0638670921325684, | |
| "learning_rate": 4.867700211340347e-05, | |
| "loss": 0.4751, | |
| "num_input_tokens_seen": 10853408, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.31383737517831667, | |
| "grad_norm": 1.2563133239746094, | |
| "learning_rate": 4.8661975657600765e-05, | |
| "loss": 0.4873, | |
| "num_input_tokens_seen": 10918576, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.31562054208273893, | |
| "grad_norm": 1.0638364553451538, | |
| "learning_rate": 4.8646866694549795e-05, | |
| "loss": 0.4572, | |
| "num_input_tokens_seen": 10980976, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.3174037089871612, | |
| "grad_norm": 1.3460172414779663, | |
| "learning_rate": 4.863167527693417e-05, | |
| "loss": 0.4758, | |
| "num_input_tokens_seen": 11040448, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.31918687589158345, | |
| "grad_norm": 1.210242509841919, | |
| "learning_rate": 4.861640145772507e-05, | |
| "loss": 0.5092, | |
| "num_input_tokens_seen": 11104160, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.3209700427960057, | |
| "grad_norm": 1.0002316236495972, | |
| "learning_rate": 4.8601045290180946e-05, | |
| "loss": 0.4447, | |
| "num_input_tokens_seen": 11164224, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.322753209700428, | |
| "grad_norm": 1.332479476928711, | |
| "learning_rate": 4.858560682784744e-05, | |
| "loss": 0.4335, | |
| "num_input_tokens_seen": 11227376, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.32453637660485024, | |
| "grad_norm": 1.223310112953186, | |
| "learning_rate": 4.8570086124557116e-05, | |
| "loss": 0.4156, | |
| "num_input_tokens_seen": 11284704, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.32631954350927245, | |
| "grad_norm": 1.439526915550232, | |
| "learning_rate": 4.85544832344293e-05, | |
| "loss": 0.431, | |
| "num_input_tokens_seen": 11348448, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.3281027104136947, | |
| "grad_norm": 1.27132248878479, | |
| "learning_rate": 4.853879821186993e-05, | |
| "loss": 0.4941, | |
| "num_input_tokens_seen": 11406160, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.32988587731811697, | |
| "grad_norm": 1.6706770658493042, | |
| "learning_rate": 4.8523031111571316e-05, | |
| "loss": 0.4718, | |
| "num_input_tokens_seen": 11467088, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.33166904422253923, | |
| "grad_norm": 1.131922960281372, | |
| "learning_rate": 4.850718198851195e-05, | |
| "loss": 0.4172, | |
| "num_input_tokens_seen": 11532768, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3334522111269615, | |
| "grad_norm": 1.1946320533752441, | |
| "learning_rate": 4.849125089795634e-05, | |
| "loss": 0.3736, | |
| "num_input_tokens_seen": 11591984, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.33523537803138376, | |
| "grad_norm": 1.2938627004623413, | |
| "learning_rate": 4.8475237895454833e-05, | |
| "loss": 0.462, | |
| "num_input_tokens_seen": 11656624, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.33701854493580596, | |
| "grad_norm": 1.3345882892608643, | |
| "learning_rate": 4.845914303684336e-05, | |
| "loss": 0.4584, | |
| "num_input_tokens_seen": 11718256, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.3388017118402282, | |
| "grad_norm": 0.8923389315605164, | |
| "learning_rate": 4.844296637824329e-05, | |
| "loss": 0.5339, | |
| "num_input_tokens_seen": 11776080, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3405848787446505, | |
| "grad_norm": 1.2611216306686401, | |
| "learning_rate": 4.8426707976061226e-05, | |
| "loss": 0.5625, | |
| "num_input_tokens_seen": 11840768, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.34236804564907275, | |
| "grad_norm": 1.1760461330413818, | |
| "learning_rate": 4.84103678869888e-05, | |
| "loss": 0.5043, | |
| "num_input_tokens_seen": 11904208, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.344151212553495, | |
| "grad_norm": 1.19206964969635, | |
| "learning_rate": 4.8393946168002477e-05, | |
| "loss": 0.4183, | |
| "num_input_tokens_seen": 11967952, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.3459343794579173, | |
| "grad_norm": 1.1648989915847778, | |
| "learning_rate": 4.8377442876363364e-05, | |
| "loss": 0.4095, | |
| "num_input_tokens_seen": 12033136, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.34771754636233954, | |
| "grad_norm": 1.2076241970062256, | |
| "learning_rate": 4.8360858069617006e-05, | |
| "loss": 0.4537, | |
| "num_input_tokens_seen": 12097584, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.34950071326676174, | |
| "grad_norm": 1.0648747682571411, | |
| "learning_rate": 4.834419180559317e-05, | |
| "loss": 0.3932, | |
| "num_input_tokens_seen": 12156320, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.351283880171184, | |
| "grad_norm": 1.228440523147583, | |
| "learning_rate": 4.832744414240567e-05, | |
| "loss": 0.4313, | |
| "num_input_tokens_seen": 12218384, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.35306704707560627, | |
| "grad_norm": 1.4440958499908447, | |
| "learning_rate": 4.8310615138452156e-05, | |
| "loss": 0.4685, | |
| "num_input_tokens_seen": 12281856, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.35485021398002853, | |
| "grad_norm": 1.0754982233047485, | |
| "learning_rate": 4.829370485241388e-05, | |
| "loss": 0.4623, | |
| "num_input_tokens_seen": 12343904, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.3566333808844508, | |
| "grad_norm": 1.3745994567871094, | |
| "learning_rate": 4.827671334325556e-05, | |
| "loss": 0.4334, | |
| "num_input_tokens_seen": 12402256, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.35841654778887305, | |
| "grad_norm": 1.0649508237838745, | |
| "learning_rate": 4.82596406702251e-05, | |
| "loss": 0.4728, | |
| "num_input_tokens_seen": 12465536, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.3601997146932953, | |
| "grad_norm": 1.061046838760376, | |
| "learning_rate": 4.8242486892853424e-05, | |
| "loss": 0.421, | |
| "num_input_tokens_seen": 12530464, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3619828815977175, | |
| "grad_norm": 1.7068672180175781, | |
| "learning_rate": 4.822525207095425e-05, | |
| "loss": 0.4843, | |
| "num_input_tokens_seen": 12593216, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.3637660485021398, | |
| "grad_norm": 1.2018966674804688, | |
| "learning_rate": 4.820793626462391e-05, | |
| "loss": 0.4604, | |
| "num_input_tokens_seen": 12655248, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.36554921540656204, | |
| "grad_norm": 1.2888667583465576, | |
| "learning_rate": 4.819053953424112e-05, | |
| "loss": 0.427, | |
| "num_input_tokens_seen": 12718048, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.3673323823109843, | |
| "grad_norm": 1.3183050155639648, | |
| "learning_rate": 4.817306194046675e-05, | |
| "loss": 0.4415, | |
| "num_input_tokens_seen": 12781536, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.36911554921540657, | |
| "grad_norm": 1.7154110670089722, | |
| "learning_rate": 4.815550354424365e-05, | |
| "loss": 0.5193, | |
| "num_input_tokens_seen": 12844336, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.37089871611982883, | |
| "grad_norm": 1.3131228685379028, | |
| "learning_rate": 4.813786440679642e-05, | |
| "loss": 0.4078, | |
| "num_input_tokens_seen": 12906288, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.3726818830242511, | |
| "grad_norm": 1.1588881015777588, | |
| "learning_rate": 4.81201445896312e-05, | |
| "loss": 0.3672, | |
| "num_input_tokens_seen": 12965200, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.3744650499286733, | |
| "grad_norm": 1.5113669633865356, | |
| "learning_rate": 4.810234415453545e-05, | |
| "loss": 0.4896, | |
| "num_input_tokens_seen": 13033248, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.37624821683309556, | |
| "grad_norm": 1.4646553993225098, | |
| "learning_rate": 4.808446316357773e-05, | |
| "loss": 0.4772, | |
| "num_input_tokens_seen": 13096752, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.3780313837375178, | |
| "grad_norm": 1.9652795791625977, | |
| "learning_rate": 4.80665016791075e-05, | |
| "loss": 0.4468, | |
| "num_input_tokens_seen": 13158992, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.3798145506419401, | |
| "grad_norm": 3.033592700958252, | |
| "learning_rate": 4.804845976375489e-05, | |
| "loss": 0.3997, | |
| "num_input_tokens_seen": 13222064, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.38159771754636235, | |
| "grad_norm": 1.2086786031723022, | |
| "learning_rate": 4.8030337480430496e-05, | |
| "loss": 0.4966, | |
| "num_input_tokens_seen": 13286112, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.3833808844507846, | |
| "grad_norm": 1.7219619750976562, | |
| "learning_rate": 4.801213489232514e-05, | |
| "loss": 0.4918, | |
| "num_input_tokens_seen": 13346832, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.38516405135520687, | |
| "grad_norm": 1.256044864654541, | |
| "learning_rate": 4.799385206290965e-05, | |
| "loss": 0.4734, | |
| "num_input_tokens_seen": 13408992, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.3869472182596291, | |
| "grad_norm": 1.150932788848877, | |
| "learning_rate": 4.7975489055934666e-05, | |
| "loss": 0.3703, | |
| "num_input_tokens_seen": 13469280, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.38873038516405134, | |
| "grad_norm": 1.4256497621536255, | |
| "learning_rate": 4.79570459354304e-05, | |
| "loss": 0.5076, | |
| "num_input_tokens_seen": 13533536, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.3905135520684736, | |
| "grad_norm": 1.1593137979507446, | |
| "learning_rate": 4.79385227657064e-05, | |
| "loss": 0.4351, | |
| "num_input_tokens_seen": 13594304, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.39229671897289586, | |
| "grad_norm": 0.9239110350608826, | |
| "learning_rate": 4.791991961135135e-05, | |
| "loss": 0.4984, | |
| "num_input_tokens_seen": 13657328, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3940798858773181, | |
| "grad_norm": 0.999727189540863, | |
| "learning_rate": 4.790123653723282e-05, | |
| "loss": 0.4598, | |
| "num_input_tokens_seen": 13720224, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.3958630527817404, | |
| "grad_norm": 1.0658410787582397, | |
| "learning_rate": 4.788247360849708e-05, | |
| "loss": 0.4409, | |
| "num_input_tokens_seen": 13782656, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.39764621968616265, | |
| "grad_norm": 1.2038542032241821, | |
| "learning_rate": 4.786363089056881e-05, | |
| "loss": 0.4719, | |
| "num_input_tokens_seen": 13849120, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.39942938659058486, | |
| "grad_norm": 1.1782008409500122, | |
| "learning_rate": 4.784470844915093e-05, | |
| "loss": 0.4147, | |
| "num_input_tokens_seen": 13910944, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.4012125534950071, | |
| "grad_norm": 0.9827120304107666, | |
| "learning_rate": 4.782570635022436e-05, | |
| "loss": 0.3883, | |
| "num_input_tokens_seen": 13969248, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.4029957203994294, | |
| "grad_norm": 1.0276070833206177, | |
| "learning_rate": 4.7806624660047744e-05, | |
| "loss": 0.4337, | |
| "num_input_tokens_seen": 14028112, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.40477888730385164, | |
| "grad_norm": 1.923315167427063, | |
| "learning_rate": 4.7787463445157286e-05, | |
| "loss": 0.5135, | |
| "num_input_tokens_seen": 14090320, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.4065620542082739, | |
| "grad_norm": 1.3430618047714233, | |
| "learning_rate": 4.7768222772366466e-05, | |
| "loss": 0.5111, | |
| "num_input_tokens_seen": 14151840, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.40834522111269617, | |
| "grad_norm": 1.5225883722305298, | |
| "learning_rate": 4.774890270876584e-05, | |
| "loss": 0.5005, | |
| "num_input_tokens_seen": 14213824, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.41012838801711843, | |
| "grad_norm": 1.0013866424560547, | |
| "learning_rate": 4.772950332172279e-05, | |
| "loss": 0.6018, | |
| "num_input_tokens_seen": 14278736, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.41191155492154063, | |
| "grad_norm": 1.0078413486480713, | |
| "learning_rate": 4.771002467888128e-05, | |
| "loss": 0.3879, | |
| "num_input_tokens_seen": 14339408, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.4136947218259629, | |
| "grad_norm": 1.1650017499923706, | |
| "learning_rate": 4.769046684816165e-05, | |
| "loss": 0.4924, | |
| "num_input_tokens_seen": 14399008, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.41547788873038516, | |
| "grad_norm": 1.351217269897461, | |
| "learning_rate": 4.767082989776034e-05, | |
| "loss": 0.4104, | |
| "num_input_tokens_seen": 14462656, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.4172610556348074, | |
| "grad_norm": 1.3392795324325562, | |
| "learning_rate": 4.76511138961497e-05, | |
| "loss": 0.4629, | |
| "num_input_tokens_seen": 14527568, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.4190442225392297, | |
| "grad_norm": 1.3544095754623413, | |
| "learning_rate": 4.763131891207771e-05, | |
| "loss": 0.486, | |
| "num_input_tokens_seen": 14590944, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.42082738944365194, | |
| "grad_norm": 1.1842771768569946, | |
| "learning_rate": 4.761144501456773e-05, | |
| "loss": 0.4529, | |
| "num_input_tokens_seen": 14651104, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.4226105563480742, | |
| "grad_norm": 0.9588406085968018, | |
| "learning_rate": 4.7591492272918344e-05, | |
| "loss": 0.3739, | |
| "num_input_tokens_seen": 14711344, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.4243937232524964, | |
| "grad_norm": 1.1637108325958252, | |
| "learning_rate": 4.7571460756703e-05, | |
| "loss": 0.4772, | |
| "num_input_tokens_seen": 14772656, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.4261768901569187, | |
| "grad_norm": 1.225539207458496, | |
| "learning_rate": 4.755135053576987e-05, | |
| "loss": 0.4606, | |
| "num_input_tokens_seen": 14833840, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.42796005706134094, | |
| "grad_norm": 1.4021525382995605, | |
| "learning_rate": 4.753116168024153e-05, | |
| "loss": 0.4168, | |
| "num_input_tokens_seen": 14896544, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4297432239657632, | |
| "grad_norm": 1.615051507949829, | |
| "learning_rate": 4.751089426051476e-05, | |
| "loss": 0.4156, | |
| "num_input_tokens_seen": 14956432, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.43152639087018546, | |
| "grad_norm": 3.979645252227783, | |
| "learning_rate": 4.749054834726029e-05, | |
| "loss": 0.5188, | |
| "num_input_tokens_seen": 15021296, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4333095577746077, | |
| "grad_norm": 1.3276335000991821, | |
| "learning_rate": 4.7470124011422555e-05, | |
| "loss": 0.4941, | |
| "num_input_tokens_seen": 15080688, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.43509272467902993, | |
| "grad_norm": 1.2513278722763062, | |
| "learning_rate": 4.744962132421943e-05, | |
| "loss": 0.4719, | |
| "num_input_tokens_seen": 15141456, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.4368758915834522, | |
| "grad_norm": 1.1449891328811646, | |
| "learning_rate": 4.742904035714199e-05, | |
| "loss": 0.4811, | |
| "num_input_tokens_seen": 15202768, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.43865905848787445, | |
| "grad_norm": 1.0668220520019531, | |
| "learning_rate": 4.7408381181954284e-05, | |
| "loss": 0.4801, | |
| "num_input_tokens_seen": 15266416, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.4404422253922967, | |
| "grad_norm": 1.576777458190918, | |
| "learning_rate": 4.7387643870693055e-05, | |
| "loss": 0.4551, | |
| "num_input_tokens_seen": 15328416, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.442225392296719, | |
| "grad_norm": 1.0677021741867065, | |
| "learning_rate": 4.736682849566751e-05, | |
| "loss": 0.3682, | |
| "num_input_tokens_seen": 15387392, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.44400855920114124, | |
| "grad_norm": 1.105083703994751, | |
| "learning_rate": 4.734593512945904e-05, | |
| "loss": 0.4721, | |
| "num_input_tokens_seen": 15444928, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.4457917261055635, | |
| "grad_norm": 1.1016100645065308, | |
| "learning_rate": 4.7324963844920986e-05, | |
| "loss": 0.4568, | |
| "num_input_tokens_seen": 15505488, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4475748930099857, | |
| "grad_norm": 1.4010059833526611, | |
| "learning_rate": 4.7303914715178396e-05, | |
| "loss": 0.5337, | |
| "num_input_tokens_seen": 15566336, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.44935805991440797, | |
| "grad_norm": 1.149238109588623, | |
| "learning_rate": 4.728278781362777e-05, | |
| "loss": 0.3965, | |
| "num_input_tokens_seen": 15632768, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.45114122681883023, | |
| "grad_norm": 1.4296883344650269, | |
| "learning_rate": 4.7261583213936746e-05, | |
| "loss": 0.5366, | |
| "num_input_tokens_seen": 15694944, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.4529243937232525, | |
| "grad_norm": 1.2786849737167358, | |
| "learning_rate": 4.7240300990043926e-05, | |
| "loss": 0.4339, | |
| "num_input_tokens_seen": 15756496, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.45470756062767476, | |
| "grad_norm": 1.1299382448196411, | |
| "learning_rate": 4.721894121615859e-05, | |
| "loss": 0.4866, | |
| "num_input_tokens_seen": 15821200, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.456490727532097, | |
| "grad_norm": 1.1465532779693604, | |
| "learning_rate": 4.7197503966760375e-05, | |
| "loss": 0.4288, | |
| "num_input_tokens_seen": 15882736, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.4582738944365193, | |
| "grad_norm": 1.4677292108535767, | |
| "learning_rate": 4.717598931659913e-05, | |
| "loss": 0.443, | |
| "num_input_tokens_seen": 15944560, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.4600570613409415, | |
| "grad_norm": 1.8437912464141846, | |
| "learning_rate": 4.7154397340694556e-05, | |
| "loss": 0.4923, | |
| "num_input_tokens_seen": 16006784, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.46184022824536375, | |
| "grad_norm": 1.5408210754394531, | |
| "learning_rate": 4.713272811433599e-05, | |
| "loss": 0.4868, | |
| "num_input_tokens_seen": 16068896, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.463623395149786, | |
| "grad_norm": 1.1977325677871704, | |
| "learning_rate": 4.711098171308214e-05, | |
| "loss": 0.4781, | |
| "num_input_tokens_seen": 16128640, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.4654065620542083, | |
| "grad_norm": 1.470975399017334, | |
| "learning_rate": 4.708915821276082e-05, | |
| "loss": 0.4748, | |
| "num_input_tokens_seen": 16192800, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.46718972895863053, | |
| "grad_norm": 1.460138201713562, | |
| "learning_rate": 4.706725768946866e-05, | |
| "loss": 0.5107, | |
| "num_input_tokens_seen": 16251248, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.4689728958630528, | |
| "grad_norm": 1.2103915214538574, | |
| "learning_rate": 4.7045280219570896e-05, | |
| "loss": 0.4768, | |
| "num_input_tokens_seen": 16314704, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.47075606276747506, | |
| "grad_norm": 1.1669901609420776, | |
| "learning_rate": 4.702322587970104e-05, | |
| "loss": 0.4624, | |
| "num_input_tokens_seen": 16375792, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.47253922967189727, | |
| "grad_norm": 1.1790727376937866, | |
| "learning_rate": 4.700109474676064e-05, | |
| "loss": 0.4735, | |
| "num_input_tokens_seen": 16438672, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.4743223965763195, | |
| "grad_norm": 1.019875407218933, | |
| "learning_rate": 4.697888689791906e-05, | |
| "loss": 0.3809, | |
| "num_input_tokens_seen": 16498896, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.4761055634807418, | |
| "grad_norm": 1.2999383211135864, | |
| "learning_rate": 4.6956602410613115e-05, | |
| "loss": 0.4421, | |
| "num_input_tokens_seen": 16566736, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.47788873038516405, | |
| "grad_norm": 1.4289456605911255, | |
| "learning_rate": 4.6934241362546874e-05, | |
| "loss": 0.5083, | |
| "num_input_tokens_seen": 16630480, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.4796718972895863, | |
| "grad_norm": 1.2647002935409546, | |
| "learning_rate": 4.691180383169137e-05, | |
| "loss": 0.5118, | |
| "num_input_tokens_seen": 16688832, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.4814550641940086, | |
| "grad_norm": 1.1783503293991089, | |
| "learning_rate": 4.688928989628431e-05, | |
| "loss": 0.4128, | |
| "num_input_tokens_seen": 16752432, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.48323823109843084, | |
| "grad_norm": 1.2250187397003174, | |
| "learning_rate": 4.686669963482983e-05, | |
| "loss": 0.3974, | |
| "num_input_tokens_seen": 16814912, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.48502139800285304, | |
| "grad_norm": 1.5874429941177368, | |
| "learning_rate": 4.6844033126098206e-05, | |
| "loss": 0.5244, | |
| "num_input_tokens_seen": 16875696, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.4868045649072753, | |
| "grad_norm": 1.5424435138702393, | |
| "learning_rate": 4.682129044912558e-05, | |
| "loss": 0.3909, | |
| "num_input_tokens_seen": 16934768, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.48858773181169757, | |
| "grad_norm": 1.395538568496704, | |
| "learning_rate": 4.679847168321368e-05, | |
| "loss": 0.4208, | |
| "num_input_tokens_seen": 16994192, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.49037089871611983, | |
| "grad_norm": 1.3311400413513184, | |
| "learning_rate": 4.677557690792956e-05, | |
| "loss": 0.5148, | |
| "num_input_tokens_seen": 17055952, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.4921540656205421, | |
| "grad_norm": 1.0483784675598145, | |
| "learning_rate": 4.6752606203105314e-05, | |
| "loss": 0.4838, | |
| "num_input_tokens_seen": 17118352, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.49393723252496435, | |
| "grad_norm": 1.4240469932556152, | |
| "learning_rate": 4.6729559648837777e-05, | |
| "loss": 0.4676, | |
| "num_input_tokens_seen": 17181856, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.4957203994293866, | |
| "grad_norm": 1.1497527360916138, | |
| "learning_rate": 4.6706437325488285e-05, | |
| "loss": 0.4607, | |
| "num_input_tokens_seen": 17239040, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.4975035663338088, | |
| "grad_norm": 1.324589490890503, | |
| "learning_rate": 4.6683239313682356e-05, | |
| "loss": 0.3867, | |
| "num_input_tokens_seen": 17300096, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.4992867332382311, | |
| "grad_norm": 1.401481032371521, | |
| "learning_rate": 4.6659965694309446e-05, | |
| "loss": 0.477, | |
| "num_input_tokens_seen": 17367088, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5010699001426534, | |
| "grad_norm": 1.0556763410568237, | |
| "learning_rate": 4.6636616548522637e-05, | |
| "loss": 0.4092, | |
| "num_input_tokens_seen": 17427648, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.5028530670470756, | |
| "grad_norm": 1.5187320709228516, | |
| "learning_rate": 4.661319195773837e-05, | |
| "loss": 0.4266, | |
| "num_input_tokens_seen": 17491664, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5046362339514978, | |
| "grad_norm": 1.2626229524612427, | |
| "learning_rate": 4.658969200363614e-05, | |
| "loss": 0.5192, | |
| "num_input_tokens_seen": 17553312, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.5064194008559201, | |
| "grad_norm": 1.3596255779266357, | |
| "learning_rate": 4.6566116768158254e-05, | |
| "loss": 0.4983, | |
| "num_input_tokens_seen": 17614656, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.5082025677603423, | |
| "grad_norm": 1.131866455078125, | |
| "learning_rate": 4.6542466333509496e-05, | |
| "loss": 0.4593, | |
| "num_input_tokens_seen": 17673104, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.5099857346647646, | |
| "grad_norm": 1.1720597743988037, | |
| "learning_rate": 4.651874078215688e-05, | |
| "loss": 0.3885, | |
| "num_input_tokens_seen": 17733920, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5117689015691869, | |
| "grad_norm": 1.1201550960540771, | |
| "learning_rate": 4.6494940196829326e-05, | |
| "loss": 0.4661, | |
| "num_input_tokens_seen": 17795024, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.5135520684736091, | |
| "grad_norm": 1.4359281063079834, | |
| "learning_rate": 4.647106466051741e-05, | |
| "loss": 0.4327, | |
| "num_input_tokens_seen": 17856080, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5153352353780314, | |
| "grad_norm": 1.2126119136810303, | |
| "learning_rate": 4.644711425647305e-05, | |
| "loss": 0.4281, | |
| "num_input_tokens_seen": 17918592, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.5171184022824536, | |
| "grad_norm": 1.1998052597045898, | |
| "learning_rate": 4.642308906820921e-05, | |
| "loss": 0.4234, | |
| "num_input_tokens_seen": 17985056, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5189015691868759, | |
| "grad_norm": 1.2513782978057861, | |
| "learning_rate": 4.6398989179499635e-05, | |
| "loss": 0.4952, | |
| "num_input_tokens_seen": 18047856, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.5206847360912982, | |
| "grad_norm": 1.5451606512069702, | |
| "learning_rate": 4.637481467437854e-05, | |
| "loss": 0.4061, | |
| "num_input_tokens_seen": 18110608, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5224679029957204, | |
| "grad_norm": 1.280383586883545, | |
| "learning_rate": 4.635056563714031e-05, | |
| "loss": 0.4709, | |
| "num_input_tokens_seen": 18170192, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.5242510699001427, | |
| "grad_norm": 1.536872386932373, | |
| "learning_rate": 4.632624215233924e-05, | |
| "loss": 0.5166, | |
| "num_input_tokens_seen": 18234512, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.526034236804565, | |
| "grad_norm": 1.1344192028045654, | |
| "learning_rate": 4.6301844304789185e-05, | |
| "loss": 0.4313, | |
| "num_input_tokens_seen": 18297872, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.5278174037089871, | |
| "grad_norm": 1.2558397054672241, | |
| "learning_rate": 4.6277372179563336e-05, | |
| "loss": 0.4426, | |
| "num_input_tokens_seen": 18360688, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5296005706134094, | |
| "grad_norm": 1.3379613161087036, | |
| "learning_rate": 4.625282586199384e-05, | |
| "loss": 0.4684, | |
| "num_input_tokens_seen": 18421600, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.5313837375178316, | |
| "grad_norm": 1.471182942390442, | |
| "learning_rate": 4.622820543767159e-05, | |
| "loss": 0.3746, | |
| "num_input_tokens_seen": 18482608, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.5331669044222539, | |
| "grad_norm": 1.147135853767395, | |
| "learning_rate": 4.6203510992445844e-05, | |
| "loss": 0.3896, | |
| "num_input_tokens_seen": 18542720, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.5349500713266762, | |
| "grad_norm": 1.6015293598175049, | |
| "learning_rate": 4.617874261242399e-05, | |
| "loss": 0.4613, | |
| "num_input_tokens_seen": 18604304, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5367332382310984, | |
| "grad_norm": 1.1261463165283203, | |
| "learning_rate": 4.615390038397121e-05, | |
| "loss": 0.4636, | |
| "num_input_tokens_seen": 18666336, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.5385164051355207, | |
| "grad_norm": 1.1836202144622803, | |
| "learning_rate": 4.612898439371019e-05, | |
| "loss": 0.4072, | |
| "num_input_tokens_seen": 18724912, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.5402995720399429, | |
| "grad_norm": 1.108585238456726, | |
| "learning_rate": 4.6103994728520815e-05, | |
| "loss": 0.3483, | |
| "num_input_tokens_seen": 18786352, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.5420827389443652, | |
| "grad_norm": 1.3794957399368286, | |
| "learning_rate": 4.607893147553989e-05, | |
| "loss": 0.4259, | |
| "num_input_tokens_seen": 18851488, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5438659058487875, | |
| "grad_norm": 1.4083433151245117, | |
| "learning_rate": 4.605379472216076e-05, | |
| "loss": 0.4364, | |
| "num_input_tokens_seen": 18915008, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.5456490727532097, | |
| "grad_norm": 1.3088963031768799, | |
| "learning_rate": 4.602858455603313e-05, | |
| "loss": 0.4098, | |
| "num_input_tokens_seen": 18976256, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.547432239657632, | |
| "grad_norm": 1.3022725582122803, | |
| "learning_rate": 4.600330106506263e-05, | |
| "loss": 0.4449, | |
| "num_input_tokens_seen": 19036560, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.5492154065620543, | |
| "grad_norm": 1.7286397218704224, | |
| "learning_rate": 4.597794433741061e-05, | |
| "loss": 0.5088, | |
| "num_input_tokens_seen": 19097568, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5509985734664765, | |
| "grad_norm": 1.4286762475967407, | |
| "learning_rate": 4.5952514461493754e-05, | |
| "loss": 0.445, | |
| "num_input_tokens_seen": 19158592, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.5527817403708987, | |
| "grad_norm": 1.2713367938995361, | |
| "learning_rate": 4.5927011525983824e-05, | |
| "loss": 0.3791, | |
| "num_input_tokens_seen": 19215600, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5545649072753209, | |
| "grad_norm": 1.3422623872756958, | |
| "learning_rate": 4.590143561980736e-05, | |
| "loss": 0.4897, | |
| "num_input_tokens_seen": 19277184, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.5563480741797432, | |
| "grad_norm": 1.278333306312561, | |
| "learning_rate": 4.5875786832145287e-05, | |
| "loss": 0.4426, | |
| "num_input_tokens_seen": 19338032, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5581312410841655, | |
| "grad_norm": 1.4938713312149048, | |
| "learning_rate": 4.5850065252432706e-05, | |
| "loss": 0.4246, | |
| "num_input_tokens_seen": 19397040, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.5599144079885877, | |
| "grad_norm": 2.4364399909973145, | |
| "learning_rate": 4.582427097035854e-05, | |
| "loss": 0.4777, | |
| "num_input_tokens_seen": 19456144, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.56169757489301, | |
| "grad_norm": 3.5539422035217285, | |
| "learning_rate": 4.579840407586517e-05, | |
| "loss": 0.4894, | |
| "num_input_tokens_seen": 19518176, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.5634807417974322, | |
| "grad_norm": 1.4036399126052856, | |
| "learning_rate": 4.577246465914825e-05, | |
| "loss": 0.4704, | |
| "num_input_tokens_seen": 19581024, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.5652639087018545, | |
| "grad_norm": 0.9552262425422668, | |
| "learning_rate": 4.5746452810656225e-05, | |
| "loss": 0.4527, | |
| "num_input_tokens_seen": 19643104, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.5670470756062768, | |
| "grad_norm": 1.2145711183547974, | |
| "learning_rate": 4.572036862109017e-05, | |
| "loss": 0.4612, | |
| "num_input_tokens_seen": 19702528, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.568830242510699, | |
| "grad_norm": 1.0046789646148682, | |
| "learning_rate": 4.5694212181403374e-05, | |
| "loss": 0.4235, | |
| "num_input_tokens_seen": 19763424, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.5706134094151213, | |
| "grad_norm": 1.3540983200073242, | |
| "learning_rate": 4.5667983582801064e-05, | |
| "loss": 0.3833, | |
| "num_input_tokens_seen": 19823200, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5723965763195435, | |
| "grad_norm": 1.2544758319854736, | |
| "learning_rate": 4.5641682916740084e-05, | |
| "loss": 0.4586, | |
| "num_input_tokens_seen": 19883888, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.5741797432239658, | |
| "grad_norm": 1.1667801141738892, | |
| "learning_rate": 4.5615310274928556e-05, | |
| "loss": 0.5969, | |
| "num_input_tokens_seen": 19949840, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.5759629101283881, | |
| "grad_norm": 0.9844037294387817, | |
| "learning_rate": 4.5588865749325594e-05, | |
| "loss": 0.3798, | |
| "num_input_tokens_seen": 20014640, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.5777460770328102, | |
| "grad_norm": 1.3161027431488037, | |
| "learning_rate": 4.556234943214095e-05, | |
| "loss": 0.4234, | |
| "num_input_tokens_seen": 20077008, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.5795292439372325, | |
| "grad_norm": 1.1113629341125488, | |
| "learning_rate": 4.5535761415834724e-05, | |
| "loss": 0.4714, | |
| "num_input_tokens_seen": 20141488, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.5813124108416547, | |
| "grad_norm": 1.3117053508758545, | |
| "learning_rate": 4.550910179311699e-05, | |
| "loss": 0.5514, | |
| "num_input_tokens_seen": 20206016, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.583095577746077, | |
| "grad_norm": 1.151132345199585, | |
| "learning_rate": 4.5482370656947554e-05, | |
| "loss": 0.4626, | |
| "num_input_tokens_seen": 20270880, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.5848787446504993, | |
| "grad_norm": 2.0122318267822266, | |
| "learning_rate": 4.5455568100535545e-05, | |
| "loss": 0.4758, | |
| "num_input_tokens_seen": 20334448, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.5866619115549215, | |
| "grad_norm": 1.6800963878631592, | |
| "learning_rate": 4.542869421733915e-05, | |
| "loss": 0.4178, | |
| "num_input_tokens_seen": 20398480, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.5884450784593438, | |
| "grad_norm": 1.4573643207550049, | |
| "learning_rate": 4.540174910106526e-05, | |
| "loss": 0.4314, | |
| "num_input_tokens_seen": 20458128, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5902282453637661, | |
| "grad_norm": 1.1499691009521484, | |
| "learning_rate": 4.537473284566914e-05, | |
| "loss": 0.4182, | |
| "num_input_tokens_seen": 20521840, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.5920114122681883, | |
| "grad_norm": 1.1684014797210693, | |
| "learning_rate": 4.5347645545354136e-05, | |
| "loss": 0.3945, | |
| "num_input_tokens_seen": 20582304, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.5937945791726106, | |
| "grad_norm": 1.358035683631897, | |
| "learning_rate": 4.532048729457128e-05, | |
| "loss": 0.4674, | |
| "num_input_tokens_seen": 20642656, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.5955777460770328, | |
| "grad_norm": 1.285057783126831, | |
| "learning_rate": 4.5293258188019055e-05, | |
| "loss": 0.4027, | |
| "num_input_tokens_seen": 20709664, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.5973609129814551, | |
| "grad_norm": 1.0107051134109497, | |
| "learning_rate": 4.526595832064296e-05, | |
| "loss": 0.4402, | |
| "num_input_tokens_seen": 20769888, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.5991440798858774, | |
| "grad_norm": 1.144665241241455, | |
| "learning_rate": 4.523858778763528e-05, | |
| "loss": 0.4725, | |
| "num_input_tokens_seen": 20834912, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6009272467902995, | |
| "grad_norm": 1.5452603101730347, | |
| "learning_rate": 4.521114668443464e-05, | |
| "loss": 0.4413, | |
| "num_input_tokens_seen": 20896784, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.6027104136947218, | |
| "grad_norm": 1.0601692199707031, | |
| "learning_rate": 4.518363510672583e-05, | |
| "loss": 0.4758, | |
| "num_input_tokens_seen": 20954224, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.604493580599144, | |
| "grad_norm": 1.6150104999542236, | |
| "learning_rate": 4.515605315043928e-05, | |
| "loss": 0.4027, | |
| "num_input_tokens_seen": 21019760, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.6062767475035663, | |
| "grad_norm": 1.3952018022537231, | |
| "learning_rate": 4.512840091175089e-05, | |
| "loss": 0.4497, | |
| "num_input_tokens_seen": 21081952, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6080599144079886, | |
| "grad_norm": 1.6579699516296387, | |
| "learning_rate": 4.5100678487081614e-05, | |
| "loss": 0.4343, | |
| "num_input_tokens_seen": 21145680, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.6098430813124108, | |
| "grad_norm": 1.5067193508148193, | |
| "learning_rate": 4.507288597309711e-05, | |
| "loss": 0.4142, | |
| "num_input_tokens_seen": 21206048, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6116262482168331, | |
| "grad_norm": 1.2458901405334473, | |
| "learning_rate": 4.504502346670748e-05, | |
| "loss": 0.5092, | |
| "num_input_tokens_seen": 21269520, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.6134094151212554, | |
| "grad_norm": 1.33489990234375, | |
| "learning_rate": 4.5017091065066837e-05, | |
| "loss": 0.4563, | |
| "num_input_tokens_seen": 21331136, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.6151925820256776, | |
| "grad_norm": 1.4016698598861694, | |
| "learning_rate": 4.4989088865573035e-05, | |
| "loss": 0.3743, | |
| "num_input_tokens_seen": 21392496, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.6169757489300999, | |
| "grad_norm": 1.5638152360916138, | |
| "learning_rate": 4.496101696586732e-05, | |
| "loss": 0.4823, | |
| "num_input_tokens_seen": 21455504, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6187589158345221, | |
| "grad_norm": 1.2184085845947266, | |
| "learning_rate": 4.4932875463833944e-05, | |
| "loss": 0.4219, | |
| "num_input_tokens_seen": 21518800, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.6205420827389444, | |
| "grad_norm": 1.5745280981063843, | |
| "learning_rate": 4.490466445759988e-05, | |
| "loss": 0.506, | |
| "num_input_tokens_seen": 21579120, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.6223252496433667, | |
| "grad_norm": 1.4783879518508911, | |
| "learning_rate": 4.487638404553445e-05, | |
| "loss": 0.4638, | |
| "num_input_tokens_seen": 21638528, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.6241084165477889, | |
| "grad_norm": 1.4319891929626465, | |
| "learning_rate": 4.484803432624899e-05, | |
| "loss": 0.434, | |
| "num_input_tokens_seen": 21703664, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.6258915834522111, | |
| "grad_norm": 1.3542821407318115, | |
| "learning_rate": 4.48196153985965e-05, | |
| "loss": 0.4472, | |
| "num_input_tokens_seen": 21764336, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.6276747503566333, | |
| "grad_norm": 1.1602082252502441, | |
| "learning_rate": 4.4791127361671304e-05, | |
| "loss": 0.3541, | |
| "num_input_tokens_seen": 21825392, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.6294579172610556, | |
| "grad_norm": 1.6145776510238647, | |
| "learning_rate": 4.476257031480871e-05, | |
| "loss": 0.4401, | |
| "num_input_tokens_seen": 21886848, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.6312410841654779, | |
| "grad_norm": 1.1257821321487427, | |
| "learning_rate": 4.4733944357584644e-05, | |
| "loss": 0.5242, | |
| "num_input_tokens_seen": 21951680, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.6330242510699001, | |
| "grad_norm": 1.4322980642318726, | |
| "learning_rate": 4.470524958981534e-05, | |
| "loss": 0.4926, | |
| "num_input_tokens_seen": 22016624, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.6348074179743224, | |
| "grad_norm": 1.255799651145935, | |
| "learning_rate": 4.4676486111556936e-05, | |
| "loss": 0.4128, | |
| "num_input_tokens_seen": 22079040, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.6365905848787446, | |
| "grad_norm": 1.157120943069458, | |
| "learning_rate": 4.46476540231052e-05, | |
| "loss": 0.3521, | |
| "num_input_tokens_seen": 22142400, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.6383737517831669, | |
| "grad_norm": 1.5262624025344849, | |
| "learning_rate": 4.461875342499509e-05, | |
| "loss": 0.4028, | |
| "num_input_tokens_seen": 22199136, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.6401569186875892, | |
| "grad_norm": 1.7937567234039307, | |
| "learning_rate": 4.458978441800048e-05, | |
| "loss": 0.4126, | |
| "num_input_tokens_seen": 22260608, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.6419400855920114, | |
| "grad_norm": 1.3475735187530518, | |
| "learning_rate": 4.456074710313378e-05, | |
| "loss": 0.4692, | |
| "num_input_tokens_seen": 22322272, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6437232524964337, | |
| "grad_norm": 1.2804908752441406, | |
| "learning_rate": 4.4531641581645576e-05, | |
| "loss": 0.4931, | |
| "num_input_tokens_seen": 22384368, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.645506419400856, | |
| "grad_norm": 1.2529658079147339, | |
| "learning_rate": 4.4502467955024294e-05, | |
| "loss": 0.386, | |
| "num_input_tokens_seen": 22447888, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.6472895863052782, | |
| "grad_norm": 1.3398923873901367, | |
| "learning_rate": 4.447322632499581e-05, | |
| "loss": 0.4522, | |
| "num_input_tokens_seen": 22514704, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.6490727532097005, | |
| "grad_norm": 1.320273518562317, | |
| "learning_rate": 4.444391679352315e-05, | |
| "loss": 0.4082, | |
| "num_input_tokens_seen": 22573024, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.6508559201141226, | |
| "grad_norm": 1.2203108072280884, | |
| "learning_rate": 4.441453946280612e-05, | |
| "loss": 0.4551, | |
| "num_input_tokens_seen": 22632080, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.6526390870185449, | |
| "grad_norm": 1.1191906929016113, | |
| "learning_rate": 4.4385094435280873e-05, | |
| "loss": 0.3873, | |
| "num_input_tokens_seen": 22692192, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.6544222539229672, | |
| "grad_norm": 1.249611496925354, | |
| "learning_rate": 4.435558181361969e-05, | |
| "loss": 0.398, | |
| "num_input_tokens_seen": 22754624, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.6562054208273894, | |
| "grad_norm": 1.4326295852661133, | |
| "learning_rate": 4.432600170073048e-05, | |
| "loss": 0.4159, | |
| "num_input_tokens_seen": 22819616, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.6579885877318117, | |
| "grad_norm": 1.2453666925430298, | |
| "learning_rate": 4.429635419975655e-05, | |
| "loss": 0.4343, | |
| "num_input_tokens_seen": 22879136, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.6597717546362339, | |
| "grad_norm": 1.1724647283554077, | |
| "learning_rate": 4.426663941407614e-05, | |
| "loss": 0.4287, | |
| "num_input_tokens_seen": 22940528, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.6615549215406562, | |
| "grad_norm": 1.185964822769165, | |
| "learning_rate": 4.423685744730213e-05, | |
| "loss": 0.3901, | |
| "num_input_tokens_seen": 23004128, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.6633380884450785, | |
| "grad_norm": 1.167861819267273, | |
| "learning_rate": 4.420700840328162e-05, | |
| "loss": 0.512, | |
| "num_input_tokens_seen": 23066240, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.6651212553495007, | |
| "grad_norm": 1.6327167749404907, | |
| "learning_rate": 4.417709238609566e-05, | |
| "loss": 0.4102, | |
| "num_input_tokens_seen": 23126128, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.666904422253923, | |
| "grad_norm": 1.0951687097549438, | |
| "learning_rate": 4.4147109500058776e-05, | |
| "loss": 0.4767, | |
| "num_input_tokens_seen": 23182704, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.6686875891583453, | |
| "grad_norm": 1.1051822900772095, | |
| "learning_rate": 4.411705984971868e-05, | |
| "loss": 0.4009, | |
| "num_input_tokens_seen": 23244816, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.6704707560627675, | |
| "grad_norm": 1.4562581777572632, | |
| "learning_rate": 4.408694353985589e-05, | |
| "loss": 0.5083, | |
| "num_input_tokens_seen": 23307776, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.6722539229671898, | |
| "grad_norm": 1.4651310443878174, | |
| "learning_rate": 4.4056760675483356e-05, | |
| "loss": 0.5302, | |
| "num_input_tokens_seen": 23370368, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.6740370898716119, | |
| "grad_norm": 1.1008446216583252, | |
| "learning_rate": 4.402651136184609e-05, | |
| "loss": 0.3035, | |
| "num_input_tokens_seen": 23436192, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.6758202567760342, | |
| "grad_norm": 1.7820332050323486, | |
| "learning_rate": 4.3996195704420826e-05, | |
| "loss": 0.3972, | |
| "num_input_tokens_seen": 23501408, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.6776034236804565, | |
| "grad_norm": 1.2907474040985107, | |
| "learning_rate": 4.396581380891562e-05, | |
| "loss": 0.4644, | |
| "num_input_tokens_seen": 23561072, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.6793865905848787, | |
| "grad_norm": 1.3212149143218994, | |
| "learning_rate": 4.3935365781269476e-05, | |
| "loss": 0.5038, | |
| "num_input_tokens_seen": 23622784, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.681169757489301, | |
| "grad_norm": 1.5679349899291992, | |
| "learning_rate": 4.390485172765204e-05, | |
| "loss": 0.4286, | |
| "num_input_tokens_seen": 23682096, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.6829529243937232, | |
| "grad_norm": 1.3519368171691895, | |
| "learning_rate": 4.387427175446315e-05, | |
| "loss": 0.395, | |
| "num_input_tokens_seen": 23742608, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.6847360912981455, | |
| "grad_norm": 1.419407844543457, | |
| "learning_rate": 4.38436259683325e-05, | |
| "loss": 0.4882, | |
| "num_input_tokens_seen": 23805008, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.6865192582025678, | |
| "grad_norm": 1.1762245893478394, | |
| "learning_rate": 4.3812914476119293e-05, | |
| "loss": 0.3958, | |
| "num_input_tokens_seen": 23868960, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.68830242510699, | |
| "grad_norm": 1.192785382270813, | |
| "learning_rate": 4.378213738491182e-05, | |
| "loss": 0.4351, | |
| "num_input_tokens_seen": 23933808, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.6900855920114123, | |
| "grad_norm": 1.0715630054473877, | |
| "learning_rate": 4.375129480202711e-05, | |
| "loss": 0.365, | |
| "num_input_tokens_seen": 23990160, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.6918687589158345, | |
| "grad_norm": 1.3897589445114136, | |
| "learning_rate": 4.372038683501057e-05, | |
| "loss": 0.5394, | |
| "num_input_tokens_seen": 24056944, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.6936519258202568, | |
| "grad_norm": 1.3039186000823975, | |
| "learning_rate": 4.36894135916356e-05, | |
| "loss": 0.4806, | |
| "num_input_tokens_seen": 24120016, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.6954350927246791, | |
| "grad_norm": 1.101616621017456, | |
| "learning_rate": 4.3658375179903185e-05, | |
| "loss": 0.5264, | |
| "num_input_tokens_seen": 24181376, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.6972182596291013, | |
| "grad_norm": 1.0111804008483887, | |
| "learning_rate": 4.3627271708041565e-05, | |
| "loss": 0.4425, | |
| "num_input_tokens_seen": 24243440, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.6990014265335235, | |
| "grad_norm": 1.257667899131775, | |
| "learning_rate": 4.3596103284505854e-05, | |
| "loss": 0.4541, | |
| "num_input_tokens_seen": 24301264, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.7007845934379457, | |
| "grad_norm": 1.5398532152175903, | |
| "learning_rate": 4.35648700179776e-05, | |
| "loss": 0.4665, | |
| "num_input_tokens_seen": 24362208, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.702567760342368, | |
| "grad_norm": 1.2097506523132324, | |
| "learning_rate": 4.353357201736452e-05, | |
| "loss": 0.409, | |
| "num_input_tokens_seen": 24426144, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.7043509272467903, | |
| "grad_norm": 1.243800163269043, | |
| "learning_rate": 4.3502209391799985e-05, | |
| "loss": 0.3783, | |
| "num_input_tokens_seen": 24489120, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.7061340941512125, | |
| "grad_norm": 1.3025860786437988, | |
| "learning_rate": 4.347078225064276e-05, | |
| "loss": 0.3957, | |
| "num_input_tokens_seen": 24550720, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.7079172610556348, | |
| "grad_norm": 1.6858657598495483, | |
| "learning_rate": 4.343929070347653e-05, | |
| "loss": 0.4329, | |
| "num_input_tokens_seen": 24614608, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.7097004279600571, | |
| "grad_norm": 1.7270207405090332, | |
| "learning_rate": 4.34077348601096e-05, | |
| "loss": 0.4347, | |
| "num_input_tokens_seen": 24679136, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.7114835948644793, | |
| "grad_norm": 1.3973181247711182, | |
| "learning_rate": 4.337611483057443e-05, | |
| "loss": 0.4126, | |
| "num_input_tokens_seen": 24738160, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.7132667617689016, | |
| "grad_norm": 1.5383044481277466, | |
| "learning_rate": 4.3344430725127315e-05, | |
| "loss": 0.4086, | |
| "num_input_tokens_seen": 24803728, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7150499286733238, | |
| "grad_norm": 1.096655249595642, | |
| "learning_rate": 4.331268265424797e-05, | |
| "loss": 0.4139, | |
| "num_input_tokens_seen": 24868144, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.7168330955777461, | |
| "grad_norm": 1.1997692584991455, | |
| "learning_rate": 4.328087072863915e-05, | |
| "loss": 0.4188, | |
| "num_input_tokens_seen": 24930448, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.7186162624821684, | |
| "grad_norm": 1.1563403606414795, | |
| "learning_rate": 4.3248995059226284e-05, | |
| "loss": 0.5091, | |
| "num_input_tokens_seen": 24991664, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.7203994293865906, | |
| "grad_norm": 1.050632119178772, | |
| "learning_rate": 4.321705575715703e-05, | |
| "loss": 0.4012, | |
| "num_input_tokens_seen": 25055840, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.7221825962910129, | |
| "grad_norm": 1.3263276815414429, | |
| "learning_rate": 4.318505293380097e-05, | |
| "loss": 0.4003, | |
| "num_input_tokens_seen": 25117968, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.723965763195435, | |
| "grad_norm": 1.3196632862091064, | |
| "learning_rate": 4.3152986700749165e-05, | |
| "loss": 0.5408, | |
| "num_input_tokens_seen": 25183168, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.7257489300998573, | |
| "grad_norm": 1.1344470977783203, | |
| "learning_rate": 4.3120857169813766e-05, | |
| "loss": 0.4827, | |
| "num_input_tokens_seen": 25245424, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.7275320970042796, | |
| "grad_norm": 0.9428476691246033, | |
| "learning_rate": 4.308866445302766e-05, | |
| "loss": 0.4728, | |
| "num_input_tokens_seen": 25307296, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.7293152639087018, | |
| "grad_norm": 1.3459362983703613, | |
| "learning_rate": 4.3056408662644024e-05, | |
| "loss": 0.4732, | |
| "num_input_tokens_seen": 25368656, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.7310984308131241, | |
| "grad_norm": 1.1690179109573364, | |
| "learning_rate": 4.302408991113601e-05, | |
| "loss": 0.409, | |
| "num_input_tokens_seen": 25429792, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.7328815977175464, | |
| "grad_norm": 1.6628319025039673, | |
| "learning_rate": 4.2991708311196285e-05, | |
| "loss": 0.4224, | |
| "num_input_tokens_seen": 25494064, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.7346647646219686, | |
| "grad_norm": 1.2542611360549927, | |
| "learning_rate": 4.2959263975736676e-05, | |
| "loss": 0.4836, | |
| "num_input_tokens_seen": 25559856, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.7364479315263909, | |
| "grad_norm": 1.2287793159484863, | |
| "learning_rate": 4.292675701788774e-05, | |
| "loss": 0.3773, | |
| "num_input_tokens_seen": 25622160, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.7382310984308131, | |
| "grad_norm": 1.0096776485443115, | |
| "learning_rate": 4.289418755099841e-05, | |
| "loss": 0.4791, | |
| "num_input_tokens_seen": 25685120, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.7400142653352354, | |
| "grad_norm": 1.1361216306686401, | |
| "learning_rate": 4.28615556886356e-05, | |
| "loss": 0.4616, | |
| "num_input_tokens_seen": 25748256, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.7417974322396577, | |
| "grad_norm": 1.6110931634902954, | |
| "learning_rate": 4.2828861544583746e-05, | |
| "loss": 0.47, | |
| "num_input_tokens_seen": 25810016, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.7435805991440799, | |
| "grad_norm": 1.3852949142456055, | |
| "learning_rate": 4.279610523284449e-05, | |
| "loss": 0.4363, | |
| "num_input_tokens_seen": 25875264, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.7453637660485022, | |
| "grad_norm": 1.6177430152893066, | |
| "learning_rate": 4.2763286867636244e-05, | |
| "loss": 0.3818, | |
| "num_input_tokens_seen": 25936032, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.7471469329529244, | |
| "grad_norm": 1.313751220703125, | |
| "learning_rate": 4.2730406563393777e-05, | |
| "loss": 0.3704, | |
| "num_input_tokens_seen": 25997696, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.7489300998573466, | |
| "grad_norm": 1.566335678100586, | |
| "learning_rate": 4.269746443476787e-05, | |
| "loss": 0.4764, | |
| "num_input_tokens_seen": 26060400, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7507132667617689, | |
| "grad_norm": 1.0574077367782593, | |
| "learning_rate": 4.266446059662482e-05, | |
| "loss": 0.3958, | |
| "num_input_tokens_seen": 26117744, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.7524964336661911, | |
| "grad_norm": 1.2322158813476562, | |
| "learning_rate": 4.2631395164046165e-05, | |
| "loss": 0.3588, | |
| "num_input_tokens_seen": 26179472, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.7542796005706134, | |
| "grad_norm": 1.3826179504394531, | |
| "learning_rate": 4.259826825232819e-05, | |
| "loss": 0.4088, | |
| "num_input_tokens_seen": 26241296, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.7560627674750356, | |
| "grad_norm": 1.3250257968902588, | |
| "learning_rate": 4.256507997698152e-05, | |
| "loss": 0.4987, | |
| "num_input_tokens_seen": 26304048, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.7578459343794579, | |
| "grad_norm": 1.3705068826675415, | |
| "learning_rate": 4.2531830453730824e-05, | |
| "loss": 0.4284, | |
| "num_input_tokens_seen": 26366240, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.7596291012838802, | |
| "grad_norm": 1.335214376449585, | |
| "learning_rate": 4.249851979851426e-05, | |
| "loss": 0.427, | |
| "num_input_tokens_seen": 26427888, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.7614122681883024, | |
| "grad_norm": 1.2708473205566406, | |
| "learning_rate": 4.24651481274832e-05, | |
| "loss": 0.3807, | |
| "num_input_tokens_seen": 26489536, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.7631954350927247, | |
| "grad_norm": 1.567751169204712, | |
| "learning_rate": 4.243171555700174e-05, | |
| "loss": 0.4036, | |
| "num_input_tokens_seen": 26552608, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.764978601997147, | |
| "grad_norm": 1.4113706350326538, | |
| "learning_rate": 4.2398222203646355e-05, | |
| "loss": 0.4032, | |
| "num_input_tokens_seen": 26617392, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.7667617689015692, | |
| "grad_norm": 1.4144871234893799, | |
| "learning_rate": 4.2364668184205445e-05, | |
| "loss": 0.408, | |
| "num_input_tokens_seen": 26677216, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.7685449358059915, | |
| "grad_norm": 1.2482500076293945, | |
| "learning_rate": 4.233105361567894e-05, | |
| "loss": 0.3975, | |
| "num_input_tokens_seen": 26739344, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.7703281027104137, | |
| "grad_norm": 1.270215630531311, | |
| "learning_rate": 4.2297378615277935e-05, | |
| "loss": 0.416, | |
| "num_input_tokens_seen": 26801200, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.7721112696148359, | |
| "grad_norm": 1.1123536825180054, | |
| "learning_rate": 4.226364330042422e-05, | |
| "loss": 0.3925, | |
| "num_input_tokens_seen": 26863440, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.7738944365192582, | |
| "grad_norm": 1.3251960277557373, | |
| "learning_rate": 4.2229847788749886e-05, | |
| "loss": 0.3339, | |
| "num_input_tokens_seen": 26925632, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.7756776034236804, | |
| "grad_norm": 1.4706435203552246, | |
| "learning_rate": 4.2195992198096956e-05, | |
| "loss": 0.4934, | |
| "num_input_tokens_seen": 26989776, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.7774607703281027, | |
| "grad_norm": 1.3785024881362915, | |
| "learning_rate": 4.216207664651691e-05, | |
| "loss": 0.4337, | |
| "num_input_tokens_seen": 27051392, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.7792439372325249, | |
| "grad_norm": 1.1321138143539429, | |
| "learning_rate": 4.2128101252270335e-05, | |
| "loss": 0.3584, | |
| "num_input_tokens_seen": 27117712, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.7810271041369472, | |
| "grad_norm": 1.3333991765975952, | |
| "learning_rate": 4.2094066133826457e-05, | |
| "loss": 0.4337, | |
| "num_input_tokens_seen": 27181344, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.7828102710413695, | |
| "grad_norm": 1.4429007768630981, | |
| "learning_rate": 4.2059971409862785e-05, | |
| "loss": 0.4527, | |
| "num_input_tokens_seen": 27245440, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.7845934379457917, | |
| "grad_norm": 0.9634119868278503, | |
| "learning_rate": 4.202581719926465e-05, | |
| "loss": 0.3812, | |
| "num_input_tokens_seen": 27310976, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.786376604850214, | |
| "grad_norm": 1.2908122539520264, | |
| "learning_rate": 4.1991603621124796e-05, | |
| "loss": 0.3835, | |
| "num_input_tokens_seen": 27377088, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.7881597717546363, | |
| "grad_norm": 1.4514962434768677, | |
| "learning_rate": 4.195733079474301e-05, | |
| "loss": 0.4294, | |
| "num_input_tokens_seen": 27440160, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.7899429386590585, | |
| "grad_norm": 1.1848522424697876, | |
| "learning_rate": 4.192299883962564e-05, | |
| "loss": 0.4469, | |
| "num_input_tokens_seen": 27497936, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.7917261055634808, | |
| "grad_norm": 1.6799744367599487, | |
| "learning_rate": 4.188860787548522e-05, | |
| "loss": 0.517, | |
| "num_input_tokens_seen": 27558608, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.793509272467903, | |
| "grad_norm": 1.2593986988067627, | |
| "learning_rate": 4.185415802224005e-05, | |
| "loss": 0.4464, | |
| "num_input_tokens_seen": 27618704, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.7952924393723253, | |
| "grad_norm": 0.8842148184776306, | |
| "learning_rate": 4.181964940001378e-05, | |
| "loss": 0.3391, | |
| "num_input_tokens_seen": 27678272, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.7970756062767475, | |
| "grad_norm": 1.397862434387207, | |
| "learning_rate": 4.1785082129134955e-05, | |
| "loss": 0.4106, | |
| "num_input_tokens_seen": 27736064, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.7988587731811697, | |
| "grad_norm": 1.234437346458435, | |
| "learning_rate": 4.175045633013665e-05, | |
| "loss": 0.4695, | |
| "num_input_tokens_seen": 27797456, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.800641940085592, | |
| "grad_norm": 1.6073672771453857, | |
| "learning_rate": 4.1715772123755994e-05, | |
| "loss": 0.4356, | |
| "num_input_tokens_seen": 27860640, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.8024251069900142, | |
| "grad_norm": 1.7200384140014648, | |
| "learning_rate": 4.1681029630933804e-05, | |
| "loss": 0.4549, | |
| "num_input_tokens_seen": 27920992, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.8042082738944365, | |
| "grad_norm": 1.1777783632278442, | |
| "learning_rate": 4.1646228972814126e-05, | |
| "loss": 0.4374, | |
| "num_input_tokens_seen": 27984960, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.8059914407988588, | |
| "grad_norm": 0.9882134795188904, | |
| "learning_rate": 4.1611370270743826e-05, | |
| "loss": 0.4478, | |
| "num_input_tokens_seen": 28046064, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.807774607703281, | |
| "grad_norm": 1.7729289531707764, | |
| "learning_rate": 4.157645364627216e-05, | |
| "loss": 0.4987, | |
| "num_input_tokens_seen": 28106368, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.8095577746077033, | |
| "grad_norm": 1.2028993368148804, | |
| "learning_rate": 4.154147922115036e-05, | |
| "loss": 0.4149, | |
| "num_input_tokens_seen": 28170784, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.8113409415121255, | |
| "grad_norm": 1.251348853111267, | |
| "learning_rate": 4.1506447117331195e-05, | |
| "loss": 0.4042, | |
| "num_input_tokens_seen": 28231216, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.8131241084165478, | |
| "grad_norm": 1.2988697290420532, | |
| "learning_rate": 4.1471357456968556e-05, | |
| "loss": 0.4622, | |
| "num_input_tokens_seen": 28291392, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.8149072753209701, | |
| "grad_norm": 1.257813811302185, | |
| "learning_rate": 4.143621036241705e-05, | |
| "loss": 0.427, | |
| "num_input_tokens_seen": 28354848, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.8166904422253923, | |
| "grad_norm": 1.110034465789795, | |
| "learning_rate": 4.1401005956231506e-05, | |
| "loss": 0.4259, | |
| "num_input_tokens_seen": 28415808, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.8184736091298146, | |
| "grad_norm": 2.1352555751800537, | |
| "learning_rate": 4.136574436116665e-05, | |
| "loss": 0.4289, | |
| "num_input_tokens_seen": 28479648, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.8202567760342369, | |
| "grad_norm": 1.8342629671096802, | |
| "learning_rate": 4.1330425700176586e-05, | |
| "loss": 0.4511, | |
| "num_input_tokens_seen": 28539184, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.822039942938659, | |
| "grad_norm": 1.5662261247634888, | |
| "learning_rate": 4.12950500964144e-05, | |
| "loss": 0.4681, | |
| "num_input_tokens_seen": 28599488, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.8238231098430813, | |
| "grad_norm": 1.5573149919509888, | |
| "learning_rate": 4.125961767323174e-05, | |
| "loss": 0.4644, | |
| "num_input_tokens_seen": 28661568, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.8256062767475035, | |
| "grad_norm": 1.1155126094818115, | |
| "learning_rate": 4.1224128554178394e-05, | |
| "loss": 0.4353, | |
| "num_input_tokens_seen": 28720656, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.8273894436519258, | |
| "grad_norm": 1.0635948181152344, | |
| "learning_rate": 4.118858286300182e-05, | |
| "loss": 0.3924, | |
| "num_input_tokens_seen": 28783488, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.829172610556348, | |
| "grad_norm": 1.1664304733276367, | |
| "learning_rate": 4.1152980723646745e-05, | |
| "loss": 0.4228, | |
| "num_input_tokens_seen": 28843568, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.8309557774607703, | |
| "grad_norm": 1.4405851364135742, | |
| "learning_rate": 4.111732226025473e-05, | |
| "loss": 0.5034, | |
| "num_input_tokens_seen": 28907856, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.8327389443651926, | |
| "grad_norm": 1.6694755554199219, | |
| "learning_rate": 4.108160759716373e-05, | |
| "loss": 0.4676, | |
| "num_input_tokens_seen": 28973616, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.8345221112696148, | |
| "grad_norm": 1.5665149688720703, | |
| "learning_rate": 4.1045836858907676e-05, | |
| "loss": 0.4959, | |
| "num_input_tokens_seen": 29037232, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.8363052781740371, | |
| "grad_norm": 1.4531359672546387, | |
| "learning_rate": 4.1010010170216e-05, | |
| "loss": 0.407, | |
| "num_input_tokens_seen": 29096032, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.8380884450784594, | |
| "grad_norm": 1.453543782234192, | |
| "learning_rate": 4.097412765601326e-05, | |
| "loss": 0.3479, | |
| "num_input_tokens_seen": 29158688, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.8398716119828816, | |
| "grad_norm": 1.0374047756195068, | |
| "learning_rate": 4.093818944141865e-05, | |
| "loss": 0.4006, | |
| "num_input_tokens_seen": 29221328, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.8416547788873039, | |
| "grad_norm": 0.9795634746551514, | |
| "learning_rate": 4.090219565174559e-05, | |
| "loss": 0.3167, | |
| "num_input_tokens_seen": 29281968, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.8434379457917262, | |
| "grad_norm": 1.4658923149108887, | |
| "learning_rate": 4.08661464125013e-05, | |
| "loss": 0.3598, | |
| "num_input_tokens_seen": 29343616, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.8452211126961484, | |
| "grad_norm": 1.236444354057312, | |
| "learning_rate": 4.083004184938633e-05, | |
| "loss": 0.4541, | |
| "num_input_tokens_seen": 29407664, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.8470042796005706, | |
| "grad_norm": 1.3159562349319458, | |
| "learning_rate": 4.079388208829415e-05, | |
| "loss": 0.4256, | |
| "num_input_tokens_seen": 29476880, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.8487874465049928, | |
| "grad_norm": 1.3893544673919678, | |
| "learning_rate": 4.075766725531069e-05, | |
| "loss": 0.3899, | |
| "num_input_tokens_seen": 29536800, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.8505706134094151, | |
| "grad_norm": 1.5271923542022705, | |
| "learning_rate": 4.072139747671394e-05, | |
| "loss": 0.4808, | |
| "num_input_tokens_seen": 29595504, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.8523537803138374, | |
| "grad_norm": 0.9985124468803406, | |
| "learning_rate": 4.068507287897343e-05, | |
| "loss": 0.4354, | |
| "num_input_tokens_seen": 29658432, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.8541369472182596, | |
| "grad_norm": 1.6747127771377563, | |
| "learning_rate": 4.06486935887499e-05, | |
| "loss": 0.4542, | |
| "num_input_tokens_seen": 29717312, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.8559201141226819, | |
| "grad_norm": 1.523199439048767, | |
| "learning_rate": 4.061225973289473e-05, | |
| "loss": 0.4326, | |
| "num_input_tokens_seen": 29780544, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.8577032810271041, | |
| "grad_norm": 1.2455990314483643, | |
| "learning_rate": 4.057577143844964e-05, | |
| "loss": 0.4117, | |
| "num_input_tokens_seen": 29845408, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.8594864479315264, | |
| "grad_norm": 1.330797553062439, | |
| "learning_rate": 4.05392288326461e-05, | |
| "loss": 0.4742, | |
| "num_input_tokens_seen": 29909472, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.8612696148359487, | |
| "grad_norm": 1.3107638359069824, | |
| "learning_rate": 4.050263204290502e-05, | |
| "loss": 0.4182, | |
| "num_input_tokens_seen": 29972656, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.8630527817403709, | |
| "grad_norm": 1.050984501838684, | |
| "learning_rate": 4.046598119683621e-05, | |
| "loss": 0.3584, | |
| "num_input_tokens_seen": 30033440, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.8648359486447932, | |
| "grad_norm": 1.2663660049438477, | |
| "learning_rate": 4.042927642223798e-05, | |
| "loss": 0.3989, | |
| "num_input_tokens_seen": 30096256, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.8666191155492154, | |
| "grad_norm": 1.2052220106124878, | |
| "learning_rate": 4.039251784709667e-05, | |
| "loss": 0.4054, | |
| "num_input_tokens_seen": 30159776, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.8684022824536377, | |
| "grad_norm": 1.296916127204895, | |
| "learning_rate": 4.035570559958624e-05, | |
| "loss": 0.4812, | |
| "num_input_tokens_seen": 30220400, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.8701854493580599, | |
| "grad_norm": 1.4973492622375488, | |
| "learning_rate": 4.0318839808067796e-05, | |
| "loss": 0.4122, | |
| "num_input_tokens_seen": 30280144, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.8719686162624821, | |
| "grad_norm": 1.5270328521728516, | |
| "learning_rate": 4.028192060108914e-05, | |
| "loss": 0.4983, | |
| "num_input_tokens_seen": 30338336, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.8737517831669044, | |
| "grad_norm": 0.987087070941925, | |
| "learning_rate": 4.024494810738432e-05, | |
| "loss": 0.4126, | |
| "num_input_tokens_seen": 30401520, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.8755349500713266, | |
| "grad_norm": 1.3958648443222046, | |
| "learning_rate": 4.0207922455873205e-05, | |
| "loss": 0.4404, | |
| "num_input_tokens_seen": 30463280, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.8773181169757489, | |
| "grad_norm": 1.5073976516723633, | |
| "learning_rate": 4.0170843775661025e-05, | |
| "loss": 0.3789, | |
| "num_input_tokens_seen": 30525376, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.8791012838801712, | |
| "grad_norm": 1.4449955224990845, | |
| "learning_rate": 4.0133712196037906e-05, | |
| "loss": 0.4454, | |
| "num_input_tokens_seen": 30589792, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.8808844507845934, | |
| "grad_norm": 1.3354792594909668, | |
| "learning_rate": 4.009652784647843e-05, | |
| "loss": 0.4149, | |
| "num_input_tokens_seen": 30652512, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.8826676176890157, | |
| "grad_norm": 1.1937047243118286, | |
| "learning_rate": 4.005929085664119e-05, | |
| "loss": 0.4385, | |
| "num_input_tokens_seen": 30715456, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.884450784593438, | |
| "grad_norm": 1.794368863105774, | |
| "learning_rate": 4.002200135636832e-05, | |
| "loss": 0.4807, | |
| "num_input_tokens_seen": 30779760, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.8862339514978602, | |
| "grad_norm": 1.3825370073318481, | |
| "learning_rate": 3.998465947568506e-05, | |
| "loss": 0.4214, | |
| "num_input_tokens_seen": 30841680, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 0.8880171184022825, | |
| "grad_norm": 1.2612850666046143, | |
| "learning_rate": 3.9947265344799304e-05, | |
| "loss": 0.3469, | |
| "num_input_tokens_seen": 30904864, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.8898002853067047, | |
| "grad_norm": 1.769665002822876, | |
| "learning_rate": 3.990981909410112e-05, | |
| "loss": 0.4782, | |
| "num_input_tokens_seen": 30966048, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 0.891583452211127, | |
| "grad_norm": 1.4009034633636475, | |
| "learning_rate": 3.9872320854162324e-05, | |
| "loss": 0.4822, | |
| "num_input_tokens_seen": 31031488, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.8933666191155493, | |
| "grad_norm": 1.411489725112915, | |
| "learning_rate": 3.9834770755736014e-05, | |
| "loss": 0.4813, | |
| "num_input_tokens_seen": 31093488, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.8951497860199714, | |
| "grad_norm": 1.448851227760315, | |
| "learning_rate": 3.979716892975612e-05, | |
| "loss": 0.4946, | |
| "num_input_tokens_seen": 31156816, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.8969329529243937, | |
| "grad_norm": 1.247158169746399, | |
| "learning_rate": 3.975951550733693e-05, | |
| "loss": 0.4141, | |
| "num_input_tokens_seen": 31220592, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 0.8987161198288159, | |
| "grad_norm": 1.0128424167633057, | |
| "learning_rate": 3.9721810619772636e-05, | |
| "loss": 0.4432, | |
| "num_input_tokens_seen": 31283920, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.9004992867332382, | |
| "grad_norm": 1.1553157567977905, | |
| "learning_rate": 3.968405439853691e-05, | |
| "loss": 0.4436, | |
| "num_input_tokens_seen": 31348656, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.9022824536376605, | |
| "grad_norm": 2.350649356842041, | |
| "learning_rate": 3.964624697528241e-05, | |
| "loss": 0.4768, | |
| "num_input_tokens_seen": 31407968, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.9040656205420827, | |
| "grad_norm": 1.277190089225769, | |
| "learning_rate": 3.960838848184032e-05, | |
| "loss": 0.4741, | |
| "num_input_tokens_seen": 31473280, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.905848787446505, | |
| "grad_norm": 1.0319541692733765, | |
| "learning_rate": 3.957047905021991e-05, | |
| "loss": 0.4863, | |
| "num_input_tokens_seen": 31536784, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.9076319543509273, | |
| "grad_norm": 1.1045851707458496, | |
| "learning_rate": 3.9532518812608075e-05, | |
| "loss": 0.442, | |
| "num_input_tokens_seen": 31599904, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 0.9094151212553495, | |
| "grad_norm": 1.2741860151290894, | |
| "learning_rate": 3.949450790136885e-05, | |
| "loss": 0.4147, | |
| "num_input_tokens_seen": 31660832, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9111982881597718, | |
| "grad_norm": 1.344022274017334, | |
| "learning_rate": 3.9456446449042967e-05, | |
| "loss": 0.4988, | |
| "num_input_tokens_seen": 31725776, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 0.912981455064194, | |
| "grad_norm": 1.1035910844802856, | |
| "learning_rate": 3.9418334588347406e-05, | |
| "loss": 0.3968, | |
| "num_input_tokens_seen": 31785440, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.9147646219686163, | |
| "grad_norm": 1.2287249565124512, | |
| "learning_rate": 3.9380172452174894e-05, | |
| "loss": 0.4313, | |
| "num_input_tokens_seen": 31847136, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.9165477888730386, | |
| "grad_norm": 1.5301395654678345, | |
| "learning_rate": 3.9341960173593495e-05, | |
| "loss": 0.4506, | |
| "num_input_tokens_seen": 31909488, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.9183309557774608, | |
| "grad_norm": 1.2145062685012817, | |
| "learning_rate": 3.930369788584607e-05, | |
| "loss": 0.3616, | |
| "num_input_tokens_seen": 31971488, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.920114122681883, | |
| "grad_norm": 1.5201282501220703, | |
| "learning_rate": 3.926538572234991e-05, | |
| "loss": 0.4509, | |
| "num_input_tokens_seen": 32033920, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.9218972895863052, | |
| "grad_norm": 1.2658700942993164, | |
| "learning_rate": 3.9227023816696176e-05, | |
| "loss": 0.4035, | |
| "num_input_tokens_seen": 32095680, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 0.9236804564907275, | |
| "grad_norm": 2.191631555557251, | |
| "learning_rate": 3.91886123026495e-05, | |
| "loss": 0.5551, | |
| "num_input_tokens_seen": 32153184, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.9254636233951498, | |
| "grad_norm": 1.0200012922286987, | |
| "learning_rate": 3.9150151314147474e-05, | |
| "loss": 0.4772, | |
| "num_input_tokens_seen": 32213536, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.927246790299572, | |
| "grad_norm": 1.1653907299041748, | |
| "learning_rate": 3.911164098530023e-05, | |
| "loss": 0.374, | |
| "num_input_tokens_seen": 32269760, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9290299572039943, | |
| "grad_norm": 1.4243419170379639, | |
| "learning_rate": 3.907308145038993e-05, | |
| "loss": 0.4263, | |
| "num_input_tokens_seen": 32332112, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 0.9308131241084165, | |
| "grad_norm": 1.1571168899536133, | |
| "learning_rate": 3.903447284387029e-05, | |
| "loss": 0.3365, | |
| "num_input_tokens_seen": 32392352, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.9325962910128388, | |
| "grad_norm": 1.3121576309204102, | |
| "learning_rate": 3.899581530036619e-05, | |
| "loss": 0.3868, | |
| "num_input_tokens_seen": 32455136, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 0.9343794579172611, | |
| "grad_norm": 1.7881336212158203, | |
| "learning_rate": 3.89571089546731e-05, | |
| "loss": 0.4519, | |
| "num_input_tokens_seen": 32516496, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.9361626248216833, | |
| "grad_norm": 1.2598882913589478, | |
| "learning_rate": 3.8918353941756684e-05, | |
| "loss": 0.4071, | |
| "num_input_tokens_seen": 32576432, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.9379457917261056, | |
| "grad_norm": 1.1581662893295288, | |
| "learning_rate": 3.8879550396752295e-05, | |
| "loss": 0.4122, | |
| "num_input_tokens_seen": 32639328, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.9397289586305279, | |
| "grad_norm": 1.4587301015853882, | |
| "learning_rate": 3.8840698454964507e-05, | |
| "loss": 0.39, | |
| "num_input_tokens_seen": 32700896, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 0.9415121255349501, | |
| "grad_norm": 1.2084256410598755, | |
| "learning_rate": 3.880179825186667e-05, | |
| "loss": 0.4211, | |
| "num_input_tokens_seen": 32767616, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.9432952924393724, | |
| "grad_norm": 1.432020664215088, | |
| "learning_rate": 3.8762849923100384e-05, | |
| "loss": 0.3842, | |
| "num_input_tokens_seen": 32829216, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 0.9450784593437945, | |
| "grad_norm": 0.9488567113876343, | |
| "learning_rate": 3.8723853604475104e-05, | |
| "loss": 0.4145, | |
| "num_input_tokens_seen": 32894624, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.9468616262482168, | |
| "grad_norm": 1.3725416660308838, | |
| "learning_rate": 3.8684809431967576e-05, | |
| "loss": 0.4174, | |
| "num_input_tokens_seen": 32957296, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.948644793152639, | |
| "grad_norm": 1.5407421588897705, | |
| "learning_rate": 3.864571754172144e-05, | |
| "loss": 0.4154, | |
| "num_input_tokens_seen": 33020944, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.9504279600570613, | |
| "grad_norm": 0.9749571681022644, | |
| "learning_rate": 3.8606578070046715e-05, | |
| "loss": 0.3845, | |
| "num_input_tokens_seen": 33083056, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 0.9522111269614836, | |
| "grad_norm": 0.9800275564193726, | |
| "learning_rate": 3.856739115341933e-05, | |
| "loss": 0.3894, | |
| "num_input_tokens_seen": 33144688, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.9539942938659058, | |
| "grad_norm": 1.2863636016845703, | |
| "learning_rate": 3.852815692848064e-05, | |
| "loss": 0.4649, | |
| "num_input_tokens_seen": 33208288, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.9557774607703281, | |
| "grad_norm": 1.2792158126831055, | |
| "learning_rate": 3.8488875532036975e-05, | |
| "loss": 0.452, | |
| "num_input_tokens_seen": 33264672, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.9575606276747504, | |
| "grad_norm": 1.3720332384109497, | |
| "learning_rate": 3.8449547101059135e-05, | |
| "loss": 0.4364, | |
| "num_input_tokens_seen": 33323904, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.9593437945791726, | |
| "grad_norm": 1.337188482284546, | |
| "learning_rate": 3.8410171772681955e-05, | |
| "loss": 0.3704, | |
| "num_input_tokens_seen": 33385568, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.9611269614835949, | |
| "grad_norm": 1.5518020391464233, | |
| "learning_rate": 3.837074968420376e-05, | |
| "loss": 0.4792, | |
| "num_input_tokens_seen": 33448144, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 0.9629101283880172, | |
| "grad_norm": 1.4631222486495972, | |
| "learning_rate": 3.833128097308594e-05, | |
| "loss": 0.3936, | |
| "num_input_tokens_seen": 33508960, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.9646932952924394, | |
| "grad_norm": 1.750189185142517, | |
| "learning_rate": 3.829176577695246e-05, | |
| "loss": 0.4272, | |
| "num_input_tokens_seen": 33572384, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 0.9664764621968617, | |
| "grad_norm": 1.2178962230682373, | |
| "learning_rate": 3.825220423358936e-05, | |
| "loss": 0.4352, | |
| "num_input_tokens_seen": 33631776, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.9682596291012838, | |
| "grad_norm": 1.262508749961853, | |
| "learning_rate": 3.8212596480944294e-05, | |
| "loss": 0.4359, | |
| "num_input_tokens_seen": 33695792, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.9700427960057061, | |
| "grad_norm": 1.2347853183746338, | |
| "learning_rate": 3.817294265712606e-05, | |
| "loss": 0.3832, | |
| "num_input_tokens_seen": 33759456, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.9718259629101283, | |
| "grad_norm": 1.334446907043457, | |
| "learning_rate": 3.813324290040408e-05, | |
| "loss": 0.4191, | |
| "num_input_tokens_seen": 33820352, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.9736091298145506, | |
| "grad_norm": 1.4268221855163574, | |
| "learning_rate": 3.809349734920793e-05, | |
| "loss": 0.4254, | |
| "num_input_tokens_seen": 33882016, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.9753922967189729, | |
| "grad_norm": 1.6015069484710693, | |
| "learning_rate": 3.805370614212692e-05, | |
| "loss": 0.4804, | |
| "num_input_tokens_seen": 33943184, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 0.9771754636233951, | |
| "grad_norm": 1.2011687755584717, | |
| "learning_rate": 3.8013869417909496e-05, | |
| "loss": 0.5089, | |
| "num_input_tokens_seen": 34009888, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.9789586305278174, | |
| "grad_norm": 1.290658712387085, | |
| "learning_rate": 3.797398731546286e-05, | |
| "loss": 0.4389, | |
| "num_input_tokens_seen": 34070576, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.9807417974322397, | |
| "grad_norm": 1.0900776386260986, | |
| "learning_rate": 3.793405997385242e-05, | |
| "loss": 0.3871, | |
| "num_input_tokens_seen": 34129760, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.9825249643366619, | |
| "grad_norm": 1.4768859148025513, | |
| "learning_rate": 3.789408753230135e-05, | |
| "loss": 0.3302, | |
| "num_input_tokens_seen": 34193408, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 0.9843081312410842, | |
| "grad_norm": 1.168250322341919, | |
| "learning_rate": 3.785407013019006e-05, | |
| "loss": 0.4042, | |
| "num_input_tokens_seen": 34255024, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.9860912981455064, | |
| "grad_norm": 1.1951143741607666, | |
| "learning_rate": 3.781400790705576e-05, | |
| "loss": 0.4567, | |
| "num_input_tokens_seen": 34319424, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 0.9878744650499287, | |
| "grad_norm": 1.4566255807876587, | |
| "learning_rate": 3.777390100259192e-05, | |
| "loss": 0.4034, | |
| "num_input_tokens_seen": 34384912, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.989657631954351, | |
| "grad_norm": 1.7778925895690918, | |
| "learning_rate": 3.773374955664782e-05, | |
| "loss": 0.4801, | |
| "num_input_tokens_seen": 34445488, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.9914407988587732, | |
| "grad_norm": 1.0619663000106812, | |
| "learning_rate": 3.769355370922807e-05, | |
| "loss": 0.3374, | |
| "num_input_tokens_seen": 34504400, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.9932239657631954, | |
| "grad_norm": 1.3074612617492676, | |
| "learning_rate": 3.765331360049208e-05, | |
| "loss": 0.5528, | |
| "num_input_tokens_seen": 34568560, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 0.9950071326676176, | |
| "grad_norm": 1.1420252323150635, | |
| "learning_rate": 3.761302937075361e-05, | |
| "loss": 0.4356, | |
| "num_input_tokens_seen": 34631808, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.9967902995720399, | |
| "grad_norm": 1.2096112966537476, | |
| "learning_rate": 3.7572701160480254e-05, | |
| "loss": 0.4348, | |
| "num_input_tokens_seen": 34692560, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 0.9985734664764622, | |
| "grad_norm": 1.4141584634780884, | |
| "learning_rate": 3.7532329110292966e-05, | |
| "loss": 0.383, | |
| "num_input_tokens_seen": 34754128, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.0003566333808844, | |
| "grad_norm": 1.2436716556549072, | |
| "learning_rate": 3.749191336096558e-05, | |
| "loss": 0.4626, | |
| "num_input_tokens_seen": 34817616, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 1.0021398002853068, | |
| "grad_norm": 1.2323740720748901, | |
| "learning_rate": 3.745145405342429e-05, | |
| "loss": 0.4048, | |
| "num_input_tokens_seen": 34878544, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.003922967189729, | |
| "grad_norm": 1.0738112926483154, | |
| "learning_rate": 3.741095132874718e-05, | |
| "loss": 0.2812, | |
| "num_input_tokens_seen": 34940416, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 1.005706134094151, | |
| "grad_norm": 1.168542742729187, | |
| "learning_rate": 3.73704053281637e-05, | |
| "loss": 0.4261, | |
| "num_input_tokens_seen": 35004160, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.0074893009985735, | |
| "grad_norm": 1.366598129272461, | |
| "learning_rate": 3.7329816193054265e-05, | |
| "loss": 0.4108, | |
| "num_input_tokens_seen": 35066304, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.0092724679029956, | |
| "grad_norm": 1.1343327760696411, | |
| "learning_rate": 3.728918406494962e-05, | |
| "loss": 0.3122, | |
| "num_input_tokens_seen": 35128512, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.011055634807418, | |
| "grad_norm": 1.558720588684082, | |
| "learning_rate": 3.7248509085530465e-05, | |
| "loss": 0.358, | |
| "num_input_tokens_seen": 35188704, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 1.0128388017118402, | |
| "grad_norm": 1.211642861366272, | |
| "learning_rate": 3.720779139662691e-05, | |
| "loss": 0.4065, | |
| "num_input_tokens_seen": 35254032, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.0146219686162625, | |
| "grad_norm": 1.1981390714645386, | |
| "learning_rate": 3.7167031140218e-05, | |
| "loss": 0.4427, | |
| "num_input_tokens_seen": 35319520, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 1.0164051355206847, | |
| "grad_norm": 1.1277800798416138, | |
| "learning_rate": 3.712622845843119e-05, | |
| "loss": 0.3631, | |
| "num_input_tokens_seen": 35377936, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.018188302425107, | |
| "grad_norm": 1.461593508720398, | |
| "learning_rate": 3.708538349354189e-05, | |
| "loss": 0.4219, | |
| "num_input_tokens_seen": 35443072, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 1.0199714693295292, | |
| "grad_norm": 1.250154733657837, | |
| "learning_rate": 3.7044496387972914e-05, | |
| "loss": 0.3502, | |
| "num_input_tokens_seen": 35504144, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.0217546362339516, | |
| "grad_norm": 1.126944661140442, | |
| "learning_rate": 3.700356728429405e-05, | |
| "loss": 0.3805, | |
| "num_input_tokens_seen": 35566224, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 1.0235378031383737, | |
| "grad_norm": 1.158125638961792, | |
| "learning_rate": 3.696259632522152e-05, | |
| "loss": 0.4136, | |
| "num_input_tokens_seen": 35626384, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.025320970042796, | |
| "grad_norm": 1.3660660982131958, | |
| "learning_rate": 3.6921583653617476e-05, | |
| "loss": 0.3744, | |
| "num_input_tokens_seen": 35691424, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.0271041369472182, | |
| "grad_norm": 1.327812671661377, | |
| "learning_rate": 3.688052941248956e-05, | |
| "loss": 0.3597, | |
| "num_input_tokens_seen": 35752480, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.0288873038516406, | |
| "grad_norm": 1.474350929260254, | |
| "learning_rate": 3.683943374499031e-05, | |
| "loss": 0.3859, | |
| "num_input_tokens_seen": 35814640, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 1.0306704707560628, | |
| "grad_norm": 1.1447596549987793, | |
| "learning_rate": 3.679829679441674e-05, | |
| "loss": 0.314, | |
| "num_input_tokens_seen": 35874480, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.032453637660485, | |
| "grad_norm": 1.4844218492507935, | |
| "learning_rate": 3.675711870420983e-05, | |
| "loss": 0.4063, | |
| "num_input_tokens_seen": 35936528, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 1.0342368045649073, | |
| "grad_norm": 1.397241234779358, | |
| "learning_rate": 3.671589961795399e-05, | |
| "loss": 0.3333, | |
| "num_input_tokens_seen": 35999232, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.0360199714693294, | |
| "grad_norm": 1.4940799474716187, | |
| "learning_rate": 3.667463967937657e-05, | |
| "loss": 0.4014, | |
| "num_input_tokens_seen": 36060480, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 1.0378031383737518, | |
| "grad_norm": 1.0337133407592773, | |
| "learning_rate": 3.663333903234739e-05, | |
| "loss": 0.3575, | |
| "num_input_tokens_seen": 36125664, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.039586305278174, | |
| "grad_norm": 1.4721628427505493, | |
| "learning_rate": 3.659199782087821e-05, | |
| "loss": 0.4124, | |
| "num_input_tokens_seen": 36192256, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 1.0413694721825963, | |
| "grad_norm": 1.5297164916992188, | |
| "learning_rate": 3.655061618912224e-05, | |
| "loss": 0.3779, | |
| "num_input_tokens_seen": 36254208, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.0431526390870185, | |
| "grad_norm": 1.5214314460754395, | |
| "learning_rate": 3.650919428137362e-05, | |
| "loss": 0.4273, | |
| "num_input_tokens_seen": 36314944, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.0449358059914409, | |
| "grad_norm": 1.232298731803894, | |
| "learning_rate": 3.6467732242066936e-05, | |
| "loss": 0.4427, | |
| "num_input_tokens_seen": 36376832, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.046718972895863, | |
| "grad_norm": 1.0849212408065796, | |
| "learning_rate": 3.64262302157767e-05, | |
| "loss": 0.3674, | |
| "num_input_tokens_seen": 36440064, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 1.0485021398002854, | |
| "grad_norm": 1.261112928390503, | |
| "learning_rate": 3.6384688347216875e-05, | |
| "loss": 0.3893, | |
| "num_input_tokens_seen": 36501520, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.0502853067047075, | |
| "grad_norm": 1.5491517782211304, | |
| "learning_rate": 3.634310678124033e-05, | |
| "loss": 0.4026, | |
| "num_input_tokens_seen": 36563360, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 1.05206847360913, | |
| "grad_norm": 1.3838887214660645, | |
| "learning_rate": 3.630148566283837e-05, | |
| "loss": 0.376, | |
| "num_input_tokens_seen": 36624576, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.053851640513552, | |
| "grad_norm": 1.4949077367782593, | |
| "learning_rate": 3.6259825137140214e-05, | |
| "loss": 0.46, | |
| "num_input_tokens_seen": 36688448, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 1.0556348074179742, | |
| "grad_norm": 1.5406221151351929, | |
| "learning_rate": 3.621812534941246e-05, | |
| "loss": 0.4085, | |
| "num_input_tokens_seen": 36749520, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.0574179743223966, | |
| "grad_norm": 1.39425528049469, | |
| "learning_rate": 3.6176386445058666e-05, | |
| "loss": 0.3876, | |
| "num_input_tokens_seen": 36816208, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 1.0592011412268187, | |
| "grad_norm": 1.120383858680725, | |
| "learning_rate": 3.6134608569618754e-05, | |
| "loss": 0.3333, | |
| "num_input_tokens_seen": 36877008, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.0609843081312411, | |
| "grad_norm": 2.9805078506469727, | |
| "learning_rate": 3.609279186876853e-05, | |
| "loss": 0.3594, | |
| "num_input_tokens_seen": 36939824, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.0627674750356633, | |
| "grad_norm": 1.165252685546875, | |
| "learning_rate": 3.605093648831917e-05, | |
| "loss": 0.3132, | |
| "num_input_tokens_seen": 37002496, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.0645506419400856, | |
| "grad_norm": 1.2931095361709595, | |
| "learning_rate": 3.600904257421677e-05, | |
| "loss": 0.3781, | |
| "num_input_tokens_seen": 37064128, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 1.0663338088445078, | |
| "grad_norm": 1.4180676937103271, | |
| "learning_rate": 3.5967110272541745e-05, | |
| "loss": 0.383, | |
| "num_input_tokens_seen": 37124272, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.0681169757489302, | |
| "grad_norm": 1.0311819314956665, | |
| "learning_rate": 3.592513972950837e-05, | |
| "loss": 0.3219, | |
| "num_input_tokens_seen": 37184720, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 1.0699001426533523, | |
| "grad_norm": 1.1849727630615234, | |
| "learning_rate": 3.58831310914643e-05, | |
| "loss": 0.3799, | |
| "num_input_tokens_seen": 37248832, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.0716833095577747, | |
| "grad_norm": 1.2104076147079468, | |
| "learning_rate": 3.5841084504889974e-05, | |
| "loss": 0.3072, | |
| "num_input_tokens_seen": 37310960, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 1.0734664764621968, | |
| "grad_norm": 1.3508752584457397, | |
| "learning_rate": 3.5799000116398184e-05, | |
| "loss": 0.3319, | |
| "num_input_tokens_seen": 37368320, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.0752496433666192, | |
| "grad_norm": 1.4133416414260864, | |
| "learning_rate": 3.575687807273352e-05, | |
| "loss": 0.3714, | |
| "num_input_tokens_seen": 37428336, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 1.0770328102710414, | |
| "grad_norm": 1.8722141981124878, | |
| "learning_rate": 3.5714718520771904e-05, | |
| "loss": 0.4047, | |
| "num_input_tokens_seen": 37493216, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.0788159771754637, | |
| "grad_norm": 1.4663044214248657, | |
| "learning_rate": 3.5672521607519994e-05, | |
| "loss": 0.4123, | |
| "num_input_tokens_seen": 37555504, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.0805991440798859, | |
| "grad_norm": 1.102156162261963, | |
| "learning_rate": 3.563028748011476e-05, | |
| "loss": 0.3994, | |
| "num_input_tokens_seen": 37617728, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.082382310984308, | |
| "grad_norm": 1.8379590511322021, | |
| "learning_rate": 3.5588016285822936e-05, | |
| "loss": 0.3766, | |
| "num_input_tokens_seen": 37680176, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 1.0841654778887304, | |
| "grad_norm": 1.6983126401901245, | |
| "learning_rate": 3.554570817204048e-05, | |
| "loss": 0.4254, | |
| "num_input_tokens_seen": 37744656, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.0859486447931526, | |
| "grad_norm": 1.4322010278701782, | |
| "learning_rate": 3.550336328629211e-05, | |
| "loss": 0.3099, | |
| "num_input_tokens_seen": 37802912, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 1.087731811697575, | |
| "grad_norm": 1.4063708782196045, | |
| "learning_rate": 3.546098177623075e-05, | |
| "loss": 0.3445, | |
| "num_input_tokens_seen": 37863504, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.089514978601997, | |
| "grad_norm": 1.2764664888381958, | |
| "learning_rate": 3.541856378963704e-05, | |
| "loss": 0.375, | |
| "num_input_tokens_seen": 37922832, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 1.0912981455064195, | |
| "grad_norm": 1.6607260704040527, | |
| "learning_rate": 3.53761094744188e-05, | |
| "loss": 0.4187, | |
| "num_input_tokens_seen": 37985536, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.0930813124108416, | |
| "grad_norm": 1.5022599697113037, | |
| "learning_rate": 3.533361897861053e-05, | |
| "loss": 0.454, | |
| "num_input_tokens_seen": 38045824, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 1.094864479315264, | |
| "grad_norm": 1.4386935234069824, | |
| "learning_rate": 3.529109245037289e-05, | |
| "loss": 0.4023, | |
| "num_input_tokens_seen": 38104704, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.0966476462196861, | |
| "grad_norm": 1.9962483644485474, | |
| "learning_rate": 3.524853003799218e-05, | |
| "loss": 0.3831, | |
| "num_input_tokens_seen": 38165632, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.0984308131241085, | |
| "grad_norm": 1.5470753908157349, | |
| "learning_rate": 3.520593188987982e-05, | |
| "loss": 0.4213, | |
| "num_input_tokens_seen": 38228720, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.1002139800285307, | |
| "grad_norm": 0.9549476504325867, | |
| "learning_rate": 3.516329815457184e-05, | |
| "loss": 0.4021, | |
| "num_input_tokens_seen": 38291936, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 1.1019971469329528, | |
| "grad_norm": 1.1868444681167603, | |
| "learning_rate": 3.512062898072838e-05, | |
| "loss": 0.3664, | |
| "num_input_tokens_seen": 38353984, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.1037803138373752, | |
| "grad_norm": 1.822995901107788, | |
| "learning_rate": 3.5077924517133114e-05, | |
| "loss": 0.3468, | |
| "num_input_tokens_seen": 38416352, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 1.1055634807417973, | |
| "grad_norm": 1.369612455368042, | |
| "learning_rate": 3.503518491269279e-05, | |
| "loss": 0.3248, | |
| "num_input_tokens_seen": 38479152, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.1073466476462197, | |
| "grad_norm": 1.3822919130325317, | |
| "learning_rate": 3.49924103164367e-05, | |
| "loss": 0.4058, | |
| "num_input_tokens_seen": 38541776, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 1.1091298145506419, | |
| "grad_norm": 1.54763925075531, | |
| "learning_rate": 3.4949600877516126e-05, | |
| "loss": 0.4997, | |
| "num_input_tokens_seen": 38604704, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.1109129814550642, | |
| "grad_norm": 1.6321046352386475, | |
| "learning_rate": 3.490675674520385e-05, | |
| "loss": 0.3429, | |
| "num_input_tokens_seen": 38669408, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 1.1126961483594864, | |
| "grad_norm": 1.2626839876174927, | |
| "learning_rate": 3.4863878068893625e-05, | |
| "loss": 0.3938, | |
| "num_input_tokens_seen": 38732240, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.1144793152639088, | |
| "grad_norm": 1.724424123764038, | |
| "learning_rate": 3.482096499809967e-05, | |
| "loss": 0.3934, | |
| "num_input_tokens_seen": 38796240, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.116262482168331, | |
| "grad_norm": 1.5761538743972778, | |
| "learning_rate": 3.477801768245614e-05, | |
| "loss": 0.3706, | |
| "num_input_tokens_seen": 38859568, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.1180456490727533, | |
| "grad_norm": 1.656040906906128, | |
| "learning_rate": 3.473503627171655e-05, | |
| "loss": 0.4048, | |
| "num_input_tokens_seen": 38919472, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 1.1198288159771754, | |
| "grad_norm": 1.6139919757843018, | |
| "learning_rate": 3.469202091575337e-05, | |
| "loss": 0.3609, | |
| "num_input_tokens_seen": 38981744, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.1216119828815978, | |
| "grad_norm": 1.5200504064559937, | |
| "learning_rate": 3.464897176455737e-05, | |
| "loss": 0.3309, | |
| "num_input_tokens_seen": 39040848, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 1.12339514978602, | |
| "grad_norm": 1.6206810474395752, | |
| "learning_rate": 3.460588896823721e-05, | |
| "loss": 0.4394, | |
| "num_input_tokens_seen": 39100560, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.1251783166904423, | |
| "grad_norm": 1.4544352293014526, | |
| "learning_rate": 3.456277267701884e-05, | |
| "loss": 0.3477, | |
| "num_input_tokens_seen": 39163216, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 1.1269614835948645, | |
| "grad_norm": 1.2673510313034058, | |
| "learning_rate": 3.4519623041245026e-05, | |
| "loss": 0.4006, | |
| "num_input_tokens_seen": 39228880, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.1287446504992866, | |
| "grad_norm": 1.2291406393051147, | |
| "learning_rate": 3.447644021137477e-05, | |
| "loss": 0.4547, | |
| "num_input_tokens_seen": 39290176, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 1.130527817403709, | |
| "grad_norm": 1.1247378587722778, | |
| "learning_rate": 3.443322433798285e-05, | |
| "loss": 0.361, | |
| "num_input_tokens_seen": 39353136, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.1323109843081312, | |
| "grad_norm": 1.3517816066741943, | |
| "learning_rate": 3.438997557175925e-05, | |
| "loss": 0.3889, | |
| "num_input_tokens_seen": 39414672, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.1340941512125535, | |
| "grad_norm": 1.177815318107605, | |
| "learning_rate": 3.434669406350866e-05, | |
| "loss": 0.4059, | |
| "num_input_tokens_seen": 39474240, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.1358773181169757, | |
| "grad_norm": 1.392850399017334, | |
| "learning_rate": 3.430337996414991e-05, | |
| "loss": 0.339, | |
| "num_input_tokens_seen": 39537376, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 1.137660485021398, | |
| "grad_norm": 1.5260528326034546, | |
| "learning_rate": 3.4260033424715504e-05, | |
| "loss": 0.4236, | |
| "num_input_tokens_seen": 39597264, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.1394436519258202, | |
| "grad_norm": 1.3453905582427979, | |
| "learning_rate": 3.421665459635105e-05, | |
| "loss": 0.3432, | |
| "num_input_tokens_seen": 39659008, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 1.1412268188302426, | |
| "grad_norm": 1.3098467588424683, | |
| "learning_rate": 3.4173243630314754e-05, | |
| "loss": 0.3199, | |
| "num_input_tokens_seen": 39719792, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.1430099857346647, | |
| "grad_norm": 1.5861715078353882, | |
| "learning_rate": 3.4129800677976846e-05, | |
| "loss": 0.4334, | |
| "num_input_tokens_seen": 39781200, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 1.144793152639087, | |
| "grad_norm": 1.5333304405212402, | |
| "learning_rate": 3.408632589081915e-05, | |
| "loss": 0.4005, | |
| "num_input_tokens_seen": 39843616, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.1465763195435092, | |
| "grad_norm": 1.2974801063537598, | |
| "learning_rate": 3.4042819420434437e-05, | |
| "loss": 0.4372, | |
| "num_input_tokens_seen": 39909440, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 1.1483594864479316, | |
| "grad_norm": 1.7935795783996582, | |
| "learning_rate": 3.399928141852599e-05, | |
| "loss": 0.4579, | |
| "num_input_tokens_seen": 39972400, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.1501426533523538, | |
| "grad_norm": 1.1913748979568481, | |
| "learning_rate": 3.395571203690703e-05, | |
| "loss": 0.4283, | |
| "num_input_tokens_seen": 40035248, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.1519258202567761, | |
| "grad_norm": 1.4842363595962524, | |
| "learning_rate": 3.3912111427500205e-05, | |
| "loss": 0.4128, | |
| "num_input_tokens_seen": 40094368, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.1537089871611983, | |
| "grad_norm": 1.7173250913619995, | |
| "learning_rate": 3.3868479742337024e-05, | |
| "loss": 0.3723, | |
| "num_input_tokens_seen": 40156592, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 1.1554921540656204, | |
| "grad_norm": 1.351295828819275, | |
| "learning_rate": 3.382481713355738e-05, | |
| "loss": 0.3311, | |
| "num_input_tokens_seen": 40221712, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.1572753209700428, | |
| "grad_norm": 1.3807501792907715, | |
| "learning_rate": 3.3781123753409e-05, | |
| "loss": 0.3431, | |
| "num_input_tokens_seen": 40284224, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 1.159058487874465, | |
| "grad_norm": 1.1675392389297485, | |
| "learning_rate": 3.3737399754246875e-05, | |
| "loss": 0.4291, | |
| "num_input_tokens_seen": 40347328, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.1608416547788873, | |
| "grad_norm": 1.3195664882659912, | |
| "learning_rate": 3.36936452885328e-05, | |
| "loss": 0.3906, | |
| "num_input_tokens_seen": 40414544, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 1.1626248216833095, | |
| "grad_norm": 1.4810302257537842, | |
| "learning_rate": 3.364986050883476e-05, | |
| "loss": 0.2888, | |
| "num_input_tokens_seen": 40476528, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.1644079885877319, | |
| "grad_norm": 1.142104983329773, | |
| "learning_rate": 3.360604556782649e-05, | |
| "loss": 0.3662, | |
| "num_input_tokens_seen": 40539248, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 1.166191155492154, | |
| "grad_norm": 1.2931478023529053, | |
| "learning_rate": 3.356220061828689e-05, | |
| "loss": 0.3909, | |
| "num_input_tokens_seen": 40600832, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.1679743223965764, | |
| "grad_norm": 1.4515559673309326, | |
| "learning_rate": 3.351832581309944e-05, | |
| "loss": 0.3883, | |
| "num_input_tokens_seen": 40663600, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.1697574893009985, | |
| "grad_norm": 1.3096765279769897, | |
| "learning_rate": 3.3474421305251785e-05, | |
| "loss": 0.4206, | |
| "num_input_tokens_seen": 40727616, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.171540656205421, | |
| "grad_norm": 1.3618152141571045, | |
| "learning_rate": 3.343048724783512e-05, | |
| "loss": 0.4285, | |
| "num_input_tokens_seen": 40793168, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 1.173323823109843, | |
| "grad_norm": 1.4577500820159912, | |
| "learning_rate": 3.3386523794043677e-05, | |
| "loss": 0.3513, | |
| "num_input_tokens_seen": 40856128, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.1751069900142652, | |
| "grad_norm": 1.5820814371109009, | |
| "learning_rate": 3.334253109717419e-05, | |
| "loss": 0.3188, | |
| "num_input_tokens_seen": 40921200, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 1.1768901569186876, | |
| "grad_norm": 1.4901388883590698, | |
| "learning_rate": 3.3298509310625363e-05, | |
| "loss": 0.3945, | |
| "num_input_tokens_seen": 40981808, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.17867332382311, | |
| "grad_norm": 1.420837640762329, | |
| "learning_rate": 3.325445858789732e-05, | |
| "loss": 0.3768, | |
| "num_input_tokens_seen": 41042096, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 1.1804564907275321, | |
| "grad_norm": 1.5397422313690186, | |
| "learning_rate": 3.321037908259111e-05, | |
| "loss": 0.3753, | |
| "num_input_tokens_seen": 41105520, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.1822396576319543, | |
| "grad_norm": 1.6058024168014526, | |
| "learning_rate": 3.3166270948408126e-05, | |
| "loss": 0.3827, | |
| "num_input_tokens_seen": 41168112, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 1.1840228245363766, | |
| "grad_norm": 1.4254966974258423, | |
| "learning_rate": 3.3122134339149585e-05, | |
| "loss": 0.4157, | |
| "num_input_tokens_seen": 41231664, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.1858059914407988, | |
| "grad_norm": 1.437872290611267, | |
| "learning_rate": 3.3077969408715995e-05, | |
| "loss": 0.3757, | |
| "num_input_tokens_seen": 41293296, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 1.1875891583452212, | |
| "grad_norm": 1.2461625337600708, | |
| "learning_rate": 3.3033776311106626e-05, | |
| "loss": 0.409, | |
| "num_input_tokens_seen": 41355840, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.1893723252496433, | |
| "grad_norm": 1.0907572507858276, | |
| "learning_rate": 3.2989555200418977e-05, | |
| "loss": 0.3948, | |
| "num_input_tokens_seen": 41415680, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 1.1911554921540657, | |
| "grad_norm": 1.3984030485153198, | |
| "learning_rate": 3.2945306230848185e-05, | |
| "loss": 0.3062, | |
| "num_input_tokens_seen": 41476864, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.1929386590584878, | |
| "grad_norm": 1.6210757493972778, | |
| "learning_rate": 3.2901029556686555e-05, | |
| "loss": 0.436, | |
| "num_input_tokens_seen": 41541712, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 1.1947218259629102, | |
| "grad_norm": 1.393109917640686, | |
| "learning_rate": 3.285672533232301e-05, | |
| "loss": 0.4281, | |
| "num_input_tokens_seen": 41606016, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.1965049928673324, | |
| "grad_norm": 1.9764735698699951, | |
| "learning_rate": 3.281239371224252e-05, | |
| "loss": 0.3698, | |
| "num_input_tokens_seen": 41666896, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 1.1982881597717547, | |
| "grad_norm": 1.2083278894424438, | |
| "learning_rate": 3.276803485102557e-05, | |
| "loss": 0.4166, | |
| "num_input_tokens_seen": 41730864, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.2000713266761769, | |
| "grad_norm": 1.567348599433899, | |
| "learning_rate": 3.2723648903347646e-05, | |
| "loss": 0.3745, | |
| "num_input_tokens_seen": 41792336, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 1.201854493580599, | |
| "grad_norm": 1.2875126600265503, | |
| "learning_rate": 3.267923602397869e-05, | |
| "loss": 0.3444, | |
| "num_input_tokens_seen": 41850800, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.2036376604850214, | |
| "grad_norm": 1.5980690717697144, | |
| "learning_rate": 3.263479636778255e-05, | |
| "loss": 0.3312, | |
| "num_input_tokens_seen": 41914192, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 1.2054208273894436, | |
| "grad_norm": 1.565764307975769, | |
| "learning_rate": 3.259033008971642e-05, | |
| "loss": 0.3275, | |
| "num_input_tokens_seen": 41978608, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.207203994293866, | |
| "grad_norm": 1.3292726278305054, | |
| "learning_rate": 3.2545837344830356e-05, | |
| "loss": 0.4084, | |
| "num_input_tokens_seen": 42039376, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 1.208987161198288, | |
| "grad_norm": 1.6631255149841309, | |
| "learning_rate": 3.2501318288266667e-05, | |
| "loss": 0.3749, | |
| "num_input_tokens_seen": 42103296, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.2107703281027105, | |
| "grad_norm": 2.20603609085083, | |
| "learning_rate": 3.2456773075259437e-05, | |
| "loss": 0.4643, | |
| "num_input_tokens_seen": 42166896, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 1.2125534950071326, | |
| "grad_norm": 1.4111806154251099, | |
| "learning_rate": 3.241220186113394e-05, | |
| "loss": 0.3692, | |
| "num_input_tokens_seen": 42226960, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.214336661911555, | |
| "grad_norm": 1.2245135307312012, | |
| "learning_rate": 3.236760480130612e-05, | |
| "loss": 0.3737, | |
| "num_input_tokens_seen": 42286720, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 1.2161198288159771, | |
| "grad_norm": 1.8092842102050781, | |
| "learning_rate": 3.2322982051282044e-05, | |
| "loss": 0.423, | |
| "num_input_tokens_seen": 42349920, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.2179029957203995, | |
| "grad_norm": 1.93547785282135, | |
| "learning_rate": 3.227833376665734e-05, | |
| "loss": 0.3346, | |
| "num_input_tokens_seen": 42413520, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 1.2196861626248217, | |
| "grad_norm": 1.9991748332977295, | |
| "learning_rate": 3.223366010311671e-05, | |
| "loss": 0.4164, | |
| "num_input_tokens_seen": 42477488, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.221469329529244, | |
| "grad_norm": 1.5855672359466553, | |
| "learning_rate": 3.218896121643331e-05, | |
| "loss": 0.3737, | |
| "num_input_tokens_seen": 42538032, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 1.2232524964336662, | |
| "grad_norm": 1.3733339309692383, | |
| "learning_rate": 3.214423726246828e-05, | |
| "loss": 0.3339, | |
| "num_input_tokens_seen": 42602288, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.2250356633380886, | |
| "grad_norm": 1.2213727235794067, | |
| "learning_rate": 3.209948839717014e-05, | |
| "loss": 0.3601, | |
| "num_input_tokens_seen": 42662464, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 1.2268188302425107, | |
| "grad_norm": 1.5007941722869873, | |
| "learning_rate": 3.205471477657428e-05, | |
| "loss": 0.2879, | |
| "num_input_tokens_seen": 42724832, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.2286019971469329, | |
| "grad_norm": 1.0848278999328613, | |
| "learning_rate": 3.200991655680243e-05, | |
| "loss": 0.3755, | |
| "num_input_tokens_seen": 42784336, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 1.2303851640513552, | |
| "grad_norm": 1.4045600891113281, | |
| "learning_rate": 3.1965093894062084e-05, | |
| "loss": 0.4618, | |
| "num_input_tokens_seen": 42845744, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.2321683309557774, | |
| "grad_norm": 1.537743330001831, | |
| "learning_rate": 3.1920246944645945e-05, | |
| "loss": 0.3932, | |
| "num_input_tokens_seen": 42909840, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 1.2339514978601998, | |
| "grad_norm": 1.8205797672271729, | |
| "learning_rate": 3.1875375864931426e-05, | |
| "loss": 0.4308, | |
| "num_input_tokens_seen": 42971504, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.235734664764622, | |
| "grad_norm": 1.714099407196045, | |
| "learning_rate": 3.183048081138009e-05, | |
| "loss": 0.4523, | |
| "num_input_tokens_seen": 43033200, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 1.2375178316690443, | |
| "grad_norm": 1.4274886846542358, | |
| "learning_rate": 3.178556194053706e-05, | |
| "loss": 0.4437, | |
| "num_input_tokens_seen": 43097680, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.2393009985734664, | |
| "grad_norm": 1.469146728515625, | |
| "learning_rate": 3.174061940903053e-05, | |
| "loss": 0.4199, | |
| "num_input_tokens_seen": 43158240, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 1.2410841654778888, | |
| "grad_norm": 1.4748458862304688, | |
| "learning_rate": 3.1695653373571196e-05, | |
| "loss": 0.417, | |
| "num_input_tokens_seen": 43220000, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.242867332382311, | |
| "grad_norm": 1.4848557710647583, | |
| "learning_rate": 3.16506639909517e-05, | |
| "loss": 0.3459, | |
| "num_input_tokens_seen": 43284592, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 1.2446504992867333, | |
| "grad_norm": 1.5085371732711792, | |
| "learning_rate": 3.160565141804611e-05, | |
| "loss": 0.4546, | |
| "num_input_tokens_seen": 43343088, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.2464336661911555, | |
| "grad_norm": 1.1072133779525757, | |
| "learning_rate": 3.156061581180936e-05, | |
| "loss": 0.3337, | |
| "num_input_tokens_seen": 43405888, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 1.2482168330955776, | |
| "grad_norm": 1.4966447353363037, | |
| "learning_rate": 3.1515557329276654e-05, | |
| "loss": 0.3346, | |
| "num_input_tokens_seen": 43468000, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.5697176456451416, | |
| "learning_rate": 3.147047612756302e-05, | |
| "loss": 0.3192, | |
| "num_input_tokens_seen": 43531968, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 1.2517831669044224, | |
| "grad_norm": 1.3662910461425781, | |
| "learning_rate": 3.1425372363862676e-05, | |
| "loss": 0.3736, | |
| "num_input_tokens_seen": 43592880, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.2535663338088445, | |
| "grad_norm": 1.7950197458267212, | |
| "learning_rate": 3.1380246195448516e-05, | |
| "loss": 0.4962, | |
| "num_input_tokens_seen": 43659280, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 1.2553495007132667, | |
| "grad_norm": 1.6455252170562744, | |
| "learning_rate": 3.1335097779671564e-05, | |
| "loss": 0.3387, | |
| "num_input_tokens_seen": 43714592, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.257132667617689, | |
| "grad_norm": 1.551438808441162, | |
| "learning_rate": 3.128992727396041e-05, | |
| "loss": 0.3563, | |
| "num_input_tokens_seen": 43773584, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 1.2589158345221112, | |
| "grad_norm": 2.015979051589966, | |
| "learning_rate": 3.1244734835820666e-05, | |
| "loss": 0.425, | |
| "num_input_tokens_seen": 43834848, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.2606990014265336, | |
| "grad_norm": 1.8502330780029297, | |
| "learning_rate": 3.119952062283444e-05, | |
| "loss": 0.3942, | |
| "num_input_tokens_seen": 43896768, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 1.2624821683309557, | |
| "grad_norm": 1.9823004007339478, | |
| "learning_rate": 3.115428479265975e-05, | |
| "loss": 0.3489, | |
| "num_input_tokens_seen": 43956416, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.264265335235378, | |
| "grad_norm": 1.3106993436813354, | |
| "learning_rate": 3.1109027503029994e-05, | |
| "loss": 0.3622, | |
| "num_input_tokens_seen": 44018848, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 1.2660485021398002, | |
| "grad_norm": 1.3623470067977905, | |
| "learning_rate": 3.10637489117534e-05, | |
| "loss": 0.383, | |
| "num_input_tokens_seen": 44081952, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.2678316690442226, | |
| "grad_norm": 2.47135853767395, | |
| "learning_rate": 3.1018449176712474e-05, | |
| "loss": 0.3496, | |
| "num_input_tokens_seen": 44146544, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 1.2696148359486448, | |
| "grad_norm": 1.5469810962677002, | |
| "learning_rate": 3.097312845586345e-05, | |
| "loss": 0.3219, | |
| "num_input_tokens_seen": 44207664, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.2713980028530671, | |
| "grad_norm": 1.3339722156524658, | |
| "learning_rate": 3.0927786907235727e-05, | |
| "loss": 0.3731, | |
| "num_input_tokens_seen": 44268848, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 1.2731811697574893, | |
| "grad_norm": 1.4878593683242798, | |
| "learning_rate": 3.088242468893135e-05, | |
| "loss": 0.4539, | |
| "num_input_tokens_seen": 44329824, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.2749643366619114, | |
| "grad_norm": 1.579049825668335, | |
| "learning_rate": 3.083704195912439e-05, | |
| "loss": 0.3579, | |
| "num_input_tokens_seen": 44389872, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.2767475035663338, | |
| "grad_norm": 1.2362189292907715, | |
| "learning_rate": 3.079163887606051e-05, | |
| "loss": 0.3178, | |
| "num_input_tokens_seen": 44451040, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.2785306704707562, | |
| "grad_norm": 1.586029052734375, | |
| "learning_rate": 3.074621559805629e-05, | |
| "loss": 0.37, | |
| "num_input_tokens_seen": 44514000, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 1.2803138373751783, | |
| "grad_norm": 2.0480854511260986, | |
| "learning_rate": 3.070077228349875e-05, | |
| "loss": 0.3557, | |
| "num_input_tokens_seen": 44577760, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.2820970042796005, | |
| "grad_norm": 1.724202036857605, | |
| "learning_rate": 3.065530909084477e-05, | |
| "loss": 0.4098, | |
| "num_input_tokens_seen": 44638880, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 1.2838801711840229, | |
| "grad_norm": 1.251970887184143, | |
| "learning_rate": 3.060982617862053e-05, | |
| "loss": 0.389, | |
| "num_input_tokens_seen": 44705040, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.285663338088445, | |
| "grad_norm": 1.652723789215088, | |
| "learning_rate": 3.0564323705420996e-05, | |
| "loss": 0.3512, | |
| "num_input_tokens_seen": 44769344, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 1.2874465049928674, | |
| "grad_norm": 1.1588093042373657, | |
| "learning_rate": 3.051880182990932e-05, | |
| "loss": 0.4157, | |
| "num_input_tokens_seen": 44830528, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.2892296718972895, | |
| "grad_norm": 1.424497365951538, | |
| "learning_rate": 3.0473260710816333e-05, | |
| "loss": 0.3897, | |
| "num_input_tokens_seen": 44890592, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 1.291012838801712, | |
| "grad_norm": 1.8615883588790894, | |
| "learning_rate": 3.042770050693994e-05, | |
| "loss": 0.3806, | |
| "num_input_tokens_seen": 44955440, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.292796005706134, | |
| "grad_norm": 1.5784142017364502, | |
| "learning_rate": 3.0382121377144597e-05, | |
| "loss": 0.3707, | |
| "num_input_tokens_seen": 45015792, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.2945791726105562, | |
| "grad_norm": 1.1894679069519043, | |
| "learning_rate": 3.033652348036078e-05, | |
| "loss": 0.3357, | |
| "num_input_tokens_seen": 45075840, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.2963623395149786, | |
| "grad_norm": 1.3909114599227905, | |
| "learning_rate": 3.0290906975584364e-05, | |
| "loss": 0.3775, | |
| "num_input_tokens_seen": 45138448, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 1.298145506419401, | |
| "grad_norm": 1.9987061023712158, | |
| "learning_rate": 3.0245272021876144e-05, | |
| "loss": 0.4345, | |
| "num_input_tokens_seen": 45200016, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.2999286733238231, | |
| "grad_norm": 1.595581293106079, | |
| "learning_rate": 3.0199618778361205e-05, | |
| "loss": 0.3906, | |
| "num_input_tokens_seen": 45258832, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 1.3017118402282453, | |
| "grad_norm": 1.3167765140533447, | |
| "learning_rate": 3.015394740422846e-05, | |
| "loss": 0.3751, | |
| "num_input_tokens_seen": 45317744, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.3034950071326676, | |
| "grad_norm": 1.4946184158325195, | |
| "learning_rate": 3.0108258058730005e-05, | |
| "loss": 0.3753, | |
| "num_input_tokens_seen": 45382832, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 1.3052781740370898, | |
| "grad_norm": 1.1609739065170288, | |
| "learning_rate": 3.006255090118059e-05, | |
| "loss": 0.3884, | |
| "num_input_tokens_seen": 45445168, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.3070613409415122, | |
| "grad_norm": 1.3701294660568237, | |
| "learning_rate": 3.0016826090957106e-05, | |
| "loss": 0.3847, | |
| "num_input_tokens_seen": 45509216, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 1.3088445078459343, | |
| "grad_norm": 1.580486536026001, | |
| "learning_rate": 2.9971083787497988e-05, | |
| "loss": 0.3808, | |
| "num_input_tokens_seen": 45572064, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.3106276747503567, | |
| "grad_norm": 1.3107916116714478, | |
| "learning_rate": 2.9925324150302665e-05, | |
| "loss": 0.3296, | |
| "num_input_tokens_seen": 45634000, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.3124108416547788, | |
| "grad_norm": 1.739546298980713, | |
| "learning_rate": 2.9879547338930997e-05, | |
| "loss": 0.4001, | |
| "num_input_tokens_seen": 45695632, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.3141940085592012, | |
| "grad_norm": 1.9113649129867554, | |
| "learning_rate": 2.9833753513002743e-05, | |
| "loss": 0.3726, | |
| "num_input_tokens_seen": 45755824, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 1.3159771754636234, | |
| "grad_norm": 1.4150328636169434, | |
| "learning_rate": 2.978794283219698e-05, | |
| "loss": 0.3018, | |
| "num_input_tokens_seen": 45817024, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.3177603423680457, | |
| "grad_norm": 1.5835505723953247, | |
| "learning_rate": 2.9742115456251575e-05, | |
| "loss": 0.4116, | |
| "num_input_tokens_seen": 45883600, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 1.3195435092724679, | |
| "grad_norm": 1.714812159538269, | |
| "learning_rate": 2.9696271544962583e-05, | |
| "loss": 0.4026, | |
| "num_input_tokens_seen": 45946768, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.32132667617689, | |
| "grad_norm": 1.2335606813430786, | |
| "learning_rate": 2.965041125818374e-05, | |
| "loss": 0.3238, | |
| "num_input_tokens_seen": 46003360, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 1.3231098430813124, | |
| "grad_norm": 1.5695058107376099, | |
| "learning_rate": 2.9604534755825863e-05, | |
| "loss": 0.4024, | |
| "num_input_tokens_seen": 46064304, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.3248930099857348, | |
| "grad_norm": 1.2431243658065796, | |
| "learning_rate": 2.9558642197856322e-05, | |
| "loss": 0.3557, | |
| "num_input_tokens_seen": 46125168, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 1.326676176890157, | |
| "grad_norm": 1.5153807401657104, | |
| "learning_rate": 2.9512733744298482e-05, | |
| "loss": 0.4022, | |
| "num_input_tokens_seen": 46185408, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.328459343794579, | |
| "grad_norm": 1.2606337070465088, | |
| "learning_rate": 2.9466809555231112e-05, | |
| "loss": 0.3489, | |
| "num_input_tokens_seen": 46249776, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 1.3302425106990015, | |
| "grad_norm": 1.4757767915725708, | |
| "learning_rate": 2.9420869790787852e-05, | |
| "loss": 0.4108, | |
| "num_input_tokens_seen": 46312512, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.3320256776034236, | |
| "grad_norm": 1.3754016160964966, | |
| "learning_rate": 2.9374914611156668e-05, | |
| "loss": 0.4006, | |
| "num_input_tokens_seen": 46375680, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 1.333808844507846, | |
| "grad_norm": 1.6682347059249878, | |
| "learning_rate": 2.932894417657927e-05, | |
| "loss": 0.4147, | |
| "num_input_tokens_seen": 46440688, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.3355920114122681, | |
| "grad_norm": 1.5695980787277222, | |
| "learning_rate": 2.928295864735056e-05, | |
| "loss": 0.3782, | |
| "num_input_tokens_seen": 46502272, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 1.3373751783166905, | |
| "grad_norm": 1.4191399812698364, | |
| "learning_rate": 2.9236958183818076e-05, | |
| "loss": 0.3895, | |
| "num_input_tokens_seen": 46565120, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.3391583452211127, | |
| "grad_norm": 1.4917359352111816, | |
| "learning_rate": 2.9190942946381418e-05, | |
| "loss": 0.4084, | |
| "num_input_tokens_seen": 46628752, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 1.340941512125535, | |
| "grad_norm": 1.8590489625930786, | |
| "learning_rate": 2.914491309549171e-05, | |
| "loss": 0.3762, | |
| "num_input_tokens_seen": 46690560, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.3427246790299572, | |
| "grad_norm": 1.7186845541000366, | |
| "learning_rate": 2.9098868791651046e-05, | |
| "loss": 0.4157, | |
| "num_input_tokens_seen": 46755712, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 1.3445078459343796, | |
| "grad_norm": 1.2674869298934937, | |
| "learning_rate": 2.90528101954119e-05, | |
| "loss": 0.3688, | |
| "num_input_tokens_seen": 46821440, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.3462910128388017, | |
| "grad_norm": 1.3083473443984985, | |
| "learning_rate": 2.9006737467376577e-05, | |
| "loss": 0.4088, | |
| "num_input_tokens_seen": 46882768, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 1.3480741797432239, | |
| "grad_norm": 1.3852232694625854, | |
| "learning_rate": 2.8960650768196672e-05, | |
| "loss": 0.4017, | |
| "num_input_tokens_seen": 46945088, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.3498573466476462, | |
| "grad_norm": 1.9241416454315186, | |
| "learning_rate": 2.8914550258572487e-05, | |
| "loss": 0.362, | |
| "num_input_tokens_seen": 47013312, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 1.3516405135520686, | |
| "grad_norm": 1.380786657333374, | |
| "learning_rate": 2.8868436099252503e-05, | |
| "loss": 0.3919, | |
| "num_input_tokens_seen": 47078800, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.3534236804564908, | |
| "grad_norm": 1.7454177141189575, | |
| "learning_rate": 2.8822308451032754e-05, | |
| "loss": 0.3647, | |
| "num_input_tokens_seen": 47139856, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 1.355206847360913, | |
| "grad_norm": 1.9598338603973389, | |
| "learning_rate": 2.877616747475634e-05, | |
| "loss": 0.3123, | |
| "num_input_tokens_seen": 47202944, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.3569900142653353, | |
| "grad_norm": 1.3957338333129883, | |
| "learning_rate": 2.873001333131282e-05, | |
| "loss": 0.3588, | |
| "num_input_tokens_seen": 47262080, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 1.3587731811697574, | |
| "grad_norm": 1.6384849548339844, | |
| "learning_rate": 2.8683846181637685e-05, | |
| "loss": 0.4088, | |
| "num_input_tokens_seen": 47320400, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.3605563480741798, | |
| "grad_norm": 1.4136253595352173, | |
| "learning_rate": 2.863766618671177e-05, | |
| "loss": 0.3077, | |
| "num_input_tokens_seen": 47383184, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 1.362339514978602, | |
| "grad_norm": 2.322763681411743, | |
| "learning_rate": 2.8591473507560667e-05, | |
| "loss": 0.4329, | |
| "num_input_tokens_seen": 47444144, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.3641226818830243, | |
| "grad_norm": 1.4467617273330688, | |
| "learning_rate": 2.8545268305254254e-05, | |
| "loss": 0.424, | |
| "num_input_tokens_seen": 47509584, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 1.3659058487874465, | |
| "grad_norm": 1.751305341720581, | |
| "learning_rate": 2.8499050740906037e-05, | |
| "loss": 0.3705, | |
| "num_input_tokens_seen": 47573696, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.3676890156918686, | |
| "grad_norm": 1.503512978553772, | |
| "learning_rate": 2.8452820975672628e-05, | |
| "loss": 0.3882, | |
| "num_input_tokens_seen": 47635168, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 1.369472182596291, | |
| "grad_norm": 1.5444144010543823, | |
| "learning_rate": 2.8406579170753205e-05, | |
| "loss": 0.3555, | |
| "num_input_tokens_seen": 47693984, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.3712553495007134, | |
| "grad_norm": 1.5627723932266235, | |
| "learning_rate": 2.8360325487388913e-05, | |
| "loss": 0.3481, | |
| "num_input_tokens_seen": 47758704, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 1.3730385164051355, | |
| "grad_norm": 1.991705298423767, | |
| "learning_rate": 2.8314060086862308e-05, | |
| "loss": 0.3879, | |
| "num_input_tokens_seen": 47822832, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.3748216833095577, | |
| "grad_norm": 1.3864610195159912, | |
| "learning_rate": 2.8267783130496817e-05, | |
| "loss": 0.4234, | |
| "num_input_tokens_seen": 47887488, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 1.37660485021398, | |
| "grad_norm": 1.6995244026184082, | |
| "learning_rate": 2.822149477965617e-05, | |
| "loss": 0.4011, | |
| "num_input_tokens_seen": 47951888, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.3783880171184024, | |
| "grad_norm": 1.4929567575454712, | |
| "learning_rate": 2.8175195195743792e-05, | |
| "loss": 0.351, | |
| "num_input_tokens_seen": 48014128, | |
| "step": 3865 | |
| }, | |
| { | |
| "epoch": 1.3801711840228246, | |
| "grad_norm": 1.6061722040176392, | |
| "learning_rate": 2.8128884540202317e-05, | |
| "loss": 0.4725, | |
| "num_input_tokens_seen": 48072048, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.3819543509272467, | |
| "grad_norm": 1.555999517440796, | |
| "learning_rate": 2.8082562974512948e-05, | |
| "loss": 0.3323, | |
| "num_input_tokens_seen": 48134720, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 1.383737517831669, | |
| "grad_norm": 1.188208818435669, | |
| "learning_rate": 2.8036230660194972e-05, | |
| "loss": 0.3187, | |
| "num_input_tokens_seen": 48197488, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.3855206847360912, | |
| "grad_norm": 1.965627670288086, | |
| "learning_rate": 2.7989887758805134e-05, | |
| "loss": 0.458, | |
| "num_input_tokens_seen": 48262272, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 1.3873038516405136, | |
| "grad_norm": 1.364537000656128, | |
| "learning_rate": 2.794353443193707e-05, | |
| "loss": 0.3833, | |
| "num_input_tokens_seen": 48322592, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.3890870185449358, | |
| "grad_norm": 1.5128754377365112, | |
| "learning_rate": 2.789717084122081e-05, | |
| "loss": 0.3946, | |
| "num_input_tokens_seen": 48378000, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 1.3908701854493581, | |
| "grad_norm": 1.3390058279037476, | |
| "learning_rate": 2.785079714832216e-05, | |
| "loss": 0.4381, | |
| "num_input_tokens_seen": 48434736, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.3926533523537803, | |
| "grad_norm": 1.3086851835250854, | |
| "learning_rate": 2.7804413514942147e-05, | |
| "loss": 0.3576, | |
| "num_input_tokens_seen": 48494288, | |
| "step": 3905 | |
| }, | |
| { | |
| "epoch": 1.3944365192582024, | |
| "grad_norm": 1.5228943824768066, | |
| "learning_rate": 2.7758020102816456e-05, | |
| "loss": 0.3978, | |
| "num_input_tokens_seen": 48554784, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.3962196861626248, | |
| "grad_norm": 1.810608983039856, | |
| "learning_rate": 2.7711617073714872e-05, | |
| "loss": 0.336, | |
| "num_input_tokens_seen": 48620192, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 1.3980028530670472, | |
| "grad_norm": 2.060600519180298, | |
| "learning_rate": 2.766520458944073e-05, | |
| "loss": 0.4053, | |
| "num_input_tokens_seen": 48683392, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.3997860199714693, | |
| "grad_norm": 1.435867190361023, | |
| "learning_rate": 2.76187828118303e-05, | |
| "loss": 0.3841, | |
| "num_input_tokens_seen": 48752192, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 1.4015691868758915, | |
| "grad_norm": 2.4842238426208496, | |
| "learning_rate": 2.7572351902752296e-05, | |
| "loss": 0.4873, | |
| "num_input_tokens_seen": 48815952, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.4033523537803139, | |
| "grad_norm": 1.696571707725525, | |
| "learning_rate": 2.7525912024107242e-05, | |
| "loss": 0.431, | |
| "num_input_tokens_seen": 48880176, | |
| "step": 3935 | |
| }, | |
| { | |
| "epoch": 1.405135520684736, | |
| "grad_norm": 1.5973232984542847, | |
| "learning_rate": 2.747946333782696e-05, | |
| "loss": 0.4383, | |
| "num_input_tokens_seen": 48943680, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.4069186875891584, | |
| "grad_norm": 1.621713638305664, | |
| "learning_rate": 2.7433006005873956e-05, | |
| "loss": 0.3977, | |
| "num_input_tokens_seen": 49004224, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 1.4087018544935805, | |
| "grad_norm": 1.6473205089569092, | |
| "learning_rate": 2.738654019024093e-05, | |
| "loss": 0.3486, | |
| "num_input_tokens_seen": 49064512, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.410485021398003, | |
| "grad_norm": 1.304693341255188, | |
| "learning_rate": 2.7340066052950103e-05, | |
| "loss": 0.4181, | |
| "num_input_tokens_seen": 49126544, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 1.412268188302425, | |
| "grad_norm": 1.6275643110275269, | |
| "learning_rate": 2.7293583756052755e-05, | |
| "loss": 0.3857, | |
| "num_input_tokens_seen": 49191056, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.4140513552068474, | |
| "grad_norm": 1.4851253032684326, | |
| "learning_rate": 2.7247093461628616e-05, | |
| "loss": 0.4096, | |
| "num_input_tokens_seen": 49249936, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 1.4158345221112696, | |
| "grad_norm": 1.1423369646072388, | |
| "learning_rate": 2.720059533178529e-05, | |
| "loss": 0.3315, | |
| "num_input_tokens_seen": 49309968, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.417617689015692, | |
| "grad_norm": 1.5465352535247803, | |
| "learning_rate": 2.71540895286577e-05, | |
| "loss": 0.3942, | |
| "num_input_tokens_seen": 49368576, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 1.4194008559201141, | |
| "grad_norm": 1.611109733581543, | |
| "learning_rate": 2.710757621440753e-05, | |
| "loss": 0.3589, | |
| "num_input_tokens_seen": 49427888, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.4211840228245363, | |
| "grad_norm": 1.5170013904571533, | |
| "learning_rate": 2.7061055551222663e-05, | |
| "loss": 0.3942, | |
| "num_input_tokens_seen": 49491648, | |
| "step": 3985 | |
| }, | |
| { | |
| "epoch": 1.4229671897289586, | |
| "grad_norm": 1.4070842266082764, | |
| "learning_rate": 2.70145277013166e-05, | |
| "loss": 0.3997, | |
| "num_input_tokens_seen": 49557296, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.424750356633381, | |
| "grad_norm": 1.52862548828125, | |
| "learning_rate": 2.6967992826927897e-05, | |
| "loss": 0.4189, | |
| "num_input_tokens_seen": 49620208, | |
| "step": 3995 | |
| }, | |
| { | |
| "epoch": 1.4265335235378032, | |
| "grad_norm": 1.0965608358383179, | |
| "learning_rate": 2.6921451090319603e-05, | |
| "loss": 0.3903, | |
| "num_input_tokens_seen": 49683888, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.4283166904422253, | |
| "grad_norm": 1.8186748027801514, | |
| "learning_rate": 2.6874902653778712e-05, | |
| "loss": 0.3987, | |
| "num_input_tokens_seen": 49741728, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 1.4300998573466477, | |
| "grad_norm": 1.3632290363311768, | |
| "learning_rate": 2.6828347679615558e-05, | |
| "loss": 0.377, | |
| "num_input_tokens_seen": 49804832, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.4318830242510698, | |
| "grad_norm": 1.560187816619873, | |
| "learning_rate": 2.6781786330163282e-05, | |
| "loss": 0.3524, | |
| "num_input_tokens_seen": 49867744, | |
| "step": 4015 | |
| }, | |
| { | |
| "epoch": 1.4336661911554922, | |
| "grad_norm": 1.8583840131759644, | |
| "learning_rate": 2.673521876777727e-05, | |
| "loss": 0.344, | |
| "num_input_tokens_seen": 49932304, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.4354493580599144, | |
| "grad_norm": 1.5638411045074463, | |
| "learning_rate": 2.6688645154834537e-05, | |
| "loss": 0.4498, | |
| "num_input_tokens_seen": 49994560, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 1.4372325249643367, | |
| "grad_norm": 1.6732484102249146, | |
| "learning_rate": 2.6642065653733213e-05, | |
| "loss": 0.3513, | |
| "num_input_tokens_seen": 50053584, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.4390156918687589, | |
| "grad_norm": 1.5469917058944702, | |
| "learning_rate": 2.6595480426891976e-05, | |
| "loss": 0.354, | |
| "num_input_tokens_seen": 50111824, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 1.440798858773181, | |
| "grad_norm": 1.4826576709747314, | |
| "learning_rate": 2.654888963674945e-05, | |
| "loss": 0.4709, | |
| "num_input_tokens_seen": 50175024, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.4425820256776034, | |
| "grad_norm": 1.5195841789245605, | |
| "learning_rate": 2.650229344576367e-05, | |
| "loss": 0.3669, | |
| "num_input_tokens_seen": 50236896, | |
| "step": 4045 | |
| }, | |
| { | |
| "epoch": 1.4443651925820258, | |
| "grad_norm": 1.735239863395691, | |
| "learning_rate": 2.6455692016411476e-05, | |
| "loss": 0.3854, | |
| "num_input_tokens_seen": 50297360, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.446148359486448, | |
| "grad_norm": 1.5575016736984253, | |
| "learning_rate": 2.640908551118801e-05, | |
| "loss": 0.4022, | |
| "num_input_tokens_seen": 50357200, | |
| "step": 4055 | |
| }, | |
| { | |
| "epoch": 1.44793152639087, | |
| "grad_norm": 1.480210781097412, | |
| "learning_rate": 2.6362474092606088e-05, | |
| "loss": 0.3373, | |
| "num_input_tokens_seen": 50422896, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.4497146932952925, | |
| "grad_norm": 1.5694204568862915, | |
| "learning_rate": 2.631585792319567e-05, | |
| "loss": 0.3583, | |
| "num_input_tokens_seen": 50483168, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 1.4514978601997148, | |
| "grad_norm": 1.4493743181228638, | |
| "learning_rate": 2.626923716550328e-05, | |
| "loss": 0.3734, | |
| "num_input_tokens_seen": 50546688, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.453281027104137, | |
| "grad_norm": 1.8895164728164673, | |
| "learning_rate": 2.622261198209143e-05, | |
| "loss": 0.4142, | |
| "num_input_tokens_seen": 50611280, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 1.4550641940085591, | |
| "grad_norm": 1.3316305875778198, | |
| "learning_rate": 2.6175982535538098e-05, | |
| "loss": 0.3354, | |
| "num_input_tokens_seen": 50675728, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.4568473609129815, | |
| "grad_norm": 1.2798820734024048, | |
| "learning_rate": 2.6129348988436074e-05, | |
| "loss": 0.4042, | |
| "num_input_tokens_seen": 50735520, | |
| "step": 4085 | |
| }, | |
| { | |
| "epoch": 1.4586305278174037, | |
| "grad_norm": 1.5085526704788208, | |
| "learning_rate": 2.6082711503392494e-05, | |
| "loss": 0.371, | |
| "num_input_tokens_seen": 50797520, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.460413694721826, | |
| "grad_norm": 1.8046773672103882, | |
| "learning_rate": 2.60360702430282e-05, | |
| "loss": 0.4519, | |
| "num_input_tokens_seen": 50857456, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 1.4621968616262482, | |
| "grad_norm": 4.003551006317139, | |
| "learning_rate": 2.5989425369977195e-05, | |
| "loss": 0.4206, | |
| "num_input_tokens_seen": 50917760, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.4639800285306706, | |
| "grad_norm": 1.346306324005127, | |
| "learning_rate": 2.5942777046886108e-05, | |
| "loss": 0.2947, | |
| "num_input_tokens_seen": 50980272, | |
| "step": 4105 | |
| }, | |
| { | |
| "epoch": 1.4657631954350927, | |
| "grad_norm": 1.428459882736206, | |
| "learning_rate": 2.589612543641357e-05, | |
| "loss": 0.3589, | |
| "num_input_tokens_seen": 51044768, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.4675463623395149, | |
| "grad_norm": 1.2181662321090698, | |
| "learning_rate": 2.5849470701229685e-05, | |
| "loss": 0.2415, | |
| "num_input_tokens_seen": 51102032, | |
| "step": 4115 | |
| }, | |
| { | |
| "epoch": 1.4693295292439372, | |
| "grad_norm": 1.5910693407058716, | |
| "learning_rate": 2.5802813004015443e-05, | |
| "loss": 0.3508, | |
| "num_input_tokens_seen": 51164672, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.4711126961483596, | |
| "grad_norm": 1.6249642372131348, | |
| "learning_rate": 2.5756152507462177e-05, | |
| "loss": 0.3944, | |
| "num_input_tokens_seen": 51228176, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 1.4728958630527818, | |
| "grad_norm": 2.0845768451690674, | |
| "learning_rate": 2.5709489374270983e-05, | |
| "loss": 0.3756, | |
| "num_input_tokens_seen": 51289072, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.474679029957204, | |
| "grad_norm": 1.638650894165039, | |
| "learning_rate": 2.5662823767152127e-05, | |
| "loss": 0.3464, | |
| "num_input_tokens_seen": 51348944, | |
| "step": 4135 | |
| }, | |
| { | |
| "epoch": 1.4764621968616263, | |
| "grad_norm": 1.555907130241394, | |
| "learning_rate": 2.561615584882453e-05, | |
| "loss": 0.3685, | |
| "num_input_tokens_seen": 51408432, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.4782453637660484, | |
| "grad_norm": 1.6691559553146362, | |
| "learning_rate": 2.5569485782015144e-05, | |
| "loss": 0.3658, | |
| "num_input_tokens_seen": 51472704, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 1.4800285306704708, | |
| "grad_norm": 1.5996575355529785, | |
| "learning_rate": 2.5522813729458443e-05, | |
| "loss": 0.3806, | |
| "num_input_tokens_seen": 51533440, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.481811697574893, | |
| "grad_norm": 2.0772979259490967, | |
| "learning_rate": 2.5476139853895796e-05, | |
| "loss": 0.4304, | |
| "num_input_tokens_seen": 51597872, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 1.4835948644793153, | |
| "grad_norm": 1.6055898666381836, | |
| "learning_rate": 2.5429464318074952e-05, | |
| "loss": 0.3482, | |
| "num_input_tokens_seen": 51658736, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.4853780313837375, | |
| "grad_norm": 1.9511430263519287, | |
| "learning_rate": 2.538278728474944e-05, | |
| "loss": 0.4127, | |
| "num_input_tokens_seen": 51720672, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 1.4871611982881598, | |
| "grad_norm": 1.5314128398895264, | |
| "learning_rate": 2.5336108916677986e-05, | |
| "loss": 0.362, | |
| "num_input_tokens_seen": 51781760, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.488944365192582, | |
| "grad_norm": 2.31260085105896, | |
| "learning_rate": 2.528942937662403e-05, | |
| "loss": 0.3639, | |
| "num_input_tokens_seen": 51845840, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 1.4907275320970044, | |
| "grad_norm": 1.428371548652649, | |
| "learning_rate": 2.5242748827355046e-05, | |
| "loss": 0.3494, | |
| "num_input_tokens_seen": 51909856, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.4925106990014265, | |
| "grad_norm": 1.799333930015564, | |
| "learning_rate": 2.519606743164204e-05, | |
| "loss": 0.3494, | |
| "num_input_tokens_seen": 51970080, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 1.4942938659058487, | |
| "grad_norm": 1.3890087604522705, | |
| "learning_rate": 2.514938535225897e-05, | |
| "loss": 0.3798, | |
| "num_input_tokens_seen": 52034608, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.496077032810271, | |
| "grad_norm": 1.6815882921218872, | |
| "learning_rate": 2.5102702751982188e-05, | |
| "loss": 0.3498, | |
| "num_input_tokens_seen": 52100032, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 1.4978601997146934, | |
| "grad_norm": 1.7529703378677368, | |
| "learning_rate": 2.5056019793589858e-05, | |
| "loss": 0.3873, | |
| "num_input_tokens_seen": 52162800, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.4996433666191156, | |
| "grad_norm": 1.700079321861267, | |
| "learning_rate": 2.500933663986139e-05, | |
| "loss": 0.3284, | |
| "num_input_tokens_seen": 52222560, | |
| "step": 4205 | |
| }, | |
| { | |
| "epoch": 1.5014265335235377, | |
| "grad_norm": 1.6496635675430298, | |
| "learning_rate": 2.496265345357687e-05, | |
| "loss": 0.3805, | |
| "num_input_tokens_seen": 52285328, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.50320970042796, | |
| "grad_norm": 1.5684199333190918, | |
| "learning_rate": 2.49159703975165e-05, | |
| "loss": 0.4581, | |
| "num_input_tokens_seen": 52347456, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 1.5049928673323825, | |
| "grad_norm": 1.2051026821136475, | |
| "learning_rate": 2.4869287634460045e-05, | |
| "loss": 0.3323, | |
| "num_input_tokens_seen": 52409680, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.5067760342368046, | |
| "grad_norm": 1.540032982826233, | |
| "learning_rate": 2.4822605327186217e-05, | |
| "loss": 0.3204, | |
| "num_input_tokens_seen": 52471376, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 1.5085592011412268, | |
| "grad_norm": 1.0840933322906494, | |
| "learning_rate": 2.4775923638472172e-05, | |
| "loss": 0.3218, | |
| "num_input_tokens_seen": 52532256, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.5103423680456491, | |
| "grad_norm": 1.5128995180130005, | |
| "learning_rate": 2.472924273109287e-05, | |
| "loss": 0.3686, | |
| "num_input_tokens_seen": 52592928, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 1.5121255349500713, | |
| "grad_norm": 1.375869870185852, | |
| "learning_rate": 2.4682562767820587e-05, | |
| "loss": 0.3571, | |
| "num_input_tokens_seen": 52655968, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.5139087018544934, | |
| "grad_norm": 1.3699325323104858, | |
| "learning_rate": 2.4635883911424293e-05, | |
| "loss": 0.4165, | |
| "num_input_tokens_seen": 52716096, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 1.5156918687589158, | |
| "grad_norm": 1.4494869709014893, | |
| "learning_rate": 2.4589206324669082e-05, | |
| "loss": 0.4172, | |
| "num_input_tokens_seen": 52780176, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.5174750356633382, | |
| "grad_norm": 1.4189242124557495, | |
| "learning_rate": 2.4542530170315635e-05, | |
| "loss": 0.3887, | |
| "num_input_tokens_seen": 52841968, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 1.5192582025677603, | |
| "grad_norm": 1.198378086090088, | |
| "learning_rate": 2.449585561111965e-05, | |
| "loss": 0.3482, | |
| "num_input_tokens_seen": 52904544, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.5210413694721825, | |
| "grad_norm": 1.5662075281143188, | |
| "learning_rate": 2.4449182809831227e-05, | |
| "loss": 0.3348, | |
| "num_input_tokens_seen": 52964960, | |
| "step": 4265 | |
| }, | |
| { | |
| "epoch": 1.5228245363766049, | |
| "grad_norm": 2.213198184967041, | |
| "learning_rate": 2.4402511929194383e-05, | |
| "loss": 0.4413, | |
| "num_input_tokens_seen": 53026528, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.5246077032810272, | |
| "grad_norm": 1.652597188949585, | |
| "learning_rate": 2.4355843131946407e-05, | |
| "loss": 0.3929, | |
| "num_input_tokens_seen": 53091424, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 1.5263908701854494, | |
| "grad_norm": 1.3295167684555054, | |
| "learning_rate": 2.4309176580817318e-05, | |
| "loss": 0.4019, | |
| "num_input_tokens_seen": 53153552, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.5281740370898715, | |
| "grad_norm": 1.6449896097183228, | |
| "learning_rate": 2.426251243852932e-05, | |
| "loss": 0.3761, | |
| "num_input_tokens_seen": 53210416, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 1.529957203994294, | |
| "grad_norm": 1.336366891860962, | |
| "learning_rate": 2.421585086779623e-05, | |
| "loss": 0.3722, | |
| "num_input_tokens_seen": 53272864, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.5317403708987163, | |
| "grad_norm": 2.004012107849121, | |
| "learning_rate": 2.4169192031322865e-05, | |
| "loss": 0.4091, | |
| "num_input_tokens_seen": 53331424, | |
| "step": 4295 | |
| }, | |
| { | |
| "epoch": 1.5335235378031382, | |
| "grad_norm": 1.515665054321289, | |
| "learning_rate": 2.412253609180453e-05, | |
| "loss": 0.4092, | |
| "num_input_tokens_seen": 53392848, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.5353067047075606, | |
| "grad_norm": 1.5377110242843628, | |
| "learning_rate": 2.4075883211926415e-05, | |
| "loss": 0.3362, | |
| "num_input_tokens_seen": 53453392, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 1.537089871611983, | |
| "grad_norm": 1.5057114362716675, | |
| "learning_rate": 2.4029233554363047e-05, | |
| "loss": 0.3732, | |
| "num_input_tokens_seen": 53516752, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.5388730385164051, | |
| "grad_norm": 1.4880399703979492, | |
| "learning_rate": 2.3982587281777742e-05, | |
| "loss": 0.3358, | |
| "num_input_tokens_seen": 53580576, | |
| "step": 4315 | |
| }, | |
| { | |
| "epoch": 1.5406562054208273, | |
| "grad_norm": 2.0042483806610107, | |
| "learning_rate": 2.3935944556821966e-05, | |
| "loss": 0.409, | |
| "num_input_tokens_seen": 53644272, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.5424393723252496, | |
| "grad_norm": 1.3616559505462646, | |
| "learning_rate": 2.388930554213484e-05, | |
| "loss": 0.3688, | |
| "num_input_tokens_seen": 53705232, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 1.544222539229672, | |
| "grad_norm": 1.4289284944534302, | |
| "learning_rate": 2.3842670400342566e-05, | |
| "loss": 0.3526, | |
| "num_input_tokens_seen": 53760288, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.5460057061340942, | |
| "grad_norm": 1.3744745254516602, | |
| "learning_rate": 2.3796039294057795e-05, | |
| "loss": 0.3635, | |
| "num_input_tokens_seen": 53816640, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 1.5477888730385163, | |
| "grad_norm": 1.7196354866027832, | |
| "learning_rate": 2.3749412385879154e-05, | |
| "loss": 0.3233, | |
| "num_input_tokens_seen": 53876544, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.5495720399429387, | |
| "grad_norm": 1.4938087463378906, | |
| "learning_rate": 2.370278983839061e-05, | |
| "loss": 0.389, | |
| "num_input_tokens_seen": 53938928, | |
| "step": 4345 | |
| }, | |
| { | |
| "epoch": 1.551355206847361, | |
| "grad_norm": 1.674802541732788, | |
| "learning_rate": 2.3656171814160906e-05, | |
| "loss": 0.3819, | |
| "num_input_tokens_seen": 54000640, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.5531383737517832, | |
| "grad_norm": 1.393610954284668, | |
| "learning_rate": 2.3609558475743048e-05, | |
| "loss": 0.4104, | |
| "num_input_tokens_seen": 54062448, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 1.5549215406562054, | |
| "grad_norm": 1.6902203559875488, | |
| "learning_rate": 2.356294998567369e-05, | |
| "loss": 0.3618, | |
| "num_input_tokens_seen": 54123424, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.5567047075606277, | |
| "grad_norm": 1.4336259365081787, | |
| "learning_rate": 2.351634650647257e-05, | |
| "loss": 0.37, | |
| "num_input_tokens_seen": 54188112, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 1.5584878744650499, | |
| "grad_norm": 1.1472690105438232, | |
| "learning_rate": 2.3469748200641967e-05, | |
| "loss": 0.3926, | |
| "num_input_tokens_seen": 54252208, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.560271041369472, | |
| "grad_norm": 1.2778717279434204, | |
| "learning_rate": 2.34231552306661e-05, | |
| "loss": 0.449, | |
| "num_input_tokens_seen": 54316784, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 1.5620542082738944, | |
| "grad_norm": 2.114806652069092, | |
| "learning_rate": 2.3376567759010614e-05, | |
| "loss": 0.4046, | |
| "num_input_tokens_seen": 54376256, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.5638373751783168, | |
| "grad_norm": 1.5421040058135986, | |
| "learning_rate": 2.3329985948121963e-05, | |
| "loss": 0.3858, | |
| "num_input_tokens_seen": 54436576, | |
| "step": 4385 | |
| }, | |
| { | |
| "epoch": 1.565620542082739, | |
| "grad_norm": 1.365997076034546, | |
| "learning_rate": 2.3283409960426857e-05, | |
| "loss": 0.3647, | |
| "num_input_tokens_seen": 54497760, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.567403708987161, | |
| "grad_norm": 1.4641869068145752, | |
| "learning_rate": 2.323683995833171e-05, | |
| "loss": 0.3754, | |
| "num_input_tokens_seen": 54559504, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 1.5691868758915835, | |
| "grad_norm": 1.8695045709609985, | |
| "learning_rate": 2.3190276104222073e-05, | |
| "loss": 0.4224, | |
| "num_input_tokens_seen": 54623280, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.5709700427960058, | |
| "grad_norm": 1.825547456741333, | |
| "learning_rate": 2.3143718560462042e-05, | |
| "loss": 0.3847, | |
| "num_input_tokens_seen": 54688800, | |
| "step": 4405 | |
| }, | |
| { | |
| "epoch": 1.572753209700428, | |
| "grad_norm": 1.671181321144104, | |
| "learning_rate": 2.3097167489393705e-05, | |
| "loss": 0.4252, | |
| "num_input_tokens_seen": 54751120, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.5745363766048501, | |
| "grad_norm": 1.5111570358276367, | |
| "learning_rate": 2.3050623053336623e-05, | |
| "loss": 0.3571, | |
| "num_input_tokens_seen": 54814640, | |
| "step": 4415 | |
| }, | |
| { | |
| "epoch": 1.5763195435092725, | |
| "grad_norm": 1.5124739408493042, | |
| "learning_rate": 2.300408541458716e-05, | |
| "loss": 0.3436, | |
| "num_input_tokens_seen": 54873728, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.5781027104136949, | |
| "grad_norm": 1.5977612733840942, | |
| "learning_rate": 2.2957554735418023e-05, | |
| "loss": 0.3866, | |
| "num_input_tokens_seen": 54935536, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 1.579885877318117, | |
| "grad_norm": 1.5673508644104004, | |
| "learning_rate": 2.2911031178077648e-05, | |
| "loss": 0.4096, | |
| "num_input_tokens_seen": 54995776, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.5816690442225392, | |
| "grad_norm": 1.5844247341156006, | |
| "learning_rate": 2.2864514904789606e-05, | |
| "loss": 0.4905, | |
| "num_input_tokens_seen": 55053984, | |
| "step": 4435 | |
| }, | |
| { | |
| "epoch": 1.5834522111269616, | |
| "grad_norm": 1.5645432472229004, | |
| "learning_rate": 2.281800607775211e-05, | |
| "loss": 0.3688, | |
| "num_input_tokens_seen": 55115472, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.5852353780313837, | |
| "grad_norm": 1.758135437965393, | |
| "learning_rate": 2.2771504859137365e-05, | |
| "loss": 0.4062, | |
| "num_input_tokens_seen": 55177392, | |
| "step": 4445 | |
| }, | |
| { | |
| "epoch": 1.5870185449358059, | |
| "grad_norm": 1.7348984479904175, | |
| "learning_rate": 2.2725011411091097e-05, | |
| "loss": 0.3988, | |
| "num_input_tokens_seen": 55239744, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.5888017118402282, | |
| "grad_norm": 1.2819241285324097, | |
| "learning_rate": 2.26785258957319e-05, | |
| "loss": 0.409, | |
| "num_input_tokens_seen": 55300416, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 1.5905848787446506, | |
| "grad_norm": 1.5527747869491577, | |
| "learning_rate": 2.2632048475150705e-05, | |
| "loss": 0.3621, | |
| "num_input_tokens_seen": 55360480, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.5923680456490727, | |
| "grad_norm": 1.163960576057434, | |
| "learning_rate": 2.2585579311410242e-05, | |
| "loss": 0.3758, | |
| "num_input_tokens_seen": 55419840, | |
| "step": 4465 | |
| }, | |
| { | |
| "epoch": 1.594151212553495, | |
| "grad_norm": 1.756473183631897, | |
| "learning_rate": 2.2539118566544443e-05, | |
| "loss": 0.3922, | |
| "num_input_tokens_seen": 55479024, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.5959343794579173, | |
| "grad_norm": 1.3992432355880737, | |
| "learning_rate": 2.2492666402557873e-05, | |
| "loss": 0.4174, | |
| "num_input_tokens_seen": 55539360, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 1.5977175463623396, | |
| "grad_norm": 1.5347425937652588, | |
| "learning_rate": 2.244622298142517e-05, | |
| "loss": 0.3443, | |
| "num_input_tokens_seen": 55602384, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.5995007132667618, | |
| "grad_norm": 1.5150680541992188, | |
| "learning_rate": 2.239978846509052e-05, | |
| "loss": 0.3496, | |
| "num_input_tokens_seen": 55661456, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 1.601283880171184, | |
| "grad_norm": 1.6969490051269531, | |
| "learning_rate": 2.235336301546702e-05, | |
| "loss": 0.3593, | |
| "num_input_tokens_seen": 55727984, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.6030670470756063, | |
| "grad_norm": 1.6206620931625366, | |
| "learning_rate": 2.230694679443618e-05, | |
| "loss": 0.3449, | |
| "num_input_tokens_seen": 55790240, | |
| "step": 4495 | |
| }, | |
| { | |
| "epoch": 1.6048502139800287, | |
| "grad_norm": 1.4925209283828735, | |
| "learning_rate": 2.2260539963847317e-05, | |
| "loss": 0.3625, | |
| "num_input_tokens_seen": 55850592, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.6066333808844506, | |
| "grad_norm": 1.5853619575500488, | |
| "learning_rate": 2.2214142685517005e-05, | |
| "loss": 0.3858, | |
| "num_input_tokens_seen": 55911552, | |
| "step": 4505 | |
| }, | |
| { | |
| "epoch": 1.608416547788873, | |
| "grad_norm": 1.5945818424224854, | |
| "learning_rate": 2.2167755121228516e-05, | |
| "loss": 0.3834, | |
| "num_input_tokens_seen": 55974224, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.6101997146932954, | |
| "grad_norm": 1.4650936126708984, | |
| "learning_rate": 2.212137743273124e-05, | |
| "loss": 0.3578, | |
| "num_input_tokens_seen": 56035440, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 1.6119828815977175, | |
| "grad_norm": 1.6041439771652222, | |
| "learning_rate": 2.2075009781740144e-05, | |
| "loss": 0.3204, | |
| "num_input_tokens_seen": 56096256, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.6137660485021397, | |
| "grad_norm": 1.5402308702468872, | |
| "learning_rate": 2.2028652329935196e-05, | |
| "loss": 0.3659, | |
| "num_input_tokens_seen": 56158160, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 1.615549215406562, | |
| "grad_norm": 1.6089669466018677, | |
| "learning_rate": 2.198230523896077e-05, | |
| "loss": 0.3354, | |
| "num_input_tokens_seen": 56219072, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.6173323823109844, | |
| "grad_norm": 1.728310465812683, | |
| "learning_rate": 2.193596867042515e-05, | |
| "loss": 0.3414, | |
| "num_input_tokens_seen": 56280416, | |
| "step": 4535 | |
| }, | |
| { | |
| "epoch": 1.6191155492154066, | |
| "grad_norm": 1.711105465888977, | |
| "learning_rate": 2.1889642785899926e-05, | |
| "loss": 0.4517, | |
| "num_input_tokens_seen": 56341792, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.6208987161198287, | |
| "grad_norm": 1.99299955368042, | |
| "learning_rate": 2.1843327746919405e-05, | |
| "loss": 0.3987, | |
| "num_input_tokens_seen": 56400304, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 1.622681883024251, | |
| "grad_norm": 1.4943881034851074, | |
| "learning_rate": 2.1797023714980092e-05, | |
| "loss": 0.4308, | |
| "num_input_tokens_seen": 56463152, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.6244650499286735, | |
| "grad_norm": 1.8707401752471924, | |
| "learning_rate": 2.1750730851540135e-05, | |
| "loss": 0.4295, | |
| "num_input_tokens_seen": 56527024, | |
| "step": 4555 | |
| }, | |
| { | |
| "epoch": 1.6262482168330956, | |
| "grad_norm": 1.4292758703231812, | |
| "learning_rate": 2.1704449318018692e-05, | |
| "loss": 0.3562, | |
| "num_input_tokens_seen": 56588288, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.6280313837375178, | |
| "grad_norm": 1.6914817094802856, | |
| "learning_rate": 2.1658179275795457e-05, | |
| "loss": 0.3525, | |
| "num_input_tokens_seen": 56649728, | |
| "step": 4565 | |
| }, | |
| { | |
| "epoch": 1.6298145506419401, | |
| "grad_norm": 1.3131465911865234, | |
| "learning_rate": 2.1611920886210034e-05, | |
| "loss": 0.335, | |
| "num_input_tokens_seen": 56712144, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.6315977175463623, | |
| "grad_norm": 2.274732828140259, | |
| "learning_rate": 2.156567431056139e-05, | |
| "loss": 0.3433, | |
| "num_input_tokens_seen": 56774944, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 1.6333808844507844, | |
| "grad_norm": 1.2653104066848755, | |
| "learning_rate": 2.151943971010732e-05, | |
| "loss": 0.3342, | |
| "num_input_tokens_seen": 56839024, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.6351640513552068, | |
| "grad_norm": 1.7100783586502075, | |
| "learning_rate": 2.1473217246063833e-05, | |
| "loss": 0.2926, | |
| "num_input_tokens_seen": 56900368, | |
| "step": 4585 | |
| }, | |
| { | |
| "epoch": 1.6369472182596292, | |
| "grad_norm": 1.8248704671859741, | |
| "learning_rate": 2.1427007079604643e-05, | |
| "loss": 0.3742, | |
| "num_input_tokens_seen": 56962480, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.6387303851640513, | |
| "grad_norm": 1.7061128616333008, | |
| "learning_rate": 2.1380809371860588e-05, | |
| "loss": 0.4454, | |
| "num_input_tokens_seen": 57025088, | |
| "step": 4595 | |
| }, | |
| { | |
| "epoch": 1.6405135520684735, | |
| "grad_norm": 2.0661263465881348, | |
| "learning_rate": 2.1334624283919026e-05, | |
| "loss": 0.3228, | |
| "num_input_tokens_seen": 57087760, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.6422967189728959, | |
| "grad_norm": 2.6758852005004883, | |
| "learning_rate": 2.1288451976823352e-05, | |
| "loss": 0.3899, | |
| "num_input_tokens_seen": 57150864, | |
| "step": 4605 | |
| }, | |
| { | |
| "epoch": 1.6440798858773182, | |
| "grad_norm": 1.6372278928756714, | |
| "learning_rate": 2.1242292611572387e-05, | |
| "loss": 0.4195, | |
| "num_input_tokens_seen": 57213760, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.6458630527817404, | |
| "grad_norm": 1.4883116483688354, | |
| "learning_rate": 2.1196146349119802e-05, | |
| "loss": 0.3935, | |
| "num_input_tokens_seen": 57275264, | |
| "step": 4615 | |
| }, | |
| { | |
| "epoch": 1.6476462196861625, | |
| "grad_norm": 1.9058313369750977, | |
| "learning_rate": 2.1150013350373594e-05, | |
| "loss": 0.4095, | |
| "num_input_tokens_seen": 57335248, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.649429386590585, | |
| "grad_norm": 1.4819669723510742, | |
| "learning_rate": 2.110389377619553e-05, | |
| "loss": 0.3567, | |
| "num_input_tokens_seen": 57397200, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 1.6512125534950073, | |
| "grad_norm": 1.2163094282150269, | |
| "learning_rate": 2.1057787787400528e-05, | |
| "loss": 0.3625, | |
| "num_input_tokens_seen": 57456560, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.6529957203994294, | |
| "grad_norm": 1.4048594236373901, | |
| "learning_rate": 2.1011695544756172e-05, | |
| "loss": 0.278, | |
| "num_input_tokens_seen": 57515072, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 1.6547788873038516, | |
| "grad_norm": 1.4936057329177856, | |
| "learning_rate": 2.096561720898209e-05, | |
| "loss": 0.3555, | |
| "num_input_tokens_seen": 57576816, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.656562054208274, | |
| "grad_norm": 1.3369996547698975, | |
| "learning_rate": 2.0919552940749415e-05, | |
| "loss": 0.373, | |
| "num_input_tokens_seen": 57638784, | |
| "step": 4645 | |
| }, | |
| { | |
| "epoch": 1.658345221112696, | |
| "grad_norm": 1.3848330974578857, | |
| "learning_rate": 2.087350290068026e-05, | |
| "loss": 0.32, | |
| "num_input_tokens_seen": 57700128, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.6601283880171183, | |
| "grad_norm": 2.041731595993042, | |
| "learning_rate": 2.0827467249347085e-05, | |
| "loss": 0.4344, | |
| "num_input_tokens_seen": 57763072, | |
| "step": 4655 | |
| }, | |
| { | |
| "epoch": 1.6619115549215406, | |
| "grad_norm": 1.5269125699996948, | |
| "learning_rate": 2.078144614727221e-05, | |
| "loss": 0.4432, | |
| "num_input_tokens_seen": 57825312, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.663694721825963, | |
| "grad_norm": 1.7696996927261353, | |
| "learning_rate": 2.0735439754927206e-05, | |
| "loss": 0.359, | |
| "num_input_tokens_seen": 57887616, | |
| "step": 4665 | |
| }, | |
| { | |
| "epoch": 1.6654778887303852, | |
| "grad_norm": 1.252975583076477, | |
| "learning_rate": 2.0689448232732345e-05, | |
| "loss": 0.3115, | |
| "num_input_tokens_seen": 57945744, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.6672610556348073, | |
| "grad_norm": 1.894482970237732, | |
| "learning_rate": 2.0643471741056075e-05, | |
| "loss": 0.4102, | |
| "num_input_tokens_seen": 58006464, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 1.6690442225392297, | |
| "grad_norm": 1.2879408597946167, | |
| "learning_rate": 2.059751044021441e-05, | |
| "loss": 0.4138, | |
| "num_input_tokens_seen": 58068400, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.670827389443652, | |
| "grad_norm": 1.8357077836990356, | |
| "learning_rate": 2.055156449047041e-05, | |
| "loss": 0.3768, | |
| "num_input_tokens_seen": 58127664, | |
| "step": 4685 | |
| }, | |
| { | |
| "epoch": 1.6726105563480742, | |
| "grad_norm": 1.647601842880249, | |
| "learning_rate": 2.0505634052033585e-05, | |
| "loss": 0.3792, | |
| "num_input_tokens_seen": 58192000, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.6743937232524964, | |
| "grad_norm": 1.852842092514038, | |
| "learning_rate": 2.0459719285059396e-05, | |
| "loss": 0.3663, | |
| "num_input_tokens_seen": 58252192, | |
| "step": 4695 | |
| }, | |
| { | |
| "epoch": 1.6761768901569187, | |
| "grad_norm": 1.6066093444824219, | |
| "learning_rate": 2.041382034964862e-05, | |
| "loss": 0.3448, | |
| "num_input_tokens_seen": 58312416, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.677960057061341, | |
| "grad_norm": 2.437854766845703, | |
| "learning_rate": 2.0367937405846844e-05, | |
| "loss": 0.3217, | |
| "num_input_tokens_seen": 58373728, | |
| "step": 4705 | |
| }, | |
| { | |
| "epoch": 1.679743223965763, | |
| "grad_norm": 1.6272107362747192, | |
| "learning_rate": 2.0322070613643913e-05, | |
| "loss": 0.4379, | |
| "num_input_tokens_seen": 58436144, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.6815263908701854, | |
| "grad_norm": 1.2958059310913086, | |
| "learning_rate": 2.0276220132973316e-05, | |
| "loss": 0.3759, | |
| "num_input_tokens_seen": 58500384, | |
| "step": 4715 | |
| }, | |
| { | |
| "epoch": 1.6833095577746078, | |
| "grad_norm": 1.5485390424728394, | |
| "learning_rate": 2.0230386123711714e-05, | |
| "loss": 0.3597, | |
| "num_input_tokens_seen": 58560080, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.68509272467903, | |
| "grad_norm": 1.8313623666763306, | |
| "learning_rate": 2.0184568745678278e-05, | |
| "loss": 0.3011, | |
| "num_input_tokens_seen": 58624320, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 1.686875891583452, | |
| "grad_norm": 1.8504736423492432, | |
| "learning_rate": 2.0138768158634224e-05, | |
| "loss": 0.4347, | |
| "num_input_tokens_seen": 58688080, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.6886590584878745, | |
| "grad_norm": 1.6756471395492554, | |
| "learning_rate": 2.009298452228222e-05, | |
| "loss": 0.4311, | |
| "num_input_tokens_seen": 58753808, | |
| "step": 4735 | |
| }, | |
| { | |
| "epoch": 1.6904422253922968, | |
| "grad_norm": 4.543945789337158, | |
| "learning_rate": 2.00472179962658e-05, | |
| "loss": 0.3863, | |
| "num_input_tokens_seen": 58815120, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.692225392296719, | |
| "grad_norm": 1.8706589937210083, | |
| "learning_rate": 2.0001468740168872e-05, | |
| "loss": 0.3552, | |
| "num_input_tokens_seen": 58876960, | |
| "step": 4745 | |
| }, | |
| { | |
| "epoch": 1.6940085592011411, | |
| "grad_norm": 1.59261953830719, | |
| "learning_rate": 1.99557369135151e-05, | |
| "loss": 0.3894, | |
| "num_input_tokens_seen": 58940672, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.6957917261055635, | |
| "grad_norm": 1.4362701177597046, | |
| "learning_rate": 1.9910022675767376e-05, | |
| "loss": 0.2959, | |
| "num_input_tokens_seen": 58998544, | |
| "step": 4755 | |
| }, | |
| { | |
| "epoch": 1.6975748930099859, | |
| "grad_norm": 1.9743127822875977, | |
| "learning_rate": 1.9864326186327265e-05, | |
| "loss": 0.3498, | |
| "num_input_tokens_seen": 59059648, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.699358059914408, | |
| "grad_norm": 1.3991342782974243, | |
| "learning_rate": 1.9818647604534464e-05, | |
| "loss": 0.3219, | |
| "num_input_tokens_seen": 59122176, | |
| "step": 4765 | |
| }, | |
| { | |
| "epoch": 1.7011412268188302, | |
| "grad_norm": 1.8187235593795776, | |
| "learning_rate": 1.977298708966619e-05, | |
| "loss": 0.3669, | |
| "num_input_tokens_seen": 59186784, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.7029243937232525, | |
| "grad_norm": 1.8813676834106445, | |
| "learning_rate": 1.9727344800936683e-05, | |
| "loss": 0.3834, | |
| "num_input_tokens_seen": 59247216, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 1.7047075606276747, | |
| "grad_norm": 2.3406333923339844, | |
| "learning_rate": 1.968172089749664e-05, | |
| "loss": 0.3516, | |
| "num_input_tokens_seen": 59307904, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.7064907275320969, | |
| "grad_norm": 1.5572633743286133, | |
| "learning_rate": 1.963611553843262e-05, | |
| "loss": 0.3393, | |
| "num_input_tokens_seen": 59371232, | |
| "step": 4785 | |
| }, | |
| { | |
| "epoch": 1.7082738944365192, | |
| "grad_norm": 1.5522667169570923, | |
| "learning_rate": 1.9590528882766565e-05, | |
| "loss": 0.3647, | |
| "num_input_tokens_seen": 59431552, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.7100570613409416, | |
| "grad_norm": 1.8206260204315186, | |
| "learning_rate": 1.954496108945515e-05, | |
| "loss": 0.3945, | |
| "num_input_tokens_seen": 59493632, | |
| "step": 4795 | |
| }, | |
| { | |
| "epoch": 1.7118402282453637, | |
| "grad_norm": 1.764434814453125, | |
| "learning_rate": 1.9499412317389305e-05, | |
| "loss": 0.3307, | |
| "num_input_tokens_seen": 59555664, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.713623395149786, | |
| "grad_norm": 1.5242831707000732, | |
| "learning_rate": 1.9453882725393647e-05, | |
| "loss": 0.3776, | |
| "num_input_tokens_seen": 59618688, | |
| "step": 4805 | |
| }, | |
| { | |
| "epoch": 1.7154065620542083, | |
| "grad_norm": 1.5517514944076538, | |
| "learning_rate": 1.940837247222587e-05, | |
| "loss": 0.3579, | |
| "num_input_tokens_seen": 59678528, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.7171897289586306, | |
| "grad_norm": 1.5298833847045898, | |
| "learning_rate": 1.936288171657629e-05, | |
| "loss": 0.3753, | |
| "num_input_tokens_seen": 59741216, | |
| "step": 4815 | |
| }, | |
| { | |
| "epoch": 1.7189728958630528, | |
| "grad_norm": 2.073880672454834, | |
| "learning_rate": 1.93174106170672e-05, | |
| "loss": 0.4658, | |
| "num_input_tokens_seen": 59800048, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.720756062767475, | |
| "grad_norm": 2.046309232711792, | |
| "learning_rate": 1.927195933225236e-05, | |
| "loss": 0.3769, | |
| "num_input_tokens_seen": 59863248, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 1.7225392296718973, | |
| "grad_norm": 2.1719560623168945, | |
| "learning_rate": 1.922652802061644e-05, | |
| "loss": 0.3404, | |
| "num_input_tokens_seen": 59923552, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.7243223965763197, | |
| "grad_norm": 1.5875083208084106, | |
| "learning_rate": 1.9181116840574482e-05, | |
| "loss": 0.4051, | |
| "num_input_tokens_seen": 59985472, | |
| "step": 4835 | |
| }, | |
| { | |
| "epoch": 1.7261055634807418, | |
| "grad_norm": 1.585699200630188, | |
| "learning_rate": 1.91357259504713e-05, | |
| "loss": 0.4092, | |
| "num_input_tokens_seen": 60049280, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.727888730385164, | |
| "grad_norm": 1.9710477590560913, | |
| "learning_rate": 1.909035550858097e-05, | |
| "loss": 0.4739, | |
| "num_input_tokens_seen": 60117088, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 1.7296718972895864, | |
| "grad_norm": 1.366571307182312, | |
| "learning_rate": 1.9045005673106294e-05, | |
| "loss": 0.3841, | |
| "num_input_tokens_seen": 60182864, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.7314550641940085, | |
| "grad_norm": 1.3404649496078491, | |
| "learning_rate": 1.8999676602178177e-05, | |
| "loss": 0.3849, | |
| "num_input_tokens_seen": 60243184, | |
| "step": 4855 | |
| }, | |
| { | |
| "epoch": 1.7332382310984307, | |
| "grad_norm": 1.3966970443725586, | |
| "learning_rate": 1.895436845385516e-05, | |
| "loss": 0.4027, | |
| "num_input_tokens_seen": 60303088, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.735021398002853, | |
| "grad_norm": 1.9772543907165527, | |
| "learning_rate": 1.8909081386122794e-05, | |
| "loss": 0.4104, | |
| "num_input_tokens_seen": 60364368, | |
| "step": 4865 | |
| }, | |
| { | |
| "epoch": 1.7368045649072754, | |
| "grad_norm": 1.3881804943084717, | |
| "learning_rate": 1.886381555689314e-05, | |
| "loss": 0.3492, | |
| "num_input_tokens_seen": 60426320, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.7385877318116976, | |
| "grad_norm": 1.6630189418792725, | |
| "learning_rate": 1.8818571124004218e-05, | |
| "loss": 0.4027, | |
| "num_input_tokens_seen": 60488864, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 1.7403708987161197, | |
| "grad_norm": 1.3820875883102417, | |
| "learning_rate": 1.8773348245219403e-05, | |
| "loss": 0.413, | |
| "num_input_tokens_seen": 60551408, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.742154065620542, | |
| "grad_norm": 1.9947419166564941, | |
| "learning_rate": 1.8728147078226955e-05, | |
| "loss": 0.3397, | |
| "num_input_tokens_seen": 60616480, | |
| "step": 4885 | |
| }, | |
| { | |
| "epoch": 1.7439372325249645, | |
| "grad_norm": 1.653064489364624, | |
| "learning_rate": 1.8682967780639398e-05, | |
| "loss": 0.3454, | |
| "num_input_tokens_seen": 60679840, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.7457203994293866, | |
| "grad_norm": 1.5282491445541382, | |
| "learning_rate": 1.8637810509993002e-05, | |
| "loss": 0.3116, | |
| "num_input_tokens_seen": 60744928, | |
| "step": 4895 | |
| }, | |
| { | |
| "epoch": 1.7475035663338088, | |
| "grad_norm": 1.984711766242981, | |
| "learning_rate": 1.859267542374724e-05, | |
| "loss": 0.3609, | |
| "num_input_tokens_seen": 60807424, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.7492867332382311, | |
| "grad_norm": 1.3897349834442139, | |
| "learning_rate": 1.8547562679284243e-05, | |
| "loss": 0.3705, | |
| "num_input_tokens_seen": 60870304, | |
| "step": 4905 | |
| }, | |
| { | |
| "epoch": 1.7510699001426535, | |
| "grad_norm": 1.379233479499817, | |
| "learning_rate": 1.8502472433908197e-05, | |
| "loss": 0.3989, | |
| "num_input_tokens_seen": 60930880, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.7528530670470754, | |
| "grad_norm": 1.5175212621688843, | |
| "learning_rate": 1.8457404844844883e-05, | |
| "loss": 0.3492, | |
| "num_input_tokens_seen": 60995040, | |
| "step": 4915 | |
| }, | |
| { | |
| "epoch": 1.7546362339514978, | |
| "grad_norm": 1.4529441595077515, | |
| "learning_rate": 1.8412360069241034e-05, | |
| "loss": 0.3915, | |
| "num_input_tokens_seen": 61059296, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.7564194008559202, | |
| "grad_norm": 1.897772192955017, | |
| "learning_rate": 1.836733826416387e-05, | |
| "loss": 0.3891, | |
| "num_input_tokens_seen": 61116064, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 1.7582025677603423, | |
| "grad_norm": 1.2248293161392212, | |
| "learning_rate": 1.8322339586600524e-05, | |
| "loss": 0.3858, | |
| "num_input_tokens_seen": 61179536, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.7599857346647645, | |
| "grad_norm": 1.7476718425750732, | |
| "learning_rate": 1.8277364193457436e-05, | |
| "loss": 0.4066, | |
| "num_input_tokens_seen": 61239040, | |
| "step": 4935 | |
| }, | |
| { | |
| "epoch": 1.7617689015691869, | |
| "grad_norm": 1.3817017078399658, | |
| "learning_rate": 1.8232412241559896e-05, | |
| "loss": 0.3426, | |
| "num_input_tokens_seen": 61302576, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.7635520684736092, | |
| "grad_norm": 1.6009461879730225, | |
| "learning_rate": 1.818748388765146e-05, | |
| "loss": 0.4506, | |
| "num_input_tokens_seen": 61367472, | |
| "step": 4945 | |
| }, | |
| { | |
| "epoch": 1.7653352353780314, | |
| "grad_norm": 1.4665969610214233, | |
| "learning_rate": 1.8142579288393354e-05, | |
| "loss": 0.4153, | |
| "num_input_tokens_seen": 61428000, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.7671184022824535, | |
| "grad_norm": 1.4972290992736816, | |
| "learning_rate": 1.8097698600364026e-05, | |
| "loss": 0.4038, | |
| "num_input_tokens_seen": 61492752, | |
| "step": 4955 | |
| }, | |
| { | |
| "epoch": 1.768901569186876, | |
| "grad_norm": 1.6141380071640015, | |
| "learning_rate": 1.8052841980058533e-05, | |
| "loss": 0.3921, | |
| "num_input_tokens_seen": 61554016, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.7706847360912983, | |
| "grad_norm": 2.656374931335449, | |
| "learning_rate": 1.8008009583887982e-05, | |
| "loss": 0.3052, | |
| "num_input_tokens_seen": 61613216, | |
| "step": 4965 | |
| }, | |
| { | |
| "epoch": 1.7724679029957204, | |
| "grad_norm": 1.6948812007904053, | |
| "learning_rate": 1.7963201568179046e-05, | |
| "loss": 0.3735, | |
| "num_input_tokens_seen": 61675680, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.7742510699001426, | |
| "grad_norm": 1.326504111289978, | |
| "learning_rate": 1.791841808917338e-05, | |
| "loss": 0.3857, | |
| "num_input_tokens_seen": 61739056, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 1.776034236804565, | |
| "grad_norm": 1.384664535522461, | |
| "learning_rate": 1.7873659303027052e-05, | |
| "loss": 0.3468, | |
| "num_input_tokens_seen": 61801136, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.777817403708987, | |
| "grad_norm": 1.8413240909576416, | |
| "learning_rate": 1.7828925365810077e-05, | |
| "loss": 0.4141, | |
| "num_input_tokens_seen": 61862192, | |
| "step": 4985 | |
| }, | |
| { | |
| "epoch": 1.7796005706134093, | |
| "grad_norm": 1.7805628776550293, | |
| "learning_rate": 1.778421643350578e-05, | |
| "loss": 0.3474, | |
| "num_input_tokens_seen": 61923792, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.7813837375178316, | |
| "grad_norm": 1.673789620399475, | |
| "learning_rate": 1.7739532662010322e-05, | |
| "loss": 0.4318, | |
| "num_input_tokens_seen": 61984768, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 1.783166904422254, | |
| "grad_norm": 1.5332224369049072, | |
| "learning_rate": 1.7694874207132127e-05, | |
| "loss": 0.464, | |
| "num_input_tokens_seen": 62048432, | |
| "step": 5000 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 8412, | |
| "num_input_tokens_seen": 62048432, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.9126461037889126e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |