| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.18053800324968405, | |
| "eval_steps": 500, | |
| "global_step": 20000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00045134500812421015, | |
| "grad_norm": 4.092976093292236, | |
| "learning_rate": 4.9977884094601916e-05, | |
| "loss": 5.2971, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0009026900162484203, | |
| "grad_norm": 3.788925886154175, | |
| "learning_rate": 4.99553168441957e-05, | |
| "loss": 5.2741, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0013540350243726304, | |
| "grad_norm": 5.105148792266846, | |
| "learning_rate": 4.9932749593789494e-05, | |
| "loss": 5.0557, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0018053800324968406, | |
| "grad_norm": 4.261813163757324, | |
| "learning_rate": 4.991018234338328e-05, | |
| "loss": 5.155, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0022567250406210506, | |
| "grad_norm": 6.269433498382568, | |
| "learning_rate": 4.988761509297707e-05, | |
| "loss": 5.098, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.002708070048745261, | |
| "grad_norm": 4.494116306304932, | |
| "learning_rate": 4.986504784257086e-05, | |
| "loss": 5.0972, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.003159415056869471, | |
| "grad_norm": 3.811136484146118, | |
| "learning_rate": 4.984248059216465e-05, | |
| "loss": 5.0968, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.003610760064993681, | |
| "grad_norm": 5.116394996643066, | |
| "learning_rate": 4.9819913341758444e-05, | |
| "loss": 5.0053, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.004062105073117891, | |
| "grad_norm": 3.4574902057647705, | |
| "learning_rate": 4.979734609135223e-05, | |
| "loss": 5.0443, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.004513450081242101, | |
| "grad_norm": 3.924276113510132, | |
| "learning_rate": 4.977477884094602e-05, | |
| "loss": 4.9834, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.004964795089366311, | |
| "grad_norm": 3.120497226715088, | |
| "learning_rate": 4.975221159053981e-05, | |
| "loss": 4.9859, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.005416140097490522, | |
| "grad_norm": 5.467548847198486, | |
| "learning_rate": 4.97296443401336e-05, | |
| "loss": 5.0014, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.005867485105614732, | |
| "grad_norm": 4.165292739868164, | |
| "learning_rate": 4.970707708972739e-05, | |
| "loss": 4.9381, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.006318830113738942, | |
| "grad_norm": 5.348793029785156, | |
| "learning_rate": 4.968450983932118e-05, | |
| "loss": 4.8988, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.006770175121863152, | |
| "grad_norm": 5.445329189300537, | |
| "learning_rate": 4.9661942588914965e-05, | |
| "loss": 4.882, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.007221520129987362, | |
| "grad_norm": 3.731977939605713, | |
| "learning_rate": 4.963937533850876e-05, | |
| "loss": 4.8879, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.007672865138111573, | |
| "grad_norm": 4.9821343421936035, | |
| "learning_rate": 4.9616808088102544e-05, | |
| "loss": 4.8933, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.008124210146235782, | |
| "grad_norm": 6.4130401611328125, | |
| "learning_rate": 4.9594240837696337e-05, | |
| "loss": 4.8942, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.008575555154359992, | |
| "grad_norm": 5.44791841506958, | |
| "learning_rate": 4.957167358729013e-05, | |
| "loss": 4.816, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.009026900162484202, | |
| "grad_norm": 4.63847541809082, | |
| "learning_rate": 4.9549106336883915e-05, | |
| "loss": 4.8797, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.009478245170608413, | |
| "grad_norm": 4.616076946258545, | |
| "learning_rate": 4.952653908647771e-05, | |
| "loss": 4.8622, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.009929590178732623, | |
| "grad_norm": 4.847900390625, | |
| "learning_rate": 4.9503971836071494e-05, | |
| "loss": 4.8765, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.010380935186856833, | |
| "grad_norm": 3.968596935272217, | |
| "learning_rate": 4.9481404585665286e-05, | |
| "loss": 4.7834, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.010832280194981043, | |
| "grad_norm": 3.6416995525360107, | |
| "learning_rate": 4.945883733525907e-05, | |
| "loss": 4.8674, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.011283625203105253, | |
| "grad_norm": 6.88565731048584, | |
| "learning_rate": 4.9436270084852865e-05, | |
| "loss": 4.809, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.011734970211229464, | |
| "grad_norm": 5.064456462860107, | |
| "learning_rate": 4.941370283444665e-05, | |
| "loss": 4.8121, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.012186315219353674, | |
| "grad_norm": 4.556407451629639, | |
| "learning_rate": 4.939113558404044e-05, | |
| "loss": 4.799, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.012637660227477884, | |
| "grad_norm": 4.071566581726074, | |
| "learning_rate": 4.936856833363423e-05, | |
| "loss": 4.7471, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.013089005235602094, | |
| "grad_norm": 4.647943019866943, | |
| "learning_rate": 4.934600108322802e-05, | |
| "loss": 4.7783, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.013540350243726304, | |
| "grad_norm": 4.131853103637695, | |
| "learning_rate": 4.9323433832821814e-05, | |
| "loss": 4.8507, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.013991695251850515, | |
| "grad_norm": 6.93862771987915, | |
| "learning_rate": 4.93008665824156e-05, | |
| "loss": 4.8, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.014443040259974725, | |
| "grad_norm": 4.366854190826416, | |
| "learning_rate": 4.927829933200939e-05, | |
| "loss": 4.8794, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.014894385268098935, | |
| "grad_norm": 3.989370822906494, | |
| "learning_rate": 4.925573208160318e-05, | |
| "loss": 4.7972, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.015345730276223145, | |
| "grad_norm": 4.402428150177002, | |
| "learning_rate": 4.923316483119697e-05, | |
| "loss": 4.9, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.015797075284347355, | |
| "grad_norm": 4.536413192749023, | |
| "learning_rate": 4.921059758079076e-05, | |
| "loss": 4.7663, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.016248420292471564, | |
| "grad_norm": 6.875385284423828, | |
| "learning_rate": 4.918803033038455e-05, | |
| "loss": 4.8557, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.016699765300595776, | |
| "grad_norm": 2.8038690090179443, | |
| "learning_rate": 4.9165463079978336e-05, | |
| "loss": 4.8403, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.017151110308719984, | |
| "grad_norm": 4.83705997467041, | |
| "learning_rate": 4.914289582957213e-05, | |
| "loss": 4.8451, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.017602455316844196, | |
| "grad_norm": 3.359116315841675, | |
| "learning_rate": 4.9120328579165914e-05, | |
| "loss": 4.8559, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.018053800324968405, | |
| "grad_norm": 6.140733242034912, | |
| "learning_rate": 4.909776132875971e-05, | |
| "loss": 4.8633, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.018505145333092617, | |
| "grad_norm": 4.224785327911377, | |
| "learning_rate": 4.90751940783535e-05, | |
| "loss": 4.835, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.018956490341216825, | |
| "grad_norm": 3.613844394683838, | |
| "learning_rate": 4.9052626827947285e-05, | |
| "loss": 4.7293, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.019407835349341037, | |
| "grad_norm": 5.848568439483643, | |
| "learning_rate": 4.903005957754108e-05, | |
| "loss": 4.6969, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.019859180357465245, | |
| "grad_norm": 3.9656293392181396, | |
| "learning_rate": 4.9007492327134864e-05, | |
| "loss": 4.7284, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.020310525365589457, | |
| "grad_norm": 5.00789213180542, | |
| "learning_rate": 4.898492507672866e-05, | |
| "loss": 4.865, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.020761870373713666, | |
| "grad_norm": 4.17151403427124, | |
| "learning_rate": 4.896235782632244e-05, | |
| "loss": 4.7261, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.021213215381837878, | |
| "grad_norm": 3.966817617416382, | |
| "learning_rate": 4.8939790575916235e-05, | |
| "loss": 4.7437, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.021664560389962086, | |
| "grad_norm": 4.516706943511963, | |
| "learning_rate": 4.891722332551002e-05, | |
| "loss": 4.7352, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.022115905398086298, | |
| "grad_norm": 4.184154033660889, | |
| "learning_rate": 4.8894656075103814e-05, | |
| "loss": 4.6622, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.022567250406210507, | |
| "grad_norm": 6.0985188484191895, | |
| "learning_rate": 4.8872088824697606e-05, | |
| "loss": 4.7659, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.02301859541433472, | |
| "grad_norm": 4.630510330200195, | |
| "learning_rate": 4.884952157429139e-05, | |
| "loss": 4.8424, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.023469940422458927, | |
| "grad_norm": 4.261359214782715, | |
| "learning_rate": 4.8826954323885185e-05, | |
| "loss": 4.7576, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.02392128543058314, | |
| "grad_norm": 4.511416435241699, | |
| "learning_rate": 4.880438707347897e-05, | |
| "loss": 4.7644, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.024372630438707348, | |
| "grad_norm": 3.6945180892944336, | |
| "learning_rate": 4.878181982307276e-05, | |
| "loss": 4.7893, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.02482397544683156, | |
| "grad_norm": 5.0419511795043945, | |
| "learning_rate": 4.875925257266655e-05, | |
| "loss": 4.6466, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.025275320454955768, | |
| "grad_norm": 3.80349063873291, | |
| "learning_rate": 4.873668532226034e-05, | |
| "loss": 4.778, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.02572666546307998, | |
| "grad_norm": 3.5543832778930664, | |
| "learning_rate": 4.871411807185413e-05, | |
| "loss": 4.6788, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.02617801047120419, | |
| "grad_norm": 3.064133405685425, | |
| "learning_rate": 4.869155082144792e-05, | |
| "loss": 4.6796, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.0266293554793284, | |
| "grad_norm": 3.449727773666382, | |
| "learning_rate": 4.8668983571041706e-05, | |
| "loss": 4.7513, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.02708070048745261, | |
| "grad_norm": 3.831252098083496, | |
| "learning_rate": 4.86464163206355e-05, | |
| "loss": 4.6886, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.027532045495576817, | |
| "grad_norm": 6.98654842376709, | |
| "learning_rate": 4.862384907022929e-05, | |
| "loss": 4.7468, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.02798339050370103, | |
| "grad_norm": 3.842249870300293, | |
| "learning_rate": 4.860128181982308e-05, | |
| "loss": 4.6317, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.028434735511825238, | |
| "grad_norm": 8.266908645629883, | |
| "learning_rate": 4.857871456941686e-05, | |
| "loss": 4.7426, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.02888608051994945, | |
| "grad_norm": 5.496558666229248, | |
| "learning_rate": 4.8556147319010656e-05, | |
| "loss": 4.5784, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.029337425528073658, | |
| "grad_norm": 4.078311920166016, | |
| "learning_rate": 4.853358006860444e-05, | |
| "loss": 4.6739, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.02978877053619787, | |
| "grad_norm": 3.8962206840515137, | |
| "learning_rate": 4.8511012818198234e-05, | |
| "loss": 4.7384, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.03024011554432208, | |
| "grad_norm": 3.655855178833008, | |
| "learning_rate": 4.848844556779202e-05, | |
| "loss": 4.7782, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.03069146055244629, | |
| "grad_norm": 3.840287446975708, | |
| "learning_rate": 4.846587831738581e-05, | |
| "loss": 4.6969, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.0311428055605705, | |
| "grad_norm": 3.54238224029541, | |
| "learning_rate": 4.84433110669796e-05, | |
| "loss": 4.7998, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.03159415056869471, | |
| "grad_norm": 6.432263374328613, | |
| "learning_rate": 4.842074381657339e-05, | |
| "loss": 4.7554, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.03204549557681892, | |
| "grad_norm": 4.151718616485596, | |
| "learning_rate": 4.839817656616718e-05, | |
| "loss": 4.7455, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.03249684058494313, | |
| "grad_norm": 3.6925272941589355, | |
| "learning_rate": 4.837560931576097e-05, | |
| "loss": 4.7143, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.03294818559306734, | |
| "grad_norm": 5.515355110168457, | |
| "learning_rate": 4.8353042065354756e-05, | |
| "loss": 4.6842, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.03339953060119155, | |
| "grad_norm": 4.059805393218994, | |
| "learning_rate": 4.833047481494855e-05, | |
| "loss": 4.6799, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.03385087560931576, | |
| "grad_norm": 4.311253547668457, | |
| "learning_rate": 4.8307907564542334e-05, | |
| "loss": 4.7937, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.03430222061743997, | |
| "grad_norm": 3.7470786571502686, | |
| "learning_rate": 4.828534031413613e-05, | |
| "loss": 4.6706, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.034753565625564184, | |
| "grad_norm": 3.432297468185425, | |
| "learning_rate": 4.826277306372991e-05, | |
| "loss": 4.6627, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.03520491063368839, | |
| "grad_norm": 2.6612203121185303, | |
| "learning_rate": 4.8240205813323706e-05, | |
| "loss": 4.6027, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.0356562556418126, | |
| "grad_norm": 5.329100131988525, | |
| "learning_rate": 4.821763856291749e-05, | |
| "loss": 4.7471, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.03610760064993681, | |
| "grad_norm": 3.7262275218963623, | |
| "learning_rate": 4.8195071312511284e-05, | |
| "loss": 4.6032, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.036558945658061025, | |
| "grad_norm": 4.605144500732422, | |
| "learning_rate": 4.817250406210507e-05, | |
| "loss": 4.8456, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.03701029066618523, | |
| "grad_norm": 3.8024492263793945, | |
| "learning_rate": 4.814993681169886e-05, | |
| "loss": 4.7256, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.03746163567430944, | |
| "grad_norm": 7.693057060241699, | |
| "learning_rate": 4.8127369561292655e-05, | |
| "loss": 4.6224, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.03791298068243365, | |
| "grad_norm": 4.100279808044434, | |
| "learning_rate": 4.810480231088644e-05, | |
| "loss": 4.7119, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.038364325690557866, | |
| "grad_norm": 4.8026347160339355, | |
| "learning_rate": 4.8082235060480234e-05, | |
| "loss": 4.57, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.038815670698682074, | |
| "grad_norm": 5.2641119956970215, | |
| "learning_rate": 4.805966781007402e-05, | |
| "loss": 4.5887, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.03926701570680628, | |
| "grad_norm": 2.8225934505462646, | |
| "learning_rate": 4.803710055966781e-05, | |
| "loss": 4.6981, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.03971836071493049, | |
| "grad_norm": 3.3784983158111572, | |
| "learning_rate": 4.80145333092616e-05, | |
| "loss": 4.6752, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.040169705723054706, | |
| "grad_norm": 5.6406426429748535, | |
| "learning_rate": 4.799196605885539e-05, | |
| "loss": 4.6396, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.040621050731178915, | |
| "grad_norm": 4.564062595367432, | |
| "learning_rate": 4.7969398808449177e-05, | |
| "loss": 4.6158, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.04107239573930312, | |
| "grad_norm": 3.6431472301483154, | |
| "learning_rate": 4.794683155804297e-05, | |
| "loss": 4.6883, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.04152374074742733, | |
| "grad_norm": 5.026195526123047, | |
| "learning_rate": 4.792426430763676e-05, | |
| "loss": 4.7225, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.04197508575555154, | |
| "grad_norm": 4.776146411895752, | |
| "learning_rate": 4.790169705723055e-05, | |
| "loss": 4.6596, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.042426430763675756, | |
| "grad_norm": 4.838674545288086, | |
| "learning_rate": 4.787912980682434e-05, | |
| "loss": 4.6576, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.042877775771799964, | |
| "grad_norm": 4.529509544372559, | |
| "learning_rate": 4.7856562556418126e-05, | |
| "loss": 4.6721, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.04332912077992417, | |
| "grad_norm": 4.392935752868652, | |
| "learning_rate": 4.783399530601192e-05, | |
| "loss": 4.701, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.04378046578804838, | |
| "grad_norm": 4.331223011016846, | |
| "learning_rate": 4.7811428055605705e-05, | |
| "loss": 4.6795, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.044231810796172596, | |
| "grad_norm": 4.109352111816406, | |
| "learning_rate": 4.77888608051995e-05, | |
| "loss": 4.5997, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.044683155804296805, | |
| "grad_norm": 3.7418441772460938, | |
| "learning_rate": 4.776629355479328e-05, | |
| "loss": 4.6427, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.04513450081242101, | |
| "grad_norm": 3.0237081050872803, | |
| "learning_rate": 4.7743726304387076e-05, | |
| "loss": 4.7359, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.04558584582054522, | |
| "grad_norm": 3.9886231422424316, | |
| "learning_rate": 4.772115905398086e-05, | |
| "loss": 4.5842, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.04603719082866944, | |
| "grad_norm": 4.597533226013184, | |
| "learning_rate": 4.7698591803574654e-05, | |
| "loss": 4.7202, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.046488535836793646, | |
| "grad_norm": 4.520393371582031, | |
| "learning_rate": 4.767602455316845e-05, | |
| "loss": 4.5774, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.046939880844917854, | |
| "grad_norm": 3.2824018001556396, | |
| "learning_rate": 4.765345730276223e-05, | |
| "loss": 4.6084, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.04739122585304206, | |
| "grad_norm": 6.290219783782959, | |
| "learning_rate": 4.7630890052356026e-05, | |
| "loss": 4.6361, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.04784257086116628, | |
| "grad_norm": 4.844172954559326, | |
| "learning_rate": 4.760832280194981e-05, | |
| "loss": 4.6252, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.04829391586929049, | |
| "grad_norm": 4.8328962326049805, | |
| "learning_rate": 4.7585755551543604e-05, | |
| "loss": 4.5557, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.048745260877414695, | |
| "grad_norm": 4.386012077331543, | |
| "learning_rate": 4.756318830113739e-05, | |
| "loss": 4.6911, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.049196605885538904, | |
| "grad_norm": 4.393270969390869, | |
| "learning_rate": 4.754062105073118e-05, | |
| "loss": 4.4869, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.04964795089366312, | |
| "grad_norm": 3.9346606731414795, | |
| "learning_rate": 4.751805380032497e-05, | |
| "loss": 4.608, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.05009929590178733, | |
| "grad_norm": 5.140569686889648, | |
| "learning_rate": 4.749548654991876e-05, | |
| "loss": 4.6262, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.050550640909911536, | |
| "grad_norm": 3.2936654090881348, | |
| "learning_rate": 4.747291929951255e-05, | |
| "loss": 4.6565, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.051001985918035744, | |
| "grad_norm": 3.5564124584198, | |
| "learning_rate": 4.745035204910634e-05, | |
| "loss": 4.5727, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.05145333092615996, | |
| "grad_norm": 3.9385626316070557, | |
| "learning_rate": 4.742778479870013e-05, | |
| "loss": 4.7165, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.05190467593428417, | |
| "grad_norm": 3.736527681350708, | |
| "learning_rate": 4.740521754829392e-05, | |
| "loss": 4.6504, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.05235602094240838, | |
| "grad_norm": 3.3729724884033203, | |
| "learning_rate": 4.738265029788771e-05, | |
| "loss": 4.7029, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.052807365950532585, | |
| "grad_norm": 2.953383445739746, | |
| "learning_rate": 4.73600830474815e-05, | |
| "loss": 4.5483, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.0532587109586568, | |
| "grad_norm": 4.406127452850342, | |
| "learning_rate": 4.733751579707529e-05, | |
| "loss": 4.6443, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.05371005596678101, | |
| "grad_norm": 2.935302495956421, | |
| "learning_rate": 4.7314948546669075e-05, | |
| "loss": 4.61, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.05416140097490522, | |
| "grad_norm": 4.362770080566406, | |
| "learning_rate": 4.729238129626287e-05, | |
| "loss": 4.5821, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.054612745983029426, | |
| "grad_norm": 3.588181972503662, | |
| "learning_rate": 4.7269814045856654e-05, | |
| "loss": 4.6317, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.055064090991153634, | |
| "grad_norm": 2.7238504886627197, | |
| "learning_rate": 4.7247246795450446e-05, | |
| "loss": 4.6867, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.05551543599927785, | |
| "grad_norm": 3.66497802734375, | |
| "learning_rate": 4.722467954504423e-05, | |
| "loss": 4.557, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.05596678100740206, | |
| "grad_norm": 3.9344165325164795, | |
| "learning_rate": 4.7202112294638025e-05, | |
| "loss": 4.5099, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.05641812601552627, | |
| "grad_norm": 3.919712781906128, | |
| "learning_rate": 4.717954504423182e-05, | |
| "loss": 4.6521, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.056869471023650475, | |
| "grad_norm": 6.165071964263916, | |
| "learning_rate": 4.71569777938256e-05, | |
| "loss": 4.6656, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.05732081603177469, | |
| "grad_norm": 3.976167917251587, | |
| "learning_rate": 4.7134410543419396e-05, | |
| "loss": 4.6891, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.0577721610398989, | |
| "grad_norm": 3.4293136596679688, | |
| "learning_rate": 4.711184329301318e-05, | |
| "loss": 4.6213, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.05822350604802311, | |
| "grad_norm": 3.062398910522461, | |
| "learning_rate": 4.7089276042606975e-05, | |
| "loss": 4.6794, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.058674851056147316, | |
| "grad_norm": 3.9836747646331787, | |
| "learning_rate": 4.706670879220076e-05, | |
| "loss": 4.6722, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.05912619606427153, | |
| "grad_norm": 4.0859246253967285, | |
| "learning_rate": 4.704414154179455e-05, | |
| "loss": 4.6909, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.05957754107239574, | |
| "grad_norm": 4.478472709655762, | |
| "learning_rate": 4.702157429138834e-05, | |
| "loss": 4.4942, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.06002888608051995, | |
| "grad_norm": 5.508967399597168, | |
| "learning_rate": 4.699900704098213e-05, | |
| "loss": 4.6658, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.06048023108864416, | |
| "grad_norm": 3.933199644088745, | |
| "learning_rate": 4.697643979057592e-05, | |
| "loss": 4.5696, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.06093157609676837, | |
| "grad_norm": 3.0764100551605225, | |
| "learning_rate": 4.695387254016971e-05, | |
| "loss": 4.7047, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.06138292110489258, | |
| "grad_norm": 3.0718812942504883, | |
| "learning_rate": 4.69313052897635e-05, | |
| "loss": 4.6213, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.06183426611301679, | |
| "grad_norm": 3.2949626445770264, | |
| "learning_rate": 4.690873803935729e-05, | |
| "loss": 4.5174, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.062285611121141, | |
| "grad_norm": 3.5119667053222656, | |
| "learning_rate": 4.688617078895108e-05, | |
| "loss": 4.6313, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.0627369561292652, | |
| "grad_norm": 3.8293747901916504, | |
| "learning_rate": 4.686360353854487e-05, | |
| "loss": 4.6896, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.06318830113738942, | |
| "grad_norm": 3.223698139190674, | |
| "learning_rate": 4.684103628813866e-05, | |
| "loss": 4.6462, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.06363964614551364, | |
| "grad_norm": 3.7061171531677246, | |
| "learning_rate": 4.6818469037732446e-05, | |
| "loss": 4.5423, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.06409099115363784, | |
| "grad_norm": 3.9031214714050293, | |
| "learning_rate": 4.679590178732624e-05, | |
| "loss": 4.6688, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.06454233616176205, | |
| "grad_norm": 11.581488609313965, | |
| "learning_rate": 4.6773334536920024e-05, | |
| "loss": 4.6832, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.06499368116988626, | |
| "grad_norm": 3.9187841415405273, | |
| "learning_rate": 4.675076728651382e-05, | |
| "loss": 4.6451, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.06544502617801047, | |
| "grad_norm": 3.8191521167755127, | |
| "learning_rate": 4.67282000361076e-05, | |
| "loss": 4.5677, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.06589637118613469, | |
| "grad_norm": 3.5511984825134277, | |
| "learning_rate": 4.6705632785701395e-05, | |
| "loss": 4.5412, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.06634771619425889, | |
| "grad_norm": 4.853089809417725, | |
| "learning_rate": 4.668306553529518e-05, | |
| "loss": 4.6636, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.0667990612023831, | |
| "grad_norm": 2.9507358074188232, | |
| "learning_rate": 4.6660498284888974e-05, | |
| "loss": 4.6232, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.06725040621050732, | |
| "grad_norm": 4.20766019821167, | |
| "learning_rate": 4.663793103448276e-05, | |
| "loss": 4.6429, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.06770175121863152, | |
| "grad_norm": 2.9639532566070557, | |
| "learning_rate": 4.6615363784076546e-05, | |
| "loss": 4.5613, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.06815309622675574, | |
| "grad_norm": 4.452625751495361, | |
| "learning_rate": 4.659279653367034e-05, | |
| "loss": 4.7034, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.06860444123487994, | |
| "grad_norm": 4.076809883117676, | |
| "learning_rate": 4.6570229283264124e-05, | |
| "loss": 4.6244, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.06905578624300415, | |
| "grad_norm": 3.361752510070801, | |
| "learning_rate": 4.654766203285792e-05, | |
| "loss": 4.6122, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.06950713125112837, | |
| "grad_norm": 2.9916162490844727, | |
| "learning_rate": 4.65250947824517e-05, | |
| "loss": 4.5939, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.06995847625925257, | |
| "grad_norm": 4.1875200271606445, | |
| "learning_rate": 4.6502527532045495e-05, | |
| "loss": 4.5255, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.07040982126737678, | |
| "grad_norm": 2.9376866817474365, | |
| "learning_rate": 4.647996028163929e-05, | |
| "loss": 4.6222, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.07086116627550099, | |
| "grad_norm": 3.77079176902771, | |
| "learning_rate": 4.6457393031233074e-05, | |
| "loss": 4.5671, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.0713125112836252, | |
| "grad_norm": 6.709794044494629, | |
| "learning_rate": 4.6434825780826866e-05, | |
| "loss": 4.493, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.07176385629174942, | |
| "grad_norm": 4.273845195770264, | |
| "learning_rate": 4.641225853042065e-05, | |
| "loss": 4.6503, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.07221520129987362, | |
| "grad_norm": 3.1263434886932373, | |
| "learning_rate": 4.6389691280014445e-05, | |
| "loss": 4.6878, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.07266654630799783, | |
| "grad_norm": 4.049619674682617, | |
| "learning_rate": 4.636712402960823e-05, | |
| "loss": 4.6728, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.07311789131612205, | |
| "grad_norm": 4.419615745544434, | |
| "learning_rate": 4.6344556779202023e-05, | |
| "loss": 4.5648, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.07356923632424625, | |
| "grad_norm": 4.067174911499023, | |
| "learning_rate": 4.632198952879581e-05, | |
| "loss": 4.6475, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.07402058133237047, | |
| "grad_norm": 3.8273239135742188, | |
| "learning_rate": 4.62994222783896e-05, | |
| "loss": 4.4821, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.07447192634049467, | |
| "grad_norm": 2.988802433013916, | |
| "learning_rate": 4.627685502798339e-05, | |
| "loss": 4.4786, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.07492327134861888, | |
| "grad_norm": 4.000159740447998, | |
| "learning_rate": 4.625428777757718e-05, | |
| "loss": 4.6493, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.0753746163567431, | |
| "grad_norm": 4.026582717895508, | |
| "learning_rate": 4.623172052717097e-05, | |
| "loss": 4.6096, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.0758259613648673, | |
| "grad_norm": 3.3265931606292725, | |
| "learning_rate": 4.620915327676476e-05, | |
| "loss": 4.5148, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.07627730637299152, | |
| "grad_norm": 3.2252328395843506, | |
| "learning_rate": 4.618658602635855e-05, | |
| "loss": 4.6038, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.07672865138111573, | |
| "grad_norm": 3.4897453784942627, | |
| "learning_rate": 4.616401877595234e-05, | |
| "loss": 4.6121, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.07717999638923993, | |
| "grad_norm": 3.3298215866088867, | |
| "learning_rate": 4.614145152554613e-05, | |
| "loss": 4.5457, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.07763134139736415, | |
| "grad_norm": 3.875998020172119, | |
| "learning_rate": 4.6118884275139916e-05, | |
| "loss": 4.5236, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.07808268640548835, | |
| "grad_norm": 3.5962016582489014, | |
| "learning_rate": 4.609631702473371e-05, | |
| "loss": 4.528, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.07853403141361257, | |
| "grad_norm": 2.4850423336029053, | |
| "learning_rate": 4.6073749774327494e-05, | |
| "loss": 4.5441, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.07898537642173678, | |
| "grad_norm": 2.6482949256896973, | |
| "learning_rate": 4.605118252392129e-05, | |
| "loss": 4.5235, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.07943672142986098, | |
| "grad_norm": 3.3628525733947754, | |
| "learning_rate": 4.602861527351507e-05, | |
| "loss": 4.6373, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.0798880664379852, | |
| "grad_norm": 3.0251526832580566, | |
| "learning_rate": 4.6006048023108866e-05, | |
| "loss": 4.3972, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.08033941144610941, | |
| "grad_norm": 3.8248074054718018, | |
| "learning_rate": 4.598348077270266e-05, | |
| "loss": 4.5324, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.08079075645423361, | |
| "grad_norm": 3.5319507122039795, | |
| "learning_rate": 4.5960913522296444e-05, | |
| "loss": 4.6163, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.08124210146235783, | |
| "grad_norm": 5.563832759857178, | |
| "learning_rate": 4.593834627189024e-05, | |
| "loss": 4.5826, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.08169344647048203, | |
| "grad_norm": 3.98085355758667, | |
| "learning_rate": 4.591577902148402e-05, | |
| "loss": 4.6515, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.08214479147860625, | |
| "grad_norm": 6.063210964202881, | |
| "learning_rate": 4.5893211771077815e-05, | |
| "loss": 4.5158, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.08259613648673046, | |
| "grad_norm": 3.957599401473999, | |
| "learning_rate": 4.58706445206716e-05, | |
| "loss": 4.5528, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.08304748149485466, | |
| "grad_norm": 3.1111884117126465, | |
| "learning_rate": 4.5848077270265394e-05, | |
| "loss": 4.5484, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.08349882650297888, | |
| "grad_norm": 4.1915059089660645, | |
| "learning_rate": 4.582551001985918e-05, | |
| "loss": 4.595, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.08395017151110308, | |
| "grad_norm": 4.1448140144348145, | |
| "learning_rate": 4.580294276945297e-05, | |
| "loss": 4.6259, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.0844015165192273, | |
| "grad_norm": 3.6308369636535645, | |
| "learning_rate": 4.5780375519046765e-05, | |
| "loss": 4.6703, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.08485286152735151, | |
| "grad_norm": 6.079587459564209, | |
| "learning_rate": 4.575780826864055e-05, | |
| "loss": 4.6145, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.08530420653547571, | |
| "grad_norm": 3.5566651821136475, | |
| "learning_rate": 4.5735241018234343e-05, | |
| "loss": 4.5084, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.08575555154359993, | |
| "grad_norm": 4.733799934387207, | |
| "learning_rate": 4.571267376782813e-05, | |
| "loss": 4.5918, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.08620689655172414, | |
| "grad_norm": 3.1966097354888916, | |
| "learning_rate": 4.569010651742192e-05, | |
| "loss": 4.4592, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.08665824155984835, | |
| "grad_norm": 3.9291093349456787, | |
| "learning_rate": 4.566753926701571e-05, | |
| "loss": 4.5673, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.08710958656797256, | |
| "grad_norm": 5.446611404418945, | |
| "learning_rate": 4.56449720166095e-05, | |
| "loss": 4.6176, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.08756093157609676, | |
| "grad_norm": 3.054124355316162, | |
| "learning_rate": 4.5622404766203286e-05, | |
| "loss": 4.6921, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.08801227658422098, | |
| "grad_norm": 3.27416729927063, | |
| "learning_rate": 4.559983751579708e-05, | |
| "loss": 4.5667, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.08846362159234519, | |
| "grad_norm": 3.577589273452759, | |
| "learning_rate": 4.5577270265390865e-05, | |
| "loss": 4.442, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.0889149666004694, | |
| "grad_norm": 3.566028118133545, | |
| "learning_rate": 4.555470301498466e-05, | |
| "loss": 4.6025, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.08936631160859361, | |
| "grad_norm": 4.064197540283203, | |
| "learning_rate": 4.553213576457845e-05, | |
| "loss": 4.5812, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.08981765661671783, | |
| "grad_norm": 4.237987041473389, | |
| "learning_rate": 4.5509568514172236e-05, | |
| "loss": 4.6083, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.09026900162484203, | |
| "grad_norm": 3.0101680755615234, | |
| "learning_rate": 4.548700126376603e-05, | |
| "loss": 4.6099, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.09072034663296624, | |
| "grad_norm": 3.5102596282958984, | |
| "learning_rate": 4.5464434013359815e-05, | |
| "loss": 4.5643, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.09117169164109044, | |
| "grad_norm": 4.774995803833008, | |
| "learning_rate": 4.544186676295361e-05, | |
| "loss": 4.5663, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.09162303664921466, | |
| "grad_norm": 3.963777780532837, | |
| "learning_rate": 4.541929951254739e-05, | |
| "loss": 4.5603, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.09207438165733887, | |
| "grad_norm": 2.888615846633911, | |
| "learning_rate": 4.5396732262141186e-05, | |
| "loss": 4.5396, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.09252572666546308, | |
| "grad_norm": 4.281205177307129, | |
| "learning_rate": 4.537416501173497e-05, | |
| "loss": 4.5725, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.09297707167358729, | |
| "grad_norm": 4.1528472900390625, | |
| "learning_rate": 4.5351597761328764e-05, | |
| "loss": 4.6262, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.0934284166817115, | |
| "grad_norm": 3.966341972351074, | |
| "learning_rate": 4.532903051092255e-05, | |
| "loss": 4.5871, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.09387976168983571, | |
| "grad_norm": 3.1821911334991455, | |
| "learning_rate": 4.530646326051634e-05, | |
| "loss": 4.4732, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.09433110669795992, | |
| "grad_norm": 5.116222858428955, | |
| "learning_rate": 4.5283896010110135e-05, | |
| "loss": 4.6084, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.09478245170608413, | |
| "grad_norm": 5.254827976226807, | |
| "learning_rate": 4.526132875970392e-05, | |
| "loss": 4.5696, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.09523379671420834, | |
| "grad_norm": 3.6102991104125977, | |
| "learning_rate": 4.5238761509297714e-05, | |
| "loss": 4.5971, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.09568514172233256, | |
| "grad_norm": 3.348236322402954, | |
| "learning_rate": 4.52161942588915e-05, | |
| "loss": 4.5048, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.09613648673045676, | |
| "grad_norm": 3.1192493438720703, | |
| "learning_rate": 4.519362700848529e-05, | |
| "loss": 4.571, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.09658783173858097, | |
| "grad_norm": 2.9626996517181396, | |
| "learning_rate": 4.517105975807908e-05, | |
| "loss": 4.4975, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.09703917674670517, | |
| "grad_norm": 3.4130876064300537, | |
| "learning_rate": 4.514849250767287e-05, | |
| "loss": 4.6279, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.09749052175482939, | |
| "grad_norm": 2.4458179473876953, | |
| "learning_rate": 4.512592525726666e-05, | |
| "loss": 4.511, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.0979418667629536, | |
| "grad_norm": 5.223287105560303, | |
| "learning_rate": 4.510335800686045e-05, | |
| "loss": 4.5435, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.09839321177107781, | |
| "grad_norm": 4.481621742248535, | |
| "learning_rate": 4.5080790756454235e-05, | |
| "loss": 4.5177, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.09884455677920202, | |
| "grad_norm": 2.959305763244629, | |
| "learning_rate": 4.505822350604803e-05, | |
| "loss": 4.5746, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.09929590178732624, | |
| "grad_norm": 6.753904342651367, | |
| "learning_rate": 4.5035656255641814e-05, | |
| "loss": 4.5424, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.09974724679545044, | |
| "grad_norm": 3.4904415607452393, | |
| "learning_rate": 4.5013089005235606e-05, | |
| "loss": 4.5661, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.10019859180357465, | |
| "grad_norm": 3.316413164138794, | |
| "learning_rate": 4.499052175482939e-05, | |
| "loss": 4.5242, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.10064993681169886, | |
| "grad_norm": 3.974198579788208, | |
| "learning_rate": 4.4967954504423185e-05, | |
| "loss": 4.5251, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.10110128181982307, | |
| "grad_norm": 4.306400775909424, | |
| "learning_rate": 4.494538725401697e-05, | |
| "loss": 4.5209, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.10155262682794729, | |
| "grad_norm": 4.841123104095459, | |
| "learning_rate": 4.4922820003610764e-05, | |
| "loss": 4.5895, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.10200397183607149, | |
| "grad_norm": 3.6396520137786865, | |
| "learning_rate": 4.490025275320455e-05, | |
| "loss": 4.5788, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.1024553168441957, | |
| "grad_norm": 3.500455379486084, | |
| "learning_rate": 4.487768550279834e-05, | |
| "loss": 4.4844, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.10290666185231992, | |
| "grad_norm": 4.19438362121582, | |
| "learning_rate": 4.485511825239213e-05, | |
| "loss": 4.5723, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.10335800686044412, | |
| "grad_norm": 3.513514995574951, | |
| "learning_rate": 4.483255100198592e-05, | |
| "loss": 4.6457, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.10380935186856834, | |
| "grad_norm": 3.5381104946136475, | |
| "learning_rate": 4.4809983751579706e-05, | |
| "loss": 4.4716, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.10426069687669254, | |
| "grad_norm": 4.183605194091797, | |
| "learning_rate": 4.47874165011735e-05, | |
| "loss": 4.5554, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.10471204188481675, | |
| "grad_norm": 3.838669538497925, | |
| "learning_rate": 4.4764849250767285e-05, | |
| "loss": 4.5354, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.10516338689294097, | |
| "grad_norm": 3.651357889175415, | |
| "learning_rate": 4.474228200036108e-05, | |
| "loss": 4.5784, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.10561473190106517, | |
| "grad_norm": 3.6753928661346436, | |
| "learning_rate": 4.4719714749954863e-05, | |
| "loss": 4.5484, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.10606607690918939, | |
| "grad_norm": 4.5028228759765625, | |
| "learning_rate": 4.4697147499548656e-05, | |
| "loss": 4.5781, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.1065174219173136, | |
| "grad_norm": 7.304862022399902, | |
| "learning_rate": 4.467458024914244e-05, | |
| "loss": 4.5041, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.1069687669254378, | |
| "grad_norm": 4.280136585235596, | |
| "learning_rate": 4.4652012998736235e-05, | |
| "loss": 4.6027, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.10742011193356202, | |
| "grad_norm": 3.6763241291046143, | |
| "learning_rate": 4.462944574833002e-05, | |
| "loss": 4.5593, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.10787145694168622, | |
| "grad_norm": 3.8541440963745117, | |
| "learning_rate": 4.460687849792381e-05, | |
| "loss": 4.5144, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.10832280194981043, | |
| "grad_norm": 2.8991189002990723, | |
| "learning_rate": 4.4584311247517606e-05, | |
| "loss": 4.6064, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.10877414695793465, | |
| "grad_norm": 2.928452491760254, | |
| "learning_rate": 4.456174399711139e-05, | |
| "loss": 4.5631, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.10922549196605885, | |
| "grad_norm": 3.3975236415863037, | |
| "learning_rate": 4.4539176746705184e-05, | |
| "loss": 4.5839, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.10967683697418307, | |
| "grad_norm": 3.4614107608795166, | |
| "learning_rate": 4.451660949629897e-05, | |
| "loss": 4.5176, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.11012818198230727, | |
| "grad_norm": 3.582960605621338, | |
| "learning_rate": 4.449404224589276e-05, | |
| "loss": 4.6017, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.11057952699043148, | |
| "grad_norm": 5.049736499786377, | |
| "learning_rate": 4.447147499548655e-05, | |
| "loss": 4.5375, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.1110308719985557, | |
| "grad_norm": 4.15340518951416, | |
| "learning_rate": 4.444890774508034e-05, | |
| "loss": 4.6302, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.1114822170066799, | |
| "grad_norm": 3.0118372440338135, | |
| "learning_rate": 4.442634049467413e-05, | |
| "loss": 4.6187, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.11193356201480412, | |
| "grad_norm": 3.5457749366760254, | |
| "learning_rate": 4.440377324426792e-05, | |
| "loss": 4.5201, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.11238490702292833, | |
| "grad_norm": 3.9251248836517334, | |
| "learning_rate": 4.4381205993861706e-05, | |
| "loss": 4.6261, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.11283625203105253, | |
| "grad_norm": 3.2046866416931152, | |
| "learning_rate": 4.43586387434555e-05, | |
| "loss": 4.5479, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.11328759703917675, | |
| "grad_norm": 3.1684064865112305, | |
| "learning_rate": 4.433607149304929e-05, | |
| "loss": 4.5823, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.11373894204730095, | |
| "grad_norm": 4.124698638916016, | |
| "learning_rate": 4.431350424264308e-05, | |
| "loss": 4.6002, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.11419028705542517, | |
| "grad_norm": 3.9625906944274902, | |
| "learning_rate": 4.429093699223687e-05, | |
| "loss": 4.5571, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.11464163206354938, | |
| "grad_norm": 4.684337139129639, | |
| "learning_rate": 4.4268369741830655e-05, | |
| "loss": 4.5483, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.11509297707167358, | |
| "grad_norm": 4.30114221572876, | |
| "learning_rate": 4.424580249142445e-05, | |
| "loss": 4.5534, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.1155443220797978, | |
| "grad_norm": 3.673405647277832, | |
| "learning_rate": 4.4223235241018234e-05, | |
| "loss": 4.5645, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.11599566708792201, | |
| "grad_norm": 4.129467964172363, | |
| "learning_rate": 4.4200667990612027e-05, | |
| "loss": 4.5647, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.11644701209604622, | |
| "grad_norm": 2.8640856742858887, | |
| "learning_rate": 4.417810074020581e-05, | |
| "loss": 4.475, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.11689835710417043, | |
| "grad_norm": 3.1711478233337402, | |
| "learning_rate": 4.4155533489799605e-05, | |
| "loss": 4.5395, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 0.11734970211229463, | |
| "grad_norm": 4.4645586013793945, | |
| "learning_rate": 4.413296623939339e-05, | |
| "loss": 4.5149, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.11780104712041885, | |
| "grad_norm": 4.081081867218018, | |
| "learning_rate": 4.4110398988987184e-05, | |
| "loss": 4.3781, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.11825239212854306, | |
| "grad_norm": 3.4459915161132812, | |
| "learning_rate": 4.4087831738580976e-05, | |
| "loss": 4.4447, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.11870373713666726, | |
| "grad_norm": 4.382139205932617, | |
| "learning_rate": 4.406526448817476e-05, | |
| "loss": 4.5698, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 0.11915508214479148, | |
| "grad_norm": 3.9767699241638184, | |
| "learning_rate": 4.4042697237768555e-05, | |
| "loss": 4.7358, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.1196064271529157, | |
| "grad_norm": 2.903264284133911, | |
| "learning_rate": 4.402012998736234e-05, | |
| "loss": 4.4043, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.1200577721610399, | |
| "grad_norm": 3.7580466270446777, | |
| "learning_rate": 4.399756273695613e-05, | |
| "loss": 4.4899, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.12050911716916411, | |
| "grad_norm": 3.086916446685791, | |
| "learning_rate": 4.397499548654992e-05, | |
| "loss": 4.494, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.12096046217728831, | |
| "grad_norm": 3.9137027263641357, | |
| "learning_rate": 4.395242823614371e-05, | |
| "loss": 4.5933, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.12141180718541253, | |
| "grad_norm": 3.615917205810547, | |
| "learning_rate": 4.39298609857375e-05, | |
| "loss": 4.4373, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 0.12186315219353674, | |
| "grad_norm": 2.4744229316711426, | |
| "learning_rate": 4.390729373533129e-05, | |
| "loss": 4.6075, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.12231449720166095, | |
| "grad_norm": 3.469045639038086, | |
| "learning_rate": 4.3884726484925076e-05, | |
| "loss": 4.5208, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 0.12276584220978516, | |
| "grad_norm": 4.882166385650635, | |
| "learning_rate": 4.386215923451887e-05, | |
| "loss": 4.4989, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.12321718721790936, | |
| "grad_norm": 4.610581398010254, | |
| "learning_rate": 4.383959198411266e-05, | |
| "loss": 4.5306, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.12366853222603358, | |
| "grad_norm": 3.6969921588897705, | |
| "learning_rate": 4.381702473370645e-05, | |
| "loss": 4.5978, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.1241198772341578, | |
| "grad_norm": 4.886890888214111, | |
| "learning_rate": 4.379445748330024e-05, | |
| "loss": 4.4131, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.124571222242282, | |
| "grad_norm": 2.121551513671875, | |
| "learning_rate": 4.3771890232894026e-05, | |
| "loss": 4.5148, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.1250225672504062, | |
| "grad_norm": 3.1213953495025635, | |
| "learning_rate": 4.374932298248782e-05, | |
| "loss": 4.6455, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 0.1254739122585304, | |
| "grad_norm": 3.9660770893096924, | |
| "learning_rate": 4.3726755732081604e-05, | |
| "loss": 4.4807, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.12592525726665463, | |
| "grad_norm": 2.980980396270752, | |
| "learning_rate": 4.37041884816754e-05, | |
| "loss": 4.6299, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.12637660227477884, | |
| "grad_norm": 3.5488901138305664, | |
| "learning_rate": 4.368162123126918e-05, | |
| "loss": 4.5797, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.12682794728290306, | |
| "grad_norm": 2.9502065181732178, | |
| "learning_rate": 4.3659053980862975e-05, | |
| "loss": 4.5491, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 0.12727929229102727, | |
| "grad_norm": 2.8409996032714844, | |
| "learning_rate": 4.363648673045677e-05, | |
| "loss": 4.5958, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.12773063729915146, | |
| "grad_norm": 5.0700907707214355, | |
| "learning_rate": 4.3613919480050554e-05, | |
| "loss": 4.6128, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 0.12818198230727568, | |
| "grad_norm": 3.55629301071167, | |
| "learning_rate": 4.3591352229644347e-05, | |
| "loss": 4.492, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.1286333273153999, | |
| "grad_norm": 3.631505250930786, | |
| "learning_rate": 4.356878497923813e-05, | |
| "loss": 4.5023, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.1290846723235241, | |
| "grad_norm": 3.8898086547851562, | |
| "learning_rate": 4.3546217728831925e-05, | |
| "loss": 4.5478, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.12953601733164832, | |
| "grad_norm": 3.2403228282928467, | |
| "learning_rate": 4.352365047842571e-05, | |
| "loss": 4.4829, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 0.1299873623397725, | |
| "grad_norm": 3.5314269065856934, | |
| "learning_rate": 4.3501083228019504e-05, | |
| "loss": 4.5186, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.13043870734789673, | |
| "grad_norm": 3.769017457962036, | |
| "learning_rate": 4.347851597761329e-05, | |
| "loss": 4.4798, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 0.13089005235602094, | |
| "grad_norm": 3.4731597900390625, | |
| "learning_rate": 4.345594872720708e-05, | |
| "loss": 4.6109, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.13134139736414516, | |
| "grad_norm": 4.540064811706543, | |
| "learning_rate": 4.343338147680087e-05, | |
| "loss": 4.5537, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.13179274237226937, | |
| "grad_norm": 4.51099157333374, | |
| "learning_rate": 4.341081422639466e-05, | |
| "loss": 4.4937, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.13224408738039356, | |
| "grad_norm": 5.07973051071167, | |
| "learning_rate": 4.338824697598845e-05, | |
| "loss": 4.5471, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 0.13269543238851778, | |
| "grad_norm": 4.9902753829956055, | |
| "learning_rate": 4.336567972558224e-05, | |
| "loss": 4.5544, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.133146777396642, | |
| "grad_norm": 5.2365031242370605, | |
| "learning_rate": 4.334311247517603e-05, | |
| "loss": 4.4559, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.1335981224047662, | |
| "grad_norm": 4.138045787811279, | |
| "learning_rate": 4.332054522476982e-05, | |
| "loss": 4.3979, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.13404946741289042, | |
| "grad_norm": 3.637258529663086, | |
| "learning_rate": 4.329797797436361e-05, | |
| "loss": 4.5282, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.13450081242101464, | |
| "grad_norm": 3.374943256378174, | |
| "learning_rate": 4.3275410723957396e-05, | |
| "loss": 4.5585, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.13495215742913882, | |
| "grad_norm": 4.198739051818848, | |
| "learning_rate": 4.325284347355119e-05, | |
| "loss": 4.4996, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 0.13540350243726304, | |
| "grad_norm": 3.0009047985076904, | |
| "learning_rate": 4.3230276223144975e-05, | |
| "loss": 4.5361, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.13585484744538726, | |
| "grad_norm": 3.28633975982666, | |
| "learning_rate": 4.320770897273877e-05, | |
| "loss": 4.5367, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 0.13630619245351147, | |
| "grad_norm": 3.2945947647094727, | |
| "learning_rate": 4.318514172233255e-05, | |
| "loss": 4.5113, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.1367575374616357, | |
| "grad_norm": 5.111336708068848, | |
| "learning_rate": 4.3162574471926346e-05, | |
| "loss": 4.5238, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.13720888246975987, | |
| "grad_norm": 2.328876256942749, | |
| "learning_rate": 4.314000722152013e-05, | |
| "loss": 4.5512, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.1376602274778841, | |
| "grad_norm": 3.703890323638916, | |
| "learning_rate": 4.311743997111392e-05, | |
| "loss": 4.5192, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.1381115724860083, | |
| "grad_norm": 2.8396573066711426, | |
| "learning_rate": 4.309487272070771e-05, | |
| "loss": 4.5641, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.13856291749413252, | |
| "grad_norm": 3.3222029209136963, | |
| "learning_rate": 4.3072305470301496e-05, | |
| "loss": 4.5332, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 0.13901426250225674, | |
| "grad_norm": 3.652606725692749, | |
| "learning_rate": 4.304973821989529e-05, | |
| "loss": 4.5056, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.13946560751038092, | |
| "grad_norm": 7.847742080688477, | |
| "learning_rate": 4.3027170969489075e-05, | |
| "loss": 4.4884, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.13991695251850514, | |
| "grad_norm": 3.6494662761688232, | |
| "learning_rate": 4.300460371908287e-05, | |
| "loss": 4.4938, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.14036829752662935, | |
| "grad_norm": 4.544933795928955, | |
| "learning_rate": 4.298203646867665e-05, | |
| "loss": 4.5045, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 0.14081964253475357, | |
| "grad_norm": 3.429764986038208, | |
| "learning_rate": 4.2959469218270446e-05, | |
| "loss": 4.5033, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.14127098754287779, | |
| "grad_norm": 3.790017604827881, | |
| "learning_rate": 4.293690196786423e-05, | |
| "loss": 4.4775, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 0.14172233255100197, | |
| "grad_norm": 3.4987452030181885, | |
| "learning_rate": 4.2914334717458024e-05, | |
| "loss": 4.5988, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.1421736775591262, | |
| "grad_norm": 2.5895438194274902, | |
| "learning_rate": 4.289176746705182e-05, | |
| "loss": 4.5285, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.1426250225672504, | |
| "grad_norm": 4.709017276763916, | |
| "learning_rate": 4.28692002166456e-05, | |
| "loss": 4.4137, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.14307636757537462, | |
| "grad_norm": 3.9760525226593018, | |
| "learning_rate": 4.2846632966239395e-05, | |
| "loss": 4.5278, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 0.14352771258349883, | |
| "grad_norm": 4.445188045501709, | |
| "learning_rate": 4.282406571583318e-05, | |
| "loss": 4.5362, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.14397905759162305, | |
| "grad_norm": 4.021897792816162, | |
| "learning_rate": 4.2801498465426974e-05, | |
| "loss": 4.458, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.14443040259974724, | |
| "grad_norm": 4.263660907745361, | |
| "learning_rate": 4.277893121502076e-05, | |
| "loss": 4.4782, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.14488174760787145, | |
| "grad_norm": 3.184115171432495, | |
| "learning_rate": 4.275636396461455e-05, | |
| "loss": 4.4877, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.14533309261599567, | |
| "grad_norm": 3.6419224739074707, | |
| "learning_rate": 4.273379671420834e-05, | |
| "loss": 4.5329, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.14578443762411988, | |
| "grad_norm": 5.209333896636963, | |
| "learning_rate": 4.271122946380213e-05, | |
| "loss": 4.5252, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 0.1462357826322441, | |
| "grad_norm": 2.9980499744415283, | |
| "learning_rate": 4.2688662213395924e-05, | |
| "loss": 4.4491, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.1466871276403683, | |
| "grad_norm": 2.8836166858673096, | |
| "learning_rate": 4.266609496298971e-05, | |
| "loss": 4.524, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.1471384726484925, | |
| "grad_norm": 3.24406099319458, | |
| "learning_rate": 4.26435277125835e-05, | |
| "loss": 4.5629, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.14758981765661672, | |
| "grad_norm": 3.78409743309021, | |
| "learning_rate": 4.262096046217729e-05, | |
| "loss": 4.5051, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 0.14804116266474093, | |
| "grad_norm": 3.738863229751587, | |
| "learning_rate": 4.259839321177108e-05, | |
| "loss": 4.3699, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.14849250767286515, | |
| "grad_norm": 3.1949925422668457, | |
| "learning_rate": 4.2575825961364867e-05, | |
| "loss": 4.4681, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 0.14894385268098934, | |
| "grad_norm": 3.774017810821533, | |
| "learning_rate": 4.255325871095866e-05, | |
| "loss": 4.4382, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.14939519768911355, | |
| "grad_norm": 3.903379201889038, | |
| "learning_rate": 4.2530691460552445e-05, | |
| "loss": 4.4229, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 0.14984654269723777, | |
| "grad_norm": 2.8182575702667236, | |
| "learning_rate": 4.250812421014624e-05, | |
| "loss": 4.4755, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.15029788770536198, | |
| "grad_norm": 3.8375935554504395, | |
| "learning_rate": 4.2485556959740024e-05, | |
| "loss": 4.5113, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 0.1507492327134862, | |
| "grad_norm": 3.6683831214904785, | |
| "learning_rate": 4.2462989709333816e-05, | |
| "loss": 4.5386, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.15120057772161039, | |
| "grad_norm": 4.0321431159973145, | |
| "learning_rate": 4.244042245892761e-05, | |
| "loss": 4.4977, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 0.1516519227297346, | |
| "grad_norm": 3.8294458389282227, | |
| "learning_rate": 4.2417855208521395e-05, | |
| "loss": 4.4926, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.15210326773785882, | |
| "grad_norm": 3.6209237575531006, | |
| "learning_rate": 4.239528795811519e-05, | |
| "loss": 4.5028, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 0.15255461274598303, | |
| "grad_norm": 3.8138227462768555, | |
| "learning_rate": 4.237272070770897e-05, | |
| "loss": 4.4808, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.15300595775410725, | |
| "grad_norm": 4.5005927085876465, | |
| "learning_rate": 4.2350153457302766e-05, | |
| "loss": 4.4702, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 0.15345730276223146, | |
| "grad_norm": 3.48544979095459, | |
| "learning_rate": 4.232758620689655e-05, | |
| "loss": 4.4993, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.15390864777035565, | |
| "grad_norm": 3.5820982456207275, | |
| "learning_rate": 4.2305018956490344e-05, | |
| "loss": 4.5032, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 0.15435999277847987, | |
| "grad_norm": 4.8123555183410645, | |
| "learning_rate": 4.228245170608413e-05, | |
| "loss": 4.5196, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.15481133778660408, | |
| "grad_norm": 3.8024814128875732, | |
| "learning_rate": 4.225988445567792e-05, | |
| "loss": 4.5327, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 0.1552626827947283, | |
| "grad_norm": 5.407778263092041, | |
| "learning_rate": 4.223731720527171e-05, | |
| "loss": 4.5355, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.1557140278028525, | |
| "grad_norm": 3.6917614936828613, | |
| "learning_rate": 4.22147499548655e-05, | |
| "loss": 4.4072, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 0.1561653728109767, | |
| "grad_norm": 3.9421164989471436, | |
| "learning_rate": 4.2192182704459294e-05, | |
| "loss": 4.4812, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.15661671781910091, | |
| "grad_norm": 4.172101974487305, | |
| "learning_rate": 4.216961545405308e-05, | |
| "loss": 4.4737, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 0.15706806282722513, | |
| "grad_norm": 3.308185577392578, | |
| "learning_rate": 4.214704820364687e-05, | |
| "loss": 4.5789, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.15751940783534935, | |
| "grad_norm": 4.956492900848389, | |
| "learning_rate": 4.212448095324066e-05, | |
| "loss": 4.565, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 0.15797075284347356, | |
| "grad_norm": 3.411794900894165, | |
| "learning_rate": 4.210191370283445e-05, | |
| "loss": 4.5473, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.15842209785159775, | |
| "grad_norm": 4.067993640899658, | |
| "learning_rate": 4.207934645242824e-05, | |
| "loss": 4.4836, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 0.15887344285972196, | |
| "grad_norm": 2.9520280361175537, | |
| "learning_rate": 4.205677920202203e-05, | |
| "loss": 4.4962, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.15932478786784618, | |
| "grad_norm": 4.387596130371094, | |
| "learning_rate": 4.2034211951615815e-05, | |
| "loss": 4.513, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 0.1597761328759704, | |
| "grad_norm": 3.250239849090576, | |
| "learning_rate": 4.201164470120961e-05, | |
| "loss": 4.5496, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.1602274778840946, | |
| "grad_norm": 3.867882013320923, | |
| "learning_rate": 4.1989077450803394e-05, | |
| "loss": 4.5849, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 0.16067882289221883, | |
| "grad_norm": 3.7500853538513184, | |
| "learning_rate": 4.196651020039719e-05, | |
| "loss": 4.4585, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.161130167900343, | |
| "grad_norm": 3.8945131301879883, | |
| "learning_rate": 4.194394294999098e-05, | |
| "loss": 4.4149, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 0.16158151290846723, | |
| "grad_norm": 8.667535781860352, | |
| "learning_rate": 4.1921375699584765e-05, | |
| "loss": 4.44, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.16203285791659144, | |
| "grad_norm": 4.284276485443115, | |
| "learning_rate": 4.189880844917856e-05, | |
| "loss": 4.4561, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 0.16248420292471566, | |
| "grad_norm": 2.9393467903137207, | |
| "learning_rate": 4.1876241198772344e-05, | |
| "loss": 4.5887, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.16293554793283987, | |
| "grad_norm": 3.012742519378662, | |
| "learning_rate": 4.1853673948366136e-05, | |
| "loss": 4.4513, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 0.16338689294096406, | |
| "grad_norm": 5.467082500457764, | |
| "learning_rate": 4.183110669795992e-05, | |
| "loss": 4.5611, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.16383823794908828, | |
| "grad_norm": 3.46402907371521, | |
| "learning_rate": 4.1808539447553715e-05, | |
| "loss": 4.5312, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 0.1642895829572125, | |
| "grad_norm": 3.8491625785827637, | |
| "learning_rate": 4.17859721971475e-05, | |
| "loss": 4.4916, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.1647409279653367, | |
| "grad_norm": 5.8692450523376465, | |
| "learning_rate": 4.176340494674129e-05, | |
| "loss": 4.4869, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 0.16519227297346092, | |
| "grad_norm": 3.2287988662719727, | |
| "learning_rate": 4.174083769633508e-05, | |
| "loss": 4.4431, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.1656436179815851, | |
| "grad_norm": 4.350259304046631, | |
| "learning_rate": 4.171827044592887e-05, | |
| "loss": 4.4968, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 0.16609496298970933, | |
| "grad_norm": 3.7243659496307373, | |
| "learning_rate": 4.1695703195522664e-05, | |
| "loss": 4.4738, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.16654630799783354, | |
| "grad_norm": 4.834224224090576, | |
| "learning_rate": 4.167313594511645e-05, | |
| "loss": 4.5754, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 0.16699765300595776, | |
| "grad_norm": 6.014001846313477, | |
| "learning_rate": 4.165056869471024e-05, | |
| "loss": 4.5449, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.16744899801408197, | |
| "grad_norm": 3.7950220108032227, | |
| "learning_rate": 4.162800144430403e-05, | |
| "loss": 4.4007, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 0.16790034302220616, | |
| "grad_norm": 4.019992828369141, | |
| "learning_rate": 4.160543419389782e-05, | |
| "loss": 4.4231, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.16835168803033038, | |
| "grad_norm": 4.363696575164795, | |
| "learning_rate": 4.158286694349161e-05, | |
| "loss": 4.4445, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 0.1688030330384546, | |
| "grad_norm": 4.168088912963867, | |
| "learning_rate": 4.15602996930854e-05, | |
| "loss": 4.5064, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.1692543780465788, | |
| "grad_norm": 3.3574249744415283, | |
| "learning_rate": 4.1537732442679186e-05, | |
| "loss": 4.5161, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.16970572305470302, | |
| "grad_norm": 4.255382061004639, | |
| "learning_rate": 4.151516519227298e-05, | |
| "loss": 4.4809, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.17015706806282724, | |
| "grad_norm": 3.896949291229248, | |
| "learning_rate": 4.1492597941866764e-05, | |
| "loss": 4.488, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 0.17060841307095143, | |
| "grad_norm": 4.572742938995361, | |
| "learning_rate": 4.147003069146056e-05, | |
| "loss": 4.5692, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.17105975807907564, | |
| "grad_norm": 4.25124454498291, | |
| "learning_rate": 4.144746344105434e-05, | |
| "loss": 4.5341, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 0.17151110308719986, | |
| "grad_norm": 3.2986035346984863, | |
| "learning_rate": 4.1424896190648136e-05, | |
| "loss": 4.4489, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.17196244809532407, | |
| "grad_norm": 3.633592367172241, | |
| "learning_rate": 4.140232894024192e-05, | |
| "loss": 4.5124, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 0.1724137931034483, | |
| "grad_norm": 3.3687500953674316, | |
| "learning_rate": 4.1379761689835714e-05, | |
| "loss": 4.5165, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.17286513811157247, | |
| "grad_norm": 4.958398342132568, | |
| "learning_rate": 4.13571944394295e-05, | |
| "loss": 4.5043, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 0.1733164831196967, | |
| "grad_norm": 4.127295017242432, | |
| "learning_rate": 4.133462718902329e-05, | |
| "loss": 4.4783, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.1737678281278209, | |
| "grad_norm": 3.3556175231933594, | |
| "learning_rate": 4.131205993861708e-05, | |
| "loss": 4.4185, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 0.17421917313594512, | |
| "grad_norm": 4.382410049438477, | |
| "learning_rate": 4.128949268821087e-05, | |
| "loss": 4.5009, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.17467051814406934, | |
| "grad_norm": 3.7760777473449707, | |
| "learning_rate": 4.126692543780466e-05, | |
| "loss": 4.3572, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 0.17512186315219352, | |
| "grad_norm": 4.594768524169922, | |
| "learning_rate": 4.124435818739845e-05, | |
| "loss": 4.4793, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.17557320816031774, | |
| "grad_norm": 4.605646133422852, | |
| "learning_rate": 4.1221790936992235e-05, | |
| "loss": 4.4462, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 0.17602455316844196, | |
| "grad_norm": 3.358002185821533, | |
| "learning_rate": 4.119922368658603e-05, | |
| "loss": 4.4986, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.17647589817656617, | |
| "grad_norm": 3.7644100189208984, | |
| "learning_rate": 4.1176656436179814e-05, | |
| "loss": 4.5314, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 0.17692724318469039, | |
| "grad_norm": 4.109899044036865, | |
| "learning_rate": 4.115408918577361e-05, | |
| "loss": 4.5382, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.1773785881928146, | |
| "grad_norm": 8.232100486755371, | |
| "learning_rate": 4.113152193536739e-05, | |
| "loss": 4.5095, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 0.1778299332009388, | |
| "grad_norm": 3.442411422729492, | |
| "learning_rate": 4.1108954684961185e-05, | |
| "loss": 4.5861, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.178281278209063, | |
| "grad_norm": 2.404611825942993, | |
| "learning_rate": 4.108638743455497e-05, | |
| "loss": 4.4563, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 0.17873262321718722, | |
| "grad_norm": 3.3895816802978516, | |
| "learning_rate": 4.1063820184148764e-05, | |
| "loss": 4.4434, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.17918396822531144, | |
| "grad_norm": 2.9194042682647705, | |
| "learning_rate": 4.104125293374255e-05, | |
| "loss": 4.5463, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 0.17963531323343565, | |
| "grad_norm": 2.6337718963623047, | |
| "learning_rate": 4.101868568333634e-05, | |
| "loss": 4.4246, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.18008665824155984, | |
| "grad_norm": 5.921742916107178, | |
| "learning_rate": 4.0996118432930135e-05, | |
| "loss": 4.3955, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 0.18053800324968405, | |
| "grad_norm": 3.9008045196533203, | |
| "learning_rate": 4.097355118252392e-05, | |
| "loss": 4.6028, | |
| "step": 20000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 110780, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.4772471541951488e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |