| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 610, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01639344262295082, |
| "grad_norm": 1.4445847272872925, |
| "learning_rate": 1.5584415584415587e-06, |
| "loss": 1.2372, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03278688524590164, |
| "grad_norm": 0.7953591346740723, |
| "learning_rate": 3.5064935064935066e-06, |
| "loss": 1.2195, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04918032786885246, |
| "grad_norm": 2.0549397468566895, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 1.2416, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06557377049180328, |
| "grad_norm": 0.5585774183273315, |
| "learning_rate": 7.402597402597403e-06, |
| "loss": 1.2289, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08196721311475409, |
| "grad_norm": 0.6144344210624695, |
| "learning_rate": 9.35064935064935e-06, |
| "loss": 1.2251, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.09836065573770492, |
| "grad_norm": 0.5836143493652344, |
| "learning_rate": 1.12987012987013e-05, |
| "loss": 1.1654, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.11475409836065574, |
| "grad_norm": 0.5931638479232788, |
| "learning_rate": 1.3246753246753247e-05, |
| "loss": 1.1853, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.13114754098360656, |
| "grad_norm": 0.4809166193008423, |
| "learning_rate": 1.5194805194805194e-05, |
| "loss": 1.1453, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.14754098360655737, |
| "grad_norm": 0.4646892845630646, |
| "learning_rate": 1.7142857142857142e-05, |
| "loss": 1.0871, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.16393442622950818, |
| "grad_norm": 0.5099896192550659, |
| "learning_rate": 1.909090909090909e-05, |
| "loss": 1.1506, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.18032786885245902, |
| "grad_norm": 0.501319169998169, |
| "learning_rate": 2.103896103896104e-05, |
| "loss": 1.1107, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.19672131147540983, |
| "grad_norm": 0.43489858508110046, |
| "learning_rate": 2.298701298701299e-05, |
| "loss": 1.118, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.21311475409836064, |
| "grad_norm": 0.5342327356338501, |
| "learning_rate": 2.493506493506494e-05, |
| "loss": 1.1286, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.22950819672131148, |
| "grad_norm": 0.4543209969997406, |
| "learning_rate": 2.6883116883116883e-05, |
| "loss": 1.0512, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2459016393442623, |
| "grad_norm": 0.45664912462234497, |
| "learning_rate": 2.8831168831168832e-05, |
| "loss": 1.0923, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.26229508196721313, |
| "grad_norm": 0.5187792181968689, |
| "learning_rate": 2.999985878423113e-05, |
| "loss": 1.0963, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2786885245901639, |
| "grad_norm": 0.6245763897895813, |
| "learning_rate": 2.999827013736722e-05, |
| "loss": 1.0923, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.29508196721311475, |
| "grad_norm": 0.5452545285224915, |
| "learning_rate": 2.9994916511501615e-05, |
| "loss": 1.0095, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3114754098360656, |
| "grad_norm": 0.584423303604126, |
| "learning_rate": 2.9989798301285106e-05, |
| "loss": 1.0663, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.32786885245901637, |
| "grad_norm": 0.5853920578956604, |
| "learning_rate": 2.9982916109022723e-05, |
| "loss": 0.9832, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3442622950819672, |
| "grad_norm": 0.5287838578224182, |
| "learning_rate": 2.9974270744602862e-05, |
| "loss": 1.0344, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.36065573770491804, |
| "grad_norm": 0.7069889903068542, |
| "learning_rate": 2.9963863225401957e-05, |
| "loss": 0.9816, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3770491803278688, |
| "grad_norm": 0.6540791392326355, |
| "learning_rate": 2.9951694776164774e-05, |
| "loss": 0.9736, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.39344262295081966, |
| "grad_norm": 0.5601514577865601, |
| "learning_rate": 2.9937766828860283e-05, |
| "loss": 0.9369, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4098360655737705, |
| "grad_norm": 0.6108849048614502, |
| "learning_rate": 2.9922081022513134e-05, |
| "loss": 0.9638, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4262295081967213, |
| "grad_norm": 0.5754315853118896, |
| "learning_rate": 2.9904639203010794e-05, |
| "loss": 0.9342, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4426229508196721, |
| "grad_norm": 0.6295889616012573, |
| "learning_rate": 2.988544342288631e-05, |
| "loss": 0.97, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.45901639344262296, |
| "grad_norm": 0.6687360405921936, |
| "learning_rate": 2.9864495941076784e-05, |
| "loss": 0.9167, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.47540983606557374, |
| "grad_norm": 0.6689046621322632, |
| "learning_rate": 2.984179922265754e-05, |
| "loss": 0.8656, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.4918032786885246, |
| "grad_norm": 0.8281727433204651, |
| "learning_rate": 2.981735593855202e-05, |
| "loss": 0.8761, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5081967213114754, |
| "grad_norm": 0.8986142873764038, |
| "learning_rate": 2.9791168965217495e-05, |
| "loss": 0.8899, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.5245901639344263, |
| "grad_norm": 0.7329938411712646, |
| "learning_rate": 2.976324138430656e-05, |
| "loss": 0.8305, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5409836065573771, |
| "grad_norm": 0.8260165452957153, |
| "learning_rate": 2.9733576482304487e-05, |
| "loss": 0.8892, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5573770491803278, |
| "grad_norm": 0.8703463077545166, |
| "learning_rate": 2.970217775014247e-05, |
| "loss": 0.8721, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5737704918032787, |
| "grad_norm": 0.9152560234069824, |
| "learning_rate": 2.9669048882786823e-05, |
| "loss": 0.8314, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5901639344262295, |
| "grad_norm": 0.8130025863647461, |
| "learning_rate": 2.9634193778804165e-05, |
| "loss": 0.847, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6065573770491803, |
| "grad_norm": 0.8214168548583984, |
| "learning_rate": 2.9597616539902622e-05, |
| "loss": 0.8146, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6229508196721312, |
| "grad_norm": 0.8313847780227661, |
| "learning_rate": 2.9559321470449178e-05, |
| "loss": 0.8047, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.639344262295082, |
| "grad_norm": 0.8505757451057434, |
| "learning_rate": 2.951931307696309e-05, |
| "loss": 0.7699, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6557377049180327, |
| "grad_norm": 0.8449229598045349, |
| "learning_rate": 2.9477596067585633e-05, |
| "loss": 0.7856, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6721311475409836, |
| "grad_norm": 0.8714151978492737, |
| "learning_rate": 2.9434175351525983e-05, |
| "loss": 0.7814, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6885245901639344, |
| "grad_norm": 0.7892358303070068, |
| "learning_rate": 2.938905603848355e-05, |
| "loss": 0.7404, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7049180327868853, |
| "grad_norm": 0.8768689036369324, |
| "learning_rate": 2.9342243438046666e-05, |
| "loss": 0.7516, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.7213114754098361, |
| "grad_norm": 0.9508657455444336, |
| "learning_rate": 2.9293743059067763e-05, |
| "loss": 0.7152, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7377049180327869, |
| "grad_norm": 0.9115400910377502, |
| "learning_rate": 2.924356060901507e-05, |
| "loss": 0.7078, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.7540983606557377, |
| "grad_norm": 0.9279278516769409, |
| "learning_rate": 2.9191701993301e-05, |
| "loss": 0.8145, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7704918032786885, |
| "grad_norm": 0.8984402418136597, |
| "learning_rate": 2.913817331458719e-05, |
| "loss": 0.7177, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7868852459016393, |
| "grad_norm": 1.000724196434021, |
| "learning_rate": 2.9082980872066353e-05, |
| "loss": 0.7077, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8032786885245902, |
| "grad_norm": 0.968752920627594, |
| "learning_rate": 2.9026131160720993e-05, |
| "loss": 0.7409, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.819672131147541, |
| "grad_norm": 1.0022119283676147, |
| "learning_rate": 2.8967630870559073e-05, |
| "loss": 0.6708, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8360655737704918, |
| "grad_norm": 0.9408950805664062, |
| "learning_rate": 2.890748688582677e-05, |
| "loss": 0.6916, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8524590163934426, |
| "grad_norm": 0.8202570080757141, |
| "learning_rate": 2.8845706284198314e-05, |
| "loss": 0.7148, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8688524590163934, |
| "grad_norm": 1.015702486038208, |
| "learning_rate": 2.8782296335943116e-05, |
| "loss": 0.6521, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.8852459016393442, |
| "grad_norm": 1.0327638387680054, |
| "learning_rate": 2.871726450307021e-05, |
| "loss": 0.6667, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9016393442622951, |
| "grad_norm": 0.929453432559967, |
| "learning_rate": 2.865061843845011e-05, |
| "loss": 0.6014, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.9180327868852459, |
| "grad_norm": 0.8924345970153809, |
| "learning_rate": 2.8582365984914265e-05, |
| "loss": 0.6254, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9344262295081968, |
| "grad_norm": 1.004958152770996, |
| "learning_rate": 2.85125151743321e-05, |
| "loss": 0.6633, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.9508196721311475, |
| "grad_norm": 0.932119607925415, |
| "learning_rate": 2.8441074226665834e-05, |
| "loss": 0.619, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9672131147540983, |
| "grad_norm": 0.8984085917472839, |
| "learning_rate": 2.836805154900317e-05, |
| "loss": 0.6414, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9836065573770492, |
| "grad_norm": 1.0108938217163086, |
| "learning_rate": 2.8293455734567964e-05, |
| "loss": 0.6646, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9058620929718018, |
| "learning_rate": 2.8217295561708986e-05, |
| "loss": 0.6386, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.0163934426229508, |
| "grad_norm": 0.9871558547019958, |
| "learning_rate": 2.813957999286687e-05, |
| "loss": 0.5756, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.0327868852459017, |
| "grad_norm": 1.0473088026046753, |
| "learning_rate": 2.8060318173519455e-05, |
| "loss": 0.5358, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.0491803278688525, |
| "grad_norm": 1.2112137079238892, |
| "learning_rate": 2.7979519431105537e-05, |
| "loss": 0.5603, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.0655737704918034, |
| "grad_norm": 1.0239282846450806, |
| "learning_rate": 2.7897193273927242e-05, |
| "loss": 0.5272, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.0819672131147542, |
| "grad_norm": 1.1301106214523315, |
| "learning_rate": 2.7813349390031077e-05, |
| "loss": 0.5162, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.098360655737705, |
| "grad_norm": 0.9581446051597595, |
| "learning_rate": 2.772799764606787e-05, |
| "loss": 0.5285, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.1147540983606556, |
| "grad_norm": 1.120554804801941, |
| "learning_rate": 2.764114808613167e-05, |
| "loss": 0.5497, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.1311475409836065, |
| "grad_norm": 1.1846710443496704, |
| "learning_rate": 2.755281093057778e-05, |
| "loss": 0.4792, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.1475409836065573, |
| "grad_norm": 1.0247408151626587, |
| "learning_rate": 2.7462996574820014e-05, |
| "loss": 0.5178, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.1639344262295082, |
| "grad_norm": 0.9996801614761353, |
| "learning_rate": 2.7371715588107388e-05, |
| "loss": 0.4987, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.180327868852459, |
| "grad_norm": 1.0238093137741089, |
| "learning_rate": 2.7278978712280353e-05, |
| "loss": 0.456, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.1967213114754098, |
| "grad_norm": 1.2357455492019653, |
| "learning_rate": 2.7184796860506697e-05, |
| "loss": 0.5292, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.2131147540983607, |
| "grad_norm": 1.1693018674850464, |
| "learning_rate": 2.7089181115997292e-05, |
| "loss": 0.4745, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.2295081967213115, |
| "grad_norm": 0.9463955760002136, |
| "learning_rate": 2.699214273070185e-05, |
| "loss": 0.4803, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.2459016393442623, |
| "grad_norm": 1.2919942140579224, |
| "learning_rate": 2.689369312398479e-05, |
| "loss": 0.4721, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.2622950819672132, |
| "grad_norm": 1.0499731302261353, |
| "learning_rate": 2.6793843881281435e-05, |
| "loss": 0.5081, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.278688524590164, |
| "grad_norm": 1.1290088891983032, |
| "learning_rate": 2.6692606752734642e-05, |
| "loss": 0.4626, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.2950819672131146, |
| "grad_norm": 1.0272178649902344, |
| "learning_rate": 2.6589993651812055e-05, |
| "loss": 0.4824, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.3114754098360657, |
| "grad_norm": 1.1490875482559204, |
| "learning_rate": 2.648601665390416e-05, |
| "loss": 0.467, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.3278688524590163, |
| "grad_norm": 1.0278301239013672, |
| "learning_rate": 2.638068799490324e-05, |
| "loss": 0.4236, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.3442622950819672, |
| "grad_norm": 1.011580228805542, |
| "learning_rate": 2.6274020069763505e-05, |
| "loss": 0.4595, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.360655737704918, |
| "grad_norm": 1.1207678318023682, |
| "learning_rate": 2.6166025431042427e-05, |
| "loss": 0.4877, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.3770491803278688, |
| "grad_norm": 1.0334429740905762, |
| "learning_rate": 2.605671678742361e-05, |
| "loss": 0.416, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.3934426229508197, |
| "grad_norm": 1.1720657348632812, |
| "learning_rate": 2.5946107002221206e-05, |
| "loss": 0.4295, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.4098360655737705, |
| "grad_norm": 1.1176438331604004, |
| "learning_rate": 2.583420909186622e-05, |
| "loss": 0.4133, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.4262295081967213, |
| "grad_norm": 1.1450058221817017, |
| "learning_rate": 2.572103622437471e-05, |
| "loss": 0.4412, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.4426229508196722, |
| "grad_norm": 1.0903273820877075, |
| "learning_rate": 2.5606601717798212e-05, |
| "loss": 0.3953, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.459016393442623, |
| "grad_norm": 1.2657668590545654, |
| "learning_rate": 2.5490919038656495e-05, |
| "loss": 0.4089, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.4754098360655736, |
| "grad_norm": 1.1453659534454346, |
| "learning_rate": 2.537400180035281e-05, |
| "loss": 0.4526, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.4918032786885247, |
| "grad_norm": 1.1551433801651, |
| "learning_rate": 2.525586376157191e-05, |
| "loss": 0.4247, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.5081967213114753, |
| "grad_norm": 1.0655601024627686, |
| "learning_rate": 2.513651882466091e-05, |
| "loss": 0.4207, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.5245901639344264, |
| "grad_norm": 1.0673174858093262, |
| "learning_rate": 2.5015981033993313e-05, |
| "loss": 0.3831, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.540983606557377, |
| "grad_norm": 1.0724608898162842, |
| "learning_rate": 2.4894264574316258e-05, |
| "loss": 0.3997, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.5573770491803278, |
| "grad_norm": 1.037182092666626, |
| "learning_rate": 2.4771383769081286e-05, |
| "loss": 0.3979, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.5737704918032787, |
| "grad_norm": 1.1553537845611572, |
| "learning_rate": 2.4647353078758772e-05, |
| "loss": 0.3831, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.5901639344262295, |
| "grad_norm": 1.154228687286377, |
| "learning_rate": 2.4522187099136223e-05, |
| "loss": 0.3843, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.6065573770491803, |
| "grad_norm": 1.0977567434310913, |
| "learning_rate": 2.4395900559600676e-05, |
| "loss": 0.3558, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.6229508196721312, |
| "grad_norm": 1.1280032396316528, |
| "learning_rate": 2.4268508321405363e-05, |
| "loss": 0.394, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.639344262295082, |
| "grad_norm": 1.16530442237854, |
| "learning_rate": 2.414002537592084e-05, |
| "loss": 0.361, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.6557377049180326, |
| "grad_norm": 1.2421867847442627, |
| "learning_rate": 2.401046684287084e-05, |
| "loss": 0.3906, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.6721311475409837, |
| "grad_norm": 1.2887860536575317, |
| "learning_rate": 2.3879847968552983e-05, |
| "loss": 0.4039, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.6885245901639343, |
| "grad_norm": 1.034765362739563, |
| "learning_rate": 2.3748184124044614e-05, |
| "loss": 0.3515, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.7049180327868854, |
| "grad_norm": 1.0902974605560303, |
| "learning_rate": 2.3615490803393963e-05, |
| "loss": 0.393, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.721311475409836, |
| "grad_norm": 1.1227500438690186, |
| "learning_rate": 2.3481783621796817e-05, |
| "loss": 0.3554, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.737704918032787, |
| "grad_norm": 1.0987911224365234, |
| "learning_rate": 2.3347078313758932e-05, |
| "loss": 0.3646, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.7540983606557377, |
| "grad_norm": 1.0380116701126099, |
| "learning_rate": 2.3211390731244437e-05, |
| "loss": 0.3497, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.7704918032786885, |
| "grad_norm": 1.0511633157730103, |
| "learning_rate": 2.3074736841810365e-05, |
| "loss": 0.385, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.7868852459016393, |
| "grad_norm": 1.018215537071228, |
| "learning_rate": 2.2937132726727633e-05, |
| "loss": 0.348, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.8032786885245902, |
| "grad_norm": 1.085090160369873, |
| "learning_rate": 2.279859457908862e-05, |
| "loss": 0.3675, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.819672131147541, |
| "grad_norm": 1.0416409969329834, |
| "learning_rate": 2.265913870190154e-05, |
| "loss": 0.3742, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.8360655737704918, |
| "grad_norm": 1.280327558517456, |
| "learning_rate": 2.2518781506171975e-05, |
| "loss": 0.3629, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.8524590163934427, |
| "grad_norm": 1.1138688325881958, |
| "learning_rate": 2.237753950897164e-05, |
| "loss": 0.3433, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.8688524590163933, |
| "grad_norm": 1.2459837198257446, |
| "learning_rate": 2.2235429331494618e-05, |
| "loss": 0.3369, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.8852459016393444, |
| "grad_norm": 1.0113780498504639, |
| "learning_rate": 2.209246769710148e-05, |
| "loss": 0.3501, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.901639344262295, |
| "grad_norm": 1.1586419343948364, |
| "learning_rate": 2.1948671429351232e-05, |
| "loss": 0.3459, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.918032786885246, |
| "grad_norm": 1.0399360656738281, |
| "learning_rate": 2.1804057450021564e-05, |
| "loss": 0.3283, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.9344262295081966, |
| "grad_norm": 1.1013191938400269, |
| "learning_rate": 2.1658642777117534e-05, |
| "loss": 0.3214, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.9508196721311475, |
| "grad_norm": 1.1633572578430176, |
| "learning_rate": 2.151244452286887e-05, |
| "loss": 0.3399, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.9672131147540983, |
| "grad_norm": 0.9520934224128723, |
| "learning_rate": 2.136547989171625e-05, |
| "loss": 0.3468, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.9836065573770492, |
| "grad_norm": 1.1227986812591553, |
| "learning_rate": 2.1217766178286696e-05, |
| "loss": 0.2954, |
| "step": 605 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.099863886833191, |
| "learning_rate": 2.1069320765358354e-05, |
| "loss": 0.2882, |
| "step": 610 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1525, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.493903381060649e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|