| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.997830802603037, | |
| "eval_steps": 500, | |
| "global_step": 691, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0028922631959508315, | |
| "grad_norm": 48.71332931518555, | |
| "learning_rate": 0.0, | |
| "loss": 3.3684, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005784526391901663, | |
| "grad_norm": 45.838565826416016, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 3.2845, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008676789587852495, | |
| "grad_norm": 56.195335388183594, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 3.5006, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.011569052783803326, | |
| "grad_norm": 21.180103302001953, | |
| "learning_rate": 4.285714285714286e-06, | |
| "loss": 3.0654, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.014461315979754157, | |
| "grad_norm": 21.839435577392578, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 2.9674, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01735357917570499, | |
| "grad_norm": 9.650607109069824, | |
| "learning_rate": 7.142857142857143e-06, | |
| "loss": 2.9594, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.02024584237165582, | |
| "grad_norm": 6.312131881713867, | |
| "learning_rate": 8.571428571428573e-06, | |
| "loss": 2.8589, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.023138105567606652, | |
| "grad_norm": 6.745247840881348, | |
| "learning_rate": 1e-05, | |
| "loss": 2.6967, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.026030368763557483, | |
| "grad_norm": 5.504076957702637, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 2.446, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.028922631959508314, | |
| "grad_norm": 4.069777011871338, | |
| "learning_rate": 1.2857142857142857e-05, | |
| "loss": 2.4143, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03181489515545915, | |
| "grad_norm": 3.7189438343048096, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 2.1749, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03470715835140998, | |
| "grad_norm": 4.501105308532715, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 2.1738, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03759942154736081, | |
| "grad_norm": 5.211763858795166, | |
| "learning_rate": 1.7142857142857145e-05, | |
| "loss": 2.0335, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.04049168474331164, | |
| "grad_norm": 2.67036509513855, | |
| "learning_rate": 1.8571428571428572e-05, | |
| "loss": 1.9522, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04338394793926247, | |
| "grad_norm": 2.7940988540649414, | |
| "learning_rate": 2e-05, | |
| "loss": 1.9142, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.046276211135213303, | |
| "grad_norm": 3.4890847206115723, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 1.8412, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.049168474331164135, | |
| "grad_norm": 5.002918720245361, | |
| "learning_rate": 2.2857142857142858e-05, | |
| "loss": 1.8031, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.052060737527114966, | |
| "grad_norm": 4.0725226402282715, | |
| "learning_rate": 2.4285714285714288e-05, | |
| "loss": 1.8587, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0549530007230658, | |
| "grad_norm": 2.988891124725342, | |
| "learning_rate": 2.5714285714285714e-05, | |
| "loss": 1.6742, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05784526391901663, | |
| "grad_norm": 2.679062843322754, | |
| "learning_rate": 2.714285714285714e-05, | |
| "loss": 1.563, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06073752711496746, | |
| "grad_norm": 1.9652676582336426, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 1.6146, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.0636297903109183, | |
| "grad_norm": 3.5487523078918457, | |
| "learning_rate": 3e-05, | |
| "loss": 1.6091, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.06652205350686913, | |
| "grad_norm": 3.5734827518463135, | |
| "learning_rate": 3.142857142857143e-05, | |
| "loss": 1.5872, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.06941431670281996, | |
| "grad_norm": 2.6711552143096924, | |
| "learning_rate": 3.285714285714286e-05, | |
| "loss": 1.5964, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.07230657989877079, | |
| "grad_norm": 2.6824355125427246, | |
| "learning_rate": 3.428571428571429e-05, | |
| "loss": 1.6661, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.07519884309472162, | |
| "grad_norm": 2.8385238647460938, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 1.6069, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.07809110629067245, | |
| "grad_norm": 2.863154172897339, | |
| "learning_rate": 3.7142857142857143e-05, | |
| "loss": 1.6074, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.08098336948662328, | |
| "grad_norm": 2.5264947414398193, | |
| "learning_rate": 3.857142857142858e-05, | |
| "loss": 1.5442, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.08387563268257411, | |
| "grad_norm": 2.4073829650878906, | |
| "learning_rate": 4e-05, | |
| "loss": 1.4696, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.08676789587852494, | |
| "grad_norm": 1.2896760702133179, | |
| "learning_rate": 4.1428571428571437e-05, | |
| "loss": 1.4876, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08966015907447578, | |
| "grad_norm": 1.3128914833068848, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 1.5028, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.09255242227042661, | |
| "grad_norm": 1.6972280740737915, | |
| "learning_rate": 4.428571428571428e-05, | |
| "loss": 1.5156, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.09544468546637744, | |
| "grad_norm": 1.735119104385376, | |
| "learning_rate": 4.5714285714285716e-05, | |
| "loss": 1.3899, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.09833694866232827, | |
| "grad_norm": 1.6684017181396484, | |
| "learning_rate": 4.714285714285714e-05, | |
| "loss": 1.4309, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.1012292118582791, | |
| "grad_norm": 1.60593581199646, | |
| "learning_rate": 4.8571428571428576e-05, | |
| "loss": 1.4821, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.10412147505422993, | |
| "grad_norm": 1.369852066040039, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4606, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.10701373825018076, | |
| "grad_norm": 1.0815776586532593, | |
| "learning_rate": 4.992378048780488e-05, | |
| "loss": 1.3166, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.1099060014461316, | |
| "grad_norm": 1.1393859386444092, | |
| "learning_rate": 4.984756097560976e-05, | |
| "loss": 1.3596, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.11279826464208242, | |
| "grad_norm": 1.293410062789917, | |
| "learning_rate": 4.977134146341464e-05, | |
| "loss": 1.4997, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.11569052783803326, | |
| "grad_norm": 1.5961534976959229, | |
| "learning_rate": 4.969512195121951e-05, | |
| "loss": 1.3908, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11858279103398409, | |
| "grad_norm": 1.6708180904388428, | |
| "learning_rate": 4.961890243902439e-05, | |
| "loss": 1.4434, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.12147505422993492, | |
| "grad_norm": 1.3898653984069824, | |
| "learning_rate": 4.954268292682927e-05, | |
| "loss": 1.3746, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.12436731742588576, | |
| "grad_norm": 1.1497617959976196, | |
| "learning_rate": 4.946646341463415e-05, | |
| "loss": 1.3683, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.1272595806218366, | |
| "grad_norm": 0.9966000318527222, | |
| "learning_rate": 4.9390243902439024e-05, | |
| "loss": 1.4588, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.1301518438177874, | |
| "grad_norm": 1.0601434707641602, | |
| "learning_rate": 4.931402439024391e-05, | |
| "loss": 1.3757, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.13304410701373826, | |
| "grad_norm": 1.2142244577407837, | |
| "learning_rate": 4.923780487804878e-05, | |
| "loss": 1.419, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.13593637020968907, | |
| "grad_norm": 1.2789775133132935, | |
| "learning_rate": 4.916158536585366e-05, | |
| "loss": 1.3221, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.13882863340563992, | |
| "grad_norm": 1.2200745344161987, | |
| "learning_rate": 4.908536585365854e-05, | |
| "loss": 1.4087, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.14172089660159073, | |
| "grad_norm": 1.0769251585006714, | |
| "learning_rate": 4.900914634146342e-05, | |
| "loss": 1.3794, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.14461315979754158, | |
| "grad_norm": 0.9566358923912048, | |
| "learning_rate": 4.893292682926829e-05, | |
| "loss": 1.3159, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1475054229934924, | |
| "grad_norm": 1.0282989740371704, | |
| "learning_rate": 4.885670731707317e-05, | |
| "loss": 1.3803, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.15039768618944324, | |
| "grad_norm": 1.0863200426101685, | |
| "learning_rate": 4.878048780487805e-05, | |
| "loss": 1.3548, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.15328994938539406, | |
| "grad_norm": 1.0302592515945435, | |
| "learning_rate": 4.870426829268293e-05, | |
| "loss": 1.4204, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.1561822125813449, | |
| "grad_norm": 1.0147430896759033, | |
| "learning_rate": 4.86280487804878e-05, | |
| "loss": 1.4, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.15907447577729572, | |
| "grad_norm": 0.9125880599021912, | |
| "learning_rate": 4.855182926829269e-05, | |
| "loss": 1.3568, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.16196673897324657, | |
| "grad_norm": 0.8917691707611084, | |
| "learning_rate": 4.847560975609756e-05, | |
| "loss": 1.3547, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1648590021691974, | |
| "grad_norm": 1.03391432762146, | |
| "learning_rate": 4.839939024390244e-05, | |
| "loss": 1.2731, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.16775126536514823, | |
| "grad_norm": 1.0735812187194824, | |
| "learning_rate": 4.832317073170732e-05, | |
| "loss": 1.2944, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.17064352856109907, | |
| "grad_norm": 1.028361439704895, | |
| "learning_rate": 4.82469512195122e-05, | |
| "loss": 1.2617, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.1735357917570499, | |
| "grad_norm": 0.9899557828903198, | |
| "learning_rate": 4.817073170731707e-05, | |
| "loss": 1.3891, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.17642805495300073, | |
| "grad_norm": 6.040285110473633, | |
| "learning_rate": 4.809451219512195e-05, | |
| "loss": 1.3886, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.17932031814895155, | |
| "grad_norm": 1.1661717891693115, | |
| "learning_rate": 4.801829268292683e-05, | |
| "loss": 1.2352, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1822125813449024, | |
| "grad_norm": 3.124387502670288, | |
| "learning_rate": 4.794207317073171e-05, | |
| "loss": 1.2965, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.18510484454085321, | |
| "grad_norm": 1.1827131509780884, | |
| "learning_rate": 4.786585365853658e-05, | |
| "loss": 1.349, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.18799710773680406, | |
| "grad_norm": 1.027674674987793, | |
| "learning_rate": 4.778963414634147e-05, | |
| "loss": 1.2165, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.19088937093275488, | |
| "grad_norm": 0.9438247084617615, | |
| "learning_rate": 4.771341463414634e-05, | |
| "loss": 1.2538, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.19378163412870572, | |
| "grad_norm": 0.9163101315498352, | |
| "learning_rate": 4.763719512195122e-05, | |
| "loss": 1.2914, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.19667389732465654, | |
| "grad_norm": 0.9787700176239014, | |
| "learning_rate": 4.75609756097561e-05, | |
| "loss": 1.2013, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.19956616052060738, | |
| "grad_norm": 0.9685674905776978, | |
| "learning_rate": 4.748475609756098e-05, | |
| "loss": 1.2933, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.2024584237165582, | |
| "grad_norm": 0.8412639498710632, | |
| "learning_rate": 4.740853658536585e-05, | |
| "loss": 1.262, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.20535068691250905, | |
| "grad_norm": 0.9766181707382202, | |
| "learning_rate": 4.733231707317073e-05, | |
| "loss": 1.225, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.20824295010845986, | |
| "grad_norm": 0.990614116191864, | |
| "learning_rate": 4.725609756097561e-05, | |
| "loss": 1.192, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2111352133044107, | |
| "grad_norm": 0.8069394826889038, | |
| "learning_rate": 4.717987804878049e-05, | |
| "loss": 1.2127, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.21402747650036152, | |
| "grad_norm": 1.022425889968872, | |
| "learning_rate": 4.710365853658536e-05, | |
| "loss": 1.1593, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.21691973969631237, | |
| "grad_norm": 0.9153020977973938, | |
| "learning_rate": 4.702743902439025e-05, | |
| "loss": 1.1794, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.2198120028922632, | |
| "grad_norm": 0.7978305816650391, | |
| "learning_rate": 4.695121951219512e-05, | |
| "loss": 1.2791, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.22270426608821403, | |
| "grad_norm": 0.8948712348937988, | |
| "learning_rate": 4.6875e-05, | |
| "loss": 1.2227, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.22559652928416485, | |
| "grad_norm": 0.9704264998435974, | |
| "learning_rate": 4.679878048780488e-05, | |
| "loss": 1.186, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2284887924801157, | |
| "grad_norm": 0.8205945491790771, | |
| "learning_rate": 4.672256097560976e-05, | |
| "loss": 1.1719, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2313810556760665, | |
| "grad_norm": 0.9167234897613525, | |
| "learning_rate": 4.664634146341464e-05, | |
| "loss": 1.2479, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.23427331887201736, | |
| "grad_norm": 0.8766996264457703, | |
| "learning_rate": 4.657012195121951e-05, | |
| "loss": 1.2073, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.23716558206796817, | |
| "grad_norm": 0.8327258229255676, | |
| "learning_rate": 4.64939024390244e-05, | |
| "loss": 1.2963, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.24005784526391902, | |
| "grad_norm": 0.9994452595710754, | |
| "learning_rate": 4.641768292682927e-05, | |
| "loss": 1.1831, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.24295010845986983, | |
| "grad_norm": 0.7853651642799377, | |
| "learning_rate": 4.634146341463415e-05, | |
| "loss": 1.2727, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.24584237165582068, | |
| "grad_norm": 0.783089816570282, | |
| "learning_rate": 4.626524390243903e-05, | |
| "loss": 1.2149, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.24873463485177152, | |
| "grad_norm": 0.9224200248718262, | |
| "learning_rate": 4.618902439024391e-05, | |
| "loss": 1.1902, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.25162689804772237, | |
| "grad_norm": 0.7504012584686279, | |
| "learning_rate": 4.611280487804878e-05, | |
| "loss": 1.2568, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.2545191612436732, | |
| "grad_norm": 0.8345561027526855, | |
| "learning_rate": 4.603658536585366e-05, | |
| "loss": 1.1642, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.257411424439624, | |
| "grad_norm": 0.8287318348884583, | |
| "learning_rate": 4.596036585365854e-05, | |
| "loss": 1.2115, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.2603036876355748, | |
| "grad_norm": 0.7950981259346008, | |
| "learning_rate": 4.588414634146342e-05, | |
| "loss": 1.1439, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2631959508315257, | |
| "grad_norm": 0.8269981741905212, | |
| "learning_rate": 4.580792682926829e-05, | |
| "loss": 1.226, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.2660882140274765, | |
| "grad_norm": 0.7990830540657043, | |
| "learning_rate": 4.573170731707318e-05, | |
| "loss": 1.2059, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.26898047722342733, | |
| "grad_norm": 0.746702253818512, | |
| "learning_rate": 4.565548780487805e-05, | |
| "loss": 1.272, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.27187274041937814, | |
| "grad_norm": 0.7808762192726135, | |
| "learning_rate": 4.557926829268293e-05, | |
| "loss": 1.2135, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.274765003615329, | |
| "grad_norm": 0.8141624331474304, | |
| "learning_rate": 4.550304878048781e-05, | |
| "loss": 1.1549, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.27765726681127983, | |
| "grad_norm": 0.7702810168266296, | |
| "learning_rate": 4.542682926829269e-05, | |
| "loss": 1.1471, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.28054953000723065, | |
| "grad_norm": 0.7874007821083069, | |
| "learning_rate": 4.535060975609756e-05, | |
| "loss": 1.2672, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.28344179320318147, | |
| "grad_norm": 0.7983161211013794, | |
| "learning_rate": 4.527439024390244e-05, | |
| "loss": 1.099, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.28633405639913234, | |
| "grad_norm": 0.8033881783485413, | |
| "learning_rate": 4.519817073170732e-05, | |
| "loss": 1.1549, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.28922631959508316, | |
| "grad_norm": 0.8222156167030334, | |
| "learning_rate": 4.51219512195122e-05, | |
| "loss": 1.1527, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.292118582791034, | |
| "grad_norm": 0.7592807412147522, | |
| "learning_rate": 4.504573170731707e-05, | |
| "loss": 1.2142, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.2950108459869848, | |
| "grad_norm": 0.7466637492179871, | |
| "learning_rate": 4.496951219512196e-05, | |
| "loss": 1.2232, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.29790310918293567, | |
| "grad_norm": 0.7532088756561279, | |
| "learning_rate": 4.489329268292683e-05, | |
| "loss": 1.1717, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.3007953723788865, | |
| "grad_norm": 0.766828715801239, | |
| "learning_rate": 4.481707317073171e-05, | |
| "loss": 1.2218, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.3036876355748373, | |
| "grad_norm": 0.6948519349098206, | |
| "learning_rate": 4.474085365853659e-05, | |
| "loss": 1.1116, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.3065798987707881, | |
| "grad_norm": 0.7532397508621216, | |
| "learning_rate": 4.466463414634147e-05, | |
| "loss": 1.1451, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.309472161966739, | |
| "grad_norm": 0.7384987473487854, | |
| "learning_rate": 4.458841463414634e-05, | |
| "loss": 1.2043, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.3123644251626898, | |
| "grad_norm": 0.7876350283622742, | |
| "learning_rate": 4.451219512195122e-05, | |
| "loss": 1.3315, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.3152566883586406, | |
| "grad_norm": 0.7799772024154663, | |
| "learning_rate": 4.44359756097561e-05, | |
| "loss": 1.2367, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.31814895155459144, | |
| "grad_norm": 0.802836537361145, | |
| "learning_rate": 4.435975609756098e-05, | |
| "loss": 1.1859, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3210412147505423, | |
| "grad_norm": 0.7658648490905762, | |
| "learning_rate": 4.428353658536585e-05, | |
| "loss": 1.1554, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.32393347794649313, | |
| "grad_norm": 0.7552660703659058, | |
| "learning_rate": 4.420731707317074e-05, | |
| "loss": 1.1773, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.32682574114244395, | |
| "grad_norm": 0.7944100499153137, | |
| "learning_rate": 4.413109756097561e-05, | |
| "loss": 1.1369, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.3297180043383948, | |
| "grad_norm": 0.79727703332901, | |
| "learning_rate": 4.405487804878049e-05, | |
| "loss": 1.1515, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.33261026753434564, | |
| "grad_norm": 0.7767285704612732, | |
| "learning_rate": 4.397865853658537e-05, | |
| "loss": 1.2823, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.33550253073029646, | |
| "grad_norm": 0.8018892407417297, | |
| "learning_rate": 4.390243902439025e-05, | |
| "loss": 1.1792, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.3383947939262473, | |
| "grad_norm": 0.7893505692481995, | |
| "learning_rate": 4.382621951219512e-05, | |
| "loss": 1.2078, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.34128705712219815, | |
| "grad_norm": 0.7643678784370422, | |
| "learning_rate": 4.375e-05, | |
| "loss": 1.1172, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.34417932031814896, | |
| "grad_norm": 0.7227766513824463, | |
| "learning_rate": 4.3673780487804886e-05, | |
| "loss": 1.2424, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3470715835140998, | |
| "grad_norm": 0.7557047009468079, | |
| "learning_rate": 4.359756097560976e-05, | |
| "loss": 1.1996, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3499638467100506, | |
| "grad_norm": 0.75395667552948, | |
| "learning_rate": 4.352134146341464e-05, | |
| "loss": 1.2023, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.35285610990600147, | |
| "grad_norm": 0.7078515291213989, | |
| "learning_rate": 4.344512195121952e-05, | |
| "loss": 1.2086, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3557483731019523, | |
| "grad_norm": 0.7395102381706238, | |
| "learning_rate": 4.3368902439024396e-05, | |
| "loss": 1.1106, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.3586406362979031, | |
| "grad_norm": 0.819173276424408, | |
| "learning_rate": 4.329268292682927e-05, | |
| "loss": 1.1037, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3615328994938539, | |
| "grad_norm": 0.7435188889503479, | |
| "learning_rate": 4.321646341463415e-05, | |
| "loss": 1.1914, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3644251626898048, | |
| "grad_norm": 0.8237520456314087, | |
| "learning_rate": 4.314024390243903e-05, | |
| "loss": 1.1724, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3673174258857556, | |
| "grad_norm": 0.7931056022644043, | |
| "learning_rate": 4.306402439024391e-05, | |
| "loss": 1.1706, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.37020968908170643, | |
| "grad_norm": 0.7253796458244324, | |
| "learning_rate": 4.298780487804878e-05, | |
| "loss": 1.1297, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.37310195227765725, | |
| "grad_norm": 0.7788090705871582, | |
| "learning_rate": 4.2911585365853665e-05, | |
| "loss": 1.1685, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.3759942154736081, | |
| "grad_norm": 0.7236787676811218, | |
| "learning_rate": 4.283536585365854e-05, | |
| "loss": 1.2329, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.37888647866955893, | |
| "grad_norm": 0.7436123490333557, | |
| "learning_rate": 4.275914634146342e-05, | |
| "loss": 1.0825, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.38177874186550975, | |
| "grad_norm": 0.7631476521492004, | |
| "learning_rate": 4.26829268292683e-05, | |
| "loss": 1.1648, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.38467100506146057, | |
| "grad_norm": 0.7813283801078796, | |
| "learning_rate": 4.2606707317073176e-05, | |
| "loss": 1.1475, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.38756326825741144, | |
| "grad_norm": 0.7633726000785828, | |
| "learning_rate": 4.253048780487805e-05, | |
| "loss": 1.1771, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.39045553145336226, | |
| "grad_norm": 0.7443217039108276, | |
| "learning_rate": 4.245426829268293e-05, | |
| "loss": 1.0879, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.3933477946493131, | |
| "grad_norm": 0.7620945572853088, | |
| "learning_rate": 4.237804878048781e-05, | |
| "loss": 1.1515, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3962400578452639, | |
| "grad_norm": 0.7569906711578369, | |
| "learning_rate": 4.230182926829269e-05, | |
| "loss": 1.1857, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.39913232104121477, | |
| "grad_norm": 0.754265546798706, | |
| "learning_rate": 4.222560975609756e-05, | |
| "loss": 1.2235, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.4020245842371656, | |
| "grad_norm": 0.8115909695625305, | |
| "learning_rate": 4.2149390243902445e-05, | |
| "loss": 1.1533, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.4049168474331164, | |
| "grad_norm": 0.7119144201278687, | |
| "learning_rate": 4.207317073170732e-05, | |
| "loss": 1.0566, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4078091106290672, | |
| "grad_norm": 0.745745062828064, | |
| "learning_rate": 4.19969512195122e-05, | |
| "loss": 1.1801, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.4107013738250181, | |
| "grad_norm": 0.7318696975708008, | |
| "learning_rate": 4.1920731707317077e-05, | |
| "loss": 1.0448, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.4135936370209689, | |
| "grad_norm": 0.691558837890625, | |
| "learning_rate": 4.1844512195121956e-05, | |
| "loss": 1.118, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.4164859002169197, | |
| "grad_norm": 0.7404938340187073, | |
| "learning_rate": 4.176829268292683e-05, | |
| "loss": 1.0795, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.4193781634128706, | |
| "grad_norm": 0.7128071188926697, | |
| "learning_rate": 4.169207317073171e-05, | |
| "loss": 1.1663, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.4222704266088214, | |
| "grad_norm": 0.8010504245758057, | |
| "learning_rate": 4.161585365853659e-05, | |
| "loss": 1.2375, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.42516268980477223, | |
| "grad_norm": 0.7428746819496155, | |
| "learning_rate": 4.1539634146341466e-05, | |
| "loss": 1.0991, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.42805495300072305, | |
| "grad_norm": 0.7510153651237488, | |
| "learning_rate": 4.146341463414634e-05, | |
| "loss": 1.1386, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.4309472161966739, | |
| "grad_norm": 0.7697402834892273, | |
| "learning_rate": 4.1387195121951225e-05, | |
| "loss": 1.06, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.43383947939262474, | |
| "grad_norm": 0.7100762128829956, | |
| "learning_rate": 4.13109756097561e-05, | |
| "loss": 1.1578, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.43673174258857556, | |
| "grad_norm": 0.7327350974082947, | |
| "learning_rate": 4.123475609756098e-05, | |
| "loss": 1.1994, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.4396240057845264, | |
| "grad_norm": 0.7481423020362854, | |
| "learning_rate": 4.1158536585365856e-05, | |
| "loss": 1.1554, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.44251626898047725, | |
| "grad_norm": 0.7060924768447876, | |
| "learning_rate": 4.1082317073170736e-05, | |
| "loss": 1.1712, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.44540853217642806, | |
| "grad_norm": 0.7289426326751709, | |
| "learning_rate": 4.100609756097561e-05, | |
| "loss": 1.0854, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4483007953723789, | |
| "grad_norm": 0.7729988694190979, | |
| "learning_rate": 4.092987804878049e-05, | |
| "loss": 1.1535, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.4511930585683297, | |
| "grad_norm": 0.7460820078849792, | |
| "learning_rate": 4.085365853658537e-05, | |
| "loss": 1.1083, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.45408532176428057, | |
| "grad_norm": 0.7617100477218628, | |
| "learning_rate": 4.0777439024390246e-05, | |
| "loss": 1.0703, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.4569775849602314, | |
| "grad_norm": 0.7420201897621155, | |
| "learning_rate": 4.070121951219512e-05, | |
| "loss": 1.1499, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4598698481561822, | |
| "grad_norm": 0.7645936608314514, | |
| "learning_rate": 4.0625000000000005e-05, | |
| "loss": 1.1024, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.462762111352133, | |
| "grad_norm": 0.7603924870491028, | |
| "learning_rate": 4.0548780487804884e-05, | |
| "loss": 1.0113, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4656543745480839, | |
| "grad_norm": 0.7942943572998047, | |
| "learning_rate": 4.047256097560976e-05, | |
| "loss": 1.1814, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.4685466377440347, | |
| "grad_norm": 0.7691872715950012, | |
| "learning_rate": 4.0396341463414636e-05, | |
| "loss": 1.1274, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.47143890093998553, | |
| "grad_norm": 0.7765952348709106, | |
| "learning_rate": 4.0320121951219515e-05, | |
| "loss": 1.1215, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.47433116413593635, | |
| "grad_norm": 0.7291862368583679, | |
| "learning_rate": 4.0243902439024395e-05, | |
| "loss": 1.0973, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4772234273318872, | |
| "grad_norm": 0.7589432597160339, | |
| "learning_rate": 4.016768292682927e-05, | |
| "loss": 1.1347, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.48011569052783803, | |
| "grad_norm": 0.7447579503059387, | |
| "learning_rate": 4.0091463414634153e-05, | |
| "loss": 1.2361, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.48300795372378885, | |
| "grad_norm": 0.7255765199661255, | |
| "learning_rate": 4.0015243902439026e-05, | |
| "loss": 1.1495, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.48590021691973967, | |
| "grad_norm": 0.7621276378631592, | |
| "learning_rate": 3.9939024390243905e-05, | |
| "loss": 1.1568, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.48879248011569054, | |
| "grad_norm": 0.7537471055984497, | |
| "learning_rate": 3.9862804878048785e-05, | |
| "loss": 1.1004, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.49168474331164136, | |
| "grad_norm": 0.7859211564064026, | |
| "learning_rate": 3.9786585365853664e-05, | |
| "loss": 1.1401, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4945770065075922, | |
| "grad_norm": 0.7351391911506653, | |
| "learning_rate": 3.971036585365854e-05, | |
| "loss": 1.1076, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.49746926970354305, | |
| "grad_norm": 0.7664011716842651, | |
| "learning_rate": 3.9634146341463416e-05, | |
| "loss": 1.0421, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.5003615328994938, | |
| "grad_norm": 0.7682709693908691, | |
| "learning_rate": 3.9557926829268295e-05, | |
| "loss": 1.1002, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.5032537960954447, | |
| "grad_norm": 0.7599637508392334, | |
| "learning_rate": 3.9481707317073175e-05, | |
| "loss": 1.1453, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.5061460592913956, | |
| "grad_norm": 0.8105545043945312, | |
| "learning_rate": 3.940548780487805e-05, | |
| "loss": 1.1733, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.5090383224873464, | |
| "grad_norm": 0.7692773938179016, | |
| "learning_rate": 3.932926829268293e-05, | |
| "loss": 1.1658, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.5119305856832972, | |
| "grad_norm": 0.7400121092796326, | |
| "learning_rate": 3.9253048780487806e-05, | |
| "loss": 1.1037, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.514822848879248, | |
| "grad_norm": 0.7246294021606445, | |
| "learning_rate": 3.9176829268292685e-05, | |
| "loss": 1.1829, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.5177151120751988, | |
| "grad_norm": 0.7318651676177979, | |
| "learning_rate": 3.9100609756097565e-05, | |
| "loss": 0.9872, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.5206073752711496, | |
| "grad_norm": 0.7589302659034729, | |
| "learning_rate": 3.9024390243902444e-05, | |
| "loss": 1.1624, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5234996384671005, | |
| "grad_norm": 0.7625978589057922, | |
| "learning_rate": 3.8948170731707316e-05, | |
| "loss": 1.1147, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.5263919016630514, | |
| "grad_norm": 0.7786478400230408, | |
| "learning_rate": 3.8871951219512196e-05, | |
| "loss": 1.0524, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5292841648590022, | |
| "grad_norm": 0.7591277956962585, | |
| "learning_rate": 3.8795731707317075e-05, | |
| "loss": 1.0672, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.532176428054953, | |
| "grad_norm": 0.806042492389679, | |
| "learning_rate": 3.8719512195121954e-05, | |
| "loss": 1.0742, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5350686912509038, | |
| "grad_norm": 0.7718027830123901, | |
| "learning_rate": 3.864329268292683e-05, | |
| "loss": 1.1326, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.5379609544468547, | |
| "grad_norm": 0.7538328766822815, | |
| "learning_rate": 3.856707317073171e-05, | |
| "loss": 1.15, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.5408532176428055, | |
| "grad_norm": 0.7316940426826477, | |
| "learning_rate": 3.8490853658536586e-05, | |
| "loss": 1.0463, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.5437454808387563, | |
| "grad_norm": 0.7699999809265137, | |
| "learning_rate": 3.8414634146341465e-05, | |
| "loss": 1.183, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.5466377440347071, | |
| "grad_norm": 0.7050356268882751, | |
| "learning_rate": 3.8338414634146344e-05, | |
| "loss": 1.1208, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.549530007230658, | |
| "grad_norm": 0.7819121479988098, | |
| "learning_rate": 3.8262195121951224e-05, | |
| "loss": 1.1622, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5524222704266089, | |
| "grad_norm": 0.700554370880127, | |
| "learning_rate": 3.8185975609756096e-05, | |
| "loss": 1.1104, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.5553145336225597, | |
| "grad_norm": 0.7335946559906006, | |
| "learning_rate": 3.8109756097560976e-05, | |
| "loss": 1.0856, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5582067968185105, | |
| "grad_norm": 0.7291987538337708, | |
| "learning_rate": 3.8033536585365855e-05, | |
| "loss": 1.1158, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.5610990600144613, | |
| "grad_norm": 0.7313510775566101, | |
| "learning_rate": 3.7957317073170734e-05, | |
| "loss": 1.1907, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.5639913232104121, | |
| "grad_norm": 0.7727324366569519, | |
| "learning_rate": 3.788109756097561e-05, | |
| "loss": 1.1681, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5668835864063629, | |
| "grad_norm": 0.7505455613136292, | |
| "learning_rate": 3.780487804878049e-05, | |
| "loss": 1.0712, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5697758496023138, | |
| "grad_norm": 0.7288169860839844, | |
| "learning_rate": 3.7728658536585365e-05, | |
| "loss": 1.113, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.5726681127982647, | |
| "grad_norm": 0.8041896820068359, | |
| "learning_rate": 3.7652439024390245e-05, | |
| "loss": 1.056, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5755603759942155, | |
| "grad_norm": 0.7612701058387756, | |
| "learning_rate": 3.7576219512195124e-05, | |
| "loss": 1.0862, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5784526391901663, | |
| "grad_norm": 0.7867717742919922, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 1.1025, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5813449023861171, | |
| "grad_norm": 0.7869510054588318, | |
| "learning_rate": 3.742378048780488e-05, | |
| "loss": 1.1034, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.584237165582068, | |
| "grad_norm": 0.7608320713043213, | |
| "learning_rate": 3.7347560975609755e-05, | |
| "loss": 1.1001, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5871294287780188, | |
| "grad_norm": 0.7015742063522339, | |
| "learning_rate": 3.727134146341464e-05, | |
| "loss": 1.1404, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5900216919739696, | |
| "grad_norm": 0.7741048336029053, | |
| "learning_rate": 3.7195121951219514e-05, | |
| "loss": 1.1069, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5929139551699205, | |
| "grad_norm": 0.7461472749710083, | |
| "learning_rate": 3.7118902439024393e-05, | |
| "loss": 1.1364, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5958062183658713, | |
| "grad_norm": 0.7453926205635071, | |
| "learning_rate": 3.704268292682927e-05, | |
| "loss": 1.1352, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5986984815618221, | |
| "grad_norm": 0.7385006546974182, | |
| "learning_rate": 3.696646341463415e-05, | |
| "loss": 1.1277, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.601590744757773, | |
| "grad_norm": 0.755822479724884, | |
| "learning_rate": 3.6890243902439025e-05, | |
| "loss": 1.2321, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.6044830079537238, | |
| "grad_norm": 0.7634955048561096, | |
| "learning_rate": 3.6814024390243904e-05, | |
| "loss": 1.0884, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.6073752711496746, | |
| "grad_norm": 0.739771842956543, | |
| "learning_rate": 3.673780487804878e-05, | |
| "loss": 1.0753, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6102675343456254, | |
| "grad_norm": 0.7629417777061462, | |
| "learning_rate": 3.666158536585366e-05, | |
| "loss": 1.1352, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.6131597975415762, | |
| "grad_norm": 0.7024405002593994, | |
| "learning_rate": 3.6585365853658535e-05, | |
| "loss": 1.0872, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.6160520607375272, | |
| "grad_norm": 0.778109610080719, | |
| "learning_rate": 3.650914634146342e-05, | |
| "loss": 1.1214, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.618944323933478, | |
| "grad_norm": 0.8042979836463928, | |
| "learning_rate": 3.6432926829268294e-05, | |
| "loss": 1.0782, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.6218365871294288, | |
| "grad_norm": 0.7753491997718811, | |
| "learning_rate": 3.635670731707317e-05, | |
| "loss": 1.1299, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.6247288503253796, | |
| "grad_norm": 0.7622751593589783, | |
| "learning_rate": 3.628048780487805e-05, | |
| "loss": 1.1382, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.6276211135213304, | |
| "grad_norm": 0.7174673080444336, | |
| "learning_rate": 3.620426829268293e-05, | |
| "loss": 1.0331, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.6305133767172812, | |
| "grad_norm": 0.7472246885299683, | |
| "learning_rate": 3.6128048780487804e-05, | |
| "loss": 1.1305, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.6334056399132321, | |
| "grad_norm": 0.7711846232414246, | |
| "learning_rate": 3.6051829268292684e-05, | |
| "loss": 1.0612, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.6362979031091829, | |
| "grad_norm": 0.6961559653282166, | |
| "learning_rate": 3.597560975609756e-05, | |
| "loss": 1.1416, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6391901663051338, | |
| "grad_norm": 0.7391098141670227, | |
| "learning_rate": 3.589939024390244e-05, | |
| "loss": 1.1794, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.6420824295010846, | |
| "grad_norm": 0.7802613973617554, | |
| "learning_rate": 3.5823170731707315e-05, | |
| "loss": 1.1799, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.6449746926970354, | |
| "grad_norm": 0.7498157620429993, | |
| "learning_rate": 3.57469512195122e-05, | |
| "loss": 1.0472, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.6478669558929863, | |
| "grad_norm": 0.7516718506813049, | |
| "learning_rate": 3.5670731707317074e-05, | |
| "loss": 1.1124, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.6507592190889371, | |
| "grad_norm": 0.7159478068351746, | |
| "learning_rate": 3.559451219512195e-05, | |
| "loss": 1.1552, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.6536514822848879, | |
| "grad_norm": 0.7362671494483948, | |
| "learning_rate": 3.551829268292683e-05, | |
| "loss": 1.0227, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6565437454808387, | |
| "grad_norm": 0.7803052663803101, | |
| "learning_rate": 3.544207317073171e-05, | |
| "loss": 1.0371, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.6594360086767896, | |
| "grad_norm": 0.7725502252578735, | |
| "learning_rate": 3.5365853658536584e-05, | |
| "loss": 1.0399, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.6623282718727405, | |
| "grad_norm": 0.7670521140098572, | |
| "learning_rate": 3.5289634146341464e-05, | |
| "loss": 1.1018, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.6652205350686913, | |
| "grad_norm": 0.8205684423446655, | |
| "learning_rate": 3.521341463414634e-05, | |
| "loss": 1.0543, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6681127982646421, | |
| "grad_norm": 0.7324901223182678, | |
| "learning_rate": 3.513719512195122e-05, | |
| "loss": 1.036, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6710050614605929, | |
| "grad_norm": 0.7967138886451721, | |
| "learning_rate": 3.5060975609756095e-05, | |
| "loss": 1.1261, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6738973246565437, | |
| "grad_norm": 0.7588431239128113, | |
| "learning_rate": 3.498475609756098e-05, | |
| "loss": 1.14, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.6767895878524945, | |
| "grad_norm": 0.778581440448761, | |
| "learning_rate": 3.4908536585365853e-05, | |
| "loss": 1.1164, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.6796818510484454, | |
| "grad_norm": 0.7511733174324036, | |
| "learning_rate": 3.483231707317073e-05, | |
| "loss": 1.0948, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.6825741142443963, | |
| "grad_norm": 0.7637711763381958, | |
| "learning_rate": 3.475609756097561e-05, | |
| "loss": 1.0335, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6854663774403471, | |
| "grad_norm": 0.7194728851318359, | |
| "learning_rate": 3.467987804878049e-05, | |
| "loss": 1.1091, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6883586406362979, | |
| "grad_norm": 1.0010840892791748, | |
| "learning_rate": 3.4603658536585364e-05, | |
| "loss": 1.1292, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6912509038322487, | |
| "grad_norm": 0.767515242099762, | |
| "learning_rate": 3.4527439024390243e-05, | |
| "loss": 1.0865, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6941431670281996, | |
| "grad_norm": 0.7898090481758118, | |
| "learning_rate": 3.445121951219512e-05, | |
| "loss": 1.0899, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6970354302241504, | |
| "grad_norm": 0.7335265874862671, | |
| "learning_rate": 3.4375e-05, | |
| "loss": 1.033, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6999276934201012, | |
| "grad_norm": 0.8223006129264832, | |
| "learning_rate": 3.429878048780488e-05, | |
| "loss": 1.195, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.702819956616052, | |
| "grad_norm": 0.8035436868667603, | |
| "learning_rate": 3.422256097560976e-05, | |
| "loss": 1.0006, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.7057122198120029, | |
| "grad_norm": 0.7428767681121826, | |
| "learning_rate": 3.414634146341464e-05, | |
| "loss": 1.092, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.7086044830079538, | |
| "grad_norm": 0.7584668397903442, | |
| "learning_rate": 3.407012195121951e-05, | |
| "loss": 1.1246, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.7114967462039046, | |
| "grad_norm": 0.7379582524299622, | |
| "learning_rate": 3.399390243902439e-05, | |
| "loss": 1.1107, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.7143890093998554, | |
| "grad_norm": 0.7565631866455078, | |
| "learning_rate": 3.391768292682927e-05, | |
| "loss": 1.1014, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.7172812725958062, | |
| "grad_norm": 0.78312748670578, | |
| "learning_rate": 3.384146341463415e-05, | |
| "loss": 1.0298, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.720173535791757, | |
| "grad_norm": 0.7658934593200684, | |
| "learning_rate": 3.376524390243902e-05, | |
| "loss": 1.0314, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.7230657989877078, | |
| "grad_norm": 0.7525564432144165, | |
| "learning_rate": 3.368902439024391e-05, | |
| "loss": 1.0405, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7259580621836587, | |
| "grad_norm": 0.7480136752128601, | |
| "learning_rate": 3.361280487804878e-05, | |
| "loss": 1.0912, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.7288503253796096, | |
| "grad_norm": 0.7331277132034302, | |
| "learning_rate": 3.353658536585366e-05, | |
| "loss": 1.102, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.7317425885755604, | |
| "grad_norm": 0.7302331924438477, | |
| "learning_rate": 3.346036585365854e-05, | |
| "loss": 1.0178, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.7346348517715112, | |
| "grad_norm": 0.7028475999832153, | |
| "learning_rate": 3.338414634146342e-05, | |
| "loss": 0.997, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.737527114967462, | |
| "grad_norm": 0.7154017090797424, | |
| "learning_rate": 3.330792682926829e-05, | |
| "loss": 1.0404, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.7404193781634129, | |
| "grad_norm": 0.7640696167945862, | |
| "learning_rate": 3.323170731707317e-05, | |
| "loss": 1.1071, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.7433116413593637, | |
| "grad_norm": 0.7853246331214905, | |
| "learning_rate": 3.315548780487805e-05, | |
| "loss": 1.0511, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.7462039045553145, | |
| "grad_norm": 0.7854739427566528, | |
| "learning_rate": 3.307926829268293e-05, | |
| "loss": 1.0859, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.7490961677512654, | |
| "grad_norm": 0.7378141283988953, | |
| "learning_rate": 3.30030487804878e-05, | |
| "loss": 1.0932, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.7519884309472162, | |
| "grad_norm": 0.7881212830543518, | |
| "learning_rate": 3.292682926829269e-05, | |
| "loss": 1.0923, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.754880694143167, | |
| "grad_norm": 0.7434545755386353, | |
| "learning_rate": 3.285060975609756e-05, | |
| "loss": 1.0612, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.7577729573391179, | |
| "grad_norm": 0.7590733766555786, | |
| "learning_rate": 3.277439024390244e-05, | |
| "loss": 1.099, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.7606652205350687, | |
| "grad_norm": 0.809688925743103, | |
| "learning_rate": 3.269817073170732e-05, | |
| "loss": 1.1674, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.7635574837310195, | |
| "grad_norm": 0.7180957198143005, | |
| "learning_rate": 3.26219512195122e-05, | |
| "loss": 1.1196, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.7664497469269703, | |
| "grad_norm": 0.7526130676269531, | |
| "learning_rate": 3.254573170731707e-05, | |
| "loss": 0.9961, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.7693420101229211, | |
| "grad_norm": 0.8099539279937744, | |
| "learning_rate": 3.246951219512195e-05, | |
| "loss": 1.0742, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.7722342733188721, | |
| "grad_norm": 0.7374089360237122, | |
| "learning_rate": 3.239329268292683e-05, | |
| "loss": 1.0845, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.7751265365148229, | |
| "grad_norm": 0.6704961061477661, | |
| "learning_rate": 3.231707317073171e-05, | |
| "loss": 0.9631, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7780187997107737, | |
| "grad_norm": 0.7654604315757751, | |
| "learning_rate": 3.224085365853658e-05, | |
| "loss": 1.0663, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.7809110629067245, | |
| "grad_norm": 0.7672616243362427, | |
| "learning_rate": 3.216463414634147e-05, | |
| "loss": 1.0802, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7838033261026753, | |
| "grad_norm": 0.7247093915939331, | |
| "learning_rate": 3.208841463414634e-05, | |
| "loss": 1.0921, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.7866955892986262, | |
| "grad_norm": 0.75218266248703, | |
| "learning_rate": 3.201219512195122e-05, | |
| "loss": 1.1062, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.789587852494577, | |
| "grad_norm": 0.7745797038078308, | |
| "learning_rate": 3.19359756097561e-05, | |
| "loss": 1.1105, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.7924801156905278, | |
| "grad_norm": 0.7872446179389954, | |
| "learning_rate": 3.185975609756098e-05, | |
| "loss": 1.0644, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.7953723788864787, | |
| "grad_norm": 0.8333762884140015, | |
| "learning_rate": 3.178353658536585e-05, | |
| "loss": 1.065, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.7982646420824295, | |
| "grad_norm": 0.7147220969200134, | |
| "learning_rate": 3.170731707317073e-05, | |
| "loss": 1.1217, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.8011569052783803, | |
| "grad_norm": 0.7681723237037659, | |
| "learning_rate": 3.163109756097561e-05, | |
| "loss": 1.0033, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.8040491684743312, | |
| "grad_norm": 0.7502139210700989, | |
| "learning_rate": 3.155487804878049e-05, | |
| "loss": 1.0245, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.806941431670282, | |
| "grad_norm": 0.7371497750282288, | |
| "learning_rate": 3.147865853658536e-05, | |
| "loss": 0.9599, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.8098336948662328, | |
| "grad_norm": 0.7861061692237854, | |
| "learning_rate": 3.140243902439025e-05, | |
| "loss": 1.0698, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8127259580621836, | |
| "grad_norm": 0.7982838749885559, | |
| "learning_rate": 3.132621951219512e-05, | |
| "loss": 1.093, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.8156182212581344, | |
| "grad_norm": 0.7698132991790771, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.9996, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.8185104844540854, | |
| "grad_norm": 0.7293528914451599, | |
| "learning_rate": 3.117378048780488e-05, | |
| "loss": 1.0981, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.8214027476500362, | |
| "grad_norm": 0.7758128643035889, | |
| "learning_rate": 3.109756097560976e-05, | |
| "loss": 1.1089, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.824295010845987, | |
| "grad_norm": 0.7410516738891602, | |
| "learning_rate": 3.102134146341464e-05, | |
| "loss": 1.0829, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.8271872740419378, | |
| "grad_norm": 0.7614254355430603, | |
| "learning_rate": 3.094512195121951e-05, | |
| "loss": 1.0397, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.8300795372378886, | |
| "grad_norm": 0.7554497718811035, | |
| "learning_rate": 3.08689024390244e-05, | |
| "loss": 1.0749, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.8329718004338394, | |
| "grad_norm": 0.7554106116294861, | |
| "learning_rate": 3.079268292682927e-05, | |
| "loss": 1.0298, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.8358640636297903, | |
| "grad_norm": 0.7850284576416016, | |
| "learning_rate": 3.071646341463415e-05, | |
| "loss": 1.0809, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.8387563268257412, | |
| "grad_norm": 0.7142320275306702, | |
| "learning_rate": 3.064024390243903e-05, | |
| "loss": 1.0664, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.841648590021692, | |
| "grad_norm": 0.7595747113227844, | |
| "learning_rate": 3.056402439024391e-05, | |
| "loss": 1.0581, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.8445408532176428, | |
| "grad_norm": 0.8003636598587036, | |
| "learning_rate": 3.048780487804878e-05, | |
| "loss": 1.0977, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.8474331164135936, | |
| "grad_norm": 0.7981911301612854, | |
| "learning_rate": 3.0411585365853663e-05, | |
| "loss": 1.0425, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.8503253796095445, | |
| "grad_norm": 0.7293020486831665, | |
| "learning_rate": 3.0335365853658536e-05, | |
| "loss": 1.086, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.8532176428054953, | |
| "grad_norm": 0.7135725617408752, | |
| "learning_rate": 3.025914634146342e-05, | |
| "loss": 1.1027, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.8561099060014461, | |
| "grad_norm": 0.7151292562484741, | |
| "learning_rate": 3.0182926829268294e-05, | |
| "loss": 1.1234, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.8590021691973969, | |
| "grad_norm": 0.7805321216583252, | |
| "learning_rate": 3.0106707317073174e-05, | |
| "loss": 1.0833, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.8618944323933478, | |
| "grad_norm": 0.7318261861801147, | |
| "learning_rate": 3.003048780487805e-05, | |
| "loss": 1.0742, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.8647866955892987, | |
| "grad_norm": 0.7618130445480347, | |
| "learning_rate": 2.995426829268293e-05, | |
| "loss": 1.0974, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.8676789587852495, | |
| "grad_norm": 0.7759801745414734, | |
| "learning_rate": 2.9878048780487805e-05, | |
| "loss": 1.0236, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8705712219812003, | |
| "grad_norm": 0.7935881614685059, | |
| "learning_rate": 2.9801829268292684e-05, | |
| "loss": 1.0511, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.8734634851771511, | |
| "grad_norm": 0.7859032154083252, | |
| "learning_rate": 2.972560975609756e-05, | |
| "loss": 1.0469, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.8763557483731019, | |
| "grad_norm": 0.7812406420707703, | |
| "learning_rate": 2.9649390243902443e-05, | |
| "loss": 1.0289, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.8792480115690527, | |
| "grad_norm": 0.7637215256690979, | |
| "learning_rate": 2.9573170731707316e-05, | |
| "loss": 0.9902, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.8821402747650036, | |
| "grad_norm": 0.7497740983963013, | |
| "learning_rate": 2.9496951219512198e-05, | |
| "loss": 1.0487, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.8850325379609545, | |
| "grad_norm": 0.7327484488487244, | |
| "learning_rate": 2.9420731707317074e-05, | |
| "loss": 1.1966, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.8879248011569053, | |
| "grad_norm": 0.7829355597496033, | |
| "learning_rate": 2.9344512195121954e-05, | |
| "loss": 1.0982, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.8908170643528561, | |
| "grad_norm": 0.7765836119651794, | |
| "learning_rate": 2.926829268292683e-05, | |
| "loss": 0.9476, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8937093275488069, | |
| "grad_norm": 0.7646698951721191, | |
| "learning_rate": 2.919207317073171e-05, | |
| "loss": 1.1214, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.8966015907447578, | |
| "grad_norm": 0.7531141638755798, | |
| "learning_rate": 2.9115853658536585e-05, | |
| "loss": 1.0438, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8994938539407086, | |
| "grad_norm": 0.7788392305374146, | |
| "learning_rate": 2.9039634146341464e-05, | |
| "loss": 1.0591, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.9023861171366594, | |
| "grad_norm": 0.7006287574768066, | |
| "learning_rate": 2.896341463414634e-05, | |
| "loss": 1.0351, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.9052783803326103, | |
| "grad_norm": 0.8054205775260925, | |
| "learning_rate": 2.8887195121951223e-05, | |
| "loss": 1.1357, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.9081706435285611, | |
| "grad_norm": 0.7643339037895203, | |
| "learning_rate": 2.8810975609756095e-05, | |
| "loss": 1.073, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.911062906724512, | |
| "grad_norm": 0.7552357316017151, | |
| "learning_rate": 2.8734756097560978e-05, | |
| "loss": 1.0199, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.9139551699204628, | |
| "grad_norm": 0.7398456931114197, | |
| "learning_rate": 2.8658536585365854e-05, | |
| "loss": 1.0532, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.9168474331164136, | |
| "grad_norm": 0.7522266507148743, | |
| "learning_rate": 2.8582317073170733e-05, | |
| "loss": 1.0824, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.9197396963123644, | |
| "grad_norm": 0.7729273438453674, | |
| "learning_rate": 2.850609756097561e-05, | |
| "loss": 1.0282, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.9226319595083152, | |
| "grad_norm": 0.7700569033622742, | |
| "learning_rate": 2.842987804878049e-05, | |
| "loss": 1.0654, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.925524222704266, | |
| "grad_norm": 0.7540171146392822, | |
| "learning_rate": 2.8353658536585365e-05, | |
| "loss": 1.0615, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.928416485900217, | |
| "grad_norm": 0.7484927773475647, | |
| "learning_rate": 2.8277439024390244e-05, | |
| "loss": 1.0276, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.9313087490961678, | |
| "grad_norm": 0.793731153011322, | |
| "learning_rate": 2.820121951219512e-05, | |
| "loss": 1.0536, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.9342010122921186, | |
| "grad_norm": 0.7182806134223938, | |
| "learning_rate": 2.8125000000000003e-05, | |
| "loss": 1.0135, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.9370932754880694, | |
| "grad_norm": 0.7177212834358215, | |
| "learning_rate": 2.8048780487804882e-05, | |
| "loss": 1.02, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.9399855386840202, | |
| "grad_norm": 0.7477127909660339, | |
| "learning_rate": 2.7972560975609758e-05, | |
| "loss": 1.09, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.9428778018799711, | |
| "grad_norm": 0.7824453115463257, | |
| "learning_rate": 2.7896341463414637e-05, | |
| "loss": 0.9806, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.9457700650759219, | |
| "grad_norm": 0.7952285408973694, | |
| "learning_rate": 2.7820121951219513e-05, | |
| "loss": 1.0417, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.9486623282718727, | |
| "grad_norm": 0.8422231674194336, | |
| "learning_rate": 2.7743902439024393e-05, | |
| "loss": 1.0552, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.9515545914678236, | |
| "grad_norm": 0.8023759722709656, | |
| "learning_rate": 2.766768292682927e-05, | |
| "loss": 1.0695, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.9544468546637744, | |
| "grad_norm": 0.7767244577407837, | |
| "learning_rate": 2.759146341463415e-05, | |
| "loss": 1.1414, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.9573391178597253, | |
| "grad_norm": 0.7687296271324158, | |
| "learning_rate": 2.7515243902439024e-05, | |
| "loss": 1.0518, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.9602313810556761, | |
| "grad_norm": 0.76921147108078, | |
| "learning_rate": 2.7439024390243906e-05, | |
| "loss": 1.0616, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.9631236442516269, | |
| "grad_norm": 0.7176332473754883, | |
| "learning_rate": 2.7362804878048782e-05, | |
| "loss": 1.0969, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.9660159074475777, | |
| "grad_norm": 0.7853028774261475, | |
| "learning_rate": 2.7286585365853662e-05, | |
| "loss": 1.0375, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.9689081706435285, | |
| "grad_norm": 0.7683706879615784, | |
| "learning_rate": 2.7210365853658538e-05, | |
| "loss": 0.9734, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.9718004338394793, | |
| "grad_norm": 0.8103812336921692, | |
| "learning_rate": 2.7134146341463417e-05, | |
| "loss": 1.0579, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.9746926970354303, | |
| "grad_norm": 0.7865802049636841, | |
| "learning_rate": 2.7057926829268293e-05, | |
| "loss": 1.019, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.9775849602313811, | |
| "grad_norm": 0.7285350561141968, | |
| "learning_rate": 2.6981707317073172e-05, | |
| "loss": 1.0886, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.9804772234273319, | |
| "grad_norm": 0.7790278196334839, | |
| "learning_rate": 2.6905487804878048e-05, | |
| "loss": 1.03, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.9833694866232827, | |
| "grad_norm": 0.8020289540290833, | |
| "learning_rate": 2.682926829268293e-05, | |
| "loss": 1.0997, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9862617498192335, | |
| "grad_norm": 0.7671722173690796, | |
| "learning_rate": 2.6753048780487804e-05, | |
| "loss": 1.1381, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.9891540130151844, | |
| "grad_norm": 0.8592469096183777, | |
| "learning_rate": 2.6676829268292686e-05, | |
| "loss": 1.0825, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.9920462762111352, | |
| "grad_norm": 0.7508606910705566, | |
| "learning_rate": 2.6600609756097562e-05, | |
| "loss": 1.0808, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.9949385394070861, | |
| "grad_norm": 0.7976868152618408, | |
| "learning_rate": 2.652439024390244e-05, | |
| "loss": 1.0345, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.9978308026030369, | |
| "grad_norm": 0.7527894973754883, | |
| "learning_rate": 2.6448170731707318e-05, | |
| "loss": 0.9788, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.7162013053894043, | |
| "learning_rate": 2.6371951219512197e-05, | |
| "loss": 0.6875, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.002892263195951, | |
| "grad_norm": 0.7367959022521973, | |
| "learning_rate": 2.6295731707317073e-05, | |
| "loss": 0.8764, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.0057845263919016, | |
| "grad_norm": 0.7669069170951843, | |
| "learning_rate": 2.6219512195121952e-05, | |
| "loss": 0.8188, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.0086767895878526, | |
| "grad_norm": 0.7791001200675964, | |
| "learning_rate": 2.6143292682926828e-05, | |
| "loss": 0.9138, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.0115690527838033, | |
| "grad_norm": 0.7576078772544861, | |
| "learning_rate": 2.606707317073171e-05, | |
| "loss": 0.7908, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.0144613159797542, | |
| "grad_norm": 0.7850218415260315, | |
| "learning_rate": 2.5990853658536583e-05, | |
| "loss": 0.9152, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.017353579175705, | |
| "grad_norm": 0.9033083319664001, | |
| "learning_rate": 2.5914634146341466e-05, | |
| "loss": 0.8214, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.0202458423716558, | |
| "grad_norm": 0.91056889295578, | |
| "learning_rate": 2.5838414634146342e-05, | |
| "loss": 0.873, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.0231381055676068, | |
| "grad_norm": 0.9178743958473206, | |
| "learning_rate": 2.576219512195122e-05, | |
| "loss": 0.8357, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.0260303687635575, | |
| "grad_norm": 0.9112760424613953, | |
| "learning_rate": 2.5685975609756097e-05, | |
| "loss": 0.8381, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.0289226319595084, | |
| "grad_norm": 0.874699056148529, | |
| "learning_rate": 2.5609756097560977e-05, | |
| "loss": 0.8443, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.031814895155459, | |
| "grad_norm": 0.866185188293457, | |
| "learning_rate": 2.5533536585365853e-05, | |
| "loss": 0.8651, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.03470715835141, | |
| "grad_norm": 0.8335126042366028, | |
| "learning_rate": 2.5457317073170732e-05, | |
| "loss": 0.7468, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.0375994215473607, | |
| "grad_norm": 0.8364746570587158, | |
| "learning_rate": 2.5381097560975608e-05, | |
| "loss": 0.8368, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.0404916847433117, | |
| "grad_norm": 0.887727677822113, | |
| "learning_rate": 2.530487804878049e-05, | |
| "loss": 0.8161, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.0433839479392624, | |
| "grad_norm": 0.8570895791053772, | |
| "learning_rate": 2.5228658536585363e-05, | |
| "loss": 0.7743, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.0462762111352133, | |
| "grad_norm": 0.8758525252342224, | |
| "learning_rate": 2.5152439024390246e-05, | |
| "loss": 0.7668, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.0491684743311642, | |
| "grad_norm": 0.9433422088623047, | |
| "learning_rate": 2.5076219512195122e-05, | |
| "loss": 0.8556, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.052060737527115, | |
| "grad_norm": 0.957084596157074, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.859, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.0549530007230659, | |
| "grad_norm": 0.9015299677848816, | |
| "learning_rate": 2.492378048780488e-05, | |
| "loss": 0.7513, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.0578452639190166, | |
| "grad_norm": 0.8645225763320923, | |
| "learning_rate": 2.4847560975609756e-05, | |
| "loss": 0.7758, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.0607375271149675, | |
| "grad_norm": 0.8781758546829224, | |
| "learning_rate": 2.4771341463414636e-05, | |
| "loss": 0.7608, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.0636297903109182, | |
| "grad_norm": 0.9088943600654602, | |
| "learning_rate": 2.4695121951219512e-05, | |
| "loss": 0.8187, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.0665220535068691, | |
| "grad_norm": 0.8699431419372559, | |
| "learning_rate": 2.461890243902439e-05, | |
| "loss": 0.885, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.06941431670282, | |
| "grad_norm": 0.8766498565673828, | |
| "learning_rate": 2.454268292682927e-05, | |
| "loss": 0.8439, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.0723065798987708, | |
| "grad_norm": 0.9093021154403687, | |
| "learning_rate": 2.4466463414634146e-05, | |
| "loss": 0.8731, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.0751988430947217, | |
| "grad_norm": 0.9020785689353943, | |
| "learning_rate": 2.4390243902439026e-05, | |
| "loss": 0.8291, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.0780911062906724, | |
| "grad_norm": 0.8650471568107605, | |
| "learning_rate": 2.43140243902439e-05, | |
| "loss": 0.8439, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.0809833694866233, | |
| "grad_norm": 0.9382796883583069, | |
| "learning_rate": 2.423780487804878e-05, | |
| "loss": 0.8312, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.083875632682574, | |
| "grad_norm": 0.8890308737754822, | |
| "learning_rate": 2.416158536585366e-05, | |
| "loss": 0.8552, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.086767895878525, | |
| "grad_norm": 0.9097614884376526, | |
| "learning_rate": 2.4085365853658536e-05, | |
| "loss": 0.8513, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.0896601590744757, | |
| "grad_norm": 0.9238763451576233, | |
| "learning_rate": 2.4009146341463416e-05, | |
| "loss": 0.7782, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.0925524222704266, | |
| "grad_norm": 0.917517364025116, | |
| "learning_rate": 2.393292682926829e-05, | |
| "loss": 0.7853, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.0954446854663775, | |
| "grad_norm": 0.954457700252533, | |
| "learning_rate": 2.385670731707317e-05, | |
| "loss": 0.8102, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.0983369486623282, | |
| "grad_norm": 0.9540069699287415, | |
| "learning_rate": 2.378048780487805e-05, | |
| "loss": 0.8117, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.1012292118582792, | |
| "grad_norm": 0.8629953265190125, | |
| "learning_rate": 2.3704268292682926e-05, | |
| "loss": 0.8483, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.1041214750542299, | |
| "grad_norm": 0.9152767658233643, | |
| "learning_rate": 2.3628048780487806e-05, | |
| "loss": 0.7391, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.1070137382501808, | |
| "grad_norm": 0.9119929671287537, | |
| "learning_rate": 2.355182926829268e-05, | |
| "loss": 0.8084, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.1099060014461315, | |
| "grad_norm": 0.9688836932182312, | |
| "learning_rate": 2.347560975609756e-05, | |
| "loss": 0.8794, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.1127982646420824, | |
| "grad_norm": 0.8734216094017029, | |
| "learning_rate": 2.339939024390244e-05, | |
| "loss": 0.771, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.1156905278380334, | |
| "grad_norm": 0.936385452747345, | |
| "learning_rate": 2.332317073170732e-05, | |
| "loss": 0.843, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.118582791033984, | |
| "grad_norm": 0.8708637356758118, | |
| "learning_rate": 2.32469512195122e-05, | |
| "loss": 0.8005, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.121475054229935, | |
| "grad_norm": 0.9174913167953491, | |
| "learning_rate": 2.3170731707317075e-05, | |
| "loss": 0.7858, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.1243673174258857, | |
| "grad_norm": 0.8793891668319702, | |
| "learning_rate": 2.3094512195121954e-05, | |
| "loss": 0.7827, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.1272595806218366, | |
| "grad_norm": 0.9375653266906738, | |
| "learning_rate": 2.301829268292683e-05, | |
| "loss": 0.8587, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.1301518438177873, | |
| "grad_norm": 0.9476063251495361, | |
| "learning_rate": 2.294207317073171e-05, | |
| "loss": 0.8222, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.1330441070137383, | |
| "grad_norm": 0.8776272535324097, | |
| "learning_rate": 2.286585365853659e-05, | |
| "loss": 0.8089, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.1359363702096892, | |
| "grad_norm": 0.8908610343933105, | |
| "learning_rate": 2.2789634146341465e-05, | |
| "loss": 0.8531, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.13882863340564, | |
| "grad_norm": 0.9270078539848328, | |
| "learning_rate": 2.2713414634146344e-05, | |
| "loss": 0.8842, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.1417208966015908, | |
| "grad_norm": 0.9019871354103088, | |
| "learning_rate": 2.263719512195122e-05, | |
| "loss": 0.7006, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.1446131597975415, | |
| "grad_norm": 0.9170034527778625, | |
| "learning_rate": 2.25609756097561e-05, | |
| "loss": 0.8055, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.1475054229934925, | |
| "grad_norm": 0.9285536408424377, | |
| "learning_rate": 2.248475609756098e-05, | |
| "loss": 0.8192, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.1503976861894432, | |
| "grad_norm": 0.9291247725486755, | |
| "learning_rate": 2.2408536585365855e-05, | |
| "loss": 0.7733, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.153289949385394, | |
| "grad_norm": 0.893548846244812, | |
| "learning_rate": 2.2332317073170734e-05, | |
| "loss": 0.8112, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.1561822125813448, | |
| "grad_norm": 0.933894693851471, | |
| "learning_rate": 2.225609756097561e-05, | |
| "loss": 0.8244, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.1590744757772957, | |
| "grad_norm": 0.8933086395263672, | |
| "learning_rate": 2.217987804878049e-05, | |
| "loss": 0.799, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.1619667389732466, | |
| "grad_norm": 0.8862596750259399, | |
| "learning_rate": 2.210365853658537e-05, | |
| "loss": 0.7522, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.1648590021691974, | |
| "grad_norm": 0.9892849922180176, | |
| "learning_rate": 2.2027439024390244e-05, | |
| "loss": 0.8144, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.1677512653651483, | |
| "grad_norm": 0.8950841426849365, | |
| "learning_rate": 2.1951219512195124e-05, | |
| "loss": 0.8498, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.170643528561099, | |
| "grad_norm": 0.9264621734619141, | |
| "learning_rate": 2.1875e-05, | |
| "loss": 0.8619, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.17353579175705, | |
| "grad_norm": 0.9350318908691406, | |
| "learning_rate": 2.179878048780488e-05, | |
| "loss": 0.901, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.1764280549530008, | |
| "grad_norm": 0.8909422755241394, | |
| "learning_rate": 2.172256097560976e-05, | |
| "loss": 0.7969, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.1793203181489516, | |
| "grad_norm": 0.9076801538467407, | |
| "learning_rate": 2.1646341463414634e-05, | |
| "loss": 0.8102, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.1822125813449025, | |
| "grad_norm": 0.9365906715393066, | |
| "learning_rate": 2.1570121951219514e-05, | |
| "loss": 0.8216, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.1851048445408532, | |
| "grad_norm": 0.9423839449882507, | |
| "learning_rate": 2.149390243902439e-05, | |
| "loss": 0.8007, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.1879971077368041, | |
| "grad_norm": 0.9760177135467529, | |
| "learning_rate": 2.141768292682927e-05, | |
| "loss": 0.7394, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.1908893709327548, | |
| "grad_norm": 0.9895643591880798, | |
| "learning_rate": 2.134146341463415e-05, | |
| "loss": 0.8613, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.1937816341287057, | |
| "grad_norm": 0.9074323177337646, | |
| "learning_rate": 2.1265243902439024e-05, | |
| "loss": 0.7996, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.1966738973246565, | |
| "grad_norm": 0.9774613380432129, | |
| "learning_rate": 2.1189024390243904e-05, | |
| "loss": 0.7982, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.1995661605206074, | |
| "grad_norm": 0.9536191821098328, | |
| "learning_rate": 2.111280487804878e-05, | |
| "loss": 0.8498, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.2024584237165583, | |
| "grad_norm": 0.9640031456947327, | |
| "learning_rate": 2.103658536585366e-05, | |
| "loss": 0.7995, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.205350686912509, | |
| "grad_norm": 0.9486613869667053, | |
| "learning_rate": 2.0960365853658538e-05, | |
| "loss": 0.8277, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.20824295010846, | |
| "grad_norm": 0.9539316296577454, | |
| "learning_rate": 2.0884146341463414e-05, | |
| "loss": 0.8163, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.2111352133044107, | |
| "grad_norm": 0.9421859383583069, | |
| "learning_rate": 2.0807926829268294e-05, | |
| "loss": 0.8645, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.2140274765003616, | |
| "grad_norm": 0.9420467615127563, | |
| "learning_rate": 2.073170731707317e-05, | |
| "loss": 0.7646, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.2169197396963123, | |
| "grad_norm": 0.8715965151786804, | |
| "learning_rate": 2.065548780487805e-05, | |
| "loss": 0.819, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.2198120028922632, | |
| "grad_norm": 0.8634954690933228, | |
| "learning_rate": 2.0579268292682928e-05, | |
| "loss": 0.8478, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.222704266088214, | |
| "grad_norm": 0.9214886426925659, | |
| "learning_rate": 2.0503048780487804e-05, | |
| "loss": 0.8249, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.2255965292841648, | |
| "grad_norm": 0.9319393634796143, | |
| "learning_rate": 2.0426829268292683e-05, | |
| "loss": 0.8251, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.2284887924801158, | |
| "grad_norm": 0.9580456018447876, | |
| "learning_rate": 2.035060975609756e-05, | |
| "loss": 0.8139, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.2313810556760665, | |
| "grad_norm": 0.9004295468330383, | |
| "learning_rate": 2.0274390243902442e-05, | |
| "loss": 0.7768, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.2342733188720174, | |
| "grad_norm": 0.9250595569610596, | |
| "learning_rate": 2.0198170731707318e-05, | |
| "loss": 0.7709, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.2371655820679681, | |
| "grad_norm": 0.9740453362464905, | |
| "learning_rate": 2.0121951219512197e-05, | |
| "loss": 0.8407, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.240057845263919, | |
| "grad_norm": 0.9681423306465149, | |
| "learning_rate": 2.0045731707317077e-05, | |
| "loss": 0.7929, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.2429501084598698, | |
| "grad_norm": 0.9964022040367126, | |
| "learning_rate": 1.9969512195121953e-05, | |
| "loss": 0.7823, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.2458423716558207, | |
| "grad_norm": 1.0318474769592285, | |
| "learning_rate": 1.9893292682926832e-05, | |
| "loss": 0.8579, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.2487346348517716, | |
| "grad_norm": 0.9292550086975098, | |
| "learning_rate": 1.9817073170731708e-05, | |
| "loss": 0.815, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.2516268980477223, | |
| "grad_norm": 0.9619131088256836, | |
| "learning_rate": 1.9740853658536587e-05, | |
| "loss": 0.8136, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.2545191612436732, | |
| "grad_norm": 0.9113368391990662, | |
| "learning_rate": 1.9664634146341467e-05, | |
| "loss": 0.7857, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.257411424439624, | |
| "grad_norm": 0.9458669424057007, | |
| "learning_rate": 1.9588414634146343e-05, | |
| "loss": 0.8051, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.2603036876355749, | |
| "grad_norm": 0.9174255132675171, | |
| "learning_rate": 1.9512195121951222e-05, | |
| "loss": 0.8014, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.2631959508315256, | |
| "grad_norm": 0.961124837398529, | |
| "learning_rate": 1.9435975609756098e-05, | |
| "loss": 0.8441, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.2660882140274765, | |
| "grad_norm": 1.0305391550064087, | |
| "learning_rate": 1.9359756097560977e-05, | |
| "loss": 0.8183, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.2689804772234274, | |
| "grad_norm": 0.939954936504364, | |
| "learning_rate": 1.9283536585365857e-05, | |
| "loss": 0.7894, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.2718727404193781, | |
| "grad_norm": 0.921103835105896, | |
| "learning_rate": 1.9207317073170733e-05, | |
| "loss": 0.7405, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.274765003615329, | |
| "grad_norm": 0.926176130771637, | |
| "learning_rate": 1.9131097560975612e-05, | |
| "loss": 0.7853, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.2776572668112798, | |
| "grad_norm": 0.9235204458236694, | |
| "learning_rate": 1.9054878048780488e-05, | |
| "loss": 0.8532, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.2805495300072307, | |
| "grad_norm": 0.9539816975593567, | |
| "learning_rate": 1.8978658536585367e-05, | |
| "loss": 0.7904, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.2834417932031814, | |
| "grad_norm": 0.9811721444129944, | |
| "learning_rate": 1.8902439024390246e-05, | |
| "loss": 0.824, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.2863340563991323, | |
| "grad_norm": 0.900104284286499, | |
| "learning_rate": 1.8826219512195122e-05, | |
| "loss": 0.762, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.289226319595083, | |
| "grad_norm": 0.9972739815711975, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.8043, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.292118582791034, | |
| "grad_norm": 0.9787886738777161, | |
| "learning_rate": 1.8673780487804878e-05, | |
| "loss": 0.8379, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.295010845986985, | |
| "grad_norm": 1.0129365921020508, | |
| "learning_rate": 1.8597560975609757e-05, | |
| "loss": 0.8211, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.2979031091829356, | |
| "grad_norm": 0.9614445567131042, | |
| "learning_rate": 1.8521341463414636e-05, | |
| "loss": 0.811, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.3007953723788865, | |
| "grad_norm": 0.9432827830314636, | |
| "learning_rate": 1.8445121951219512e-05, | |
| "loss": 0.8049, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.3036876355748372, | |
| "grad_norm": 0.9323035478591919, | |
| "learning_rate": 1.836890243902439e-05, | |
| "loss": 0.8285, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.3065798987707882, | |
| "grad_norm": 0.979387640953064, | |
| "learning_rate": 1.8292682926829268e-05, | |
| "loss": 0.833, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.309472161966739, | |
| "grad_norm": 0.9406694173812866, | |
| "learning_rate": 1.8216463414634147e-05, | |
| "loss": 0.823, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.3123644251626898, | |
| "grad_norm": 0.9428540468215942, | |
| "learning_rate": 1.8140243902439026e-05, | |
| "loss": 0.7691, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.3152566883586405, | |
| "grad_norm": 0.9734871983528137, | |
| "learning_rate": 1.8064024390243902e-05, | |
| "loss": 0.7952, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.3181489515545914, | |
| "grad_norm": 0.9358460307121277, | |
| "learning_rate": 1.798780487804878e-05, | |
| "loss": 0.7799, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.3210412147505424, | |
| "grad_norm": 0.9847381711006165, | |
| "learning_rate": 1.7911585365853658e-05, | |
| "loss": 0.8272, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.323933477946493, | |
| "grad_norm": 1.0185282230377197, | |
| "learning_rate": 1.7835365853658537e-05, | |
| "loss": 0.7397, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.326825741142444, | |
| "grad_norm": 1.019514560699463, | |
| "learning_rate": 1.7759146341463416e-05, | |
| "loss": 0.8922, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.3297180043383947, | |
| "grad_norm": 1.0088555812835693, | |
| "learning_rate": 1.7682926829268292e-05, | |
| "loss": 0.8657, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.3326102675343456, | |
| "grad_norm": 0.9719268679618835, | |
| "learning_rate": 1.760670731707317e-05, | |
| "loss": 0.8074, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.3355025307302966, | |
| "grad_norm": 0.9707063436508179, | |
| "learning_rate": 1.7530487804878047e-05, | |
| "loss": 0.7983, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.3383947939262473, | |
| "grad_norm": 1.0087740421295166, | |
| "learning_rate": 1.7454268292682927e-05, | |
| "loss": 0.8205, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.3412870571221982, | |
| "grad_norm": 0.957075297832489, | |
| "learning_rate": 1.7378048780487806e-05, | |
| "loss": 0.8248, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.344179320318149, | |
| "grad_norm": 0.9987917542457581, | |
| "learning_rate": 1.7301829268292682e-05, | |
| "loss": 0.8194, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.3470715835140998, | |
| "grad_norm": 0.959826648235321, | |
| "learning_rate": 1.722560975609756e-05, | |
| "loss": 0.754, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.3499638467100505, | |
| "grad_norm": 0.9746386408805847, | |
| "learning_rate": 1.714939024390244e-05, | |
| "loss": 0.7998, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.3528561099060015, | |
| "grad_norm": 0.9507508873939514, | |
| "learning_rate": 1.707317073170732e-05, | |
| "loss": 0.7447, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.3557483731019522, | |
| "grad_norm": 1.0092105865478516, | |
| "learning_rate": 1.6996951219512196e-05, | |
| "loss": 0.8063, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.358640636297903, | |
| "grad_norm": 0.973320484161377, | |
| "learning_rate": 1.6920731707317075e-05, | |
| "loss": 0.7818, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.361532899493854, | |
| "grad_norm": 0.9913963675498962, | |
| "learning_rate": 1.6844512195121955e-05, | |
| "loss": 0.8006, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.3644251626898047, | |
| "grad_norm": 1.0580593347549438, | |
| "learning_rate": 1.676829268292683e-05, | |
| "loss": 0.8488, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.3673174258857557, | |
| "grad_norm": 0.9785270094871521, | |
| "learning_rate": 1.669207317073171e-05, | |
| "loss": 0.8249, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.3702096890817064, | |
| "grad_norm": 0.981171727180481, | |
| "learning_rate": 1.6615853658536586e-05, | |
| "loss": 0.7762, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.3731019522776573, | |
| "grad_norm": 1.0523923635482788, | |
| "learning_rate": 1.6539634146341465e-05, | |
| "loss": 0.7582, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.3759942154736082, | |
| "grad_norm": 1.0290507078170776, | |
| "learning_rate": 1.6463414634146345e-05, | |
| "loss": 0.7927, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.378886478669559, | |
| "grad_norm": 0.9900729060173035, | |
| "learning_rate": 1.638719512195122e-05, | |
| "loss": 0.7436, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.3817787418655096, | |
| "grad_norm": 0.9794175028800964, | |
| "learning_rate": 1.63109756097561e-05, | |
| "loss": 0.7744, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.3846710050614606, | |
| "grad_norm": 1.0114864110946655, | |
| "learning_rate": 1.6234756097560976e-05, | |
| "loss": 0.8683, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.3875632682574115, | |
| "grad_norm": 1.026435375213623, | |
| "learning_rate": 1.6158536585365855e-05, | |
| "loss": 0.8049, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.3904555314533622, | |
| "grad_norm": 1.0069879293441772, | |
| "learning_rate": 1.6082317073170734e-05, | |
| "loss": 0.9052, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.3933477946493131, | |
| "grad_norm": 0.9856945276260376, | |
| "learning_rate": 1.600609756097561e-05, | |
| "loss": 0.8129, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.3962400578452638, | |
| "grad_norm": 0.9632019400596619, | |
| "learning_rate": 1.592987804878049e-05, | |
| "loss": 0.7651, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.3991323210412148, | |
| "grad_norm": 0.9180967807769775, | |
| "learning_rate": 1.5853658536585366e-05, | |
| "loss": 0.7798, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.4020245842371657, | |
| "grad_norm": 0.9854956269264221, | |
| "learning_rate": 1.5777439024390245e-05, | |
| "loss": 0.7869, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.4049168474331164, | |
| "grad_norm": 0.9699094891548157, | |
| "learning_rate": 1.5701219512195124e-05, | |
| "loss": 0.7424, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.407809110629067, | |
| "grad_norm": 1.0167737007141113, | |
| "learning_rate": 1.5625e-05, | |
| "loss": 0.8369, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.410701373825018, | |
| "grad_norm": 0.9676855802536011, | |
| "learning_rate": 1.554878048780488e-05, | |
| "loss": 0.8397, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.413593637020969, | |
| "grad_norm": 0.974721372127533, | |
| "learning_rate": 1.5472560975609756e-05, | |
| "loss": 0.7772, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.4164859002169197, | |
| "grad_norm": 0.981971800327301, | |
| "learning_rate": 1.5396341463414635e-05, | |
| "loss": 0.8626, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.4193781634128706, | |
| "grad_norm": 1.004634976387024, | |
| "learning_rate": 1.5320121951219514e-05, | |
| "loss": 0.7482, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.4222704266088213, | |
| "grad_norm": 0.995227575302124, | |
| "learning_rate": 1.524390243902439e-05, | |
| "loss": 0.7898, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.4251626898047722, | |
| "grad_norm": 0.9808421730995178, | |
| "learning_rate": 1.5167682926829268e-05, | |
| "loss": 0.7677, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.4280549530007232, | |
| "grad_norm": 0.9480452537536621, | |
| "learning_rate": 1.5091463414634147e-05, | |
| "loss": 0.7852, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.4309472161966739, | |
| "grad_norm": 0.9107538461685181, | |
| "learning_rate": 1.5015243902439025e-05, | |
| "loss": 0.8798, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.4338394793926248, | |
| "grad_norm": 0.9696621894836426, | |
| "learning_rate": 1.4939024390243902e-05, | |
| "loss": 0.8056, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.4367317425885755, | |
| "grad_norm": 1.025511384010315, | |
| "learning_rate": 1.486280487804878e-05, | |
| "loss": 0.8319, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.4396240057845264, | |
| "grad_norm": 0.9872826337814331, | |
| "learning_rate": 1.4786585365853658e-05, | |
| "loss": 0.7518, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.4425162689804774, | |
| "grad_norm": 0.9867232441902161, | |
| "learning_rate": 1.4710365853658537e-05, | |
| "loss": 0.7372, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.445408532176428, | |
| "grad_norm": 1.0221909284591675, | |
| "learning_rate": 1.4634146341463415e-05, | |
| "loss": 0.764, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.4483007953723788, | |
| "grad_norm": 0.9744577407836914, | |
| "learning_rate": 1.4557926829268292e-05, | |
| "loss": 0.7816, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.4511930585683297, | |
| "grad_norm": 0.9650794267654419, | |
| "learning_rate": 1.448170731707317e-05, | |
| "loss": 0.7687, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.4540853217642806, | |
| "grad_norm": 1.067771077156067, | |
| "learning_rate": 1.4405487804878048e-05, | |
| "loss": 0.7803, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.4569775849602313, | |
| "grad_norm": 1.0217148065567017, | |
| "learning_rate": 1.4329268292682927e-05, | |
| "loss": 0.8766, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.4598698481561823, | |
| "grad_norm": 0.9869562983512878, | |
| "learning_rate": 1.4253048780487805e-05, | |
| "loss": 0.7447, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.462762111352133, | |
| "grad_norm": 1.004603385925293, | |
| "learning_rate": 1.4176829268292682e-05, | |
| "loss": 0.7725, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.465654374548084, | |
| "grad_norm": 1.0009071826934814, | |
| "learning_rate": 1.410060975609756e-05, | |
| "loss": 0.8871, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.4685466377440348, | |
| "grad_norm": 1.0561660528182983, | |
| "learning_rate": 1.4024390243902441e-05, | |
| "loss": 0.7484, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.4714389009399855, | |
| "grad_norm": 0.9575408101081848, | |
| "learning_rate": 1.3948170731707319e-05, | |
| "loss": 0.7578, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.4743311641359362, | |
| "grad_norm": 1.0391199588775635, | |
| "learning_rate": 1.3871951219512196e-05, | |
| "loss": 0.8065, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.4772234273318872, | |
| "grad_norm": 1.00908625125885, | |
| "learning_rate": 1.3795731707317076e-05, | |
| "loss": 0.7456, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.480115690527838, | |
| "grad_norm": 0.9751247763633728, | |
| "learning_rate": 1.3719512195121953e-05, | |
| "loss": 0.6815, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.4830079537237888, | |
| "grad_norm": 1.007405161857605, | |
| "learning_rate": 1.3643292682926831e-05, | |
| "loss": 0.7728, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.4859002169197397, | |
| "grad_norm": 0.9923568964004517, | |
| "learning_rate": 1.3567073170731709e-05, | |
| "loss": 0.7887, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.4887924801156904, | |
| "grad_norm": 0.9783514142036438, | |
| "learning_rate": 1.3490853658536586e-05, | |
| "loss": 0.8677, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.4916847433116414, | |
| "grad_norm": 0.9877396821975708, | |
| "learning_rate": 1.3414634146341466e-05, | |
| "loss": 0.8264, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.4945770065075923, | |
| "grad_norm": 0.973827600479126, | |
| "learning_rate": 1.3338414634146343e-05, | |
| "loss": 0.8344, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.497469269703543, | |
| "grad_norm": 0.9245984554290771, | |
| "learning_rate": 1.326219512195122e-05, | |
| "loss": 0.7671, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.5003615328994937, | |
| "grad_norm": 1.0020720958709717, | |
| "learning_rate": 1.3185975609756098e-05, | |
| "loss": 0.794, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.5032537960954446, | |
| "grad_norm": 0.9446883797645569, | |
| "learning_rate": 1.3109756097560976e-05, | |
| "loss": 0.7783, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.5061460592913956, | |
| "grad_norm": 0.9875244498252869, | |
| "learning_rate": 1.3033536585365855e-05, | |
| "loss": 0.8469, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.5090383224873465, | |
| "grad_norm": 1.0033190250396729, | |
| "learning_rate": 1.2957317073170733e-05, | |
| "loss": 0.8749, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.5119305856832972, | |
| "grad_norm": 0.9534813165664673, | |
| "learning_rate": 1.288109756097561e-05, | |
| "loss": 0.8684, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.514822848879248, | |
| "grad_norm": 0.9435486793518066, | |
| "learning_rate": 1.2804878048780488e-05, | |
| "loss": 0.8012, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.5177151120751988, | |
| "grad_norm": 1.0029319524765015, | |
| "learning_rate": 1.2728658536585366e-05, | |
| "loss": 0.762, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.5206073752711498, | |
| "grad_norm": 1.0000132322311401, | |
| "learning_rate": 1.2652439024390245e-05, | |
| "loss": 0.7812, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.5234996384671005, | |
| "grad_norm": 0.9410236477851868, | |
| "learning_rate": 1.2576219512195123e-05, | |
| "loss": 0.775, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.5263919016630514, | |
| "grad_norm": 0.9614347815513611, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.7783, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.529284164859002, | |
| "grad_norm": 0.9015387296676636, | |
| "learning_rate": 1.2423780487804878e-05, | |
| "loss": 0.7767, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.532176428054953, | |
| "grad_norm": 0.9506531357765198, | |
| "learning_rate": 1.2347560975609756e-05, | |
| "loss": 0.7928, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.535068691250904, | |
| "grad_norm": 1.0034101009368896, | |
| "learning_rate": 1.2271341463414635e-05, | |
| "loss": 0.794, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.5379609544468547, | |
| "grad_norm": 1.0089356899261475, | |
| "learning_rate": 1.2195121951219513e-05, | |
| "loss": 0.7306, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.5408532176428054, | |
| "grad_norm": 1.0234556198120117, | |
| "learning_rate": 1.211890243902439e-05, | |
| "loss": 0.7613, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.5437454808387563, | |
| "grad_norm": 0.9771298170089722, | |
| "learning_rate": 1.2042682926829268e-05, | |
| "loss": 0.7869, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.5466377440347072, | |
| "grad_norm": 1.019014835357666, | |
| "learning_rate": 1.1966463414634146e-05, | |
| "loss": 0.8096, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.5495300072306581, | |
| "grad_norm": 0.95261150598526, | |
| "learning_rate": 1.1890243902439025e-05, | |
| "loss": 0.843, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.5524222704266089, | |
| "grad_norm": 0.9801099300384521, | |
| "learning_rate": 1.1814024390243903e-05, | |
| "loss": 0.7219, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.5553145336225596, | |
| "grad_norm": 1.0174713134765625, | |
| "learning_rate": 1.173780487804878e-05, | |
| "loss": 0.787, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.5582067968185105, | |
| "grad_norm": 1.119850754737854, | |
| "learning_rate": 1.166158536585366e-05, | |
| "loss": 0.8341, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.5610990600144614, | |
| "grad_norm": 0.996792733669281, | |
| "learning_rate": 1.1585365853658537e-05, | |
| "loss": 0.8291, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.5639913232104121, | |
| "grad_norm": 1.0276952981948853, | |
| "learning_rate": 1.1509146341463415e-05, | |
| "loss": 0.7911, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.5668835864063628, | |
| "grad_norm": 0.9893227815628052, | |
| "learning_rate": 1.1432926829268294e-05, | |
| "loss": 0.8017, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.5697758496023138, | |
| "grad_norm": 1.0083463191986084, | |
| "learning_rate": 1.1356707317073172e-05, | |
| "loss": 0.8681, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.5726681127982647, | |
| "grad_norm": 1.0352839231491089, | |
| "learning_rate": 1.128048780487805e-05, | |
| "loss": 0.7451, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.5755603759942156, | |
| "grad_norm": 1.0231815576553345, | |
| "learning_rate": 1.1204268292682927e-05, | |
| "loss": 0.7971, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.5784526391901663, | |
| "grad_norm": 0.9740004539489746, | |
| "learning_rate": 1.1128048780487805e-05, | |
| "loss": 0.7174, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.581344902386117, | |
| "grad_norm": 0.9921448826789856, | |
| "learning_rate": 1.1051829268292684e-05, | |
| "loss": 0.7669, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.584237165582068, | |
| "grad_norm": 0.9635536670684814, | |
| "learning_rate": 1.0975609756097562e-05, | |
| "loss": 0.7851, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.5871294287780189, | |
| "grad_norm": 0.9930370450019836, | |
| "learning_rate": 1.089939024390244e-05, | |
| "loss": 0.749, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.5900216919739696, | |
| "grad_norm": 1.0188409090042114, | |
| "learning_rate": 1.0823170731707317e-05, | |
| "loss": 0.8287, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.5929139551699205, | |
| "grad_norm": 0.9855648875236511, | |
| "learning_rate": 1.0746951219512195e-05, | |
| "loss": 0.7985, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.5958062183658712, | |
| "grad_norm": 1.0312644243240356, | |
| "learning_rate": 1.0670731707317074e-05, | |
| "loss": 0.824, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.5986984815618221, | |
| "grad_norm": 0.9914786219596863, | |
| "learning_rate": 1.0594512195121952e-05, | |
| "loss": 0.8491, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.601590744757773, | |
| "grad_norm": 1.0038225650787354, | |
| "learning_rate": 1.051829268292683e-05, | |
| "loss": 0.8882, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.6044830079537238, | |
| "grad_norm": 1.0336111783981323, | |
| "learning_rate": 1.0442073170731707e-05, | |
| "loss": 0.7973, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.6073752711496745, | |
| "grad_norm": 0.9833325743675232, | |
| "learning_rate": 1.0365853658536585e-05, | |
| "loss": 0.7918, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.6102675343456254, | |
| "grad_norm": 1.0113708972930908, | |
| "learning_rate": 1.0289634146341464e-05, | |
| "loss": 0.803, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.6131597975415763, | |
| "grad_norm": 1.0248537063598633, | |
| "learning_rate": 1.0213414634146342e-05, | |
| "loss": 0.8015, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.6160520607375273, | |
| "grad_norm": 0.9835037589073181, | |
| "learning_rate": 1.0137195121951221e-05, | |
| "loss": 0.7493, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.618944323933478, | |
| "grad_norm": 0.9587700963020325, | |
| "learning_rate": 1.0060975609756099e-05, | |
| "loss": 0.7041, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.6218365871294287, | |
| "grad_norm": 1.0020424127578735, | |
| "learning_rate": 9.984756097560976e-06, | |
| "loss": 0.7743, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.6247288503253796, | |
| "grad_norm": 1.0215778350830078, | |
| "learning_rate": 9.908536585365854e-06, | |
| "loss": 0.9143, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.6276211135213305, | |
| "grad_norm": 1.05181086063385, | |
| "learning_rate": 9.832317073170733e-06, | |
| "loss": 0.7612, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.6305133767172812, | |
| "grad_norm": 0.9703447222709656, | |
| "learning_rate": 9.756097560975611e-06, | |
| "loss": 0.7819, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.633405639913232, | |
| "grad_norm": 1.0287517309188843, | |
| "learning_rate": 9.679878048780489e-06, | |
| "loss": 0.8443, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.6362979031091829, | |
| "grad_norm": 1.0159296989440918, | |
| "learning_rate": 9.603658536585366e-06, | |
| "loss": 0.7781, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.6391901663051338, | |
| "grad_norm": 1.0067027807235718, | |
| "learning_rate": 9.527439024390244e-06, | |
| "loss": 0.7417, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.6420824295010847, | |
| "grad_norm": 1.067325472831726, | |
| "learning_rate": 9.451219512195123e-06, | |
| "loss": 0.856, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.6449746926970354, | |
| "grad_norm": 1.0160930156707764, | |
| "learning_rate": 9.375000000000001e-06, | |
| "loss": 0.856, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.6478669558929862, | |
| "grad_norm": 0.9937707781791687, | |
| "learning_rate": 9.298780487804879e-06, | |
| "loss": 0.7341, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.650759219088937, | |
| "grad_norm": 1.0597978830337524, | |
| "learning_rate": 9.222560975609756e-06, | |
| "loss": 0.7363, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.653651482284888, | |
| "grad_norm": 1.0080229043960571, | |
| "learning_rate": 9.146341463414634e-06, | |
| "loss": 0.7734, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.6565437454808387, | |
| "grad_norm": 1.0394561290740967, | |
| "learning_rate": 9.070121951219513e-06, | |
| "loss": 0.8179, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.6594360086767896, | |
| "grad_norm": 1.0613329410552979, | |
| "learning_rate": 8.99390243902439e-06, | |
| "loss": 0.8376, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.6623282718727403, | |
| "grad_norm": 1.0188164710998535, | |
| "learning_rate": 8.917682926829268e-06, | |
| "loss": 0.7931, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.6652205350686913, | |
| "grad_norm": 0.9689257740974426, | |
| "learning_rate": 8.841463414634146e-06, | |
| "loss": 0.8066, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.6681127982646422, | |
| "grad_norm": 0.9878205060958862, | |
| "learning_rate": 8.765243902439024e-06, | |
| "loss": 0.7386, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.671005061460593, | |
| "grad_norm": 0.9607040286064148, | |
| "learning_rate": 8.689024390243903e-06, | |
| "loss": 0.7762, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.6738973246565436, | |
| "grad_norm": 0.934492290019989, | |
| "learning_rate": 8.61280487804878e-06, | |
| "loss": 0.8317, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.6767895878524945, | |
| "grad_norm": 1.0009124279022217, | |
| "learning_rate": 8.53658536585366e-06, | |
| "loss": 0.7755, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.6796818510484455, | |
| "grad_norm": 0.9868451952934265, | |
| "learning_rate": 8.460365853658538e-06, | |
| "loss": 0.7688, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.6825741142443964, | |
| "grad_norm": 1.0356996059417725, | |
| "learning_rate": 8.384146341463415e-06, | |
| "loss": 0.7601, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.685466377440347, | |
| "grad_norm": 1.0577391386032104, | |
| "learning_rate": 8.307926829268293e-06, | |
| "loss": 0.7847, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.6883586406362978, | |
| "grad_norm": 1.0306715965270996, | |
| "learning_rate": 8.231707317073172e-06, | |
| "loss": 0.8193, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.6912509038322487, | |
| "grad_norm": 1.04917311668396, | |
| "learning_rate": 8.15548780487805e-06, | |
| "loss": 0.7714, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.6941431670281997, | |
| "grad_norm": 0.9596878290176392, | |
| "learning_rate": 8.079268292682928e-06, | |
| "loss": 0.8267, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.6970354302241504, | |
| "grad_norm": 1.041686773300171, | |
| "learning_rate": 8.003048780487805e-06, | |
| "loss": 0.7706, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.699927693420101, | |
| "grad_norm": 1.0023382902145386, | |
| "learning_rate": 7.926829268292683e-06, | |
| "loss": 0.8456, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.702819956616052, | |
| "grad_norm": 1.009926438331604, | |
| "learning_rate": 7.850609756097562e-06, | |
| "loss": 0.7796, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.705712219812003, | |
| "grad_norm": 1.0054479837417603, | |
| "learning_rate": 7.77439024390244e-06, | |
| "loss": 0.7221, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.7086044830079539, | |
| "grad_norm": 0.9531407952308655, | |
| "learning_rate": 7.698170731707317e-06, | |
| "loss": 0.7801, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.7114967462039046, | |
| "grad_norm": 1.0707489252090454, | |
| "learning_rate": 7.621951219512195e-06, | |
| "loss": 0.8474, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.7143890093998553, | |
| "grad_norm": 1.0391806364059448, | |
| "learning_rate": 7.545731707317074e-06, | |
| "loss": 0.8122, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.7172812725958062, | |
| "grad_norm": 0.9896015524864197, | |
| "learning_rate": 7.469512195121951e-06, | |
| "loss": 0.8505, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.7201735357917571, | |
| "grad_norm": 1.122521162033081, | |
| "learning_rate": 7.393292682926829e-06, | |
| "loss": 0.878, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.7230657989877078, | |
| "grad_norm": 1.0091516971588135, | |
| "learning_rate": 7.317073170731707e-06, | |
| "loss": 0.7846, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.7259580621836585, | |
| "grad_norm": 0.9725529551506042, | |
| "learning_rate": 7.240853658536585e-06, | |
| "loss": 0.8274, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.7288503253796095, | |
| "grad_norm": 1.0169364213943481, | |
| "learning_rate": 7.1646341463414635e-06, | |
| "loss": 0.9092, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.7317425885755604, | |
| "grad_norm": 0.9752337336540222, | |
| "learning_rate": 7.088414634146341e-06, | |
| "loss": 0.7489, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.7346348517715113, | |
| "grad_norm": 1.0482772588729858, | |
| "learning_rate": 7.0121951219512205e-06, | |
| "loss": 0.7379, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.737527114967462, | |
| "grad_norm": 0.9847067594528198, | |
| "learning_rate": 6.935975609756098e-06, | |
| "loss": 0.7102, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.7404193781634127, | |
| "grad_norm": 0.9766717553138733, | |
| "learning_rate": 6.859756097560977e-06, | |
| "loss": 0.8012, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.7433116413593637, | |
| "grad_norm": 0.9498171806335449, | |
| "learning_rate": 6.783536585365854e-06, | |
| "loss": 0.7409, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.7462039045553146, | |
| "grad_norm": 1.0003339052200317, | |
| "learning_rate": 6.707317073170733e-06, | |
| "loss": 0.7585, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.7490961677512655, | |
| "grad_norm": 1.0416187047958374, | |
| "learning_rate": 6.63109756097561e-06, | |
| "loss": 0.7591, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.7519884309472162, | |
| "grad_norm": 0.9981351494789124, | |
| "learning_rate": 6.554878048780488e-06, | |
| "loss": 0.741, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.754880694143167, | |
| "grad_norm": 0.998756468296051, | |
| "learning_rate": 6.4786585365853665e-06, | |
| "loss": 0.8408, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.7577729573391179, | |
| "grad_norm": 1.0053471326828003, | |
| "learning_rate": 6.402439024390244e-06, | |
| "loss": 0.7636, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.7606652205350688, | |
| "grad_norm": 1.0228371620178223, | |
| "learning_rate": 6.326219512195123e-06, | |
| "loss": 0.7811, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.7635574837310195, | |
| "grad_norm": 1.0302461385726929, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.7339, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.7664497469269702, | |
| "grad_norm": 1.0541510581970215, | |
| "learning_rate": 6.173780487804878e-06, | |
| "loss": 0.7718, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.7693420101229211, | |
| "grad_norm": 0.9746615290641785, | |
| "learning_rate": 6.0975609756097564e-06, | |
| "loss": 0.849, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.772234273318872, | |
| "grad_norm": 0.9652546048164368, | |
| "learning_rate": 6.021341463414634e-06, | |
| "loss": 0.8287, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.775126536514823, | |
| "grad_norm": 1.0296525955200195, | |
| "learning_rate": 5.9451219512195126e-06, | |
| "loss": 0.7493, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.7780187997107737, | |
| "grad_norm": 1.045018196105957, | |
| "learning_rate": 5.86890243902439e-06, | |
| "loss": 0.7284, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.7809110629067244, | |
| "grad_norm": 1.0308400392532349, | |
| "learning_rate": 5.792682926829269e-06, | |
| "loss": 0.8641, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.7838033261026753, | |
| "grad_norm": 1.0580596923828125, | |
| "learning_rate": 5.716463414634147e-06, | |
| "loss": 0.8282, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.7866955892986263, | |
| "grad_norm": 1.0240721702575684, | |
| "learning_rate": 5.640243902439025e-06, | |
| "loss": 0.765, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.789587852494577, | |
| "grad_norm": 1.0127959251403809, | |
| "learning_rate": 5.5640243902439025e-06, | |
| "loss": 0.7923, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.7924801156905277, | |
| "grad_norm": 1.1011825799942017, | |
| "learning_rate": 5.487804878048781e-06, | |
| "loss": 0.7251, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.7953723788864786, | |
| "grad_norm": 1.0520384311676025, | |
| "learning_rate": 5.411585365853659e-06, | |
| "loss": 0.7217, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.7982646420824295, | |
| "grad_norm": 1.0805737972259521, | |
| "learning_rate": 5.335365853658537e-06, | |
| "loss": 0.8411, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.8011569052783805, | |
| "grad_norm": 1.0442290306091309, | |
| "learning_rate": 5.259146341463415e-06, | |
| "loss": 0.7386, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.8040491684743312, | |
| "grad_norm": 1.0919840335845947, | |
| "learning_rate": 5.182926829268292e-06, | |
| "loss": 0.7858, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.8069414316702819, | |
| "grad_norm": 0.9759023785591125, | |
| "learning_rate": 5.106707317073171e-06, | |
| "loss": 0.697, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.8098336948662328, | |
| "grad_norm": 1.017999291419983, | |
| "learning_rate": 5.030487804878049e-06, | |
| "loss": 0.8095, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.8127259580621837, | |
| "grad_norm": 1.0746080875396729, | |
| "learning_rate": 4.954268292682927e-06, | |
| "loss": 0.7828, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.8156182212581344, | |
| "grad_norm": 1.0229034423828125, | |
| "learning_rate": 4.8780487804878055e-06, | |
| "loss": 0.8028, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.8185104844540854, | |
| "grad_norm": 1.0520620346069336, | |
| "learning_rate": 4.801829268292683e-06, | |
| "loss": 0.7629, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.821402747650036, | |
| "grad_norm": 1.0495305061340332, | |
| "learning_rate": 4.725609756097562e-06, | |
| "loss": 0.7609, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.824295010845987, | |
| "grad_norm": 0.9548224806785583, | |
| "learning_rate": 4.649390243902439e-06, | |
| "loss": 0.752, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.827187274041938, | |
| "grad_norm": 1.0313746929168701, | |
| "learning_rate": 4.573170731707317e-06, | |
| "loss": 0.8528, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.8300795372378886, | |
| "grad_norm": 1.0014350414276123, | |
| "learning_rate": 4.496951219512195e-06, | |
| "loss": 0.7587, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.8329718004338393, | |
| "grad_norm": 1.069353461265564, | |
| "learning_rate": 4.420731707317073e-06, | |
| "loss": 0.8193, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.8358640636297903, | |
| "grad_norm": 1.085693120956421, | |
| "learning_rate": 4.3445121951219515e-06, | |
| "loss": 0.799, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.8387563268257412, | |
| "grad_norm": 0.97664475440979, | |
| "learning_rate": 4.26829268292683e-06, | |
| "loss": 0.7018, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.8416485900216921, | |
| "grad_norm": 1.0830881595611572, | |
| "learning_rate": 4.192073170731708e-06, | |
| "loss": 0.7851, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.8445408532176428, | |
| "grad_norm": 0.9672832489013672, | |
| "learning_rate": 4.115853658536586e-06, | |
| "loss": 0.7542, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.8474331164135935, | |
| "grad_norm": 1.0837608575820923, | |
| "learning_rate": 4.039634146341464e-06, | |
| "loss": 0.8329, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.8503253796095445, | |
| "grad_norm": 1.0772196054458618, | |
| "learning_rate": 3.9634146341463414e-06, | |
| "loss": 0.7884, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.8532176428054954, | |
| "grad_norm": 1.1313399076461792, | |
| "learning_rate": 3.88719512195122e-06, | |
| "loss": 0.7771, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.856109906001446, | |
| "grad_norm": 1.0799105167388916, | |
| "learning_rate": 3.8109756097560976e-06, | |
| "loss": 0.8173, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.8590021691973968, | |
| "grad_norm": 1.035786509513855, | |
| "learning_rate": 3.7347560975609756e-06, | |
| "loss": 0.7445, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.8618944323933477, | |
| "grad_norm": 1.0022109746932983, | |
| "learning_rate": 3.6585365853658537e-06, | |
| "loss": 0.7441, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.8647866955892987, | |
| "grad_norm": 1.0012871026992798, | |
| "learning_rate": 3.5823170731707318e-06, | |
| "loss": 0.7731, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.8676789587852496, | |
| "grad_norm": 1.0303922891616821, | |
| "learning_rate": 3.5060975609756102e-06, | |
| "loss": 0.7432, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.8705712219812003, | |
| "grad_norm": 0.9990852475166321, | |
| "learning_rate": 3.4298780487804883e-06, | |
| "loss": 0.7346, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.873463485177151, | |
| "grad_norm": 1.0499917268753052, | |
| "learning_rate": 3.3536585365853664e-06, | |
| "loss": 0.8286, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.876355748373102, | |
| "grad_norm": 0.9858948588371277, | |
| "learning_rate": 3.277439024390244e-06, | |
| "loss": 0.7513, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.8792480115690529, | |
| "grad_norm": 1.020816445350647, | |
| "learning_rate": 3.201219512195122e-06, | |
| "loss": 0.7283, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.8821402747650036, | |
| "grad_norm": 1.0142725706100464, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.8384, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.8850325379609545, | |
| "grad_norm": 1.0734213590621948, | |
| "learning_rate": 3.0487804878048782e-06, | |
| "loss": 0.7657, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.8879248011569052, | |
| "grad_norm": 0.9841848611831665, | |
| "learning_rate": 2.9725609756097563e-06, | |
| "loss": 0.7097, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.8908170643528561, | |
| "grad_norm": 1.4696120023727417, | |
| "learning_rate": 2.8963414634146343e-06, | |
| "loss": 0.6966, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.893709327548807, | |
| "grad_norm": 1.0753856897354126, | |
| "learning_rate": 2.8201219512195124e-06, | |
| "loss": 0.7836, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.8966015907447578, | |
| "grad_norm": 1.058305025100708, | |
| "learning_rate": 2.7439024390243905e-06, | |
| "loss": 0.7982, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.8994938539407085, | |
| "grad_norm": 1.0660943984985352, | |
| "learning_rate": 2.6676829268292685e-06, | |
| "loss": 0.7404, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.9023861171366594, | |
| "grad_norm": 1.0167231559753418, | |
| "learning_rate": 2.591463414634146e-06, | |
| "loss": 0.6959, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.9052783803326103, | |
| "grad_norm": 0.9782930016517639, | |
| "learning_rate": 2.5152439024390247e-06, | |
| "loss": 0.7038, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.9081706435285612, | |
| "grad_norm": 1.0442514419555664, | |
| "learning_rate": 2.4390243902439027e-06, | |
| "loss": 0.8573, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.911062906724512, | |
| "grad_norm": 1.0171256065368652, | |
| "learning_rate": 2.362804878048781e-06, | |
| "loss": 0.7684, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.9139551699204627, | |
| "grad_norm": 1.020768165588379, | |
| "learning_rate": 2.2865853658536584e-06, | |
| "loss": 0.8061, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.9168474331164136, | |
| "grad_norm": 0.9942306876182556, | |
| "learning_rate": 2.2103658536585365e-06, | |
| "loss": 0.7691, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.9197396963123645, | |
| "grad_norm": 0.9986061453819275, | |
| "learning_rate": 2.134146341463415e-06, | |
| "loss": 0.7012, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.9226319595083152, | |
| "grad_norm": 1.0474562644958496, | |
| "learning_rate": 2.057926829268293e-06, | |
| "loss": 0.728, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.925524222704266, | |
| "grad_norm": 1.0567129850387573, | |
| "learning_rate": 1.9817073170731707e-06, | |
| "loss": 0.7762, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.9284164859002169, | |
| "grad_norm": 1.0257785320281982, | |
| "learning_rate": 1.9054878048780488e-06, | |
| "loss": 0.7986, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.9313087490961678, | |
| "grad_norm": 0.9999968409538269, | |
| "learning_rate": 1.8292682926829268e-06, | |
| "loss": 0.7539, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.9342010122921187, | |
| "grad_norm": 1.082047462463379, | |
| "learning_rate": 1.7530487804878051e-06, | |
| "loss": 0.7971, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.9370932754880694, | |
| "grad_norm": 0.994654655456543, | |
| "learning_rate": 1.6768292682926832e-06, | |
| "loss": 0.7363, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.9399855386840201, | |
| "grad_norm": 1.0056068897247314, | |
| "learning_rate": 1.600609756097561e-06, | |
| "loss": 0.7643, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.942877801879971, | |
| "grad_norm": 1.015271782875061, | |
| "learning_rate": 1.5243902439024391e-06, | |
| "loss": 0.7108, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.945770065075922, | |
| "grad_norm": 0.9946292042732239, | |
| "learning_rate": 1.4481707317073172e-06, | |
| "loss": 0.8213, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.9486623282718727, | |
| "grad_norm": 0.9914453625679016, | |
| "learning_rate": 1.3719512195121952e-06, | |
| "loss": 0.7917, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.9515545914678236, | |
| "grad_norm": 1.062779426574707, | |
| "learning_rate": 1.295731707317073e-06, | |
| "loss": 0.725, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.9544468546637743, | |
| "grad_norm": 1.0502513647079468, | |
| "learning_rate": 1.2195121951219514e-06, | |
| "loss": 0.7978, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.9573391178597253, | |
| "grad_norm": 1.0494405031204224, | |
| "learning_rate": 1.1432926829268292e-06, | |
| "loss": 0.7927, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.9602313810556762, | |
| "grad_norm": 1.054677128791809, | |
| "learning_rate": 1.0670731707317075e-06, | |
| "loss": 0.7595, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.9631236442516269, | |
| "grad_norm": 1.0292917490005493, | |
| "learning_rate": 9.908536585365854e-07, | |
| "loss": 0.8302, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.9660159074475776, | |
| "grad_norm": 1.1083894968032837, | |
| "learning_rate": 9.146341463414634e-07, | |
| "loss": 0.8153, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.9689081706435285, | |
| "grad_norm": 1.086378574371338, | |
| "learning_rate": 8.384146341463416e-07, | |
| "loss": 0.7676, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.9718004338394794, | |
| "grad_norm": 1.0098559856414795, | |
| "learning_rate": 7.621951219512196e-07, | |
| "loss": 0.7764, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.9746926970354304, | |
| "grad_norm": 1.0091646909713745, | |
| "learning_rate": 6.859756097560976e-07, | |
| "loss": 0.8242, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.977584960231381, | |
| "grad_norm": 1.0496336221694946, | |
| "learning_rate": 6.097560975609757e-07, | |
| "loss": 0.7758, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.9804772234273318, | |
| "grad_norm": 1.0282728672027588, | |
| "learning_rate": 5.335365853658538e-07, | |
| "loss": 0.7421, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.9833694866232827, | |
| "grad_norm": 1.0808695554733276, | |
| "learning_rate": 4.573170731707317e-07, | |
| "loss": 0.7813, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.9862617498192336, | |
| "grad_norm": 1.0309821367263794, | |
| "learning_rate": 3.810975609756098e-07, | |
| "loss": 0.7839, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.9891540130151844, | |
| "grad_norm": 1.0294197797775269, | |
| "learning_rate": 3.0487804878048784e-07, | |
| "loss": 0.697, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.992046276211135, | |
| "grad_norm": 1.0775706768035889, | |
| "learning_rate": 2.2865853658536586e-07, | |
| "loss": 0.7508, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.994938539407086, | |
| "grad_norm": 1.0518558025360107, | |
| "learning_rate": 1.5243902439024392e-07, | |
| "loss": 0.7384, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.997830802603037, | |
| "grad_norm": 1.0389012098312378, | |
| "learning_rate": 7.621951219512196e-08, | |
| "loss": 0.7942, | |
| "step": 691 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 691, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.687294393884475e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |