| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1359, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002207505518763797, | |
| "grad_norm": 2.7993483543395996, | |
| "learning_rate": 7.352941176470589e-08, | |
| "loss": 0.7615, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004415011037527594, | |
| "grad_norm": 2.814159393310547, | |
| "learning_rate": 1.4705882352941178e-07, | |
| "loss": 0.7759, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.006622516556291391, | |
| "grad_norm": 2.932206392288208, | |
| "learning_rate": 2.2058823529411768e-07, | |
| "loss": 0.7897, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.008830022075055188, | |
| "grad_norm": 2.909721851348877, | |
| "learning_rate": 2.9411764705882356e-07, | |
| "loss": 0.7698, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.011037527593818985, | |
| "grad_norm": 2.855281114578247, | |
| "learning_rate": 3.6764705882352943e-07, | |
| "loss": 0.7673, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.013245033112582781, | |
| "grad_norm": 2.930614709854126, | |
| "learning_rate": 4.4117647058823536e-07, | |
| "loss": 0.7914, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01545253863134658, | |
| "grad_norm": 2.8653368949890137, | |
| "learning_rate": 5.147058823529412e-07, | |
| "loss": 0.7713, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.017660044150110375, | |
| "grad_norm": 2.7474284172058105, | |
| "learning_rate": 5.882352941176471e-07, | |
| "loss": 0.7713, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.019867549668874173, | |
| "grad_norm": 2.6992416381835938, | |
| "learning_rate": 6.61764705882353e-07, | |
| "loss": 0.7549, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.02207505518763797, | |
| "grad_norm": 2.718104362487793, | |
| "learning_rate": 7.352941176470589e-07, | |
| "loss": 0.757, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.024282560706401765, | |
| "grad_norm": 2.670048952102661, | |
| "learning_rate": 8.088235294117648e-07, | |
| "loss": 0.7367, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.026490066225165563, | |
| "grad_norm": 2.205970287322998, | |
| "learning_rate": 8.823529411764707e-07, | |
| "loss": 0.7397, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02869757174392936, | |
| "grad_norm": 2.2243752479553223, | |
| "learning_rate": 9.558823529411764e-07, | |
| "loss": 0.7565, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.03090507726269316, | |
| "grad_norm": 2.2006163597106934, | |
| "learning_rate": 1.0294117647058825e-06, | |
| "loss": 0.744, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.033112582781456956, | |
| "grad_norm": 2.0692009925842285, | |
| "learning_rate": 1.1029411764705884e-06, | |
| "loss": 0.7282, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03532008830022075, | |
| "grad_norm": 1.755692958831787, | |
| "learning_rate": 1.1764705882352942e-06, | |
| "loss": 0.7027, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.037527593818984545, | |
| "grad_norm": 1.429651141166687, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.7028, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.039735099337748346, | |
| "grad_norm": 1.4149061441421509, | |
| "learning_rate": 1.323529411764706e-06, | |
| "loss": 0.7027, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04194260485651214, | |
| "grad_norm": 1.4027491807937622, | |
| "learning_rate": 1.3970588235294119e-06, | |
| "loss": 0.7034, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.04415011037527594, | |
| "grad_norm": 1.3382784128189087, | |
| "learning_rate": 1.4705882352941177e-06, | |
| "loss": 0.6838, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.046357615894039736, | |
| "grad_norm": 1.2848469018936157, | |
| "learning_rate": 1.5441176470588238e-06, | |
| "loss": 0.6904, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.04856512141280353, | |
| "grad_norm": 1.0581680536270142, | |
| "learning_rate": 1.6176470588235297e-06, | |
| "loss": 0.6658, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05077262693156733, | |
| "grad_norm": 1.0082190036773682, | |
| "learning_rate": 1.6911764705882356e-06, | |
| "loss": 0.653, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.052980132450331126, | |
| "grad_norm": 0.979928195476532, | |
| "learning_rate": 1.7647058823529414e-06, | |
| "loss": 0.6501, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05518763796909492, | |
| "grad_norm": 0.9646239876747131, | |
| "learning_rate": 1.8382352941176473e-06, | |
| "loss": 0.6415, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05739514348785872, | |
| "grad_norm": 0.8932761549949646, | |
| "learning_rate": 1.9117647058823528e-06, | |
| "loss": 0.6523, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.059602649006622516, | |
| "grad_norm": 0.8707468509674072, | |
| "learning_rate": 1.985294117647059e-06, | |
| "loss": 0.6434, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.06181015452538632, | |
| "grad_norm": 0.8291523456573486, | |
| "learning_rate": 2.058823529411765e-06, | |
| "loss": 0.6333, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0640176600441501, | |
| "grad_norm": 0.7280705571174622, | |
| "learning_rate": 2.132352941176471e-06, | |
| "loss": 0.6198, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.06622516556291391, | |
| "grad_norm": 0.6107202172279358, | |
| "learning_rate": 2.2058823529411767e-06, | |
| "loss": 0.6057, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0684326710816777, | |
| "grad_norm": 0.6679992079734802, | |
| "learning_rate": 2.2794117647058826e-06, | |
| "loss": 0.5983, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0706401766004415, | |
| "grad_norm": 0.6695526242256165, | |
| "learning_rate": 2.3529411764705885e-06, | |
| "loss": 0.5946, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0728476821192053, | |
| "grad_norm": 0.6435050964355469, | |
| "learning_rate": 2.4264705882352943e-06, | |
| "loss": 0.5975, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.07505518763796909, | |
| "grad_norm": 0.6036580204963684, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.592, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0772626931567329, | |
| "grad_norm": 0.5135894417762756, | |
| "learning_rate": 2.5735294117647057e-06, | |
| "loss": 0.596, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07947019867549669, | |
| "grad_norm": 0.4571033716201782, | |
| "learning_rate": 2.647058823529412e-06, | |
| "loss": 0.5764, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08167770419426049, | |
| "grad_norm": 0.48609447479248047, | |
| "learning_rate": 2.720588235294118e-06, | |
| "loss": 0.5593, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.08388520971302428, | |
| "grad_norm": 0.49421966075897217, | |
| "learning_rate": 2.7941176470588237e-06, | |
| "loss": 0.5634, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08609271523178808, | |
| "grad_norm": 0.48713281750679016, | |
| "learning_rate": 2.8676470588235296e-06, | |
| "loss": 0.5577, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.08830022075055188, | |
| "grad_norm": 0.42998674511909485, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": 0.5602, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09050772626931568, | |
| "grad_norm": 0.39199528098106384, | |
| "learning_rate": 3.0147058823529413e-06, | |
| "loss": 0.5509, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.09271523178807947, | |
| "grad_norm": 0.3977169692516327, | |
| "learning_rate": 3.0882352941176476e-06, | |
| "loss": 0.5408, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09492273730684327, | |
| "grad_norm": 0.3659592866897583, | |
| "learning_rate": 3.161764705882353e-06, | |
| "loss": 0.5468, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.09713024282560706, | |
| "grad_norm": 0.41232988238334656, | |
| "learning_rate": 3.2352941176470594e-06, | |
| "loss": 0.5312, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.09933774834437085, | |
| "grad_norm": 0.41818928718566895, | |
| "learning_rate": 3.308823529411765e-06, | |
| "loss": 0.542, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.10154525386313466, | |
| "grad_norm": 0.38174012303352356, | |
| "learning_rate": 3.382352941176471e-06, | |
| "loss": 0.5341, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10375275938189846, | |
| "grad_norm": 0.3903500735759735, | |
| "learning_rate": 3.4558823529411766e-06, | |
| "loss": 0.5215, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.10596026490066225, | |
| "grad_norm": 0.3658589720726013, | |
| "learning_rate": 3.529411764705883e-06, | |
| "loss": 0.5277, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10816777041942605, | |
| "grad_norm": 0.3160182237625122, | |
| "learning_rate": 3.6029411764705883e-06, | |
| "loss": 0.5193, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.11037527593818984, | |
| "grad_norm": 0.29906004667282104, | |
| "learning_rate": 3.6764705882352946e-06, | |
| "loss": 0.525, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11258278145695365, | |
| "grad_norm": 0.28197285532951355, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.5136, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.11479028697571744, | |
| "grad_norm": 0.2733807861804962, | |
| "learning_rate": 3.8235294117647055e-06, | |
| "loss": 0.5126, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11699779249448124, | |
| "grad_norm": 0.2581369876861572, | |
| "learning_rate": 3.897058823529412e-06, | |
| "loss": 0.5102, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.11920529801324503, | |
| "grad_norm": 0.24509315192699432, | |
| "learning_rate": 3.970588235294118e-06, | |
| "loss": 0.4959, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.12141280353200883, | |
| "grad_norm": 0.23736661672592163, | |
| "learning_rate": 4.044117647058824e-06, | |
| "loss": 0.5024, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.12362030905077263, | |
| "grad_norm": 0.2297072857618332, | |
| "learning_rate": 4.11764705882353e-06, | |
| "loss": 0.516, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12582781456953643, | |
| "grad_norm": 0.21641261875629425, | |
| "learning_rate": 4.191176470588236e-06, | |
| "loss": 0.5045, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.1280353200883002, | |
| "grad_norm": 0.2174406796693802, | |
| "learning_rate": 4.264705882352942e-06, | |
| "loss": 0.4954, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.13024282560706402, | |
| "grad_norm": 0.2182844579219818, | |
| "learning_rate": 4.3382352941176475e-06, | |
| "loss": 0.5073, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.13245033112582782, | |
| "grad_norm": 0.18668220937252045, | |
| "learning_rate": 4.411764705882353e-06, | |
| "loss": 0.4927, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1346578366445916, | |
| "grad_norm": 0.20196162164211273, | |
| "learning_rate": 4.485294117647059e-06, | |
| "loss": 0.4962, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.1368653421633554, | |
| "grad_norm": 0.20648355782032013, | |
| "learning_rate": 4.558823529411765e-06, | |
| "loss": 0.4965, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1390728476821192, | |
| "grad_norm": 0.1998893916606903, | |
| "learning_rate": 4.632352941176471e-06, | |
| "loss": 0.4857, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.141280353200883, | |
| "grad_norm": 0.20052312314510345, | |
| "learning_rate": 4.705882352941177e-06, | |
| "loss": 0.4919, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.1434878587196468, | |
| "grad_norm": 0.18653374910354614, | |
| "learning_rate": 4.779411764705883e-06, | |
| "loss": 0.4895, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.1456953642384106, | |
| "grad_norm": 0.17638395726680756, | |
| "learning_rate": 4.852941176470589e-06, | |
| "loss": 0.4831, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.1479028697571744, | |
| "grad_norm": 0.17102564871311188, | |
| "learning_rate": 4.9264705882352945e-06, | |
| "loss": 0.4817, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.15011037527593818, | |
| "grad_norm": 0.15633539855480194, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4882, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.152317880794702, | |
| "grad_norm": 0.17420779168605804, | |
| "learning_rate": 5.073529411764706e-06, | |
| "loss": 0.4853, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.1545253863134658, | |
| "grad_norm": 0.1659373641014099, | |
| "learning_rate": 5.147058823529411e-06, | |
| "loss": 0.492, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15673289183222958, | |
| "grad_norm": 0.17147059738636017, | |
| "learning_rate": 5.220588235294118e-06, | |
| "loss": 0.4744, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.15894039735099338, | |
| "grad_norm": 0.15868496894836426, | |
| "learning_rate": 5.294117647058824e-06, | |
| "loss": 0.4845, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.16114790286975716, | |
| "grad_norm": 0.16405610740184784, | |
| "learning_rate": 5.36764705882353e-06, | |
| "loss": 0.4806, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.16335540838852097, | |
| "grad_norm": 0.16762660443782806, | |
| "learning_rate": 5.441176470588236e-06, | |
| "loss": 0.465, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16556291390728478, | |
| "grad_norm": 0.209846630692482, | |
| "learning_rate": 5.514705882352942e-06, | |
| "loss": 0.486, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.16777041942604856, | |
| "grad_norm": 0.17751334607601166, | |
| "learning_rate": 5.588235294117647e-06, | |
| "loss": 0.4761, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16997792494481237, | |
| "grad_norm": 0.16121278703212738, | |
| "learning_rate": 5.661764705882353e-06, | |
| "loss": 0.4774, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.17218543046357615, | |
| "grad_norm": 0.2625029385089874, | |
| "learning_rate": 5.735294117647059e-06, | |
| "loss": 0.4663, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.17439293598233996, | |
| "grad_norm": 0.15058760344982147, | |
| "learning_rate": 5.808823529411766e-06, | |
| "loss": 0.4644, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.17660044150110377, | |
| "grad_norm": 0.16306105256080627, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 0.4745, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.17880794701986755, | |
| "grad_norm": 0.1497834473848343, | |
| "learning_rate": 5.955882352941177e-06, | |
| "loss": 0.4614, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.18101545253863136, | |
| "grad_norm": 0.15127182006835938, | |
| "learning_rate": 6.029411764705883e-06, | |
| "loss": 0.468, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.18322295805739514, | |
| "grad_norm": 0.14110144972801208, | |
| "learning_rate": 6.102941176470589e-06, | |
| "loss": 0.4709, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.18543046357615894, | |
| "grad_norm": 0.150424063205719, | |
| "learning_rate": 6.176470588235295e-06, | |
| "loss": 0.4614, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.18763796909492272, | |
| "grad_norm": 0.14802858233451843, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.4549, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.18984547461368653, | |
| "grad_norm": 0.1567091941833496, | |
| "learning_rate": 6.323529411764706e-06, | |
| "loss": 0.4487, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.19205298013245034, | |
| "grad_norm": 0.14341039955615997, | |
| "learning_rate": 6.397058823529412e-06, | |
| "loss": 0.4453, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.19426048565121412, | |
| "grad_norm": 0.20370961725711823, | |
| "learning_rate": 6.470588235294119e-06, | |
| "loss": 0.4626, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19646799116997793, | |
| "grad_norm": 0.14623787999153137, | |
| "learning_rate": 6.544117647058824e-06, | |
| "loss": 0.4559, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.1986754966887417, | |
| "grad_norm": 0.1428503692150116, | |
| "learning_rate": 6.61764705882353e-06, | |
| "loss": 0.4464, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.20088300220750552, | |
| "grad_norm": 0.14742067456245422, | |
| "learning_rate": 6.6911764705882356e-06, | |
| "loss": 0.4663, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.20309050772626933, | |
| "grad_norm": 0.16871479153633118, | |
| "learning_rate": 6.764705882352942e-06, | |
| "loss": 0.4549, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2052980132450331, | |
| "grad_norm": 0.14448532462120056, | |
| "learning_rate": 6.838235294117648e-06, | |
| "loss": 0.4544, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.20750551876379691, | |
| "grad_norm": 0.17373333871364594, | |
| "learning_rate": 6.911764705882353e-06, | |
| "loss": 0.4557, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2097130242825607, | |
| "grad_norm": 0.15801453590393066, | |
| "learning_rate": 6.985294117647059e-06, | |
| "loss": 0.4507, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2119205298013245, | |
| "grad_norm": 0.15145470201969147, | |
| "learning_rate": 7.058823529411766e-06, | |
| "loss": 0.4499, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.2141280353200883, | |
| "grad_norm": 0.14600904285907745, | |
| "learning_rate": 7.132352941176472e-06, | |
| "loss": 0.4548, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2163355408388521, | |
| "grad_norm": 0.15834525227546692, | |
| "learning_rate": 7.205882352941177e-06, | |
| "loss": 0.4505, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2185430463576159, | |
| "grad_norm": 0.15612734854221344, | |
| "learning_rate": 7.2794117647058826e-06, | |
| "loss": 0.4598, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.22075055187637968, | |
| "grad_norm": 0.18165510892868042, | |
| "learning_rate": 7.352941176470589e-06, | |
| "loss": 0.4479, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2229580573951435, | |
| "grad_norm": 0.16820134222507477, | |
| "learning_rate": 7.426470588235295e-06, | |
| "loss": 0.4497, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.2251655629139073, | |
| "grad_norm": 0.16453172266483307, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.4532, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.22737306843267108, | |
| "grad_norm": 0.15813149511814117, | |
| "learning_rate": 7.573529411764706e-06, | |
| "loss": 0.4513, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.22958057395143489, | |
| "grad_norm": 0.1652165949344635, | |
| "learning_rate": 7.647058823529411e-06, | |
| "loss": 0.445, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.23178807947019867, | |
| "grad_norm": 0.14849768579006195, | |
| "learning_rate": 7.720588235294119e-06, | |
| "loss": 0.435, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.23399558498896247, | |
| "grad_norm": 0.17478714883327484, | |
| "learning_rate": 7.794117647058825e-06, | |
| "loss": 0.4343, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.23620309050772628, | |
| "grad_norm": 0.16727301478385925, | |
| "learning_rate": 7.86764705882353e-06, | |
| "loss": 0.4483, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.23841059602649006, | |
| "grad_norm": 0.1674540638923645, | |
| "learning_rate": 7.941176470588236e-06, | |
| "loss": 0.4452, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.24061810154525387, | |
| "grad_norm": 0.16588866710662842, | |
| "learning_rate": 8.014705882352942e-06, | |
| "loss": 0.4517, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.24282560706401765, | |
| "grad_norm": 0.176283061504364, | |
| "learning_rate": 8.088235294117648e-06, | |
| "loss": 0.4434, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.24503311258278146, | |
| "grad_norm": 0.17021676898002625, | |
| "learning_rate": 8.161764705882354e-06, | |
| "loss": 0.442, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.24724061810154527, | |
| "grad_norm": 0.15938061475753784, | |
| "learning_rate": 8.23529411764706e-06, | |
| "loss": 0.4436, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.24944812362030905, | |
| "grad_norm": 0.18990886211395264, | |
| "learning_rate": 8.308823529411766e-06, | |
| "loss": 0.439, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.25165562913907286, | |
| "grad_norm": 0.16168883442878723, | |
| "learning_rate": 8.382352941176472e-06, | |
| "loss": 0.4392, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.25386313465783666, | |
| "grad_norm": 0.2176610231399536, | |
| "learning_rate": 8.455882352941177e-06, | |
| "loss": 0.4488, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.2560706401766004, | |
| "grad_norm": 0.19106176495552063, | |
| "learning_rate": 8.529411764705883e-06, | |
| "loss": 0.436, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2582781456953642, | |
| "grad_norm": 0.1480225920677185, | |
| "learning_rate": 8.60294117647059e-06, | |
| "loss": 0.4386, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.26048565121412803, | |
| "grad_norm": 0.20528610050678253, | |
| "learning_rate": 8.676470588235295e-06, | |
| "loss": 0.4441, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.26269315673289184, | |
| "grad_norm": 0.18629467487335205, | |
| "learning_rate": 8.750000000000001e-06, | |
| "loss": 0.4322, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.26490066225165565, | |
| "grad_norm": 0.1764117330312729, | |
| "learning_rate": 8.823529411764707e-06, | |
| "loss": 0.4261, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2671081677704194, | |
| "grad_norm": 0.1975659281015396, | |
| "learning_rate": 8.897058823529413e-06, | |
| "loss": 0.4401, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.2693156732891832, | |
| "grad_norm": 0.2136935442686081, | |
| "learning_rate": 8.970588235294119e-06, | |
| "loss": 0.4416, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.271523178807947, | |
| "grad_norm": 0.18505676090717316, | |
| "learning_rate": 9.044117647058824e-06, | |
| "loss": 0.4423, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.2737306843267108, | |
| "grad_norm": 0.176743283867836, | |
| "learning_rate": 9.11764705882353e-06, | |
| "loss": 0.4435, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.27593818984547464, | |
| "grad_norm": 0.17542196810245514, | |
| "learning_rate": 9.191176470588236e-06, | |
| "loss": 0.4255, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2781456953642384, | |
| "grad_norm": 0.15908770263195038, | |
| "learning_rate": 9.264705882352942e-06, | |
| "loss": 0.438, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2803532008830022, | |
| "grad_norm": 0.16695120930671692, | |
| "learning_rate": 9.338235294117648e-06, | |
| "loss": 0.4418, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.282560706401766, | |
| "grad_norm": 0.1714697927236557, | |
| "learning_rate": 9.411764705882354e-06, | |
| "loss": 0.4293, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2847682119205298, | |
| "grad_norm": 0.1601938009262085, | |
| "learning_rate": 9.48529411764706e-06, | |
| "loss": 0.4402, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.2869757174392936, | |
| "grad_norm": 0.16425947844982147, | |
| "learning_rate": 9.558823529411766e-06, | |
| "loss": 0.4328, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2891832229580574, | |
| "grad_norm": 0.1931643933057785, | |
| "learning_rate": 9.632352941176471e-06, | |
| "loss": 0.4307, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.2913907284768212, | |
| "grad_norm": 0.18675902485847473, | |
| "learning_rate": 9.705882352941177e-06, | |
| "loss": 0.4554, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.293598233995585, | |
| "grad_norm": 0.16391406953334808, | |
| "learning_rate": 9.779411764705883e-06, | |
| "loss": 0.4272, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.2958057395143488, | |
| "grad_norm": 0.20189572870731354, | |
| "learning_rate": 9.852941176470589e-06, | |
| "loss": 0.4258, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2980132450331126, | |
| "grad_norm": 0.16828037798404694, | |
| "learning_rate": 9.926470588235295e-06, | |
| "loss": 0.4287, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.30022075055187636, | |
| "grad_norm": 0.1638776957988739, | |
| "learning_rate": 1e-05, | |
| "loss": 0.4326, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.30242825607064017, | |
| "grad_norm": 0.20775483548641205, | |
| "learning_rate": 9.999983503697906e-06, | |
| "loss": 0.4274, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.304635761589404, | |
| "grad_norm": 0.17668417096138, | |
| "learning_rate": 9.999934014900475e-06, | |
| "loss": 0.4277, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3068432671081678, | |
| "grad_norm": 0.20949822664260864, | |
| "learning_rate": 9.999851533934259e-06, | |
| "loss": 0.4277, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.3090507726269316, | |
| "grad_norm": 0.13797180354595184, | |
| "learning_rate": 9.999736061343512e-06, | |
| "loss": 0.4072, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.31125827814569534, | |
| "grad_norm": 0.20374132692813873, | |
| "learning_rate": 9.99958759789018e-06, | |
| "loss": 0.4302, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.31346578366445915, | |
| "grad_norm": 0.1946762204170227, | |
| "learning_rate": 9.999406144553905e-06, | |
| "loss": 0.4213, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.31567328918322296, | |
| "grad_norm": 0.21811267733573914, | |
| "learning_rate": 9.999191702532008e-06, | |
| "loss": 0.4285, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.31788079470198677, | |
| "grad_norm": 0.20533326268196106, | |
| "learning_rate": 9.99894427323949e-06, | |
| "loss": 0.4251, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3200883002207506, | |
| "grad_norm": 0.20235486328601837, | |
| "learning_rate": 9.99866385830902e-06, | |
| "loss": 0.4237, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.32229580573951433, | |
| "grad_norm": 0.20962080359458923, | |
| "learning_rate": 9.99835045959092e-06, | |
| "loss": 0.4266, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.32450331125827814, | |
| "grad_norm": 0.22475510835647583, | |
| "learning_rate": 9.998004079153156e-06, | |
| "loss": 0.4263, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.32671081677704195, | |
| "grad_norm": 0.20724737644195557, | |
| "learning_rate": 9.997624719281332e-06, | |
| "loss": 0.416, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.32891832229580575, | |
| "grad_norm": 0.1801760494709015, | |
| "learning_rate": 9.997212382478658e-06, | |
| "loss": 0.4233, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.33112582781456956, | |
| "grad_norm": 0.23205707967281342, | |
| "learning_rate": 9.996767071465947e-06, | |
| "loss": 0.4277, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.18537338078022003, | |
| "learning_rate": 9.996288789181595e-06, | |
| "loss": 0.4317, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.3355408388520971, | |
| "grad_norm": 0.17498917877674103, | |
| "learning_rate": 9.995777538781556e-06, | |
| "loss": 0.4288, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.33774834437086093, | |
| "grad_norm": 0.18248897790908813, | |
| "learning_rate": 9.995233323639326e-06, | |
| "loss": 0.4261, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.33995584988962474, | |
| "grad_norm": 0.1849048137664795, | |
| "learning_rate": 9.994656147345922e-06, | |
| "loss": 0.4216, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.34216335540838855, | |
| "grad_norm": 0.19169744849205017, | |
| "learning_rate": 9.994046013709852e-06, | |
| "loss": 0.423, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3443708609271523, | |
| "grad_norm": 0.18524128198623657, | |
| "learning_rate": 9.993402926757098e-06, | |
| "loss": 0.4213, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3465783664459161, | |
| "grad_norm": 0.16218866407871246, | |
| "learning_rate": 9.99272689073108e-06, | |
| "loss": 0.4252, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.3487858719646799, | |
| "grad_norm": 0.17451681196689606, | |
| "learning_rate": 9.992017910092636e-06, | |
| "loss": 0.4251, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3509933774834437, | |
| "grad_norm": 0.15944437682628632, | |
| "learning_rate": 9.991275989519991e-06, | |
| "loss": 0.4123, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.35320088300220753, | |
| "grad_norm": 0.17371642589569092, | |
| "learning_rate": 9.990501133908722e-06, | |
| "loss": 0.4234, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3554083885209713, | |
| "grad_norm": 0.1783660650253296, | |
| "learning_rate": 9.98969334837173e-06, | |
| "loss": 0.4234, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.3576158940397351, | |
| "grad_norm": 0.1762082278728485, | |
| "learning_rate": 9.988852638239206e-06, | |
| "loss": 0.418, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3598233995584989, | |
| "grad_norm": 0.16491912305355072, | |
| "learning_rate": 9.987979009058593e-06, | |
| "loss": 0.4248, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.3620309050772627, | |
| "grad_norm": 0.2407284379005432, | |
| "learning_rate": 9.98707246659455e-06, | |
| "loss": 0.4307, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.36423841059602646, | |
| "grad_norm": 0.19042451679706573, | |
| "learning_rate": 9.986133016828916e-06, | |
| "loss": 0.4231, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.36644591611479027, | |
| "grad_norm": 0.18169504404067993, | |
| "learning_rate": 9.985160665960672e-06, | |
| "loss": 0.4266, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3686534216335541, | |
| "grad_norm": 0.18646620213985443, | |
| "learning_rate": 9.984155420405895e-06, | |
| "loss": 0.4231, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.3708609271523179, | |
| "grad_norm": 0.19154079258441925, | |
| "learning_rate": 9.983117286797718e-06, | |
| "loss": 0.4308, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3730684326710817, | |
| "grad_norm": 0.17594484984874725, | |
| "learning_rate": 9.982046271986287e-06, | |
| "loss": 0.4115, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.37527593818984545, | |
| "grad_norm": 0.18167531490325928, | |
| "learning_rate": 9.980942383038717e-06, | |
| "loss": 0.424, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.37748344370860926, | |
| "grad_norm": 0.1535561978816986, | |
| "learning_rate": 9.97980562723904e-06, | |
| "loss": 0.4296, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.37969094922737306, | |
| "grad_norm": 0.15756377577781677, | |
| "learning_rate": 9.978636012088165e-06, | |
| "loss": 0.4169, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3818984547461369, | |
| "grad_norm": 0.15670788288116455, | |
| "learning_rate": 9.97743354530382e-06, | |
| "loss": 0.4394, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.3841059602649007, | |
| "grad_norm": 0.16224409639835358, | |
| "learning_rate": 9.976198234820509e-06, | |
| "loss": 0.4228, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.38631346578366443, | |
| "grad_norm": 0.14743737876415253, | |
| "learning_rate": 9.974930088789452e-06, | |
| "loss": 0.4144, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.38852097130242824, | |
| "grad_norm": 0.1594422310590744, | |
| "learning_rate": 9.97362911557854e-06, | |
| "loss": 0.4168, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.39072847682119205, | |
| "grad_norm": 0.16173714399337769, | |
| "learning_rate": 9.972295323772268e-06, | |
| "loss": 0.4166, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.39293598233995586, | |
| "grad_norm": 0.1668204814195633, | |
| "learning_rate": 9.970928722171691e-06, | |
| "loss": 0.4252, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.39514348785871967, | |
| "grad_norm": 0.15836164355278015, | |
| "learning_rate": 9.96952931979436e-06, | |
| "loss": 0.4209, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.3973509933774834, | |
| "grad_norm": 0.1634080708026886, | |
| "learning_rate": 9.968097125874258e-06, | |
| "loss": 0.4076, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3995584988962472, | |
| "grad_norm": 0.16134855151176453, | |
| "learning_rate": 9.966632149861748e-06, | |
| "loss": 0.4276, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.40176600441501104, | |
| "grad_norm": 0.15993578732013702, | |
| "learning_rate": 9.965134401423503e-06, | |
| "loss": 0.4308, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.40397350993377484, | |
| "grad_norm": 0.19045297801494598, | |
| "learning_rate": 9.963603890442448e-06, | |
| "loss": 0.4185, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.40618101545253865, | |
| "grad_norm": 0.16455209255218506, | |
| "learning_rate": 9.962040627017693e-06, | |
| "loss": 0.4232, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4083885209713024, | |
| "grad_norm": 0.16577620804309845, | |
| "learning_rate": 9.960444621464462e-06, | |
| "loss": 0.4149, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.4105960264900662, | |
| "grad_norm": 0.25177431106567383, | |
| "learning_rate": 9.958815884314033e-06, | |
| "loss": 0.4172, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.41280353200883, | |
| "grad_norm": 0.18712477385997772, | |
| "learning_rate": 9.957154426313662e-06, | |
| "loss": 0.4205, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.41501103752759383, | |
| "grad_norm": 0.1667563021183014, | |
| "learning_rate": 9.955460258426512e-06, | |
| "loss": 0.4207, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.41721854304635764, | |
| "grad_norm": 0.22865413129329681, | |
| "learning_rate": 9.953733391831586e-06, | |
| "loss": 0.4109, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4194260485651214, | |
| "grad_norm": 0.18536990880966187, | |
| "learning_rate": 9.951973837923652e-06, | |
| "loss": 0.4187, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4216335540838852, | |
| "grad_norm": 0.19504587352275848, | |
| "learning_rate": 9.950181608313158e-06, | |
| "loss": 0.4142, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.423841059602649, | |
| "grad_norm": 0.1892482340335846, | |
| "learning_rate": 9.948356714826172e-06, | |
| "loss": 0.4142, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4260485651214128, | |
| "grad_norm": 0.1839127093553543, | |
| "learning_rate": 9.946499169504294e-06, | |
| "loss": 0.4161, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.4282560706401766, | |
| "grad_norm": 0.20385828614234924, | |
| "learning_rate": 9.944608984604569e-06, | |
| "loss": 0.4124, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.4304635761589404, | |
| "grad_norm": 0.1948205530643463, | |
| "learning_rate": 9.942686172599425e-06, | |
| "loss": 0.4251, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4326710816777042, | |
| "grad_norm": 0.19438982009887695, | |
| "learning_rate": 9.940730746176578e-06, | |
| "loss": 0.4158, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.434878587196468, | |
| "grad_norm": 0.17213338613510132, | |
| "learning_rate": 9.93874271823895e-06, | |
| "loss": 0.4175, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.4370860927152318, | |
| "grad_norm": 0.22870118916034698, | |
| "learning_rate": 9.936722101904582e-06, | |
| "loss": 0.4267, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4392935982339956, | |
| "grad_norm": 0.20383016765117645, | |
| "learning_rate": 9.934668910506555e-06, | |
| "loss": 0.422, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.44150110375275936, | |
| "grad_norm": 0.16936808824539185, | |
| "learning_rate": 9.932583157592896e-06, | |
| "loss": 0.4144, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.44370860927152317, | |
| "grad_norm": 0.19149592518806458, | |
| "learning_rate": 9.930464856926488e-06, | |
| "loss": 0.4077, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.445916114790287, | |
| "grad_norm": 0.15204112231731415, | |
| "learning_rate": 9.928314022484982e-06, | |
| "loss": 0.4207, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4481236203090508, | |
| "grad_norm": 0.19697798788547516, | |
| "learning_rate": 9.926130668460702e-06, | |
| "loss": 0.4159, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.4503311258278146, | |
| "grad_norm": 0.1751161813735962, | |
| "learning_rate": 9.92391480926056e-06, | |
| "loss": 0.4179, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.45253863134657835, | |
| "grad_norm": 0.17122775316238403, | |
| "learning_rate": 9.921666459505944e-06, | |
| "loss": 0.4104, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.45474613686534215, | |
| "grad_norm": 0.18898116052150726, | |
| "learning_rate": 9.91938563403264e-06, | |
| "loss": 0.4183, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.45695364238410596, | |
| "grad_norm": 0.17628274857997894, | |
| "learning_rate": 9.917072347890721e-06, | |
| "loss": 0.411, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.45916114790286977, | |
| "grad_norm": 0.19427300989627838, | |
| "learning_rate": 9.914726616344454e-06, | |
| "loss": 0.4144, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4613686534216336, | |
| "grad_norm": 0.19164274632930756, | |
| "learning_rate": 9.912348454872196e-06, | |
| "loss": 0.4067, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.46357615894039733, | |
| "grad_norm": 0.17385149002075195, | |
| "learning_rate": 9.909937879166298e-06, | |
| "loss": 0.408, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.46578366445916114, | |
| "grad_norm": 0.18648236989974976, | |
| "learning_rate": 9.907494905132994e-06, | |
| "loss": 0.4273, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.46799116997792495, | |
| "grad_norm": 0.1911754459142685, | |
| "learning_rate": 9.905019548892296e-06, | |
| "loss": 0.4127, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.47019867549668876, | |
| "grad_norm": 0.15749873220920563, | |
| "learning_rate": 9.902511826777895e-06, | |
| "loss": 0.4174, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.47240618101545256, | |
| "grad_norm": 0.19831134378910065, | |
| "learning_rate": 9.899971755337049e-06, | |
| "loss": 0.4045, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4746136865342163, | |
| "grad_norm": 0.18536338210105896, | |
| "learning_rate": 9.897399351330471e-06, | |
| "loss": 0.4134, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4768211920529801, | |
| "grad_norm": 0.17010532319545746, | |
| "learning_rate": 9.894794631732223e-06, | |
| "loss": 0.4076, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.47902869757174393, | |
| "grad_norm": 0.17602422833442688, | |
| "learning_rate": 9.8921576137296e-06, | |
| "loss": 0.4079, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.48123620309050774, | |
| "grad_norm": 0.1983213573694229, | |
| "learning_rate": 9.889488314723024e-06, | |
| "loss": 0.413, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.48344370860927155, | |
| "grad_norm": 0.17187613248825073, | |
| "learning_rate": 9.886786752325917e-06, | |
| "loss": 0.4229, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.4856512141280353, | |
| "grad_norm": 0.2029353827238083, | |
| "learning_rate": 9.884052944364595e-06, | |
| "loss": 0.4242, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4878587196467991, | |
| "grad_norm": 0.18107262253761292, | |
| "learning_rate": 9.881286908878148e-06, | |
| "loss": 0.4157, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4900662251655629, | |
| "grad_norm": 0.18853497505187988, | |
| "learning_rate": 9.878488664118316e-06, | |
| "loss": 0.4153, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.4922737306843267, | |
| "grad_norm": 0.1775682419538498, | |
| "learning_rate": 9.875658228549379e-06, | |
| "loss": 0.4109, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.49448123620309054, | |
| "grad_norm": 0.18237102031707764, | |
| "learning_rate": 9.872795620848024e-06, | |
| "loss": 0.407, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4966887417218543, | |
| "grad_norm": 0.20489446818828583, | |
| "learning_rate": 9.869900859903225e-06, | |
| "loss": 0.4198, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4988962472406181, | |
| "grad_norm": 0.1481681913137436, | |
| "learning_rate": 9.866973964816126e-06, | |
| "loss": 0.4085, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5011037527593819, | |
| "grad_norm": 0.18315470218658447, | |
| "learning_rate": 9.864014954899905e-06, | |
| "loss": 0.4079, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.5033112582781457, | |
| "grad_norm": 0.18866921961307526, | |
| "learning_rate": 9.861023849679648e-06, | |
| "loss": 0.4249, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5055187637969095, | |
| "grad_norm": 0.15121376514434814, | |
| "learning_rate": 9.858000668892226e-06, | |
| "loss": 0.4179, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.5077262693156733, | |
| "grad_norm": 0.19818085432052612, | |
| "learning_rate": 9.85494543248616e-06, | |
| "loss": 0.4218, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5099337748344371, | |
| "grad_norm": 0.15964657068252563, | |
| "learning_rate": 9.851858160621496e-06, | |
| "loss": 0.4074, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5121412803532008, | |
| "grad_norm": 0.18371723592281342, | |
| "learning_rate": 9.848738873669653e-06, | |
| "loss": 0.4103, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5143487858719646, | |
| "grad_norm": 0.1854197084903717, | |
| "learning_rate": 9.845587592213318e-06, | |
| "loss": 0.4092, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5165562913907285, | |
| "grad_norm": 0.15952259302139282, | |
| "learning_rate": 9.842404337046284e-06, | |
| "loss": 0.4139, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5187637969094923, | |
| "grad_norm": 0.1830589473247528, | |
| "learning_rate": 9.839189129173328e-06, | |
| "loss": 0.4143, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5209713024282561, | |
| "grad_norm": 0.1684870719909668, | |
| "learning_rate": 9.835941989810065e-06, | |
| "loss": 0.4088, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5231788079470199, | |
| "grad_norm": 0.17676231265068054, | |
| "learning_rate": 9.832662940382813e-06, | |
| "loss": 0.4123, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5253863134657837, | |
| "grad_norm": 0.17995555698871613, | |
| "learning_rate": 9.829352002528449e-06, | |
| "loss": 0.4086, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5275938189845475, | |
| "grad_norm": 0.1694101095199585, | |
| "learning_rate": 9.826009198094262e-06, | |
| "loss": 0.4137, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.5298013245033113, | |
| "grad_norm": 0.16819199919700623, | |
| "learning_rate": 9.822634549137819e-06, | |
| "loss": 0.4072, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5320088300220751, | |
| "grad_norm": 0.17408284544944763, | |
| "learning_rate": 9.81922807792681e-06, | |
| "loss": 0.4052, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.5342163355408388, | |
| "grad_norm": 0.1843319982290268, | |
| "learning_rate": 9.815789806938909e-06, | |
| "loss": 0.3966, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5364238410596026, | |
| "grad_norm": 0.18125468492507935, | |
| "learning_rate": 9.812319758861616e-06, | |
| "loss": 0.4044, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.5386313465783664, | |
| "grad_norm": 0.16861899197101593, | |
| "learning_rate": 9.808817956592115e-06, | |
| "loss": 0.4092, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5408388520971302, | |
| "grad_norm": 0.22598163783550262, | |
| "learning_rate": 9.805284423237126e-06, | |
| "loss": 0.4137, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.543046357615894, | |
| "grad_norm": 0.22511421144008636, | |
| "learning_rate": 9.801719182112738e-06, | |
| "loss": 0.4073, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5452538631346578, | |
| "grad_norm": 0.1690152883529663, | |
| "learning_rate": 9.798122256744269e-06, | |
| "loss": 0.4097, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.5474613686534217, | |
| "grad_norm": 0.22893239557743073, | |
| "learning_rate": 9.794493670866108e-06, | |
| "loss": 0.4054, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5496688741721855, | |
| "grad_norm": 0.17257164418697357, | |
| "learning_rate": 9.790833448421554e-06, | |
| "loss": 0.4109, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.5518763796909493, | |
| "grad_norm": 0.1636303812265396, | |
| "learning_rate": 9.787141613562661e-06, | |
| "loss": 0.3995, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5540838852097131, | |
| "grad_norm": 0.17622527480125427, | |
| "learning_rate": 9.783418190650079e-06, | |
| "loss": 0.409, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.5562913907284768, | |
| "grad_norm": 0.20090465247631073, | |
| "learning_rate": 9.779663204252887e-06, | |
| "loss": 0.4015, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5584988962472406, | |
| "grad_norm": 0.15368807315826416, | |
| "learning_rate": 9.775876679148449e-06, | |
| "loss": 0.4009, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.5607064017660044, | |
| "grad_norm": 0.2135976403951645, | |
| "learning_rate": 9.772058640322221e-06, | |
| "loss": 0.407, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5629139072847682, | |
| "grad_norm": 0.17839573323726654, | |
| "learning_rate": 9.768209112967619e-06, | |
| "loss": 0.404, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.565121412803532, | |
| "grad_norm": 0.1953095942735672, | |
| "learning_rate": 9.764328122485827e-06, | |
| "loss": 0.402, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5673289183222958, | |
| "grad_norm": 0.16529282927513123, | |
| "learning_rate": 9.76041569448564e-06, | |
| "loss": 0.4117, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5695364238410596, | |
| "grad_norm": 0.17458973824977875, | |
| "learning_rate": 9.756471854783297e-06, | |
| "loss": 0.4144, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5717439293598234, | |
| "grad_norm": 0.1776653230190277, | |
| "learning_rate": 9.752496629402307e-06, | |
| "loss": 0.4123, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.5739514348785872, | |
| "grad_norm": 0.17428044974803925, | |
| "learning_rate": 9.748490044573275e-06, | |
| "loss": 0.3975, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5761589403973509, | |
| "grad_norm": 0.17150405049324036, | |
| "learning_rate": 9.744452126733739e-06, | |
| "loss": 0.4023, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.5783664459161147, | |
| "grad_norm": 0.1668493002653122, | |
| "learning_rate": 9.740382902527981e-06, | |
| "loss": 0.409, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5805739514348786, | |
| "grad_norm": 0.17727597057819366, | |
| "learning_rate": 9.736282398806862e-06, | |
| "loss": 0.4061, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5827814569536424, | |
| "grad_norm": 0.15944141149520874, | |
| "learning_rate": 9.73215064262764e-06, | |
| "loss": 0.402, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5849889624724062, | |
| "grad_norm": 0.17232947051525116, | |
| "learning_rate": 9.727987661253796e-06, | |
| "loss": 0.4102, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.58719646799117, | |
| "grad_norm": 0.2054961770772934, | |
| "learning_rate": 9.72379348215485e-06, | |
| "loss": 0.3994, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5894039735099338, | |
| "grad_norm": 0.1817290335893631, | |
| "learning_rate": 9.719568133006177e-06, | |
| "loss": 0.4033, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5916114790286976, | |
| "grad_norm": 0.16904965043067932, | |
| "learning_rate": 9.715311641688835e-06, | |
| "loss": 0.4082, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5938189845474614, | |
| "grad_norm": 0.17628441751003265, | |
| "learning_rate": 9.71102403628937e-06, | |
| "loss": 0.3969, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.5960264900662252, | |
| "grad_norm": 0.20128114521503448, | |
| "learning_rate": 9.706705345099632e-06, | |
| "loss": 0.4081, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5982339955849889, | |
| "grad_norm": 0.17412817478179932, | |
| "learning_rate": 9.7023555966166e-06, | |
| "loss": 0.4076, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.6004415011037527, | |
| "grad_norm": 0.18813695013523102, | |
| "learning_rate": 9.697974819542178e-06, | |
| "loss": 0.4042, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6026490066225165, | |
| "grad_norm": 0.17176660895347595, | |
| "learning_rate": 9.693563042783011e-06, | |
| "loss": 0.4099, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.6048565121412803, | |
| "grad_norm": 0.17056933045387268, | |
| "learning_rate": 9.689120295450308e-06, | |
| "loss": 0.4045, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6070640176600441, | |
| "grad_norm": 0.20970946550369263, | |
| "learning_rate": 9.684646606859621e-06, | |
| "loss": 0.3944, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.609271523178808, | |
| "grad_norm": 0.19319604337215424, | |
| "learning_rate": 9.680142006530684e-06, | |
| "loss": 0.4062, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6114790286975718, | |
| "grad_norm": 0.17683015763759613, | |
| "learning_rate": 9.675606524187192e-06, | |
| "loss": 0.4077, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.6136865342163356, | |
| "grad_norm": 0.16389036178588867, | |
| "learning_rate": 9.671040189756623e-06, | |
| "loss": 0.412, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6158940397350994, | |
| "grad_norm": 0.15970537066459656, | |
| "learning_rate": 9.666443033370026e-06, | |
| "loss": 0.4068, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.6181015452538632, | |
| "grad_norm": 0.17420579493045807, | |
| "learning_rate": 9.661815085361836e-06, | |
| "loss": 0.4046, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6203090507726269, | |
| "grad_norm": 0.16548947989940643, | |
| "learning_rate": 9.657156376269665e-06, | |
| "loss": 0.4122, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.6225165562913907, | |
| "grad_norm": 0.147056445479393, | |
| "learning_rate": 9.652466936834101e-06, | |
| "loss": 0.4085, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6247240618101545, | |
| "grad_norm": 0.17280390858650208, | |
| "learning_rate": 9.647746797998508e-06, | |
| "loss": 0.4142, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.6269315673289183, | |
| "grad_norm": 0.16386403143405914, | |
| "learning_rate": 9.642995990908817e-06, | |
| "loss": 0.4103, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6291390728476821, | |
| "grad_norm": 0.16252438724040985, | |
| "learning_rate": 9.638214546913333e-06, | |
| "loss": 0.4023, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6313465783664459, | |
| "grad_norm": 0.16047868132591248, | |
| "learning_rate": 9.633402497562512e-06, | |
| "loss": 0.4032, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6335540838852097, | |
| "grad_norm": 0.18797720968723297, | |
| "learning_rate": 9.628559874608761e-06, | |
| "loss": 0.4014, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.6357615894039735, | |
| "grad_norm": 0.15865157544612885, | |
| "learning_rate": 9.62368671000623e-06, | |
| "loss": 0.4022, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6379690949227373, | |
| "grad_norm": 0.15757879614830017, | |
| "learning_rate": 9.618783035910596e-06, | |
| "loss": 0.4019, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.6401766004415012, | |
| "grad_norm": 0.15986581146717072, | |
| "learning_rate": 9.613848884678851e-06, | |
| "loss": 0.3981, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6423841059602649, | |
| "grad_norm": 0.1824173629283905, | |
| "learning_rate": 9.608884288869103e-06, | |
| "loss": 0.407, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.6445916114790287, | |
| "grad_norm": 0.185842826962471, | |
| "learning_rate": 9.603889281240334e-06, | |
| "loss": 0.4016, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6467991169977925, | |
| "grad_norm": 0.17280805110931396, | |
| "learning_rate": 9.59886389475221e-06, | |
| "loss": 0.4081, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.6490066225165563, | |
| "grad_norm": 0.1815565824508667, | |
| "learning_rate": 9.593808162564845e-06, | |
| "loss": 0.4141, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6512141280353201, | |
| "grad_norm": 0.19602055847644806, | |
| "learning_rate": 9.588722118038595e-06, | |
| "loss": 0.4048, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.6534216335540839, | |
| "grad_norm": 0.1916995495557785, | |
| "learning_rate": 9.583605794733833e-06, | |
| "loss": 0.3953, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6556291390728477, | |
| "grad_norm": 0.17578016221523285, | |
| "learning_rate": 9.578459226410722e-06, | |
| "loss": 0.4133, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.6578366445916115, | |
| "grad_norm": 0.18694248795509338, | |
| "learning_rate": 9.573282447029e-06, | |
| "loss": 0.4023, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6600441501103753, | |
| "grad_norm": 0.20029006898403168, | |
| "learning_rate": 9.568075490747756e-06, | |
| "loss": 0.3967, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.6622516556291391, | |
| "grad_norm": 0.15852802991867065, | |
| "learning_rate": 9.562838391925197e-06, | |
| "loss": 0.4053, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6644591611479028, | |
| "grad_norm": 0.18394002318382263, | |
| "learning_rate": 9.557571185118431e-06, | |
| "loss": 0.4001, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.1864984780550003, | |
| "learning_rate": 9.55227390508323e-06, | |
| "loss": 0.3964, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6688741721854304, | |
| "grad_norm": 0.15841950476169586, | |
| "learning_rate": 9.546946586773808e-06, | |
| "loss": 0.4045, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.6710816777041942, | |
| "grad_norm": 0.19293592870235443, | |
| "learning_rate": 9.541589265342585e-06, | |
| "loss": 0.405, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.673289183222958, | |
| "grad_norm": 0.16064338386058807, | |
| "learning_rate": 9.536201976139958e-06, | |
| "loss": 0.4098, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6754966887417219, | |
| "grad_norm": 0.16002054512500763, | |
| "learning_rate": 9.530784754714069e-06, | |
| "loss": 0.3837, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6777041942604857, | |
| "grad_norm": 0.16032235324382782, | |
| "learning_rate": 9.525337636810564e-06, | |
| "loss": 0.4095, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6799116997792495, | |
| "grad_norm": 0.19734519720077515, | |
| "learning_rate": 9.519860658372364e-06, | |
| "loss": 0.4049, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6821192052980133, | |
| "grad_norm": 0.1894775629043579, | |
| "learning_rate": 9.514353855539428e-06, | |
| "loss": 0.3926, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6843267108167771, | |
| "grad_norm": 0.2021895796060562, | |
| "learning_rate": 9.508817264648506e-06, | |
| "loss": 0.3983, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6865342163355408, | |
| "grad_norm": 0.20270881056785583, | |
| "learning_rate": 9.503250922232911e-06, | |
| "loss": 0.4105, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.6887417218543046, | |
| "grad_norm": 0.17310801148414612, | |
| "learning_rate": 9.497654865022268e-06, | |
| "loss": 0.4026, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6909492273730684, | |
| "grad_norm": 0.22883597016334534, | |
| "learning_rate": 9.492029129942277e-06, | |
| "loss": 0.4062, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.6931567328918322, | |
| "grad_norm": 0.1955188363790512, | |
| "learning_rate": 9.48637375411447e-06, | |
| "loss": 0.4044, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.695364238410596, | |
| "grad_norm": 0.18373191356658936, | |
| "learning_rate": 9.48068877485596e-06, | |
| "loss": 0.4032, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6975717439293598, | |
| "grad_norm": 0.21621759235858917, | |
| "learning_rate": 9.474974229679201e-06, | |
| "loss": 0.3904, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6997792494481236, | |
| "grad_norm": 0.17569488286972046, | |
| "learning_rate": 9.469230156291742e-06, | |
| "loss": 0.4087, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.7019867549668874, | |
| "grad_norm": 0.2064937800168991, | |
| "learning_rate": 9.463456592595966e-06, | |
| "loss": 0.396, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7041942604856513, | |
| "grad_norm": 0.24051177501678467, | |
| "learning_rate": 9.457653576688857e-06, | |
| "loss": 0.4062, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.7064017660044151, | |
| "grad_norm": 0.16446927189826965, | |
| "learning_rate": 9.451821146861734e-06, | |
| "loss": 0.3987, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7086092715231788, | |
| "grad_norm": 0.20139139890670776, | |
| "learning_rate": 9.445959341600009e-06, | |
| "loss": 0.4036, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.7108167770419426, | |
| "grad_norm": 0.18157215416431427, | |
| "learning_rate": 9.440068199582923e-06, | |
| "loss": 0.4124, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.7130242825607064, | |
| "grad_norm": 0.15641391277313232, | |
| "learning_rate": 9.434147759683303e-06, | |
| "loss": 0.405, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.7152317880794702, | |
| "grad_norm": 0.16391552984714508, | |
| "learning_rate": 9.428198060967294e-06, | |
| "loss": 0.4005, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.717439293598234, | |
| "grad_norm": 0.179102823138237, | |
| "learning_rate": 9.422219142694104e-06, | |
| "loss": 0.3968, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.7196467991169978, | |
| "grad_norm": 0.17109854519367218, | |
| "learning_rate": 9.416211044315754e-06, | |
| "loss": 0.4049, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7218543046357616, | |
| "grad_norm": 0.20250354707241058, | |
| "learning_rate": 9.410173805476804e-06, | |
| "loss": 0.4186, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.7240618101545254, | |
| "grad_norm": 0.18576852977275848, | |
| "learning_rate": 9.404107466014101e-06, | |
| "loss": 0.4022, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7262693156732892, | |
| "grad_norm": 0.16930828988552094, | |
| "learning_rate": 9.398012065956512e-06, | |
| "loss": 0.3949, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.7284768211920529, | |
| "grad_norm": 0.1966543048620224, | |
| "learning_rate": 9.39188764552466e-06, | |
| "loss": 0.4103, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7306843267108167, | |
| "grad_norm": 0.18573778867721558, | |
| "learning_rate": 9.385734245130664e-06, | |
| "loss": 0.4069, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.7328918322295805, | |
| "grad_norm": 0.16225308179855347, | |
| "learning_rate": 9.379551905377863e-06, | |
| "loss": 0.4049, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7350993377483444, | |
| "grad_norm": 0.1815934032201767, | |
| "learning_rate": 9.373340667060553e-06, | |
| "loss": 0.3927, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.7373068432671082, | |
| "grad_norm": 0.1688205897808075, | |
| "learning_rate": 9.367100571163722e-06, | |
| "loss": 0.4019, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.739514348785872, | |
| "grad_norm": 0.18315370380878448, | |
| "learning_rate": 9.360831658862774e-06, | |
| "loss": 0.3989, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7417218543046358, | |
| "grad_norm": 0.17102378606796265, | |
| "learning_rate": 9.354533971523253e-06, | |
| "loss": 0.4122, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7439293598233996, | |
| "grad_norm": 0.17149822413921356, | |
| "learning_rate": 9.348207550700584e-06, | |
| "loss": 0.4087, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.7461368653421634, | |
| "grad_norm": 0.19260001182556152, | |
| "learning_rate": 9.341852438139784e-06, | |
| "loss": 0.4064, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7483443708609272, | |
| "grad_norm": 0.18973691761493683, | |
| "learning_rate": 9.335468675775196e-06, | |
| "loss": 0.399, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.7505518763796909, | |
| "grad_norm": 0.19365577399730682, | |
| "learning_rate": 9.329056305730211e-06, | |
| "loss": 0.3959, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7527593818984547, | |
| "grad_norm": 0.21033021807670593, | |
| "learning_rate": 9.322615370316986e-06, | |
| "loss": 0.4027, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.7549668874172185, | |
| "grad_norm": 0.1825421005487442, | |
| "learning_rate": 9.316145912036165e-06, | |
| "loss": 0.3971, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7571743929359823, | |
| "grad_norm": 0.1825910061597824, | |
| "learning_rate": 9.309647973576605e-06, | |
| "loss": 0.3959, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.7593818984547461, | |
| "grad_norm": 0.1806352436542511, | |
| "learning_rate": 9.30312159781509e-06, | |
| "loss": 0.4047, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7615894039735099, | |
| "grad_norm": 0.15455348789691925, | |
| "learning_rate": 9.296566827816044e-06, | |
| "loss": 0.3958, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7637969094922737, | |
| "grad_norm": 0.19140276312828064, | |
| "learning_rate": 9.289983706831254e-06, | |
| "loss": 0.3955, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7660044150110376, | |
| "grad_norm": 0.1777879148721695, | |
| "learning_rate": 9.28337227829958e-06, | |
| "loss": 0.3971, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.7682119205298014, | |
| "grad_norm": 0.16399559378623962, | |
| "learning_rate": 9.276732585846673e-06, | |
| "loss": 0.3998, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7704194260485652, | |
| "grad_norm": 0.1594182848930359, | |
| "learning_rate": 9.270064673284681e-06, | |
| "loss": 0.3898, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.7726269315673289, | |
| "grad_norm": 0.16792874038219452, | |
| "learning_rate": 9.263368584611965e-06, | |
| "loss": 0.4021, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7748344370860927, | |
| "grad_norm": 0.18502795696258545, | |
| "learning_rate": 9.256644364012803e-06, | |
| "loss": 0.3987, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.7770419426048565, | |
| "grad_norm": 0.18240559101104736, | |
| "learning_rate": 9.249892055857107e-06, | |
| "loss": 0.4074, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7792494481236203, | |
| "grad_norm": 0.15263855457305908, | |
| "learning_rate": 9.243111704700126e-06, | |
| "loss": 0.3928, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.7814569536423841, | |
| "grad_norm": 0.18149858713150024, | |
| "learning_rate": 9.236303355282142e-06, | |
| "loss": 0.4015, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7836644591611479, | |
| "grad_norm": 0.1617615818977356, | |
| "learning_rate": 9.229467052528191e-06, | |
| "loss": 0.4024, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7858719646799117, | |
| "grad_norm": 0.19588352739810944, | |
| "learning_rate": 9.222602841547766e-06, | |
| "loss": 0.3946, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7880794701986755, | |
| "grad_norm": 0.1633402556180954, | |
| "learning_rate": 9.2157107676345e-06, | |
| "loss": 0.4027, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.7902869757174393, | |
| "grad_norm": 0.16059032082557678, | |
| "learning_rate": 9.208790876265887e-06, | |
| "loss": 0.3981, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7924944812362031, | |
| "grad_norm": 0.1558162271976471, | |
| "learning_rate": 9.201843213102976e-06, | |
| "loss": 0.3969, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.7947019867549668, | |
| "grad_norm": 0.1607562005519867, | |
| "learning_rate": 9.194867823990069e-06, | |
| "loss": 0.3972, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7969094922737306, | |
| "grad_norm": 0.1428382843732834, | |
| "learning_rate": 9.187864754954412e-06, | |
| "loss": 0.3928, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.7991169977924945, | |
| "grad_norm": 0.15147483348846436, | |
| "learning_rate": 9.180834052205903e-06, | |
| "loss": 0.392, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.8013245033112583, | |
| "grad_norm": 0.14845693111419678, | |
| "learning_rate": 9.173775762136783e-06, | |
| "loss": 0.3989, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.8035320088300221, | |
| "grad_norm": 0.1665990948677063, | |
| "learning_rate": 9.166689931321326e-06, | |
| "loss": 0.3928, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.8057395143487859, | |
| "grad_norm": 0.14729240536689758, | |
| "learning_rate": 9.159576606515532e-06, | |
| "loss": 0.3953, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.8079470198675497, | |
| "grad_norm": 0.14690490067005157, | |
| "learning_rate": 9.152435834656823e-06, | |
| "loss": 0.4023, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8101545253863135, | |
| "grad_norm": 0.15939104557037354, | |
| "learning_rate": 9.145267662863732e-06, | |
| "loss": 0.4013, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.8123620309050773, | |
| "grad_norm": 0.1510556936264038, | |
| "learning_rate": 9.13807213843559e-06, | |
| "loss": 0.3992, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.8145695364238411, | |
| "grad_norm": 0.1646500676870346, | |
| "learning_rate": 9.130849308852217e-06, | |
| "loss": 0.3925, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.8167770419426048, | |
| "grad_norm": 0.17254091799259186, | |
| "learning_rate": 9.123599221773601e-06, | |
| "loss": 0.4071, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8189845474613686, | |
| "grad_norm": 0.1497507095336914, | |
| "learning_rate": 9.116321925039591e-06, | |
| "loss": 0.3883, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.8211920529801324, | |
| "grad_norm": 0.16002339124679565, | |
| "learning_rate": 9.109017466669587e-06, | |
| "loss": 0.3953, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8233995584988962, | |
| "grad_norm": 0.14812716841697693, | |
| "learning_rate": 9.101685894862206e-06, | |
| "loss": 0.4021, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.82560706401766, | |
| "grad_norm": 0.15898163616657257, | |
| "learning_rate": 9.094327257994978e-06, | |
| "loss": 0.4102, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8278145695364238, | |
| "grad_norm": 0.15488837659358978, | |
| "learning_rate": 9.086941604624022e-06, | |
| "loss": 0.3912, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.8300220750551877, | |
| "grad_norm": 0.1613994836807251, | |
| "learning_rate": 9.079528983483726e-06, | |
| "loss": 0.4029, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8322295805739515, | |
| "grad_norm": 0.1536788046360016, | |
| "learning_rate": 9.072089443486425e-06, | |
| "loss": 0.3956, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.8344370860927153, | |
| "grad_norm": 0.1693611443042755, | |
| "learning_rate": 9.064623033722077e-06, | |
| "loss": 0.3984, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8366445916114791, | |
| "grad_norm": 0.18005254864692688, | |
| "learning_rate": 9.057129803457943e-06, | |
| "loss": 0.4022, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.8388520971302428, | |
| "grad_norm": 0.15852075815200806, | |
| "learning_rate": 9.049609802138262e-06, | |
| "loss": 0.3816, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8410596026490066, | |
| "grad_norm": 0.1933150738477707, | |
| "learning_rate": 9.042063079383916e-06, | |
| "loss": 0.4028, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.8432671081677704, | |
| "grad_norm": 0.18579082190990448, | |
| "learning_rate": 9.034489684992112e-06, | |
| "loss": 0.4057, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8454746136865342, | |
| "grad_norm": 0.1803402453660965, | |
| "learning_rate": 9.026889668936054e-06, | |
| "loss": 0.3976, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.847682119205298, | |
| "grad_norm": 0.17872066795825958, | |
| "learning_rate": 9.019263081364605e-06, | |
| "loss": 0.3908, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8498896247240618, | |
| "grad_norm": 0.18206505477428436, | |
| "learning_rate": 9.01160997260196e-06, | |
| "loss": 0.4072, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.8520971302428256, | |
| "grad_norm": 0.1718129813671112, | |
| "learning_rate": 9.00393039314732e-06, | |
| "loss": 0.4036, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8543046357615894, | |
| "grad_norm": 0.21254006028175354, | |
| "learning_rate": 8.996224393674545e-06, | |
| "loss": 0.4097, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.8565121412803532, | |
| "grad_norm": 0.15457090735435486, | |
| "learning_rate": 8.988492025031838e-06, | |
| "loss": 0.4001, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8587196467991169, | |
| "grad_norm": 0.18473166227340698, | |
| "learning_rate": 8.980733338241395e-06, | |
| "loss": 0.3965, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.8609271523178808, | |
| "grad_norm": 0.19067919254302979, | |
| "learning_rate": 8.972948384499068e-06, | |
| "loss": 0.3996, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8631346578366446, | |
| "grad_norm": 0.199691504240036, | |
| "learning_rate": 8.965137215174037e-06, | |
| "loss": 0.4003, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.8653421633554084, | |
| "grad_norm": 0.22586044669151306, | |
| "learning_rate": 8.957299881808471e-06, | |
| "loss": 0.393, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8675496688741722, | |
| "grad_norm": 0.1807902604341507, | |
| "learning_rate": 8.949436436117172e-06, | |
| "loss": 0.4039, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.869757174392936, | |
| "grad_norm": 0.22098712623119354, | |
| "learning_rate": 8.941546929987253e-06, | |
| "loss": 0.39, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8719646799116998, | |
| "grad_norm": 0.19912059605121613, | |
| "learning_rate": 8.933631415477785e-06, | |
| "loss": 0.396, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8741721854304636, | |
| "grad_norm": 0.16147476434707642, | |
| "learning_rate": 8.925689944819452e-06, | |
| "loss": 0.4011, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8763796909492274, | |
| "grad_norm": 0.19136802852153778, | |
| "learning_rate": 8.917722570414217e-06, | |
| "loss": 0.3896, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.8785871964679912, | |
| "grad_norm": 0.20598876476287842, | |
| "learning_rate": 8.909729344834965e-06, | |
| "loss": 0.3995, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8807947019867549, | |
| "grad_norm": 0.1776047945022583, | |
| "learning_rate": 8.901710320825161e-06, | |
| "loss": 0.4001, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.8830022075055187, | |
| "grad_norm": 0.18616576492786407, | |
| "learning_rate": 8.893665551298502e-06, | |
| "loss": 0.3995, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8852097130242825, | |
| "grad_norm": 0.18072794377803802, | |
| "learning_rate": 8.885595089338567e-06, | |
| "loss": 0.3978, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.8874172185430463, | |
| "grad_norm": 0.181128591299057, | |
| "learning_rate": 8.877498988198471e-06, | |
| "loss": 0.4112, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8896247240618101, | |
| "grad_norm": 0.17399437725543976, | |
| "learning_rate": 8.869377301300501e-06, | |
| "loss": 0.3937, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.891832229580574, | |
| "grad_norm": 0.17823876440525055, | |
| "learning_rate": 8.86123008223578e-06, | |
| "loss": 0.4055, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8940397350993378, | |
| "grad_norm": 0.176737442612648, | |
| "learning_rate": 8.853057384763904e-06, | |
| "loss": 0.3991, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8962472406181016, | |
| "grad_norm": 0.1630028337240219, | |
| "learning_rate": 8.844859262812584e-06, | |
| "loss": 0.3903, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8984547461368654, | |
| "grad_norm": 0.18285702168941498, | |
| "learning_rate": 8.8366357704773e-06, | |
| "loss": 0.3904, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.9006622516556292, | |
| "grad_norm": 0.1716790795326233, | |
| "learning_rate": 8.82838696202094e-06, | |
| "loss": 0.4033, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.9028697571743929, | |
| "grad_norm": 0.15402917563915253, | |
| "learning_rate": 8.820112891873433e-06, | |
| "loss": 0.39, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.9050772626931567, | |
| "grad_norm": 0.16390980780124664, | |
| "learning_rate": 8.811813614631411e-06, | |
| "loss": 0.3993, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9072847682119205, | |
| "grad_norm": 0.15821807086467743, | |
| "learning_rate": 8.803489185057822e-06, | |
| "loss": 0.3953, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.9094922737306843, | |
| "grad_norm": 0.17032112181186676, | |
| "learning_rate": 8.795139658081586e-06, | |
| "loss": 0.4035, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.9116997792494481, | |
| "grad_norm": 0.15845684707164764, | |
| "learning_rate": 8.786765088797238e-06, | |
| "loss": 0.4013, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.9139072847682119, | |
| "grad_norm": 0.17293021082878113, | |
| "learning_rate": 8.778365532464543e-06, | |
| "loss": 0.3965, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.9161147902869757, | |
| "grad_norm": 0.14891566336154938, | |
| "learning_rate": 8.76994104450815e-06, | |
| "loss": 0.392, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.9183222958057395, | |
| "grad_norm": 0.15123558044433594, | |
| "learning_rate": 8.761491680517218e-06, | |
| "loss": 0.396, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.9205298013245033, | |
| "grad_norm": 0.1682642549276352, | |
| "learning_rate": 8.75301749624505e-06, | |
| "loss": 0.3838, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.9227373068432672, | |
| "grad_norm": 0.1535079926252365, | |
| "learning_rate": 8.744518547608732e-06, | |
| "loss": 0.3921, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9249448123620309, | |
| "grad_norm": 0.16250640153884888, | |
| "learning_rate": 8.735994890688749e-06, | |
| "loss": 0.3896, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.9271523178807947, | |
| "grad_norm": 0.16567584872245789, | |
| "learning_rate": 8.72744658172863e-06, | |
| "loss": 0.3928, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9293598233995585, | |
| "grad_norm": 0.16492141783237457, | |
| "learning_rate": 8.718873677134569e-06, | |
| "loss": 0.405, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.9315673289183223, | |
| "grad_norm": 0.14573417603969574, | |
| "learning_rate": 8.710276233475058e-06, | |
| "loss": 0.3955, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9337748344370861, | |
| "grad_norm": 0.15780314803123474, | |
| "learning_rate": 8.701654307480508e-06, | |
| "loss": 0.3868, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.9359823399558499, | |
| "grad_norm": 0.14735905826091766, | |
| "learning_rate": 8.693007956042874e-06, | |
| "loss": 0.3924, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9381898454746137, | |
| "grad_norm": 0.16305825114250183, | |
| "learning_rate": 8.684337236215289e-06, | |
| "loss": 0.3866, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.9403973509933775, | |
| "grad_norm": 0.1656455248594284, | |
| "learning_rate": 8.675642205211679e-06, | |
| "loss": 0.3965, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9426048565121413, | |
| "grad_norm": 0.15034431219100952, | |
| "learning_rate": 8.666922920406384e-06, | |
| "loss": 0.3982, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.9448123620309051, | |
| "grad_norm": 0.161673903465271, | |
| "learning_rate": 8.65817943933379e-06, | |
| "loss": 0.3933, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9470198675496688, | |
| "grad_norm": 0.1589784324169159, | |
| "learning_rate": 8.649411819687936e-06, | |
| "loss": 0.3976, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.9492273730684326, | |
| "grad_norm": 0.166986882686615, | |
| "learning_rate": 8.640620119322146e-06, | |
| "loss": 0.4003, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9514348785871964, | |
| "grad_norm": 0.15880529582500458, | |
| "learning_rate": 8.631804396248637e-06, | |
| "loss": 0.3926, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.9536423841059603, | |
| "grad_norm": 0.1741640269756317, | |
| "learning_rate": 8.62296470863814e-06, | |
| "loss": 0.3978, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9558498896247241, | |
| "grad_norm": 0.15339982509613037, | |
| "learning_rate": 8.61410111481952e-06, | |
| "loss": 0.4059, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.9580573951434879, | |
| "grad_norm": 0.1605585813522339, | |
| "learning_rate": 8.605213673279382e-06, | |
| "loss": 0.3897, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9602649006622517, | |
| "grad_norm": 0.154588520526886, | |
| "learning_rate": 8.5963024426617e-06, | |
| "loss": 0.3853, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9624724061810155, | |
| "grad_norm": 0.16428066790103912, | |
| "learning_rate": 8.587367481767409e-06, | |
| "loss": 0.3944, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9646799116997793, | |
| "grad_norm": 0.15117953717708588, | |
| "learning_rate": 8.578408849554037e-06, | |
| "loss": 0.3924, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.9668874172185431, | |
| "grad_norm": 0.15928852558135986, | |
| "learning_rate": 8.569426605135307e-06, | |
| "loss": 0.399, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9690949227373068, | |
| "grad_norm": 0.16322654485702515, | |
| "learning_rate": 8.560420807780742e-06, | |
| "loss": 0.3863, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.9713024282560706, | |
| "grad_norm": 0.1749914139509201, | |
| "learning_rate": 8.551391516915288e-06, | |
| "loss": 0.4018, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9735099337748344, | |
| "grad_norm": 0.15128456056118011, | |
| "learning_rate": 8.542338792118907e-06, | |
| "loss": 0.3784, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.9757174392935982, | |
| "grad_norm": 0.19126002490520477, | |
| "learning_rate": 8.533262693126191e-06, | |
| "loss": 0.3923, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.977924944812362, | |
| "grad_norm": 0.16031384468078613, | |
| "learning_rate": 8.52416327982597e-06, | |
| "loss": 0.4036, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.9801324503311258, | |
| "grad_norm": 0.16994954645633698, | |
| "learning_rate": 8.515040612260912e-06, | |
| "loss": 0.3865, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9823399558498896, | |
| "grad_norm": 0.17667335271835327, | |
| "learning_rate": 8.505894750627128e-06, | |
| "loss": 0.3884, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.9845474613686535, | |
| "grad_norm": 0.1745615154504776, | |
| "learning_rate": 8.496725755273778e-06, | |
| "loss": 0.3926, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9867549668874173, | |
| "grad_norm": 0.16568712890148163, | |
| "learning_rate": 8.487533686702668e-06, | |
| "loss": 0.393, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.9889624724061811, | |
| "grad_norm": 0.1762251853942871, | |
| "learning_rate": 8.478318605567853e-06, | |
| "loss": 0.3868, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9911699779249448, | |
| "grad_norm": 0.18370142579078674, | |
| "learning_rate": 8.46908057267524e-06, | |
| "loss": 0.3939, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.9933774834437086, | |
| "grad_norm": 0.1815333366394043, | |
| "learning_rate": 8.459819648982182e-06, | |
| "loss": 0.3849, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9955849889624724, | |
| "grad_norm": 0.19721707701683044, | |
| "learning_rate": 8.450535895597074e-06, | |
| "loss": 0.3953, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.9977924944812362, | |
| "grad_norm": 0.16212257742881775, | |
| "learning_rate": 8.441229373778957e-06, | |
| "loss": 0.3933, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.2004193216562271, | |
| "learning_rate": 8.43190014493711e-06, | |
| "loss": 0.3914, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.0022075055187638, | |
| "grad_norm": 0.15856173634529114, | |
| "learning_rate": 8.422548270630646e-06, | |
| "loss": 0.3806, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.0044150110375276, | |
| "grad_norm": 0.18225938081741333, | |
| "learning_rate": 8.413173812568099e-06, | |
| "loss": 0.3714, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.0066225165562914, | |
| "grad_norm": 0.17915207147598267, | |
| "learning_rate": 8.403776832607028e-06, | |
| "loss": 0.3834, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.0088300220750552, | |
| "grad_norm": 0.1916593611240387, | |
| "learning_rate": 8.394357392753599e-06, | |
| "loss": 0.3666, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.011037527593819, | |
| "grad_norm": 0.166362464427948, | |
| "learning_rate": 8.384915555162183e-06, | |
| "loss": 0.3839, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.0132450331125828, | |
| "grad_norm": 0.20503659546375275, | |
| "learning_rate": 8.375451382134942e-06, | |
| "loss": 0.381, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.0154525386313467, | |
| "grad_norm": 0.18665875494480133, | |
| "learning_rate": 8.365964936121422e-06, | |
| "loss": 0.3893, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.0176600441501105, | |
| "grad_norm": 0.15370848774909973, | |
| "learning_rate": 8.35645627971813e-06, | |
| "loss": 0.3796, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.0198675496688743, | |
| "grad_norm": 0.17442859709262848, | |
| "learning_rate": 8.346925475668138e-06, | |
| "loss": 0.3633, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.022075055187638, | |
| "grad_norm": 0.18989215791225433, | |
| "learning_rate": 8.337372586860651e-06, | |
| "loss": 0.3743, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.0242825607064017, | |
| "grad_norm": 0.16941452026367188, | |
| "learning_rate": 8.327797676330604e-06, | |
| "loss": 0.369, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.0264900662251655, | |
| "grad_norm": 0.14053016901016235, | |
| "learning_rate": 8.31820080725825e-06, | |
| "loss": 0.3724, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.0286975717439293, | |
| "grad_norm": 0.17336276173591614, | |
| "learning_rate": 8.308582042968726e-06, | |
| "loss": 0.3903, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.030905077262693, | |
| "grad_norm": 0.15840588510036469, | |
| "learning_rate": 8.298941446931646e-06, | |
| "loss": 0.3847, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.033112582781457, | |
| "grad_norm": 0.16392916440963745, | |
| "learning_rate": 8.289279082760685e-06, | |
| "loss": 0.3893, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.0353200883002207, | |
| "grad_norm": 0.17757326364517212, | |
| "learning_rate": 8.279595014213158e-06, | |
| "loss": 0.3876, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.0375275938189845, | |
| "grad_norm": 0.1557520627975464, | |
| "learning_rate": 8.26988930518959e-06, | |
| "loss": 0.3782, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0397350993377483, | |
| "grad_norm": 0.16154958307743073, | |
| "learning_rate": 8.260162019733305e-06, | |
| "loss": 0.3933, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.0419426048565121, | |
| "grad_norm": 0.18913333117961884, | |
| "learning_rate": 8.250413222029997e-06, | |
| "loss": 0.3888, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.044150110375276, | |
| "grad_norm": 0.1763067990541458, | |
| "learning_rate": 8.240642976407313e-06, | |
| "loss": 0.3875, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.0463576158940397, | |
| "grad_norm": 0.16598297655582428, | |
| "learning_rate": 8.230851347334424e-06, | |
| "loss": 0.3894, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0485651214128036, | |
| "grad_norm": 0.17947614192962646, | |
| "learning_rate": 8.221038399421592e-06, | |
| "loss": 0.3853, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.0507726269315674, | |
| "grad_norm": 0.16084261238574982, | |
| "learning_rate": 8.211204197419766e-06, | |
| "loss": 0.3762, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0529801324503312, | |
| "grad_norm": 0.21188747882843018, | |
| "learning_rate": 8.201348806220127e-06, | |
| "loss": 0.3791, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.055187637969095, | |
| "grad_norm": 0.15903352200984955, | |
| "learning_rate": 8.191472290853683e-06, | |
| "loss": 0.3799, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0573951434878588, | |
| "grad_norm": 0.2036534547805786, | |
| "learning_rate": 8.181574716490823e-06, | |
| "loss": 0.3909, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.0596026490066226, | |
| "grad_norm": 0.16643071174621582, | |
| "learning_rate": 8.171656148440902e-06, | |
| "loss": 0.3842, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0618101545253864, | |
| "grad_norm": 0.16434621810913086, | |
| "learning_rate": 8.161716652151795e-06, | |
| "loss": 0.3748, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.0640176600441502, | |
| "grad_norm": 0.18316681683063507, | |
| "learning_rate": 8.151756293209476e-06, | |
| "loss": 0.3795, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0662251655629138, | |
| "grad_norm": 0.14608311653137207, | |
| "learning_rate": 8.14177513733758e-06, | |
| "loss": 0.3749, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.0684326710816776, | |
| "grad_norm": 0.1656610518693924, | |
| "learning_rate": 8.131773250396973e-06, | |
| "loss": 0.3794, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0706401766004414, | |
| "grad_norm": 0.14669524133205414, | |
| "learning_rate": 8.121750698385315e-06, | |
| "loss": 0.3825, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.0728476821192052, | |
| "grad_norm": 0.17891530692577362, | |
| "learning_rate": 8.111707547436623e-06, | |
| "loss": 0.373, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.075055187637969, | |
| "grad_norm": 0.15400467813014984, | |
| "learning_rate": 8.10164386382084e-06, | |
| "loss": 0.3792, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.0772626931567328, | |
| "grad_norm": 0.17551693320274353, | |
| "learning_rate": 8.091559713943388e-06, | |
| "loss": 0.3908, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0794701986754967, | |
| "grad_norm": 0.15460826456546783, | |
| "learning_rate": 8.081455164344745e-06, | |
| "loss": 0.3838, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.0816777041942605, | |
| "grad_norm": 0.15716975927352905, | |
| "learning_rate": 8.071330281699989e-06, | |
| "loss": 0.3934, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0838852097130243, | |
| "grad_norm": 0.15411889553070068, | |
| "learning_rate": 8.06118513281837e-06, | |
| "loss": 0.3792, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.086092715231788, | |
| "grad_norm": 0.18962320685386658, | |
| "learning_rate": 8.051019784642864e-06, | |
| "loss": 0.3888, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0883002207505519, | |
| "grad_norm": 0.15896014869213104, | |
| "learning_rate": 8.040834304249733e-06, | |
| "loss": 0.3861, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.0905077262693157, | |
| "grad_norm": 0.1523059904575348, | |
| "learning_rate": 8.03062875884808e-06, | |
| "loss": 0.3727, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.0927152317880795, | |
| "grad_norm": 0.13958971202373505, | |
| "learning_rate": 8.02040321577941e-06, | |
| "loss": 0.3718, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.0949227373068433, | |
| "grad_norm": 0.1423843950033188, | |
| "learning_rate": 8.010157742517185e-06, | |
| "loss": 0.3746, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0971302428256071, | |
| "grad_norm": 0.13685709238052368, | |
| "learning_rate": 7.99989240666637e-06, | |
| "loss": 0.3727, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.099337748344371, | |
| "grad_norm": 0.15423151850700378, | |
| "learning_rate": 7.989607275963e-06, | |
| "loss": 0.3778, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.1015452538631347, | |
| "grad_norm": 0.15014959871768951, | |
| "learning_rate": 7.979302418273723e-06, | |
| "loss": 0.3747, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.1037527593818985, | |
| "grad_norm": 0.16919955611228943, | |
| "learning_rate": 7.968977901595355e-06, | |
| "loss": 0.3782, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.1059602649006623, | |
| "grad_norm": 0.1481235921382904, | |
| "learning_rate": 7.958633794054439e-06, | |
| "loss": 0.3808, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.108167770419426, | |
| "grad_norm": 0.17575468122959137, | |
| "learning_rate": 7.94827016390678e-06, | |
| "loss": 0.3708, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.1103752759381897, | |
| "grad_norm": 0.139452263712883, | |
| "learning_rate": 7.93788707953701e-06, | |
| "loss": 0.376, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.1125827814569536, | |
| "grad_norm": 0.14127641916275024, | |
| "learning_rate": 7.927484609458128e-06, | |
| "loss": 0.3847, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.1147902869757174, | |
| "grad_norm": 0.15063825249671936, | |
| "learning_rate": 7.917062822311047e-06, | |
| "loss": 0.3842, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.1169977924944812, | |
| "grad_norm": 0.15780918300151825, | |
| "learning_rate": 7.90662178686415e-06, | |
| "loss": 0.3737, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.119205298013245, | |
| "grad_norm": 0.14620929956436157, | |
| "learning_rate": 7.896161572012824e-06, | |
| "loss": 0.3862, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.1214128035320088, | |
| "grad_norm": 0.1493159383535385, | |
| "learning_rate": 7.885682246779016e-06, | |
| "loss": 0.3809, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.1236203090507726, | |
| "grad_norm": 0.1471249759197235, | |
| "learning_rate": 7.875183880310772e-06, | |
| "loss": 0.3885, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.1258278145695364, | |
| "grad_norm": 0.1509741246700287, | |
| "learning_rate": 7.86466654188178e-06, | |
| "loss": 0.3778, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.1280353200883002, | |
| "grad_norm": 0.14812204241752625, | |
| "learning_rate": 7.854130300890921e-06, | |
| "loss": 0.3731, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.130242825607064, | |
| "grad_norm": 0.1415746510028839, | |
| "learning_rate": 7.843575226861795e-06, | |
| "loss": 0.3798, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.1324503311258278, | |
| "grad_norm": 0.14002346992492676, | |
| "learning_rate": 7.833001389442283e-06, | |
| "loss": 0.3833, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.1346578366445916, | |
| "grad_norm": 0.15462878346443176, | |
| "learning_rate": 7.82240885840407e-06, | |
| "loss": 0.3785, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1368653421633554, | |
| "grad_norm": 0.15146006643772125, | |
| "learning_rate": 7.811797703642193e-06, | |
| "loss": 0.3767, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.1390728476821192, | |
| "grad_norm": 0.1545468419790268, | |
| "learning_rate": 7.801167995174575e-06, | |
| "loss": 0.3739, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.141280353200883, | |
| "grad_norm": 0.1515691876411438, | |
| "learning_rate": 7.790519803141572e-06, | |
| "loss": 0.394, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.1434878587196469, | |
| "grad_norm": 0.1403704136610031, | |
| "learning_rate": 7.7798531978055e-06, | |
| "loss": 0.3844, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1456953642384107, | |
| "grad_norm": 0.12919290363788605, | |
| "learning_rate": 7.769168249550176e-06, | |
| "loss": 0.3746, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.1479028697571745, | |
| "grad_norm": 0.137840136885643, | |
| "learning_rate": 7.758465028880455e-06, | |
| "loss": 0.3748, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.150110375275938, | |
| "grad_norm": 0.13867942988872528, | |
| "learning_rate": 7.747743606421761e-06, | |
| "loss": 0.3813, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.152317880794702, | |
| "grad_norm": 0.1563754677772522, | |
| "learning_rate": 7.737004052919623e-06, | |
| "loss": 0.3673, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1545253863134657, | |
| "grad_norm": 0.15125791728496552, | |
| "learning_rate": 7.726246439239209e-06, | |
| "loss": 0.3814, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.1567328918322295, | |
| "grad_norm": 0.15860167145729065, | |
| "learning_rate": 7.715470836364857e-06, | |
| "loss": 0.3962, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1589403973509933, | |
| "grad_norm": 0.14636121690273285, | |
| "learning_rate": 7.704677315399607e-06, | |
| "loss": 0.3786, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.161147902869757, | |
| "grad_norm": 0.1441173404455185, | |
| "learning_rate": 7.693865947564733e-06, | |
| "loss": 0.3737, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.163355408388521, | |
| "grad_norm": 0.1591232568025589, | |
| "learning_rate": 7.68303680419927e-06, | |
| "loss": 0.3721, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.1655629139072847, | |
| "grad_norm": 0.1598597913980484, | |
| "learning_rate": 7.672189956759546e-06, | |
| "loss": 0.3793, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1677704194260485, | |
| "grad_norm": 0.16569367051124573, | |
| "learning_rate": 7.661325476818708e-06, | |
| "loss": 0.3799, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.1699779249448123, | |
| "grad_norm": 0.1879713088274002, | |
| "learning_rate": 7.65044343606626e-06, | |
| "loss": 0.3831, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1721854304635762, | |
| "grad_norm": 0.14313288033008575, | |
| "learning_rate": 7.639543906307565e-06, | |
| "loss": 0.3774, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.17439293598234, | |
| "grad_norm": 0.20557354390621185, | |
| "learning_rate": 7.628626959463405e-06, | |
| "loss": 0.3766, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1766004415011038, | |
| "grad_norm": 0.14320634305477142, | |
| "learning_rate": 7.6176926675694786e-06, | |
| "loss": 0.3754, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.1788079470198676, | |
| "grad_norm": 0.18957629799842834, | |
| "learning_rate": 7.606741102775936e-06, | |
| "loss": 0.3799, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.1810154525386314, | |
| "grad_norm": 0.15119552612304688, | |
| "learning_rate": 7.595772337346912e-06, | |
| "loss": 0.3863, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.1832229580573952, | |
| "grad_norm": 0.16690769791603088, | |
| "learning_rate": 7.584786443660028e-06, | |
| "loss": 0.371, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.185430463576159, | |
| "grad_norm": 0.1685992032289505, | |
| "learning_rate": 7.573783494205936e-06, | |
| "loss": 0.3741, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.1876379690949228, | |
| "grad_norm": 0.14501997828483582, | |
| "learning_rate": 7.562763561587824e-06, | |
| "loss": 0.3701, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.1898454746136866, | |
| "grad_norm": 0.16112159192562103, | |
| "learning_rate": 7.55172671852095e-06, | |
| "loss": 0.3781, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.1920529801324504, | |
| "grad_norm": 0.13081905245780945, | |
| "learning_rate": 7.5406730378321506e-06, | |
| "loss": 0.39, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1942604856512142, | |
| "grad_norm": 0.15677790343761444, | |
| "learning_rate": 7.5296025924593705e-06, | |
| "loss": 0.3814, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.1964679911699778, | |
| "grad_norm": 0.1494779884815216, | |
| "learning_rate": 7.518515455451172e-06, | |
| "loss": 0.3757, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.1986754966887416, | |
| "grad_norm": 0.1597052365541458, | |
| "learning_rate": 7.50741169996626e-06, | |
| "loss": 0.3772, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.2008830022075054, | |
| "grad_norm": 0.1445734202861786, | |
| "learning_rate": 7.496291399273e-06, | |
| "loss": 0.3779, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.2030905077262692, | |
| "grad_norm": 0.14188243448734283, | |
| "learning_rate": 7.485154626748924e-06, | |
| "loss": 0.3801, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.205298013245033, | |
| "grad_norm": 0.15295171737670898, | |
| "learning_rate": 7.474001455880258e-06, | |
| "loss": 0.3786, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.2075055187637969, | |
| "grad_norm": 0.12851738929748535, | |
| "learning_rate": 7.4628319602614315e-06, | |
| "loss": 0.3759, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.2097130242825607, | |
| "grad_norm": 0.12803953886032104, | |
| "learning_rate": 7.451646213594597e-06, | |
| "loss": 0.3807, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.2119205298013245, | |
| "grad_norm": 0.15474575757980347, | |
| "learning_rate": 7.440444289689135e-06, | |
| "loss": 0.3761, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.2141280353200883, | |
| "grad_norm": 0.14079326391220093, | |
| "learning_rate": 7.429226262461175e-06, | |
| "loss": 0.3744, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.216335540838852, | |
| "grad_norm": 0.13553716242313385, | |
| "learning_rate": 7.417992205933104e-06, | |
| "loss": 0.3813, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.218543046357616, | |
| "grad_norm": 0.14058908820152283, | |
| "learning_rate": 7.406742194233074e-06, | |
| "loss": 0.3722, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.2207505518763797, | |
| "grad_norm": 0.15452724695205688, | |
| "learning_rate": 7.3954763015945266e-06, | |
| "loss": 0.3628, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.2229580573951435, | |
| "grad_norm": 0.14715242385864258, | |
| "learning_rate": 7.384194602355685e-06, | |
| "loss": 0.3779, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.2251655629139073, | |
| "grad_norm": 0.15119391679763794, | |
| "learning_rate": 7.37289717095908e-06, | |
| "loss": 0.3787, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.2273730684326711, | |
| "grad_norm": 0.1800910234451294, | |
| "learning_rate": 7.361584081951046e-06, | |
| "loss": 0.3781, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.229580573951435, | |
| "grad_norm": 0.1489747166633606, | |
| "learning_rate": 7.350255409981237e-06, | |
| "loss": 0.3843, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.2317880794701987, | |
| "grad_norm": 0.14379991590976715, | |
| "learning_rate": 7.338911229802133e-06, | |
| "loss": 0.3832, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.2339955849889626, | |
| "grad_norm": 0.13160887360572815, | |
| "learning_rate": 7.327551616268541e-06, | |
| "loss": 0.3858, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.2362030905077264, | |
| "grad_norm": 0.1353141814470291, | |
| "learning_rate": 7.316176644337107e-06, | |
| "loss": 0.3688, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.23841059602649, | |
| "grad_norm": 0.16255126893520355, | |
| "learning_rate": 7.304786389065823e-06, | |
| "loss": 0.377, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.240618101545254, | |
| "grad_norm": 0.13924263417720795, | |
| "learning_rate": 7.293380925613524e-06, | |
| "loss": 0.376, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2428256070640176, | |
| "grad_norm": 0.1347806751728058, | |
| "learning_rate": 7.281960329239398e-06, | |
| "loss": 0.3881, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.2450331125827814, | |
| "grad_norm": 0.1494341343641281, | |
| "learning_rate": 7.270524675302491e-06, | |
| "loss": 0.3843, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2472406181015452, | |
| "grad_norm": 0.16562990844249725, | |
| "learning_rate": 7.259074039261199e-06, | |
| "loss": 0.3843, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.249448123620309, | |
| "grad_norm": 0.1605864018201828, | |
| "learning_rate": 7.247608496672786e-06, | |
| "loss": 0.3741, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.2516556291390728, | |
| "grad_norm": 0.1619981825351715, | |
| "learning_rate": 7.2361281231928725e-06, | |
| "loss": 0.392, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.2538631346578366, | |
| "grad_norm": 0.13447479903697968, | |
| "learning_rate": 7.2246329945749425e-06, | |
| "loss": 0.3755, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2560706401766004, | |
| "grad_norm": 0.1463773399591446, | |
| "learning_rate": 7.213123186669842e-06, | |
| "loss": 0.3913, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.2582781456953642, | |
| "grad_norm": 0.14680610597133636, | |
| "learning_rate": 7.201598775425278e-06, | |
| "loss": 0.3776, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.260485651214128, | |
| "grad_norm": 0.14192719757556915, | |
| "learning_rate": 7.190059836885318e-06, | |
| "loss": 0.3829, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.2626931567328918, | |
| "grad_norm": 0.1412820965051651, | |
| "learning_rate": 7.178506447189887e-06, | |
| "loss": 0.3782, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2649006622516556, | |
| "grad_norm": 0.1515144258737564, | |
| "learning_rate": 7.166938682574272e-06, | |
| "loss": 0.3779, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.2671081677704195, | |
| "grad_norm": 0.148550346493721, | |
| "learning_rate": 7.155356619368604e-06, | |
| "loss": 0.376, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2693156732891833, | |
| "grad_norm": 0.14767727255821228, | |
| "learning_rate": 7.14376033399737e-06, | |
| "loss": 0.3768, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.271523178807947, | |
| "grad_norm": 0.1577431708574295, | |
| "learning_rate": 7.132149902978902e-06, | |
| "loss": 0.373, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2737306843267109, | |
| "grad_norm": 0.15225905179977417, | |
| "learning_rate": 7.120525402924871e-06, | |
| "loss": 0.365, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.2759381898454747, | |
| "grad_norm": 0.16508492827415466, | |
| "learning_rate": 7.108886910539781e-06, | |
| "loss": 0.3866, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2781456953642385, | |
| "grad_norm": 0.16345573961734772, | |
| "learning_rate": 7.097234502620468e-06, | |
| "loss": 0.3819, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.280353200883002, | |
| "grad_norm": 0.15904562175273895, | |
| "learning_rate": 7.085568256055589e-06, | |
| "loss": 0.3877, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2825607064017661, | |
| "grad_norm": 0.17447535693645477, | |
| "learning_rate": 7.073888247825111e-06, | |
| "loss": 0.3902, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.2847682119205297, | |
| "grad_norm": 0.1613752692937851, | |
| "learning_rate": 7.062194554999817e-06, | |
| "loss": 0.3765, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2869757174392937, | |
| "grad_norm": 0.1705675572156906, | |
| "learning_rate": 7.05048725474078e-06, | |
| "loss": 0.3699, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.2891832229580573, | |
| "grad_norm": 0.20963701605796814, | |
| "learning_rate": 7.038766424298865e-06, | |
| "loss": 0.3909, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2913907284768211, | |
| "grad_norm": 0.16403751075267792, | |
| "learning_rate": 7.027032141014216e-06, | |
| "loss": 0.3753, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.293598233995585, | |
| "grad_norm": 0.18310756981372833, | |
| "learning_rate": 7.0152844823157474e-06, | |
| "loss": 0.3807, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.2958057395143487, | |
| "grad_norm": 0.16576433181762695, | |
| "learning_rate": 7.003523525720626e-06, | |
| "loss": 0.3659, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.2980132450331126, | |
| "grad_norm": 0.1746741682291031, | |
| "learning_rate": 6.991749348833773e-06, | |
| "loss": 0.3783, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.3002207505518764, | |
| "grad_norm": 0.16022977232933044, | |
| "learning_rate": 6.979962029347338e-06, | |
| "loss": 0.3745, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.3024282560706402, | |
| "grad_norm": 0.1607898473739624, | |
| "learning_rate": 6.9681616450401936e-06, | |
| "loss": 0.3732, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.304635761589404, | |
| "grad_norm": 0.1624252200126648, | |
| "learning_rate": 6.956348273777424e-06, | |
| "loss": 0.3815, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.3068432671081678, | |
| "grad_norm": 0.14729204773902893, | |
| "learning_rate": 6.944521993509803e-06, | |
| "loss": 0.3734, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.3090507726269316, | |
| "grad_norm": 0.18559885025024414, | |
| "learning_rate": 6.9326828822732885e-06, | |
| "loss": 0.386, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.3112582781456954, | |
| "grad_norm": 0.15120132267475128, | |
| "learning_rate": 6.920831018188502e-06, | |
| "loss": 0.3702, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.3134657836644592, | |
| "grad_norm": 0.18332970142364502, | |
| "learning_rate": 6.908966479460219e-06, | |
| "loss": 0.3807, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.315673289183223, | |
| "grad_norm": 0.17234960198402405, | |
| "learning_rate": 6.89708934437684e-06, | |
| "loss": 0.3644, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.3178807947019868, | |
| "grad_norm": 0.18331332504749298, | |
| "learning_rate": 6.885199691309892e-06, | |
| "loss": 0.3841, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.3200883002207506, | |
| "grad_norm": 0.15819165110588074, | |
| "learning_rate": 6.873297598713497e-06, | |
| "loss": 0.3725, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.3222958057395142, | |
| "grad_norm": 0.1651681512594223, | |
| "learning_rate": 6.8613831451238636e-06, | |
| "loss": 0.3762, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.3245033112582782, | |
| "grad_norm": 0.1659359186887741, | |
| "learning_rate": 6.84945640915876e-06, | |
| "loss": 0.3757, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.3267108167770418, | |
| "grad_norm": 0.15379931032657623, | |
| "learning_rate": 6.837517469517001e-06, | |
| "loss": 0.3735, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.3289183222958059, | |
| "grad_norm": 0.1771930605173111, | |
| "learning_rate": 6.82556640497793e-06, | |
| "loss": 0.3798, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.3311258278145695, | |
| "grad_norm": 0.16866551339626312, | |
| "learning_rate": 6.813603294400895e-06, | |
| "loss": 0.3724, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.16047048568725586, | |
| "learning_rate": 6.8016282167247325e-06, | |
| "loss": 0.3744, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.335540838852097, | |
| "grad_norm": 0.13133041560649872, | |
| "learning_rate": 6.7896412509672385e-06, | |
| "loss": 0.3775, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.3377483443708609, | |
| "grad_norm": 0.16929350793361664, | |
| "learning_rate": 6.777642476224658e-06, | |
| "loss": 0.3724, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.3399558498896247, | |
| "grad_norm": 0.14370004832744598, | |
| "learning_rate": 6.765631971671156e-06, | |
| "loss": 0.3795, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.3421633554083885, | |
| "grad_norm": 0.18519333004951477, | |
| "learning_rate": 6.753609816558297e-06, | |
| "loss": 0.3789, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.3443708609271523, | |
| "grad_norm": 0.15061011910438538, | |
| "learning_rate": 6.741576090214526e-06, | |
| "loss": 0.3779, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.346578366445916, | |
| "grad_norm": 0.16702063381671906, | |
| "learning_rate": 6.729530872044629e-06, | |
| "loss": 0.3791, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.34878587196468, | |
| "grad_norm": 0.1492423117160797, | |
| "learning_rate": 6.717474241529235e-06, | |
| "loss": 0.3821, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.3509933774834437, | |
| "grad_norm": 0.15660499036312103, | |
| "learning_rate": 6.705406278224269e-06, | |
| "loss": 0.3764, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3532008830022075, | |
| "grad_norm": 0.13613760471343994, | |
| "learning_rate": 6.69332706176044e-06, | |
| "loss": 0.3842, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.3554083885209713, | |
| "grad_norm": 0.1655375063419342, | |
| "learning_rate": 6.681236671842709e-06, | |
| "loss": 0.3861, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.3576158940397351, | |
| "grad_norm": 0.15369166433811188, | |
| "learning_rate": 6.669135188249767e-06, | |
| "loss": 0.3753, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.359823399558499, | |
| "grad_norm": 0.1611892580986023, | |
| "learning_rate": 6.657022690833503e-06, | |
| "loss": 0.3732, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3620309050772628, | |
| "grad_norm": 0.15144729614257812, | |
| "learning_rate": 6.644899259518485e-06, | |
| "loss": 0.3756, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.3642384105960264, | |
| "grad_norm": 0.1591017246246338, | |
| "learning_rate": 6.632764974301429e-06, | |
| "loss": 0.3643, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3664459161147904, | |
| "grad_norm": 0.15775880217552185, | |
| "learning_rate": 6.620619915250666e-06, | |
| "loss": 0.3773, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.368653421633554, | |
| "grad_norm": 0.14554527401924133, | |
| "learning_rate": 6.608464162505621e-06, | |
| "loss": 0.3788, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.370860927152318, | |
| "grad_norm": 0.15284626185894012, | |
| "learning_rate": 6.596297796276284e-06, | |
| "loss": 0.3743, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.3730684326710816, | |
| "grad_norm": 0.14736317098140717, | |
| "learning_rate": 6.584120896842675e-06, | |
| "loss": 0.3763, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3752759381898454, | |
| "grad_norm": 0.15955647826194763, | |
| "learning_rate": 6.571933544554319e-06, | |
| "loss": 0.3664, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.3774834437086092, | |
| "grad_norm": 0.16919533908367157, | |
| "learning_rate": 6.559735819829713e-06, | |
| "loss": 0.3755, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.379690949227373, | |
| "grad_norm": 0.15306392312049866, | |
| "learning_rate": 6.5475278031558e-06, | |
| "loss": 0.3772, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.3818984547461368, | |
| "grad_norm": 0.1504804790019989, | |
| "learning_rate": 6.5353095750874295e-06, | |
| "loss": 0.3709, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3841059602649006, | |
| "grad_norm": 0.14429201185703278, | |
| "learning_rate": 6.523081216246838e-06, | |
| "loss": 0.3709, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.3863134657836644, | |
| "grad_norm": 0.14882300794124603, | |
| "learning_rate": 6.5108428073231e-06, | |
| "loss": 0.3768, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3885209713024282, | |
| "grad_norm": 0.1360517293214798, | |
| "learning_rate": 6.498594429071618e-06, | |
| "loss": 0.3768, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.390728476821192, | |
| "grad_norm": 0.13942170143127441, | |
| "learning_rate": 6.486336162313568e-06, | |
| "loss": 0.3796, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3929359823399559, | |
| "grad_norm": 0.14323705434799194, | |
| "learning_rate": 6.474068087935379e-06, | |
| "loss": 0.379, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.3951434878587197, | |
| "grad_norm": 0.15375126898288727, | |
| "learning_rate": 6.461790286888196e-06, | |
| "loss": 0.3721, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.3973509933774835, | |
| "grad_norm": 0.15478521585464478, | |
| "learning_rate": 6.449502840187344e-06, | |
| "loss": 0.3726, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.3995584988962473, | |
| "grad_norm": 0.16403432190418243, | |
| "learning_rate": 6.437205828911797e-06, | |
| "loss": 0.3765, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.401766004415011, | |
| "grad_norm": 0.1496008038520813, | |
| "learning_rate": 6.424899334203641e-06, | |
| "loss": 0.3785, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.403973509933775, | |
| "grad_norm": 0.152191162109375, | |
| "learning_rate": 6.4125834372675355e-06, | |
| "loss": 0.3752, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.4061810154525387, | |
| "grad_norm": 0.15051637589931488, | |
| "learning_rate": 6.400258219370183e-06, | |
| "loss": 0.3743, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.4083885209713025, | |
| "grad_norm": 0.15709619224071503, | |
| "learning_rate": 6.387923761839794e-06, | |
| "loss": 0.3697, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.410596026490066, | |
| "grad_norm": 0.14248456060886383, | |
| "learning_rate": 6.375580146065539e-06, | |
| "loss": 0.373, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.4128035320088301, | |
| "grad_norm": 0.1371423304080963, | |
| "learning_rate": 6.363227453497029e-06, | |
| "loss": 0.3819, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.4150110375275937, | |
| "grad_norm": 0.16379839181900024, | |
| "learning_rate": 6.350865765643759e-06, | |
| "loss": 0.3793, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.4172185430463577, | |
| "grad_norm": 0.14373619854450226, | |
| "learning_rate": 6.3384951640745865e-06, | |
| "loss": 0.3779, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.4194260485651213, | |
| "grad_norm": 0.14477743208408356, | |
| "learning_rate": 6.32611573041718e-06, | |
| "loss": 0.3786, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.4216335540838851, | |
| "grad_norm": 0.17920951545238495, | |
| "learning_rate": 6.313727546357498e-06, | |
| "loss": 0.3901, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.423841059602649, | |
| "grad_norm": 0.14486798644065857, | |
| "learning_rate": 6.301330693639224e-06, | |
| "loss": 0.3786, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.4260485651214128, | |
| "grad_norm": 0.13817182183265686, | |
| "learning_rate": 6.288925254063249e-06, | |
| "loss": 0.3725, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.4282560706401766, | |
| "grad_norm": 0.14681169390678406, | |
| "learning_rate": 6.27651130948713e-06, | |
| "loss": 0.3851, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.4304635761589404, | |
| "grad_norm": 0.14171209931373596, | |
| "learning_rate": 6.264088941824534e-06, | |
| "loss": 0.3802, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.4326710816777042, | |
| "grad_norm": 0.14188838005065918, | |
| "learning_rate": 6.251658233044715e-06, | |
| "loss": 0.3764, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.434878587196468, | |
| "grad_norm": 0.15146797895431519, | |
| "learning_rate": 6.239219265171964e-06, | |
| "loss": 0.3791, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.4370860927152318, | |
| "grad_norm": 0.1602582484483719, | |
| "learning_rate": 6.22677212028507e-06, | |
| "loss": 0.3773, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.4392935982339956, | |
| "grad_norm": 0.14558373391628265, | |
| "learning_rate": 6.214316880516775e-06, | |
| "loss": 0.3631, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4415011037527594, | |
| "grad_norm": 0.17893047630786896, | |
| "learning_rate": 6.201853628053243e-06, | |
| "loss": 0.374, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.4437086092715232, | |
| "grad_norm": 0.14639593660831451, | |
| "learning_rate": 6.189382445133502e-06, | |
| "loss": 0.3753, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.445916114790287, | |
| "grad_norm": 0.14472749829292297, | |
| "learning_rate": 6.17690341404891e-06, | |
| "loss": 0.3796, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.4481236203090508, | |
| "grad_norm": 0.1566278040409088, | |
| "learning_rate": 6.16441661714262e-06, | |
| "loss": 0.3741, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.4503311258278146, | |
| "grad_norm": 0.14471283555030823, | |
| "learning_rate": 6.151922136809017e-06, | |
| "loss": 0.3808, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.4525386313465782, | |
| "grad_norm": 0.15734338760375977, | |
| "learning_rate": 6.13942005549319e-06, | |
| "loss": 0.3781, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4547461368653423, | |
| "grad_norm": 0.13064594566822052, | |
| "learning_rate": 6.126910455690384e-06, | |
| "loss": 0.377, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.4569536423841059, | |
| "grad_norm": 0.15152592957019806, | |
| "learning_rate": 6.114393419945455e-06, | |
| "loss": 0.3784, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4591611479028699, | |
| "grad_norm": 0.14837685227394104, | |
| "learning_rate": 6.101869030852321e-06, | |
| "loss": 0.3889, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.4613686534216335, | |
| "grad_norm": 0.14178897440433502, | |
| "learning_rate": 6.089337371053429e-06, | |
| "loss": 0.3714, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.4635761589403973, | |
| "grad_norm": 0.15140818059444427, | |
| "learning_rate": 6.076798523239194e-06, | |
| "loss": 0.3816, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.465783664459161, | |
| "grad_norm": 0.14195586740970612, | |
| "learning_rate": 6.064252570147464e-06, | |
| "loss": 0.3738, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.467991169977925, | |
| "grad_norm": 0.13242661952972412, | |
| "learning_rate": 6.051699594562976e-06, | |
| "loss": 0.3669, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.4701986754966887, | |
| "grad_norm": 0.1379902958869934, | |
| "learning_rate": 6.039139679316797e-06, | |
| "loss": 0.362, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.4724061810154525, | |
| "grad_norm": 0.1443612426519394, | |
| "learning_rate": 6.026572907285791e-06, | |
| "loss": 0.3711, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.4746136865342163, | |
| "grad_norm": 0.14281077682971954, | |
| "learning_rate": 6.013999361392064e-06, | |
| "loss": 0.3753, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4768211920529801, | |
| "grad_norm": 0.14133213460445404, | |
| "learning_rate": 6.001419124602422e-06, | |
| "loss": 0.377, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.479028697571744, | |
| "grad_norm": 0.14672201871871948, | |
| "learning_rate": 5.988832279927818e-06, | |
| "loss": 0.3763, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4812362030905077, | |
| "grad_norm": 0.13457442820072174, | |
| "learning_rate": 5.976238910422811e-06, | |
| "loss": 0.381, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.4834437086092715, | |
| "grad_norm": 0.15843527019023895, | |
| "learning_rate": 5.963639099185011e-06, | |
| "loss": 0.3773, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4856512141280354, | |
| "grad_norm": 0.15779507160186768, | |
| "learning_rate": 5.9510329293545356e-06, | |
| "loss": 0.3789, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.4878587196467992, | |
| "grad_norm": 0.14860793948173523, | |
| "learning_rate": 5.938420484113458e-06, | |
| "loss": 0.3765, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.490066225165563, | |
| "grad_norm": 0.13150885701179504, | |
| "learning_rate": 5.925801846685262e-06, | |
| "loss": 0.37, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.4922737306843268, | |
| "grad_norm": 0.13564042747020721, | |
| "learning_rate": 5.913177100334287e-06, | |
| "loss": 0.3788, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.4944812362030906, | |
| "grad_norm": 0.14382179081439972, | |
| "learning_rate": 5.900546328365189e-06, | |
| "loss": 0.381, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.4966887417218544, | |
| "grad_norm": 0.14150989055633545, | |
| "learning_rate": 5.887909614122378e-06, | |
| "loss": 0.3683, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.498896247240618, | |
| "grad_norm": 0.13479964435100555, | |
| "learning_rate": 5.875267040989475e-06, | |
| "loss": 0.3651, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.501103752759382, | |
| "grad_norm": 0.14743578433990479, | |
| "learning_rate": 5.862618692388765e-06, | |
| "loss": 0.3676, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.5033112582781456, | |
| "grad_norm": 0.1419716477394104, | |
| "learning_rate": 5.849964651780637e-06, | |
| "loss": 0.367, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.5055187637969096, | |
| "grad_norm": 0.15260837972164154, | |
| "learning_rate": 5.837305002663043e-06, | |
| "loss": 0.3814, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.5077262693156732, | |
| "grad_norm": 0.12932439148426056, | |
| "learning_rate": 5.824639828570943e-06, | |
| "loss": 0.3868, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.5099337748344372, | |
| "grad_norm": 0.15234258770942688, | |
| "learning_rate": 5.811969213075749e-06, | |
| "loss": 0.3695, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.5121412803532008, | |
| "grad_norm": 0.1671096384525299, | |
| "learning_rate": 5.799293239784783e-06, | |
| "loss": 0.3821, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.5143487858719646, | |
| "grad_norm": 0.14161601662635803, | |
| "learning_rate": 5.786611992340719e-06, | |
| "loss": 0.3657, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.5165562913907285, | |
| "grad_norm": 0.16359050571918488, | |
| "learning_rate": 5.773925554421033e-06, | |
| "loss": 0.3788, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.5187637969094923, | |
| "grad_norm": 0.14034205675125122, | |
| "learning_rate": 5.761234009737449e-06, | |
| "loss": 0.3779, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.520971302428256, | |
| "grad_norm": 0.14073529839515686, | |
| "learning_rate": 5.74853744203539e-06, | |
| "loss": 0.3756, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.5231788079470199, | |
| "grad_norm": 0.14516787230968475, | |
| "learning_rate": 5.735835935093421e-06, | |
| "loss": 0.3886, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.5253863134657837, | |
| "grad_norm": 0.1415787935256958, | |
| "learning_rate": 5.723129572722701e-06, | |
| "loss": 0.375, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.5275938189845475, | |
| "grad_norm": 0.13726189732551575, | |
| "learning_rate": 5.710418438766428e-06, | |
| "loss": 0.3735, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.5298013245033113, | |
| "grad_norm": 0.14881715178489685, | |
| "learning_rate": 5.6977026170992834e-06, | |
| "loss": 0.3735, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.532008830022075, | |
| "grad_norm": 0.1551184505224228, | |
| "learning_rate": 5.68498219162688e-06, | |
| "loss": 0.3815, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.534216335540839, | |
| "grad_norm": 0.15148243308067322, | |
| "learning_rate": 5.672257246285212e-06, | |
| "loss": 0.3736, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.5364238410596025, | |
| "grad_norm": 0.15206126868724823, | |
| "learning_rate": 5.659527865040097e-06, | |
| "loss": 0.3771, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.5386313465783665, | |
| "grad_norm": 0.14559738337993622, | |
| "learning_rate": 5.6467941318866214e-06, | |
| "loss": 0.3742, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.5408388520971301, | |
| "grad_norm": 0.13176165521144867, | |
| "learning_rate": 5.634056130848589e-06, | |
| "loss": 0.3715, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.5430463576158941, | |
| "grad_norm": 0.14761999249458313, | |
| "learning_rate": 5.621313945977964e-06, | |
| "loss": 0.377, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.5452538631346577, | |
| "grad_norm": 0.12837889790534973, | |
| "learning_rate": 5.60856766135432e-06, | |
| "loss": 0.371, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5474613686534218, | |
| "grad_norm": 0.1346467137336731, | |
| "learning_rate": 5.595817361084283e-06, | |
| "loss": 0.3757, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.5496688741721854, | |
| "grad_norm": 0.129667267203331, | |
| "learning_rate": 5.583063129300971e-06, | |
| "loss": 0.3821, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.5518763796909494, | |
| "grad_norm": 0.1456080973148346, | |
| "learning_rate": 5.570305050163453e-06, | |
| "loss": 0.3659, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.554083885209713, | |
| "grad_norm": 0.13144616782665253, | |
| "learning_rate": 5.557543207856175e-06, | |
| "loss": 0.3755, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.5562913907284768, | |
| "grad_norm": 0.13695232570171356, | |
| "learning_rate": 5.544777686588424e-06, | |
| "loss": 0.3704, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.5584988962472406, | |
| "grad_norm": 0.1521306186914444, | |
| "learning_rate": 5.532008570593756e-06, | |
| "loss": 0.3829, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.5607064017660044, | |
| "grad_norm": 0.1419302225112915, | |
| "learning_rate": 5.519235944129448e-06, | |
| "loss": 0.371, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.5629139072847682, | |
| "grad_norm": 0.14487622678279877, | |
| "learning_rate": 5.506459891475944e-06, | |
| "loss": 0.3785, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.565121412803532, | |
| "grad_norm": 0.15528716146945953, | |
| "learning_rate": 5.4936804969362924e-06, | |
| "loss": 0.3853, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.5673289183222958, | |
| "grad_norm": 0.14845651388168335, | |
| "learning_rate": 5.480897844835595e-06, | |
| "loss": 0.3853, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5695364238410596, | |
| "grad_norm": 0.13595354557037354, | |
| "learning_rate": 5.4681120195204466e-06, | |
| "loss": 0.3718, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.5717439293598234, | |
| "grad_norm": 0.15339161455631256, | |
| "learning_rate": 5.455323105358384e-06, | |
| "loss": 0.3791, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5739514348785872, | |
| "grad_norm": 0.1527385264635086, | |
| "learning_rate": 5.442531186737322e-06, | |
| "loss": 0.3703, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.576158940397351, | |
| "grad_norm": 0.13763274252414703, | |
| "learning_rate": 5.429736348065003e-06, | |
| "loss": 0.3638, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.5783664459161146, | |
| "grad_norm": 0.16397210955619812, | |
| "learning_rate": 5.416938673768437e-06, | |
| "loss": 0.3761, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.5805739514348787, | |
| "grad_norm": 0.14585243165493011, | |
| "learning_rate": 5.404138248293346e-06, | |
| "loss": 0.3762, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5827814569536423, | |
| "grad_norm": 0.13906413316726685, | |
| "learning_rate": 5.3913351561036e-06, | |
| "loss": 0.3669, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.5849889624724063, | |
| "grad_norm": 0.13377633690834045, | |
| "learning_rate": 5.378529481680672e-06, | |
| "loss": 0.3721, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.5871964679911699, | |
| "grad_norm": 0.14944018423557281, | |
| "learning_rate": 5.365721309523072e-06, | |
| "loss": 0.3739, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.589403973509934, | |
| "grad_norm": 0.14180952310562134, | |
| "learning_rate": 5.352910724145789e-06, | |
| "loss": 0.373, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5916114790286975, | |
| "grad_norm": 0.1462966352701187, | |
| "learning_rate": 5.340097810079741e-06, | |
| "loss": 0.3768, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.5938189845474615, | |
| "grad_norm": 0.12976975739002228, | |
| "learning_rate": 5.327282651871205e-06, | |
| "loss": 0.3638, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.596026490066225, | |
| "grad_norm": 0.15462516248226166, | |
| "learning_rate": 5.3144653340812715e-06, | |
| "loss": 0.3854, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.598233995584989, | |
| "grad_norm": 0.13364708423614502, | |
| "learning_rate": 5.301645941285278e-06, | |
| "loss": 0.3862, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.6004415011037527, | |
| "grad_norm": 0.14875876903533936, | |
| "learning_rate": 5.288824558072257e-06, | |
| "loss": 0.3847, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.6026490066225165, | |
| "grad_norm": 0.13750705122947693, | |
| "learning_rate": 5.276001269044369e-06, | |
| "loss": 0.3792, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.6048565121412803, | |
| "grad_norm": 0.13552795350551605, | |
| "learning_rate": 5.263176158816355e-06, | |
| "loss": 0.3824, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.6070640176600441, | |
| "grad_norm": 0.13398821651935577, | |
| "learning_rate": 5.250349312014976e-06, | |
| "loss": 0.3728, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.609271523178808, | |
| "grad_norm": 0.14047983288764954, | |
| "learning_rate": 5.237520813278443e-06, | |
| "loss": 0.3833, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.6114790286975718, | |
| "grad_norm": 0.1419335901737213, | |
| "learning_rate": 5.224690747255875e-06, | |
| "loss": 0.3738, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.6136865342163356, | |
| "grad_norm": 0.1423855870962143, | |
| "learning_rate": 5.211859198606729e-06, | |
| "loss": 0.3702, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.6158940397350994, | |
| "grad_norm": 0.14920298755168915, | |
| "learning_rate": 5.199026252000245e-06, | |
| "loss": 0.3719, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.6181015452538632, | |
| "grad_norm": 0.13715742528438568, | |
| "learning_rate": 5.186191992114892e-06, | |
| "loss": 0.3766, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.6203090507726268, | |
| "grad_norm": 0.142774298787117, | |
| "learning_rate": 5.173356503637799e-06, | |
| "loss": 0.3693, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.6225165562913908, | |
| "grad_norm": 0.12615923583507538, | |
| "learning_rate": 5.160519871264204e-06, | |
| "loss": 0.375, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.6247240618101544, | |
| "grad_norm": 0.14882232248783112, | |
| "learning_rate": 5.147682179696893e-06, | |
| "loss": 0.3746, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.6269315673289184, | |
| "grad_norm": 0.12539036571979523, | |
| "learning_rate": 5.134843513645642e-06, | |
| "loss": 0.3786, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.629139072847682, | |
| "grad_norm": 0.14418825507164001, | |
| "learning_rate": 5.122003957826657e-06, | |
| "loss": 0.3693, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.631346578366446, | |
| "grad_norm": 0.14610478281974792, | |
| "learning_rate": 5.109163596962013e-06, | |
| "loss": 0.3759, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.6335540838852096, | |
| "grad_norm": 0.13532301783561707, | |
| "learning_rate": 5.096322515779101e-06, | |
| "loss": 0.373, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.6357615894039736, | |
| "grad_norm": 0.14194665849208832, | |
| "learning_rate": 5.083480799010061e-06, | |
| "loss": 0.3762, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.6379690949227372, | |
| "grad_norm": 0.12951509654521942, | |
| "learning_rate": 5.070638531391229e-06, | |
| "loss": 0.3723, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.6401766004415013, | |
| "grad_norm": 0.14456267654895782, | |
| "learning_rate": 5.057795797662577e-06, | |
| "loss": 0.3666, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.6423841059602649, | |
| "grad_norm": 0.13711507618427277, | |
| "learning_rate": 5.044952682567148e-06, | |
| "loss": 0.3749, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.6445916114790287, | |
| "grad_norm": 0.12987838685512543, | |
| "learning_rate": 5.032109270850507e-06, | |
| "loss": 0.3779, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.6467991169977925, | |
| "grad_norm": 0.1495376080274582, | |
| "learning_rate": 5.019265647260173e-06, | |
| "loss": 0.3704, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.6490066225165563, | |
| "grad_norm": 0.13476036489009857, | |
| "learning_rate": 5.006421896545064e-06, | |
| "loss": 0.3663, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.65121412803532, | |
| "grad_norm": 0.1335316300392151, | |
| "learning_rate": 4.993578103454938e-06, | |
| "loss": 0.3843, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.653421633554084, | |
| "grad_norm": 0.13593193888664246, | |
| "learning_rate": 4.980734352739829e-06, | |
| "loss": 0.3707, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.6556291390728477, | |
| "grad_norm": 0.13324475288391113, | |
| "learning_rate": 4.967890729149494e-06, | |
| "loss": 0.3746, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.6578366445916115, | |
| "grad_norm": 0.12398661673069, | |
| "learning_rate": 4.955047317432854e-06, | |
| "loss": 0.3783, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.6600441501103753, | |
| "grad_norm": 0.12614096701145172, | |
| "learning_rate": 4.942204202337425e-06, | |
| "loss": 0.3806, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.6622516556291391, | |
| "grad_norm": 0.14331743121147156, | |
| "learning_rate": 4.929361468608773e-06, | |
| "loss": 0.3833, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.664459161147903, | |
| "grad_norm": 0.1342383772134781, | |
| "learning_rate": 4.9165192009899414e-06, | |
| "loss": 0.3799, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.13297194242477417, | |
| "learning_rate": 4.9036774842209e-06, | |
| "loss": 0.3702, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.6688741721854305, | |
| "grad_norm": 0.12575410306453705, | |
| "learning_rate": 4.890836403037988e-06, | |
| "loss": 0.364, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.6710816777041941, | |
| "grad_norm": 0.1317722052335739, | |
| "learning_rate": 4.877996042173345e-06, | |
| "loss": 0.3782, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.6732891832229582, | |
| "grad_norm": 0.13363225758075714, | |
| "learning_rate": 4.865156486354359e-06, | |
| "loss": 0.3804, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6754966887417218, | |
| "grad_norm": 0.14083701372146606, | |
| "learning_rate": 4.852317820303107e-06, | |
| "loss": 0.38, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.6777041942604858, | |
| "grad_norm": 0.1284651756286621, | |
| "learning_rate": 4.839480128735798e-06, | |
| "loss": 0.3655, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6799116997792494, | |
| "grad_norm": 0.1379299908876419, | |
| "learning_rate": 4.826643496362202e-06, | |
| "loss": 0.3779, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.6821192052980134, | |
| "grad_norm": 0.13508039712905884, | |
| "learning_rate": 4.81380800788511e-06, | |
| "loss": 0.3785, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.684326710816777, | |
| "grad_norm": 0.14211615920066833, | |
| "learning_rate": 4.800973747999757e-06, | |
| "loss": 0.3648, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.6865342163355408, | |
| "grad_norm": 0.1293489634990692, | |
| "learning_rate": 4.788140801393273e-06, | |
| "loss": 0.3663, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6887417218543046, | |
| "grad_norm": 0.13812755048274994, | |
| "learning_rate": 4.775309252744126e-06, | |
| "loss": 0.3643, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.6909492273730684, | |
| "grad_norm": 0.13112276792526245, | |
| "learning_rate": 4.762479186721559e-06, | |
| "loss": 0.3703, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.6931567328918322, | |
| "grad_norm": 0.12749050557613373, | |
| "learning_rate": 4.7496506879850264e-06, | |
| "loss": 0.3844, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.695364238410596, | |
| "grad_norm": 0.1374199390411377, | |
| "learning_rate": 4.736823841183645e-06, | |
| "loss": 0.3689, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6975717439293598, | |
| "grad_norm": 0.12659025192260742, | |
| "learning_rate": 4.723998730955633e-06, | |
| "loss": 0.3606, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.6997792494481236, | |
| "grad_norm": 0.11276005953550339, | |
| "learning_rate": 4.711175441927746e-06, | |
| "loss": 0.3722, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.7019867549668874, | |
| "grad_norm": 0.12801696360111237, | |
| "learning_rate": 4.698354058714724e-06, | |
| "loss": 0.3789, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.7041942604856513, | |
| "grad_norm": 0.12910126149654388, | |
| "learning_rate": 4.685534665918731e-06, | |
| "loss": 0.3669, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.706401766004415, | |
| "grad_norm": 0.12605293095111847, | |
| "learning_rate": 4.672717348128796e-06, | |
| "loss": 0.377, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.7086092715231787, | |
| "grad_norm": 0.12655854225158691, | |
| "learning_rate": 4.65990218992026e-06, | |
| "loss": 0.3738, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.7108167770419427, | |
| "grad_norm": 0.1326448917388916, | |
| "learning_rate": 4.647089275854212e-06, | |
| "loss": 0.3847, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.7130242825607063, | |
| "grad_norm": 0.12570975720882416, | |
| "learning_rate": 4.63427869047693e-06, | |
| "loss": 0.3652, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.7152317880794703, | |
| "grad_norm": 0.13106000423431396, | |
| "learning_rate": 4.621470518319329e-06, | |
| "loss": 0.3781, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.7174392935982339, | |
| "grad_norm": 0.12707674503326416, | |
| "learning_rate": 4.608664843896402e-06, | |
| "loss": 0.367, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.719646799116998, | |
| "grad_norm": 0.1342884600162506, | |
| "learning_rate": 4.595861751706656e-06, | |
| "loss": 0.3827, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.7218543046357615, | |
| "grad_norm": 0.15550127625465393, | |
| "learning_rate": 4.583061326231564e-06, | |
| "loss": 0.384, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.7240618101545255, | |
| "grad_norm": 0.1267244517803192, | |
| "learning_rate": 4.570263651934998e-06, | |
| "loss": 0.38, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.7262693156732891, | |
| "grad_norm": 0.13444717228412628, | |
| "learning_rate": 4.557468813262678e-06, | |
| "loss": 0.3856, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.728476821192053, | |
| "grad_norm": 0.12585113942623138, | |
| "learning_rate": 4.544676894641617e-06, | |
| "loss": 0.3603, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.7306843267108167, | |
| "grad_norm": 0.14303074777126312, | |
| "learning_rate": 4.531887980479555e-06, | |
| "loss": 0.3769, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.7328918322295805, | |
| "grad_norm": 0.14289291203022003, | |
| "learning_rate": 4.519102155164407e-06, | |
| "loss": 0.3679, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.7350993377483444, | |
| "grad_norm": 0.13349799811840057, | |
| "learning_rate": 4.506319503063708e-06, | |
| "loss": 0.3702, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.7373068432671082, | |
| "grad_norm": 0.15247680246829987, | |
| "learning_rate": 4.493540108524057e-06, | |
| "loss": 0.3713, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.739514348785872, | |
| "grad_norm": 0.14266639947891235, | |
| "learning_rate": 4.480764055870553e-06, | |
| "loss": 0.3742, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.7417218543046358, | |
| "grad_norm": 0.13838379085063934, | |
| "learning_rate": 4.467991429406247e-06, | |
| "loss": 0.3857, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.7439293598233996, | |
| "grad_norm": 0.13494673371315002, | |
| "learning_rate": 4.455222313411578e-06, | |
| "loss": 0.3783, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.7461368653421634, | |
| "grad_norm": 0.12723763287067413, | |
| "learning_rate": 4.442456792143825e-06, | |
| "loss": 0.3657, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.7483443708609272, | |
| "grad_norm": 0.1302330642938614, | |
| "learning_rate": 4.4296949498365496e-06, | |
| "loss": 0.3671, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.7505518763796908, | |
| "grad_norm": 0.12404655665159225, | |
| "learning_rate": 4.416936870699031e-06, | |
| "loss": 0.3755, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.7527593818984548, | |
| "grad_norm": 0.13801135122776031, | |
| "learning_rate": 4.40418263891572e-06, | |
| "loss": 0.377, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.7549668874172184, | |
| "grad_norm": 0.1327562779188156, | |
| "learning_rate": 4.391432338645681e-06, | |
| "loss": 0.3649, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.7571743929359824, | |
| "grad_norm": 0.13369016349315643, | |
| "learning_rate": 4.378686054022037e-06, | |
| "loss": 0.3882, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.759381898454746, | |
| "grad_norm": 0.14249202609062195, | |
| "learning_rate": 4.365943869151412e-06, | |
| "loss": 0.3763, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.76158940397351, | |
| "grad_norm": 0.1298934519290924, | |
| "learning_rate": 4.35320586811338e-06, | |
| "loss": 0.3742, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.7637969094922736, | |
| "grad_norm": 0.14034999907016754, | |
| "learning_rate": 4.3404721349599044e-06, | |
| "loss": 0.3685, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.7660044150110377, | |
| "grad_norm": 0.12729597091674805, | |
| "learning_rate": 4.327742753714788e-06, | |
| "loss": 0.3742, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.7682119205298013, | |
| "grad_norm": 0.13787223398685455, | |
| "learning_rate": 4.315017808373121e-06, | |
| "loss": 0.3718, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.7704194260485653, | |
| "grad_norm": 0.13648386299610138, | |
| "learning_rate": 4.302297382900718e-06, | |
| "loss": 0.3722, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.7726269315673289, | |
| "grad_norm": 0.12655648589134216, | |
| "learning_rate": 4.289581561233574e-06, | |
| "loss": 0.3667, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.7748344370860927, | |
| "grad_norm": 0.12654156982898712, | |
| "learning_rate": 4.276870427277299e-06, | |
| "loss": 0.3666, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.7770419426048565, | |
| "grad_norm": 0.15781165659427643, | |
| "learning_rate": 4.264164064906581e-06, | |
| "loss": 0.3787, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.7792494481236203, | |
| "grad_norm": 0.12070343643426895, | |
| "learning_rate": 4.251462557964612e-06, | |
| "loss": 0.3897, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.781456953642384, | |
| "grad_norm": 0.1305495798587799, | |
| "learning_rate": 4.238765990262554e-06, | |
| "loss": 0.3622, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.783664459161148, | |
| "grad_norm": 0.12424585223197937, | |
| "learning_rate": 4.226074445578969e-06, | |
| "loss": 0.3732, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7858719646799117, | |
| "grad_norm": 0.12352015823125839, | |
| "learning_rate": 4.213388007659281e-06, | |
| "loss": 0.367, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.7880794701986755, | |
| "grad_norm": 0.1388520896434784, | |
| "learning_rate": 4.200706760215219e-06, | |
| "loss": 0.3723, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7902869757174393, | |
| "grad_norm": 0.13200843334197998, | |
| "learning_rate": 4.188030786924252e-06, | |
| "loss": 0.3765, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.7924944812362031, | |
| "grad_norm": 0.11911409348249435, | |
| "learning_rate": 4.17536017142906e-06, | |
| "loss": 0.3694, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.794701986754967, | |
| "grad_norm": 0.1263428032398224, | |
| "learning_rate": 4.162694997336957e-06, | |
| "loss": 0.3656, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.7969094922737305, | |
| "grad_norm": 0.125799298286438, | |
| "learning_rate": 4.1500353482193646e-06, | |
| "loss": 0.3686, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.7991169977924946, | |
| "grad_norm": 0.13606040179729462, | |
| "learning_rate": 4.137381307611236e-06, | |
| "loss": 0.3579, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.8013245033112582, | |
| "grad_norm": 0.13091742992401123, | |
| "learning_rate": 4.124732959010526e-06, | |
| "loss": 0.369, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.8035320088300222, | |
| "grad_norm": 0.13721388578414917, | |
| "learning_rate": 4.112090385877624e-06, | |
| "loss": 0.3673, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.8057395143487858, | |
| "grad_norm": 0.12669645249843597, | |
| "learning_rate": 4.099453671634811e-06, | |
| "loss": 0.3778, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.8079470198675498, | |
| "grad_norm": 0.12563163042068481, | |
| "learning_rate": 4.086822899665713e-06, | |
| "loss": 0.3687, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.8101545253863134, | |
| "grad_norm": 0.1383393555879593, | |
| "learning_rate": 4.07419815331474e-06, | |
| "loss": 0.3787, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.8123620309050774, | |
| "grad_norm": 0.1277354508638382, | |
| "learning_rate": 4.061579515886544e-06, | |
| "loss": 0.3752, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.814569536423841, | |
| "grad_norm": 0.12579599022865295, | |
| "learning_rate": 4.048967070645465e-06, | |
| "loss": 0.3767, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.8167770419426048, | |
| "grad_norm": 0.1388178914785385, | |
| "learning_rate": 4.03636090081499e-06, | |
| "loss": 0.3795, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.8189845474613686, | |
| "grad_norm": 0.13451789319515228, | |
| "learning_rate": 4.0237610895771895e-06, | |
| "loss": 0.3636, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.8211920529801324, | |
| "grad_norm": 0.1298052817583084, | |
| "learning_rate": 4.011167720072183e-06, | |
| "loss": 0.3831, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.8233995584988962, | |
| "grad_norm": 0.13578468561172485, | |
| "learning_rate": 3.99858087539758e-06, | |
| "loss": 0.3787, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.82560706401766, | |
| "grad_norm": 0.1320340931415558, | |
| "learning_rate": 3.986000638607937e-06, | |
| "loss": 0.3697, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.8278145695364238, | |
| "grad_norm": 0.13334733247756958, | |
| "learning_rate": 3.973427092714212e-06, | |
| "loss": 0.361, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.8300220750551877, | |
| "grad_norm": 0.12365536391735077, | |
| "learning_rate": 3.960860320683205e-06, | |
| "loss": 0.3716, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.8322295805739515, | |
| "grad_norm": 0.1276884227991104, | |
| "learning_rate": 3.948300405437026e-06, | |
| "loss": 0.3717, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.8344370860927153, | |
| "grad_norm": 0.13205379247665405, | |
| "learning_rate": 3.9357474298525356e-06, | |
| "loss": 0.3632, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.836644591611479, | |
| "grad_norm": 0.12293750792741776, | |
| "learning_rate": 3.9232014767608065e-06, | |
| "loss": 0.3809, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.8388520971302427, | |
| "grad_norm": 0.1293838620185852, | |
| "learning_rate": 3.910662628946573e-06, | |
| "loss": 0.3782, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.8410596026490067, | |
| "grad_norm": 0.1325875222682953, | |
| "learning_rate": 3.89813096914768e-06, | |
| "loss": 0.3734, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.8432671081677703, | |
| "grad_norm": 0.12951777875423431, | |
| "learning_rate": 3.8856065800545475e-06, | |
| "loss": 0.3828, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.8454746136865343, | |
| "grad_norm": 0.12658190727233887, | |
| "learning_rate": 3.873089544309616e-06, | |
| "loss": 0.3753, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.847682119205298, | |
| "grad_norm": 0.12527629733085632, | |
| "learning_rate": 3.860579944506811e-06, | |
| "loss": 0.3716, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.849889624724062, | |
| "grad_norm": 0.12750259041786194, | |
| "learning_rate": 3.848077863190985e-06, | |
| "loss": 0.3727, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.8520971302428255, | |
| "grad_norm": 0.13881564140319824, | |
| "learning_rate": 3.835583382857382e-06, | |
| "loss": 0.3743, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.8543046357615895, | |
| "grad_norm": 0.12845860421657562, | |
| "learning_rate": 3.82309658595109e-06, | |
| "loss": 0.3692, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.8565121412803531, | |
| "grad_norm": 0.12954264879226685, | |
| "learning_rate": 3.8106175548664994e-06, | |
| "loss": 0.3778, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.858719646799117, | |
| "grad_norm": 0.13028046488761902, | |
| "learning_rate": 3.7981463719467587e-06, | |
| "loss": 0.3825, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.8609271523178808, | |
| "grad_norm": 0.1258542686700821, | |
| "learning_rate": 3.7856831194832262e-06, | |
| "loss": 0.3817, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.8631346578366446, | |
| "grad_norm": 0.1192869246006012, | |
| "learning_rate": 3.7732278797149324e-06, | |
| "loss": 0.3706, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8653421633554084, | |
| "grad_norm": 0.13153821229934692, | |
| "learning_rate": 3.7607807348280364e-06, | |
| "loss": 0.3626, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.8675496688741722, | |
| "grad_norm": 0.12072479724884033, | |
| "learning_rate": 3.7483417669552857e-06, | |
| "loss": 0.3557, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.869757174392936, | |
| "grad_norm": 0.12467250972986221, | |
| "learning_rate": 3.735911058175467e-06, | |
| "loss": 0.3621, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.8719646799116998, | |
| "grad_norm": 0.12176632136106491, | |
| "learning_rate": 3.7234886905128724e-06, | |
| "loss": 0.3726, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8741721854304636, | |
| "grad_norm": 0.1291275918483734, | |
| "learning_rate": 3.711074745936751e-06, | |
| "loss": 0.3663, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.8763796909492274, | |
| "grad_norm": 0.1401110738515854, | |
| "learning_rate": 3.698669306360778e-06, | |
| "loss": 0.3763, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.8785871964679912, | |
| "grad_norm": 0.12075633555650711, | |
| "learning_rate": 3.6862724536425042e-06, | |
| "loss": 0.3761, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.8807947019867548, | |
| "grad_norm": 0.12913060188293457, | |
| "learning_rate": 3.67388426958282e-06, | |
| "loss": 0.3735, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.8830022075055188, | |
| "grad_norm": 0.13538530468940735, | |
| "learning_rate": 3.6615048359254155e-06, | |
| "loss": 0.3737, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.8852097130242824, | |
| "grad_norm": 0.1396217793226242, | |
| "learning_rate": 3.649134234356242e-06, | |
| "loss": 0.3765, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8874172185430464, | |
| "grad_norm": 0.12726326286792755, | |
| "learning_rate": 3.636772546502973e-06, | |
| "loss": 0.3706, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.88962472406181, | |
| "grad_norm": 0.12331200391054153, | |
| "learning_rate": 3.624419853934461e-06, | |
| "loss": 0.3698, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.891832229580574, | |
| "grad_norm": 0.12955856323242188, | |
| "learning_rate": 3.612076238160209e-06, | |
| "loss": 0.3779, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.8940397350993377, | |
| "grad_norm": 0.1281704604625702, | |
| "learning_rate": 3.5997417806298172e-06, | |
| "loss": 0.3711, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.8962472406181017, | |
| "grad_norm": 0.13094061613082886, | |
| "learning_rate": 3.5874165627324665e-06, | |
| "loss": 0.3706, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.8984547461368653, | |
| "grad_norm": 0.1244734600186348, | |
| "learning_rate": 3.575100665796362e-06, | |
| "loss": 0.3701, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.9006622516556293, | |
| "grad_norm": 0.13025392591953278, | |
| "learning_rate": 3.5627941710882042e-06, | |
| "loss": 0.3774, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.9028697571743929, | |
| "grad_norm": 0.13566212356090546, | |
| "learning_rate": 3.550497159812658e-06, | |
| "loss": 0.376, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.9050772626931567, | |
| "grad_norm": 0.13212575018405914, | |
| "learning_rate": 3.5382097131118052e-06, | |
| "loss": 0.3759, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.9072847682119205, | |
| "grad_norm": 0.13214178383350372, | |
| "learning_rate": 3.525931912064623e-06, | |
| "loss": 0.3706, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.9094922737306843, | |
| "grad_norm": 0.1290368288755417, | |
| "learning_rate": 3.5136638376864333e-06, | |
| "loss": 0.3767, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.9116997792494481, | |
| "grad_norm": 0.1320725828409195, | |
| "learning_rate": 3.5014055709283847e-06, | |
| "loss": 0.3704, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.913907284768212, | |
| "grad_norm": 0.13395248353481293, | |
| "learning_rate": 3.4891571926769e-06, | |
| "loss": 0.3774, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.9161147902869757, | |
| "grad_norm": 0.12245456129312515, | |
| "learning_rate": 3.4769187837531637e-06, | |
| "loss": 0.3872, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.9183222958057395, | |
| "grad_norm": 0.1327778398990631, | |
| "learning_rate": 3.4646904249125713e-06, | |
| "loss": 0.3666, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.9205298013245033, | |
| "grad_norm": 0.13339614868164062, | |
| "learning_rate": 3.4524721968442017e-06, | |
| "loss": 0.3782, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.9227373068432672, | |
| "grad_norm": 0.12259134650230408, | |
| "learning_rate": 3.4402641801702883e-06, | |
| "loss": 0.38, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.924944812362031, | |
| "grad_norm": 0.12124523520469666, | |
| "learning_rate": 3.4280664554456823e-06, | |
| "loss": 0.3746, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.9271523178807946, | |
| "grad_norm": 0.12291016429662704, | |
| "learning_rate": 3.415879103157327e-06, | |
| "loss": 0.3697, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.9293598233995586, | |
| "grad_norm": 0.12792356312274933, | |
| "learning_rate": 3.403702203723718e-06, | |
| "loss": 0.3755, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.9315673289183222, | |
| "grad_norm": 0.1225101426243782, | |
| "learning_rate": 3.3915358374943813e-06, | |
| "loss": 0.3821, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.9337748344370862, | |
| "grad_norm": 0.1265992522239685, | |
| "learning_rate": 3.379380084749336e-06, | |
| "loss": 0.3672, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.9359823399558498, | |
| "grad_norm": 0.12633873522281647, | |
| "learning_rate": 3.3672350256985727e-06, | |
| "loss": 0.3766, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.9381898454746138, | |
| "grad_norm": 0.1319660246372223, | |
| "learning_rate": 3.3551007404815162e-06, | |
| "loss": 0.374, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.9403973509933774, | |
| "grad_norm": 0.12871594727039337, | |
| "learning_rate": 3.3429773091664985e-06, | |
| "loss": 0.3673, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.9426048565121414, | |
| "grad_norm": 0.13455092906951904, | |
| "learning_rate": 3.330864811750235e-06, | |
| "loss": 0.3805, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.944812362030905, | |
| "grad_norm": 0.13253872096538544, | |
| "learning_rate": 3.3187633281572913e-06, | |
| "loss": 0.3695, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.9470198675496688, | |
| "grad_norm": 0.11975626647472382, | |
| "learning_rate": 3.306672938239561e-06, | |
| "loss": 0.3767, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.9492273730684326, | |
| "grad_norm": 0.12552706897258759, | |
| "learning_rate": 3.2945937217757324e-06, | |
| "loss": 0.363, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.9514348785871964, | |
| "grad_norm": 0.13104894757270813, | |
| "learning_rate": 3.282525758470768e-06, | |
| "loss": 0.3699, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.9536423841059603, | |
| "grad_norm": 0.12783929705619812, | |
| "learning_rate": 3.2704691279553725e-06, | |
| "loss": 0.3681, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.955849889624724, | |
| "grad_norm": 0.12647448480129242, | |
| "learning_rate": 3.2584239097854763e-06, | |
| "loss": 0.3873, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.9580573951434879, | |
| "grad_norm": 0.11794891208410263, | |
| "learning_rate": 3.246390183441703e-06, | |
| "loss": 0.3689, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.9602649006622517, | |
| "grad_norm": 0.1255064755678177, | |
| "learning_rate": 3.234368028328845e-06, | |
| "loss": 0.3874, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.9624724061810155, | |
| "grad_norm": 0.12461333721876144, | |
| "learning_rate": 3.2223575237753448e-06, | |
| "loss": 0.3719, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.9646799116997793, | |
| "grad_norm": 0.13075792789459229, | |
| "learning_rate": 3.2103587490327624e-06, | |
| "loss": 0.3746, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.966887417218543, | |
| "grad_norm": 0.13413941860198975, | |
| "learning_rate": 3.198371783275269e-06, | |
| "loss": 0.3723, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.9690949227373067, | |
| "grad_norm": 0.13534380495548248, | |
| "learning_rate": 3.186396705599106e-06, | |
| "loss": 0.3762, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.9713024282560707, | |
| "grad_norm": 0.12702573835849762, | |
| "learning_rate": 3.1744335950220716e-06, | |
| "loss": 0.3763, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.9735099337748343, | |
| "grad_norm": 0.131902277469635, | |
| "learning_rate": 3.162482530483e-06, | |
| "loss": 0.3647, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.9757174392935983, | |
| "grad_norm": 0.12826719880104065, | |
| "learning_rate": 3.1505435908412415e-06, | |
| "loss": 0.3705, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.977924944812362, | |
| "grad_norm": 0.1235160231590271, | |
| "learning_rate": 3.1386168548761377e-06, | |
| "loss": 0.3806, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.980132450331126, | |
| "grad_norm": 0.13026514649391174, | |
| "learning_rate": 3.126702401286503e-06, | |
| "loss": 0.3714, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.9823399558498895, | |
| "grad_norm": 0.12384995073080063, | |
| "learning_rate": 3.11480030869011e-06, | |
| "loss": 0.3775, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.9845474613686536, | |
| "grad_norm": 0.11622393131256104, | |
| "learning_rate": 3.1029106556231615e-06, | |
| "loss": 0.3685, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.9867549668874172, | |
| "grad_norm": 0.12467402219772339, | |
| "learning_rate": 3.0910335205397834e-06, | |
| "loss": 0.3753, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.9889624724061812, | |
| "grad_norm": 0.12134755402803421, | |
| "learning_rate": 3.0791689818114988e-06, | |
| "loss": 0.3763, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.9911699779249448, | |
| "grad_norm": 0.118636354804039, | |
| "learning_rate": 3.067317117726715e-06, | |
| "loss": 0.3779, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.9933774834437086, | |
| "grad_norm": 0.12621286511421204, | |
| "learning_rate": 3.055478006490199e-06, | |
| "loss": 0.3755, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.9955849889624724, | |
| "grad_norm": 0.13427864015102386, | |
| "learning_rate": 3.043651726222578e-06, | |
| "loss": 0.3677, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.9977924944812362, | |
| "grad_norm": 0.1349465250968933, | |
| "learning_rate": 3.0318383549598085e-06, | |
| "loss": 0.3821, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.1383267641067505, | |
| "learning_rate": 3.020037970652664e-06, | |
| "loss": 0.3605, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.0022075055187636, | |
| "grad_norm": 0.1400349736213684, | |
| "learning_rate": 3.00825065116623e-06, | |
| "loss": 0.3587, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 2.0044150110375276, | |
| "grad_norm": 0.12129946798086166, | |
| "learning_rate": 2.996476474279375e-06, | |
| "loss": 0.3564, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.006622516556291, | |
| "grad_norm": 0.13566596806049347, | |
| "learning_rate": 2.9847155176842547e-06, | |
| "loss": 0.3628, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 2.0088300220750552, | |
| "grad_norm": 0.12823748588562012, | |
| "learning_rate": 2.9729678589857852e-06, | |
| "loss": 0.3589, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.011037527593819, | |
| "grad_norm": 0.1251477748155594, | |
| "learning_rate": 2.9612335757011362e-06, | |
| "loss": 0.3583, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 2.013245033112583, | |
| "grad_norm": 0.1308368593454361, | |
| "learning_rate": 2.94951274525922e-06, | |
| "loss": 0.354, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.0154525386313464, | |
| "grad_norm": 0.1357547789812088, | |
| "learning_rate": 2.9378054450001836e-06, | |
| "loss": 0.3664, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 2.0176600441501105, | |
| "grad_norm": 0.13018201291561127, | |
| "learning_rate": 2.9261117521748904e-06, | |
| "loss": 0.3627, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.019867549668874, | |
| "grad_norm": 0.12550939619541168, | |
| "learning_rate": 2.914431743944414e-06, | |
| "loss": 0.3518, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 2.022075055187638, | |
| "grad_norm": 0.12784212827682495, | |
| "learning_rate": 2.902765497379534e-06, | |
| "loss": 0.3547, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.0242825607064017, | |
| "grad_norm": 0.13502328097820282, | |
| "learning_rate": 2.8911130894602198e-06, | |
| "loss": 0.3648, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 2.0264900662251657, | |
| "grad_norm": 0.12056614458560944, | |
| "learning_rate": 2.8794745970751308e-06, | |
| "loss": 0.3612, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.0286975717439293, | |
| "grad_norm": 0.12976831197738647, | |
| "learning_rate": 2.8678500970210977e-06, | |
| "loss": 0.3619, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 2.0309050772626933, | |
| "grad_norm": 0.13974756002426147, | |
| "learning_rate": 2.8562396660026304e-06, | |
| "loss": 0.3585, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.033112582781457, | |
| "grad_norm": 0.1298479437828064, | |
| "learning_rate": 2.8446433806313966e-06, | |
| "loss": 0.3533, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 2.035320088300221, | |
| "grad_norm": 0.13592906296253204, | |
| "learning_rate": 2.83306131742573e-06, | |
| "loss": 0.36, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.0375275938189845, | |
| "grad_norm": 0.13170795142650604, | |
| "learning_rate": 2.821493552810114e-06, | |
| "loss": 0.3535, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 2.0397350993377485, | |
| "grad_norm": 0.14562806487083435, | |
| "learning_rate": 2.809940163114685e-06, | |
| "loss": 0.3737, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.041942604856512, | |
| "grad_norm": 0.12969909608364105, | |
| "learning_rate": 2.7984012245747237e-06, | |
| "loss": 0.3597, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.044150110375276, | |
| "grad_norm": 0.13986288011074066, | |
| "learning_rate": 2.786876813330158e-06, | |
| "loss": 0.3608, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.0463576158940397, | |
| "grad_norm": 0.13949252665042877, | |
| "learning_rate": 2.7753670054250583e-06, | |
| "loss": 0.3507, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.0485651214128033, | |
| "grad_norm": 0.12819348275661469, | |
| "learning_rate": 2.763871876807129e-06, | |
| "loss": 0.356, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.0507726269315674, | |
| "grad_norm": 0.13626167178153992, | |
| "learning_rate": 2.7523915033272163e-06, | |
| "loss": 0.3534, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.052980132450331, | |
| "grad_norm": 0.14214330911636353, | |
| "learning_rate": 2.740925960738802e-06, | |
| "loss": 0.3627, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.055187637969095, | |
| "grad_norm": 0.1371086835861206, | |
| "learning_rate": 2.72947532469751e-06, | |
| "loss": 0.3594, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.0573951434878586, | |
| "grad_norm": 0.15132765471935272, | |
| "learning_rate": 2.7180396707606023e-06, | |
| "loss": 0.3487, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.0596026490066226, | |
| "grad_norm": 0.1387225240468979, | |
| "learning_rate": 2.7066190743864774e-06, | |
| "loss": 0.3625, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.061810154525386, | |
| "grad_norm": 0.13822317123413086, | |
| "learning_rate": 2.695213610934179e-06, | |
| "loss": 0.3542, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.06401766004415, | |
| "grad_norm": 0.1481810212135315, | |
| "learning_rate": 2.6838233556628932e-06, | |
| "loss": 0.3543, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.066225165562914, | |
| "grad_norm": 0.1475333869457245, | |
| "learning_rate": 2.672448383731461e-06, | |
| "loss": 0.3587, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.068432671081678, | |
| "grad_norm": 0.13259293138980865, | |
| "learning_rate": 2.661088770197868e-06, | |
| "loss": 0.3531, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.0706401766004414, | |
| "grad_norm": 0.129195436835289, | |
| "learning_rate": 2.6497445900187635e-06, | |
| "loss": 0.3557, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.0728476821192054, | |
| "grad_norm": 0.12847961485385895, | |
| "learning_rate": 2.6384159180489542e-06, | |
| "loss": 0.3477, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.075055187637969, | |
| "grad_norm": 0.13196007907390594, | |
| "learning_rate": 2.6271028290409216e-06, | |
| "loss": 0.3584, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.077262693156733, | |
| "grad_norm": 0.1364007443189621, | |
| "learning_rate": 2.6158053976443164e-06, | |
| "loss": 0.3601, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.0794701986754967, | |
| "grad_norm": 0.1247749775648117, | |
| "learning_rate": 2.604523698405477e-06, | |
| "loss": 0.3647, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.0816777041942607, | |
| "grad_norm": 0.12793247401714325, | |
| "learning_rate": 2.5932578057669273e-06, | |
| "loss": 0.3656, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.0838852097130243, | |
| "grad_norm": 0.12500843405723572, | |
| "learning_rate": 2.5820077940668975e-06, | |
| "loss": 0.3511, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.0860927152317883, | |
| "grad_norm": 0.13672660291194916, | |
| "learning_rate": 2.570773737538825e-06, | |
| "loss": 0.3709, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.088300220750552, | |
| "grad_norm": 0.1374640017747879, | |
| "learning_rate": 2.559555710310866e-06, | |
| "loss": 0.3528, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.0905077262693155, | |
| "grad_norm": 0.13424260914325714, | |
| "learning_rate": 2.5483537864054055e-06, | |
| "loss": 0.354, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.0927152317880795, | |
| "grad_norm": 0.13996686041355133, | |
| "learning_rate": 2.537168039738569e-06, | |
| "loss": 0.3655, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.094922737306843, | |
| "grad_norm": 0.12471663951873779, | |
| "learning_rate": 2.525998544119743e-06, | |
| "loss": 0.3606, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.097130242825607, | |
| "grad_norm": 0.13321642577648163, | |
| "learning_rate": 2.514845373251078e-06, | |
| "loss": 0.3477, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.0993377483443707, | |
| "grad_norm": 0.1407744139432907, | |
| "learning_rate": 2.5037086007270017e-06, | |
| "loss": 0.3566, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.1015452538631347, | |
| "grad_norm": 0.1319989413022995, | |
| "learning_rate": 2.4925883000337407e-06, | |
| "loss": 0.3511, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.1037527593818983, | |
| "grad_norm": 0.12347118556499481, | |
| "learning_rate": 2.4814845445488288e-06, | |
| "loss": 0.3609, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.1059602649006623, | |
| "grad_norm": 0.15142813324928284, | |
| "learning_rate": 2.4703974075406316e-06, | |
| "loss": 0.3581, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.108167770419426, | |
| "grad_norm": 0.12996384501457214, | |
| "learning_rate": 2.4593269621678502e-06, | |
| "loss": 0.3586, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.11037527593819, | |
| "grad_norm": 0.12407524138689041, | |
| "learning_rate": 2.448273281479052e-06, | |
| "loss": 0.3561, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.1125827814569536, | |
| "grad_norm": 0.12946733832359314, | |
| "learning_rate": 2.437236438412177e-06, | |
| "loss": 0.3637, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.1147902869757176, | |
| "grad_norm": 0.14091919362545013, | |
| "learning_rate": 2.426216505794066e-06, | |
| "loss": 0.3572, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.116997792494481, | |
| "grad_norm": 0.13770008087158203, | |
| "learning_rate": 2.4152135563399743e-06, | |
| "loss": 0.3617, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.119205298013245, | |
| "grad_norm": 0.1215134859085083, | |
| "learning_rate": 2.404227662653089e-06, | |
| "loss": 0.3613, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.121412803532009, | |
| "grad_norm": 0.13981877267360687, | |
| "learning_rate": 2.3932588972240642e-06, | |
| "loss": 0.3601, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.123620309050773, | |
| "grad_norm": 0.13256755471229553, | |
| "learning_rate": 2.3823073324305222e-06, | |
| "loss": 0.3551, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.1258278145695364, | |
| "grad_norm": 0.14095091819763184, | |
| "learning_rate": 2.3713730405365965e-06, | |
| "loss": 0.3671, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.1280353200883004, | |
| "grad_norm": 0.11968285590410233, | |
| "learning_rate": 2.3604560936924363e-06, | |
| "loss": 0.3558, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.130242825607064, | |
| "grad_norm": 0.13734926283359528, | |
| "learning_rate": 2.3495565639337446e-06, | |
| "loss": 0.3653, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.1324503311258276, | |
| "grad_norm": 0.13910116255283356, | |
| "learning_rate": 2.3386745231812923e-06, | |
| "loss": 0.3559, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.1346578366445916, | |
| "grad_norm": 0.13142208755016327, | |
| "learning_rate": 2.3278100432404554e-06, | |
| "loss": 0.3571, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.136865342163355, | |
| "grad_norm": 0.12986791133880615, | |
| "learning_rate": 2.3169631958007316e-06, | |
| "loss": 0.3613, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.1390728476821192, | |
| "grad_norm": 0.12400130182504654, | |
| "learning_rate": 2.3061340524352687e-06, | |
| "loss": 0.3596, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.141280353200883, | |
| "grad_norm": 0.12488456070423126, | |
| "learning_rate": 2.295322684600395e-06, | |
| "loss": 0.3571, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.143487858719647, | |
| "grad_norm": 0.13525961339473724, | |
| "learning_rate": 2.284529163635144e-06, | |
| "loss": 0.352, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.1456953642384105, | |
| "grad_norm": 0.14086809754371643, | |
| "learning_rate": 2.273753560760793e-06, | |
| "loss": 0.3529, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.1479028697571745, | |
| "grad_norm": 0.13002382218837738, | |
| "learning_rate": 2.262995947080378e-06, | |
| "loss": 0.3628, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.150110375275938, | |
| "grad_norm": 0.13714134693145752, | |
| "learning_rate": 2.252256393578241e-06, | |
| "loss": 0.3602, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.152317880794702, | |
| "grad_norm": 0.1353500783443451, | |
| "learning_rate": 2.2415349711195455e-06, | |
| "loss": 0.3562, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.1545253863134657, | |
| "grad_norm": 0.14100465178489685, | |
| "learning_rate": 2.230831750449825e-06, | |
| "loss": 0.3562, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.1567328918322297, | |
| "grad_norm": 0.11746090650558472, | |
| "learning_rate": 2.2201468021945024e-06, | |
| "loss": 0.3679, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.1589403973509933, | |
| "grad_norm": 0.13000161945819855, | |
| "learning_rate": 2.209480196858429e-06, | |
| "loss": 0.3579, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.1611479028697573, | |
| "grad_norm": 0.14291274547576904, | |
| "learning_rate": 2.198832004825427e-06, | |
| "loss": 0.366, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.163355408388521, | |
| "grad_norm": 0.1327233463525772, | |
| "learning_rate": 2.1882022963578087e-06, | |
| "loss": 0.3576, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.165562913907285, | |
| "grad_norm": 0.12592221796512604, | |
| "learning_rate": 2.177591141595931e-06, | |
| "loss": 0.3554, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.1677704194260485, | |
| "grad_norm": 0.13044218719005585, | |
| "learning_rate": 2.166998610557718e-06, | |
| "loss": 0.3489, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.1699779249448126, | |
| "grad_norm": 0.14413036406040192, | |
| "learning_rate": 2.1564247731382063e-06, | |
| "loss": 0.3653, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.172185430463576, | |
| "grad_norm": 0.12830859422683716, | |
| "learning_rate": 2.145869699109081e-06, | |
| "loss": 0.3566, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.1743929359823397, | |
| "grad_norm": 0.12629762291908264, | |
| "learning_rate": 2.1353334581182193e-06, | |
| "loss": 0.3581, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.1766004415011038, | |
| "grad_norm": 0.12835480272769928, | |
| "learning_rate": 2.1248161196892295e-06, | |
| "loss": 0.3655, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.1788079470198674, | |
| "grad_norm": 0.12383104115724564, | |
| "learning_rate": 2.1143177532209855e-06, | |
| "loss": 0.3564, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.1810154525386314, | |
| "grad_norm": 0.1490948498249054, | |
| "learning_rate": 2.1038384279871786e-06, | |
| "loss": 0.37, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.183222958057395, | |
| "grad_norm": 0.12230035662651062, | |
| "learning_rate": 2.0933782131358516e-06, | |
| "loss": 0.3549, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.185430463576159, | |
| "grad_norm": 0.13511480391025543, | |
| "learning_rate": 2.082937177688952e-06, | |
| "loss": 0.3576, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.1876379690949226, | |
| "grad_norm": 0.135064497590065, | |
| "learning_rate": 2.0725153905418726e-06, | |
| "loss": 0.3544, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.1898454746136866, | |
| "grad_norm": 0.12062691897153854, | |
| "learning_rate": 2.0621129204629907e-06, | |
| "loss": 0.3523, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.19205298013245, | |
| "grad_norm": 0.14147672057151794, | |
| "learning_rate": 2.0517298360932202e-06, | |
| "loss": 0.3583, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.1942604856512142, | |
| "grad_norm": 0.12477768212556839, | |
| "learning_rate": 2.041366205945563e-06, | |
| "loss": 0.3604, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.196467991169978, | |
| "grad_norm": 0.14072567224502563, | |
| "learning_rate": 2.0310220984046467e-06, | |
| "loss": 0.3693, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.198675496688742, | |
| "grad_norm": 0.13882459700107574, | |
| "learning_rate": 2.020697581726279e-06, | |
| "loss": 0.3604, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.2008830022075054, | |
| "grad_norm": 0.12994110584259033, | |
| "learning_rate": 2.010392724037002e-06, | |
| "loss": 0.3589, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.2030905077262695, | |
| "grad_norm": 0.13540343940258026, | |
| "learning_rate": 2.0001075933336302e-06, | |
| "loss": 0.3664, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.205298013245033, | |
| "grad_norm": 0.12710383534431458, | |
| "learning_rate": 1.9898422574828163e-06, | |
| "loss": 0.3606, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.207505518763797, | |
| "grad_norm": 0.13436554372310638, | |
| "learning_rate": 1.979596784220591e-06, | |
| "loss": 0.3583, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.2097130242825607, | |
| "grad_norm": 0.1312997192144394, | |
| "learning_rate": 1.969371241151923e-06, | |
| "loss": 0.3485, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.2119205298013247, | |
| "grad_norm": 0.12719684839248657, | |
| "learning_rate": 1.9591656957502696e-06, | |
| "loss": 0.3646, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.2141280353200883, | |
| "grad_norm": 0.11936060339212418, | |
| "learning_rate": 1.9489802153571373e-06, | |
| "loss": 0.3578, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.216335540838852, | |
| "grad_norm": 0.11757774651050568, | |
| "learning_rate": 1.938814867181632e-06, | |
| "loss": 0.3633, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.218543046357616, | |
| "grad_norm": 0.12061762064695358, | |
| "learning_rate": 1.928669718300013e-06, | |
| "loss": 0.3539, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.2207505518763795, | |
| "grad_norm": 0.11802493780851364, | |
| "learning_rate": 1.9185448356552575e-06, | |
| "loss": 0.3519, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.2229580573951435, | |
| "grad_norm": 0.12381359189748764, | |
| "learning_rate": 1.9084402860566128e-06, | |
| "loss": 0.3528, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.225165562913907, | |
| "grad_norm": 0.12773284316062927, | |
| "learning_rate": 1.8983561361791608e-06, | |
| "loss": 0.3618, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.227373068432671, | |
| "grad_norm": 0.1377752125263214, | |
| "learning_rate": 1.8882924525633778e-06, | |
| "loss": 0.36, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.2295805739514347, | |
| "grad_norm": 0.13043536245822906, | |
| "learning_rate": 1.8782493016146868e-06, | |
| "loss": 0.3588, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.2317880794701987, | |
| "grad_norm": 0.12282190471887589, | |
| "learning_rate": 1.8682267496030276e-06, | |
| "loss": 0.3661, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.2339955849889623, | |
| "grad_norm": 0.11887135356664658, | |
| "learning_rate": 1.8582248626624217e-06, | |
| "loss": 0.3585, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.2362030905077264, | |
| "grad_norm": 0.13011059165000916, | |
| "learning_rate": 1.8482437067905268e-06, | |
| "loss": 0.3648, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.23841059602649, | |
| "grad_norm": 0.12632228434085846, | |
| "learning_rate": 1.8382833478482066e-06, | |
| "loss": 0.3711, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.240618101545254, | |
| "grad_norm": 0.13252626359462738, | |
| "learning_rate": 1.8283438515590996e-06, | |
| "loss": 0.3691, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.2428256070640176, | |
| "grad_norm": 0.12210851162672043, | |
| "learning_rate": 1.8184252835091764e-06, | |
| "loss": 0.36, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.2450331125827816, | |
| "grad_norm": 0.1259019374847412, | |
| "learning_rate": 1.8085277091463188e-06, | |
| "loss": 0.3507, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.247240618101545, | |
| "grad_norm": 0.11658161133527756, | |
| "learning_rate": 1.798651193779875e-06, | |
| "loss": 0.3512, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.249448123620309, | |
| "grad_norm": 0.12686073780059814, | |
| "learning_rate": 1.788795802580236e-06, | |
| "loss": 0.3554, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.251655629139073, | |
| "grad_norm": 0.13531404733657837, | |
| "learning_rate": 1.7789616005784077e-06, | |
| "loss": 0.3569, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.253863134657837, | |
| "grad_norm": 0.12338366359472275, | |
| "learning_rate": 1.7691486526655782e-06, | |
| "loss": 0.361, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.2560706401766004, | |
| "grad_norm": 0.1204574853181839, | |
| "learning_rate": 1.7593570235926883e-06, | |
| "loss": 0.3665, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.258278145695364, | |
| "grad_norm": 0.12831008434295654, | |
| "learning_rate": 1.7495867779700053e-06, | |
| "loss": 0.3617, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.260485651214128, | |
| "grad_norm": 0.1243155300617218, | |
| "learning_rate": 1.7398379802666993e-06, | |
| "loss": 0.3485, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.262693156732892, | |
| "grad_norm": 0.12370171397924423, | |
| "learning_rate": 1.7301106948104123e-06, | |
| "loss": 0.363, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.2649006622516556, | |
| "grad_norm": 0.12362192571163177, | |
| "learning_rate": 1.7204049857868433e-06, | |
| "loss": 0.3553, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.2671081677704192, | |
| "grad_norm": 0.1303797960281372, | |
| "learning_rate": 1.7107209172393158e-06, | |
| "loss": 0.3582, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 2.2693156732891833, | |
| "grad_norm": 0.12913931906223297, | |
| "learning_rate": 1.701058553068357e-06, | |
| "loss": 0.3621, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.271523178807947, | |
| "grad_norm": 0.13231627643108368, | |
| "learning_rate": 1.6914179570312767e-06, | |
| "loss": 0.3529, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 2.273730684326711, | |
| "grad_norm": 0.12977519631385803, | |
| "learning_rate": 1.6817991927417516e-06, | |
| "loss": 0.3619, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.2759381898454745, | |
| "grad_norm": 0.12845906615257263, | |
| "learning_rate": 1.672202323669397e-06, | |
| "loss": 0.3613, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 2.2781456953642385, | |
| "grad_norm": 0.1263485550880432, | |
| "learning_rate": 1.662627413139351e-06, | |
| "loss": 0.3608, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.280353200883002, | |
| "grad_norm": 0.12105315178632736, | |
| "learning_rate": 1.6530745243318646e-06, | |
| "loss": 0.3694, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 2.282560706401766, | |
| "grad_norm": 0.12585538625717163, | |
| "learning_rate": 1.64354372028187e-06, | |
| "loss": 0.3674, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.2847682119205297, | |
| "grad_norm": 0.13143764436244965, | |
| "learning_rate": 1.634035063878579e-06, | |
| "loss": 0.3654, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.2869757174392937, | |
| "grad_norm": 0.1330743134021759, | |
| "learning_rate": 1.6245486178650582e-06, | |
| "loss": 0.3579, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.2891832229580573, | |
| "grad_norm": 0.12009730935096741, | |
| "learning_rate": 1.6150844448378178e-06, | |
| "loss": 0.3531, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 2.2913907284768213, | |
| "grad_norm": 0.13864392042160034, | |
| "learning_rate": 1.6056426072464015e-06, | |
| "loss": 0.3628, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.293598233995585, | |
| "grad_norm": 0.13580797612667084, | |
| "learning_rate": 1.5962231673929735e-06, | |
| "loss": 0.3722, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 2.295805739514349, | |
| "grad_norm": 0.12375160306692123, | |
| "learning_rate": 1.586826187431902e-06, | |
| "loss": 0.364, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.2980132450331126, | |
| "grad_norm": 0.12487831711769104, | |
| "learning_rate": 1.5774517293693558e-06, | |
| "loss": 0.3592, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 2.300220750551876, | |
| "grad_norm": 0.14687076210975647, | |
| "learning_rate": 1.5680998550628912e-06, | |
| "loss": 0.3506, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.30242825607064, | |
| "grad_norm": 0.12587517499923706, | |
| "learning_rate": 1.558770626221044e-06, | |
| "loss": 0.3577, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 2.304635761589404, | |
| "grad_norm": 0.13661305606365204, | |
| "learning_rate": 1.5494641044029268e-06, | |
| "loss": 0.3516, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.306843267108168, | |
| "grad_norm": 0.1377173811197281, | |
| "learning_rate": 1.5401803510178197e-06, | |
| "loss": 0.3603, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.3090507726269314, | |
| "grad_norm": 0.13497798144817352, | |
| "learning_rate": 1.5309194273247612e-06, | |
| "loss": 0.3545, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.3112582781456954, | |
| "grad_norm": 0.12721391022205353, | |
| "learning_rate": 1.5216813944321473e-06, | |
| "loss": 0.3447, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 2.313465783664459, | |
| "grad_norm": 0.12541592121124268, | |
| "learning_rate": 1.5124663132973338e-06, | |
| "loss": 0.3637, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.315673289183223, | |
| "grad_norm": 0.13365043699741364, | |
| "learning_rate": 1.5032742447262228e-06, | |
| "loss": 0.3535, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 2.3178807947019866, | |
| "grad_norm": 0.13111183047294617, | |
| "learning_rate": 1.4941052493728731e-06, | |
| "loss": 0.3528, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.3200883002207506, | |
| "grad_norm": 0.1259627640247345, | |
| "learning_rate": 1.48495938773909e-06, | |
| "loss": 0.3599, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 2.322295805739514, | |
| "grad_norm": 0.12294916808605194, | |
| "learning_rate": 1.4758367201740303e-06, | |
| "loss": 0.3604, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.3245033112582782, | |
| "grad_norm": 0.12867586314678192, | |
| "learning_rate": 1.46673730687381e-06, | |
| "loss": 0.3649, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 2.326710816777042, | |
| "grad_norm": 0.12218500673770905, | |
| "learning_rate": 1.4576612078810953e-06, | |
| "loss": 0.3637, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.328918322295806, | |
| "grad_norm": 0.12343169003725052, | |
| "learning_rate": 1.448608483084713e-06, | |
| "loss": 0.3552, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.3311258278145695, | |
| "grad_norm": 0.12273656576871872, | |
| "learning_rate": 1.4395791922192575e-06, | |
| "loss": 0.3613, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 0.1312197744846344, | |
| "learning_rate": 1.430573394864695e-06, | |
| "loss": 0.3603, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 2.335540838852097, | |
| "grad_norm": 0.13838644325733185, | |
| "learning_rate": 1.4215911504459645e-06, | |
| "loss": 0.3642, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.337748344370861, | |
| "grad_norm": 0.11605527251958847, | |
| "learning_rate": 1.412632518232594e-06, | |
| "loss": 0.3626, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 2.3399558498896247, | |
| "grad_norm": 0.13165433704853058, | |
| "learning_rate": 1.4036975573383028e-06, | |
| "loss": 0.3636, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.3421633554083887, | |
| "grad_norm": 0.1277674436569214, | |
| "learning_rate": 1.3947863267206174e-06, | |
| "loss": 0.3517, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 2.3443708609271523, | |
| "grad_norm": 0.13928262889385223, | |
| "learning_rate": 1.3858988851804816e-06, | |
| "loss": 0.3649, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.3465783664459163, | |
| "grad_norm": 0.13180677592754364, | |
| "learning_rate": 1.3770352913618613e-06, | |
| "loss": 0.3506, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 2.34878587196468, | |
| "grad_norm": 0.13746923208236694, | |
| "learning_rate": 1.3681956037513656e-06, | |
| "loss": 0.3583, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.3509933774834435, | |
| "grad_norm": 0.13763363659381866, | |
| "learning_rate": 1.3593798806778546e-06, | |
| "loss": 0.354, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.3532008830022075, | |
| "grad_norm": 0.12797822058200836, | |
| "learning_rate": 1.3505881803120647e-06, | |
| "loss": 0.3719, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.355408388520971, | |
| "grad_norm": 0.12430882453918457, | |
| "learning_rate": 1.341820560666211e-06, | |
| "loss": 0.3622, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 2.357615894039735, | |
| "grad_norm": 0.13003040850162506, | |
| "learning_rate": 1.3330770795936172e-06, | |
| "loss": 0.3644, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.3598233995584987, | |
| "grad_norm": 0.13834795355796814, | |
| "learning_rate": 1.3243577947883223e-06, | |
| "loss": 0.3539, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 2.3620309050772628, | |
| "grad_norm": 0.12837594747543335, | |
| "learning_rate": 1.315662763784712e-06, | |
| "loss": 0.366, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.3642384105960264, | |
| "grad_norm": 0.12611278891563416, | |
| "learning_rate": 1.3069920439571277e-06, | |
| "loss": 0.3567, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 2.3664459161147904, | |
| "grad_norm": 0.1504361927509308, | |
| "learning_rate": 1.2983456925194953e-06, | |
| "loss": 0.3539, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.368653421633554, | |
| "grad_norm": 0.13299378752708435, | |
| "learning_rate": 1.2897237665249429e-06, | |
| "loss": 0.3612, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 2.370860927152318, | |
| "grad_norm": 0.13157807290554047, | |
| "learning_rate": 1.2811263228654308e-06, | |
| "loss": 0.3652, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.3730684326710816, | |
| "grad_norm": 0.11843698471784592, | |
| "learning_rate": 1.2725534182713717e-06, | |
| "loss": 0.3664, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.3752759381898456, | |
| "grad_norm": 0.1275041550397873, | |
| "learning_rate": 1.2640051093112532e-06, | |
| "loss": 0.3595, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.377483443708609, | |
| "grad_norm": 0.12319236993789673, | |
| "learning_rate": 1.25548145239127e-06, | |
| "loss": 0.3458, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 2.3796909492273732, | |
| "grad_norm": 0.1297508329153061, | |
| "learning_rate": 1.2469825037549493e-06, | |
| "loss": 0.3599, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.381898454746137, | |
| "grad_norm": 0.13835854828357697, | |
| "learning_rate": 1.2385083194827818e-06, | |
| "loss": 0.3631, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 2.384105960264901, | |
| "grad_norm": 0.14326325058937073, | |
| "learning_rate": 1.2300589554918502e-06, | |
| "loss": 0.3612, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.3863134657836644, | |
| "grad_norm": 0.13561151921749115, | |
| "learning_rate": 1.221634467535458e-06, | |
| "loss": 0.3623, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 2.3885209713024285, | |
| "grad_norm": 0.12151821702718735, | |
| "learning_rate": 1.2132349112027636e-06, | |
| "loss": 0.347, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.390728476821192, | |
| "grad_norm": 0.12261461466550827, | |
| "learning_rate": 1.204860341918414e-06, | |
| "loss": 0.3615, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 2.3929359823399556, | |
| "grad_norm": 0.1293371468782425, | |
| "learning_rate": 1.1965108149421812e-06, | |
| "loss": 0.3611, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.3951434878587197, | |
| "grad_norm": 0.12331968545913696, | |
| "learning_rate": 1.1881863853685904e-06, | |
| "loss": 0.3635, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.3973509933774833, | |
| "grad_norm": 0.13170567154884338, | |
| "learning_rate": 1.1798871081265672e-06, | |
| "loss": 0.3698, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.3995584988962473, | |
| "grad_norm": 0.1309044063091278, | |
| "learning_rate": 1.1716130379790613e-06, | |
| "loss": 0.3474, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 2.401766004415011, | |
| "grad_norm": 0.12268619239330292, | |
| "learning_rate": 1.1633642295227005e-06, | |
| "loss": 0.3634, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.403973509933775, | |
| "grad_norm": 0.12474343925714493, | |
| "learning_rate": 1.155140737187418e-06, | |
| "loss": 0.3651, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 2.4061810154525385, | |
| "grad_norm": 0.13622671365737915, | |
| "learning_rate": 1.1469426152360974e-06, | |
| "loss": 0.3626, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.4083885209713025, | |
| "grad_norm": 0.1363033503293991, | |
| "learning_rate": 1.138769917764221e-06, | |
| "loss": 0.3608, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 2.410596026490066, | |
| "grad_norm": 0.12666958570480347, | |
| "learning_rate": 1.1306226986994989e-06, | |
| "loss": 0.3571, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.41280353200883, | |
| "grad_norm": 0.13340440392494202, | |
| "learning_rate": 1.1225010118015306e-06, | |
| "loss": 0.3602, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 2.4150110375275937, | |
| "grad_norm": 0.14224812388420105, | |
| "learning_rate": 1.1144049106614335e-06, | |
| "loss": 0.3525, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.4172185430463577, | |
| "grad_norm": 0.13309240341186523, | |
| "learning_rate": 1.1063344487015e-06, | |
| "loss": 0.3595, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.4194260485651213, | |
| "grad_norm": 0.1310422122478485, | |
| "learning_rate": 1.098289679174841e-06, | |
| "loss": 0.3674, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.4216335540838854, | |
| "grad_norm": 0.12578153610229492, | |
| "learning_rate": 1.090270655165036e-06, | |
| "loss": 0.3543, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 2.423841059602649, | |
| "grad_norm": 0.13076968491077423, | |
| "learning_rate": 1.082277429585784e-06, | |
| "loss": 0.3724, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.426048565121413, | |
| "grad_norm": 0.14538316428661346, | |
| "learning_rate": 1.074310055180549e-06, | |
| "loss": 0.3539, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 2.4282560706401766, | |
| "grad_norm": 0.13270893692970276, | |
| "learning_rate": 1.0663685845222177e-06, | |
| "loss": 0.3637, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.4304635761589406, | |
| "grad_norm": 0.13810580968856812, | |
| "learning_rate": 1.0584530700127478e-06, | |
| "loss": 0.3526, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 2.432671081677704, | |
| "grad_norm": 0.1298057585954666, | |
| "learning_rate": 1.0505635638828288e-06, | |
| "loss": 0.3551, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.4348785871964678, | |
| "grad_norm": 0.13741736114025116, | |
| "learning_rate": 1.0427001181915298e-06, | |
| "loss": 0.3512, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 2.437086092715232, | |
| "grad_norm": 0.1301170289516449, | |
| "learning_rate": 1.034862784825963e-06, | |
| "loss": 0.3568, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.439293598233996, | |
| "grad_norm": 0.12320411205291748, | |
| "learning_rate": 1.0270516155009336e-06, | |
| "loss": 0.3701, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 2.4415011037527594, | |
| "grad_norm": 0.1281418800354004, | |
| "learning_rate": 1.0192666617586072e-06, | |
| "loss": 0.3657, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.443708609271523, | |
| "grad_norm": 0.14360150694847107, | |
| "learning_rate": 1.0115079749681628e-06, | |
| "loss": 0.3598, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 2.445916114790287, | |
| "grad_norm": 0.13790085911750793, | |
| "learning_rate": 1.0037756063254555e-06, | |
| "loss": 0.3612, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.4481236203090506, | |
| "grad_norm": 0.1285860389471054, | |
| "learning_rate": 9.960696068526826e-07, | |
| "loss": 0.3615, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 2.4503311258278146, | |
| "grad_norm": 0.1286444067955017, | |
| "learning_rate": 9.883900273980408e-07, | |
| "loss": 0.3587, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.4525386313465782, | |
| "grad_norm": 0.12481298297643661, | |
| "learning_rate": 9.807369186353965e-07, | |
| "loss": 0.356, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 2.4547461368653423, | |
| "grad_norm": 0.1254754513502121, | |
| "learning_rate": 9.731103310639473e-07, | |
| "loss": 0.3575, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.456953642384106, | |
| "grad_norm": 0.12286875396966934, | |
| "learning_rate": 9.655103150078892e-07, | |
| "loss": 0.3608, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 2.45916114790287, | |
| "grad_norm": 0.11990831047296524, | |
| "learning_rate": 9.57936920616086e-07, | |
| "loss": 0.3586, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.4613686534216335, | |
| "grad_norm": 0.11828765273094177, | |
| "learning_rate": 9.503901978617392e-07, | |
| "loss": 0.3621, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 2.4635761589403975, | |
| "grad_norm": 0.13093435764312744, | |
| "learning_rate": 9.428701965420572e-07, | |
| "loss": 0.361, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.465783664459161, | |
| "grad_norm": 0.12574456632137299, | |
| "learning_rate": 9.353769662779249e-07, | |
| "loss": 0.3556, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 2.467991169977925, | |
| "grad_norm": 0.136729896068573, | |
| "learning_rate": 9.279105565135777e-07, | |
| "loss": 0.3661, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.4701986754966887, | |
| "grad_norm": 0.12174253910779953, | |
| "learning_rate": 9.204710165162751e-07, | |
| "loss": 0.3487, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 2.4724061810154527, | |
| "grad_norm": 0.13417866826057434, | |
| "learning_rate": 9.130583953759781e-07, | |
| "loss": 0.358, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.4746136865342163, | |
| "grad_norm": 0.14360179007053375, | |
| "learning_rate": 9.056727420050227e-07, | |
| "loss": 0.3628, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 2.47682119205298, | |
| "grad_norm": 0.12731443345546722, | |
| "learning_rate": 8.983141051377953e-07, | |
| "loss": 0.3603, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.479028697571744, | |
| "grad_norm": 0.13204246759414673, | |
| "learning_rate": 8.909825333304134e-07, | |
| "loss": 0.355, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 2.481236203090508, | |
| "grad_norm": 0.1328221708536148, | |
| "learning_rate": 8.836780749604096e-07, | |
| "loss": 0.3541, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.4834437086092715, | |
| "grad_norm": 0.13670921325683594, | |
| "learning_rate": 8.764007782264022e-07, | |
| "loss": 0.3649, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.485651214128035, | |
| "grad_norm": 0.124913290143013, | |
| "learning_rate": 8.691506911477848e-07, | |
| "loss": 0.3474, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.487858719646799, | |
| "grad_norm": 0.12466172128915787, | |
| "learning_rate": 8.619278615644106e-07, | |
| "loss": 0.3559, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 2.4900662251655628, | |
| "grad_norm": 0.13907819986343384, | |
| "learning_rate": 8.547323371362682e-07, | |
| "loss": 0.3544, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.492273730684327, | |
| "grad_norm": 0.13403339684009552, | |
| "learning_rate": 8.475641653431782e-07, | |
| "loss": 0.3621, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 2.4944812362030904, | |
| "grad_norm": 0.13107538223266602, | |
| "learning_rate": 8.404233934844707e-07, | |
| "loss": 0.3629, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.4966887417218544, | |
| "grad_norm": 0.12382876127958298, | |
| "learning_rate": 8.333100686786766e-07, | |
| "loss": 0.3425, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 2.498896247240618, | |
| "grad_norm": 0.1340240240097046, | |
| "learning_rate": 8.262242378632179e-07, | |
| "loss": 0.3637, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.501103752759382, | |
| "grad_norm": 0.12560687959194183, | |
| "learning_rate": 8.191659477940972e-07, | |
| "loss": 0.3694, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 2.5033112582781456, | |
| "grad_norm": 0.13509927690029144, | |
| "learning_rate": 8.121352450455899e-07, | |
| "loss": 0.3588, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.5055187637969096, | |
| "grad_norm": 0.13506671786308289, | |
| "learning_rate": 8.051321760099334e-07, | |
| "loss": 0.3657, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 2.507726269315673, | |
| "grad_norm": 0.12824004888534546, | |
| "learning_rate": 7.981567868970252e-07, | |
| "loss": 0.3541, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.5099337748344372, | |
| "grad_norm": 0.12654347717761993, | |
| "learning_rate": 7.91209123734114e-07, | |
| "loss": 0.3628, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 2.512141280353201, | |
| "grad_norm": 0.13879314064979553, | |
| "learning_rate": 7.84289232365501e-07, | |
| "loss": 0.3631, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.514348785871965, | |
| "grad_norm": 0.11704026162624359, | |
| "learning_rate": 7.773971584522355e-07, | |
| "loss": 0.3589, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 2.5165562913907285, | |
| "grad_norm": 0.12439953535795212, | |
| "learning_rate": 7.705329474718093e-07, | |
| "loss": 0.3537, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.518763796909492, | |
| "grad_norm": 0.13436546921730042, | |
| "learning_rate": 7.636966447178601e-07, | |
| "loss": 0.356, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 2.520971302428256, | |
| "grad_norm": 0.12255270779132843, | |
| "learning_rate": 7.568882952998762e-07, | |
| "loss": 0.3568, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.52317880794702, | |
| "grad_norm": 0.12233200669288635, | |
| "learning_rate": 7.501079441428927e-07, | |
| "loss": 0.3538, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 2.5253863134657837, | |
| "grad_norm": 0.12473509460687637, | |
| "learning_rate": 7.433556359871968e-07, | |
| "loss": 0.3711, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.5275938189845473, | |
| "grad_norm": 0.13673733174800873, | |
| "learning_rate": 7.366314153880361e-07, | |
| "loss": 0.3613, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 2.5298013245033113, | |
| "grad_norm": 0.1345418095588684, | |
| "learning_rate": 7.299353267153192e-07, | |
| "loss": 0.3632, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.5320088300220753, | |
| "grad_norm": 0.1333194524049759, | |
| "learning_rate": 7.232674141533274e-07, | |
| "loss": 0.3653, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 2.534216335540839, | |
| "grad_norm": 0.13697415590286255, | |
| "learning_rate": 7.166277217004214e-07, | |
| "loss": 0.3571, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.5364238410596025, | |
| "grad_norm": 0.13772854208946228, | |
| "learning_rate": 7.100162931687476e-07, | |
| "loss": 0.3737, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 2.5386313465783665, | |
| "grad_norm": 0.1227809488773346, | |
| "learning_rate": 7.034331721839566e-07, | |
| "loss": 0.3622, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.54083885209713, | |
| "grad_norm": 0.12854185700416565, | |
| "learning_rate": 6.968784021849106e-07, | |
| "loss": 0.3612, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 2.543046357615894, | |
| "grad_norm": 0.11819867044687271, | |
| "learning_rate": 6.903520264233954e-07, | |
| "loss": 0.3625, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.5452538631346577, | |
| "grad_norm": 0.1528642773628235, | |
| "learning_rate": 6.838540879638367e-07, | |
| "loss": 0.3644, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 2.5474613686534218, | |
| "grad_norm": 0.1466141641139984, | |
| "learning_rate": 6.773846296830167e-07, | |
| "loss": 0.3681, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.5496688741721854, | |
| "grad_norm": 0.13698391616344452, | |
| "learning_rate": 6.7094369426979e-07, | |
| "loss": 0.3569, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 2.5518763796909494, | |
| "grad_norm": 0.1320303976535797, | |
| "learning_rate": 6.645313242248042e-07, | |
| "loss": 0.3531, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.554083885209713, | |
| "grad_norm": 0.14429907500743866, | |
| "learning_rate": 6.581475618602174e-07, | |
| "loss": 0.3587, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 2.556291390728477, | |
| "grad_norm": 0.12878787517547607, | |
| "learning_rate": 6.517924492994182e-07, | |
| "loss": 0.3648, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.5584988962472406, | |
| "grad_norm": 0.13368941843509674, | |
| "learning_rate": 6.454660284767477e-07, | |
| "loss": 0.3569, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 2.560706401766004, | |
| "grad_norm": 0.13522258400917053, | |
| "learning_rate": 6.391683411372279e-07, | |
| "loss": 0.3624, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.562913907284768, | |
| "grad_norm": 0.1288643628358841, | |
| "learning_rate": 6.328994288362783e-07, | |
| "loss": 0.3571, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 2.5651214128035322, | |
| "grad_norm": 0.13528789579868317, | |
| "learning_rate": 6.266593329394471e-07, | |
| "loss": 0.3614, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.567328918322296, | |
| "grad_norm": 0.11771933734416962, | |
| "learning_rate": 6.204480946221386e-07, | |
| "loss": 0.3503, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 2.5695364238410594, | |
| "grad_norm": 0.13160669803619385, | |
| "learning_rate": 6.142657548693364e-07, | |
| "loss": 0.3662, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.5717439293598234, | |
| "grad_norm": 0.13713331520557404, | |
| "learning_rate": 6.081123544753404e-07, | |
| "loss": 0.3474, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 2.5739514348785875, | |
| "grad_norm": 0.12216666340827942, | |
| "learning_rate": 6.019879340434904e-07, | |
| "loss": 0.3414, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.576158940397351, | |
| "grad_norm": 0.12497063726186752, | |
| "learning_rate": 5.958925339859001e-07, | |
| "loss": 0.3501, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 2.5783664459161146, | |
| "grad_norm": 0.1288682073354721, | |
| "learning_rate": 5.898261945231965e-07, | |
| "loss": 0.3518, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.5805739514348787, | |
| "grad_norm": 0.13969705998897552, | |
| "learning_rate": 5.837889556842469e-07, | |
| "loss": 0.3595, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 2.5827814569536423, | |
| "grad_norm": 0.13971775770187378, | |
| "learning_rate": 5.777808573058969e-07, | |
| "loss": 0.3635, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.5849889624724063, | |
| "grad_norm": 0.12668545544147491, | |
| "learning_rate": 5.718019390327084e-07, | |
| "loss": 0.3519, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 2.58719646799117, | |
| "grad_norm": 0.13675172626972198, | |
| "learning_rate": 5.658522403166989e-07, | |
| "loss": 0.363, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.589403973509934, | |
| "grad_norm": 0.145385280251503, | |
| "learning_rate": 5.599318004170778e-07, | |
| "loss": 0.3561, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 2.5916114790286975, | |
| "grad_norm": 0.14911803603172302, | |
| "learning_rate": 5.540406583999925e-07, | |
| "loss": 0.3578, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.5938189845474615, | |
| "grad_norm": 0.12694287300109863, | |
| "learning_rate": 5.481788531382671e-07, | |
| "loss": 0.364, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.596026490066225, | |
| "grad_norm": 0.13262110948562622, | |
| "learning_rate": 5.423464233111448e-07, | |
| "loss": 0.3718, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.598233995584989, | |
| "grad_norm": 0.13582631945610046, | |
| "learning_rate": 5.365434074040343e-07, | |
| "loss": 0.3654, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 2.6004415011037527, | |
| "grad_norm": 0.13755717873573303, | |
| "learning_rate": 5.307698437082598e-07, | |
| "loss": 0.3522, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.6026490066225163, | |
| "grad_norm": 0.1361360102891922, | |
| "learning_rate": 5.250257703207984e-07, | |
| "loss": 0.3683, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 2.6048565121412803, | |
| "grad_norm": 0.13162481784820557, | |
| "learning_rate": 5.193112251440407e-07, | |
| "loss": 0.3559, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.6070640176600444, | |
| "grad_norm": 0.1336871236562729, | |
| "learning_rate": 5.136262458855312e-07, | |
| "loss": 0.3539, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 2.609271523178808, | |
| "grad_norm": 0.12142772227525711, | |
| "learning_rate": 5.079708700577229e-07, | |
| "loss": 0.3633, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.6114790286975715, | |
| "grad_norm": 0.12398800998926163, | |
| "learning_rate": 5.023451349777331e-07, | |
| "loss": 0.3666, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 2.6136865342163356, | |
| "grad_norm": 0.1366734653711319, | |
| "learning_rate": 4.967490777670903e-07, | |
| "loss": 0.3552, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.6158940397350996, | |
| "grad_norm": 0.13601034879684448, | |
| "learning_rate": 4.911827353514947e-07, | |
| "loss": 0.3606, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 2.618101545253863, | |
| "grad_norm": 0.13659435510635376, | |
| "learning_rate": 4.856461444605732e-07, | |
| "loss": 0.356, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.6203090507726268, | |
| "grad_norm": 0.13636133074760437, | |
| "learning_rate": 4.801393416276368e-07, | |
| "loss": 0.3591, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 2.622516556291391, | |
| "grad_norm": 0.1251705437898636, | |
| "learning_rate": 4.7466236318943816e-07, | |
| "loss": 0.3557, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.6247240618101544, | |
| "grad_norm": 0.14416959881782532, | |
| "learning_rate": 4.692152452859333e-07, | |
| "loss": 0.3587, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 2.6269315673289184, | |
| "grad_norm": 0.15531152486801147, | |
| "learning_rate": 4.637980238600437e-07, | |
| "loss": 0.3494, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.629139072847682, | |
| "grad_norm": 0.1295311003923416, | |
| "learning_rate": 4.584107346574168e-07, | |
| "loss": 0.3595, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 2.631346578366446, | |
| "grad_norm": 0.13844533264636993, | |
| "learning_rate": 4.530534132261932e-07, | |
| "loss": 0.3589, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.6335540838852096, | |
| "grad_norm": 0.13652655482292175, | |
| "learning_rate": 4.477260949167711e-07, | |
| "loss": 0.3492, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 2.6357615894039736, | |
| "grad_norm": 0.14151211082935333, | |
| "learning_rate": 4.4242881488157083e-07, | |
| "loss": 0.3565, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.6379690949227372, | |
| "grad_norm": 0.13526977598667145, | |
| "learning_rate": 4.371616080748037e-07, | |
| "loss": 0.355, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 2.6401766004415013, | |
| "grad_norm": 0.13757188618183136, | |
| "learning_rate": 4.319245092522456e-07, | |
| "loss": 0.3547, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.642384105960265, | |
| "grad_norm": 0.1314619481563568, | |
| "learning_rate": 4.2671755297100047e-07, | |
| "loss": 0.36, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 2.6445916114790284, | |
| "grad_norm": 0.13309867680072784, | |
| "learning_rate": 4.215407735892796e-07, | |
| "loss": 0.3578, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.6467991169977925, | |
| "grad_norm": 0.14237742125988007, | |
| "learning_rate": 4.1639420526616845e-07, | |
| "loss": 0.3567, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 2.6490066225165565, | |
| "grad_norm": 0.13557758927345276, | |
| "learning_rate": 4.1127788196140437e-07, | |
| "loss": 0.3627, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.65121412803532, | |
| "grad_norm": 0.14701923727989197, | |
| "learning_rate": 4.061918374351559e-07, | |
| "loss": 0.3518, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 2.6534216335540837, | |
| "grad_norm": 0.1429191380739212, | |
| "learning_rate": 4.0113610524779246e-07, | |
| "loss": 0.3579, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.6556291390728477, | |
| "grad_norm": 0.13261815905570984, | |
| "learning_rate": 3.961107187596669e-07, | |
| "loss": 0.3558, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 2.6578366445916117, | |
| "grad_norm": 0.15215714275836945, | |
| "learning_rate": 3.911157111308983e-07, | |
| "loss": 0.3582, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.6600441501103753, | |
| "grad_norm": 0.134977787733078, | |
| "learning_rate": 3.861511153211489e-07, | |
| "loss": 0.3484, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 2.662251655629139, | |
| "grad_norm": 0.13735589385032654, | |
| "learning_rate": 3.81216964089407e-07, | |
| "loss": 0.3633, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.664459161147903, | |
| "grad_norm": 0.13418880105018616, | |
| "learning_rate": 3.763132899937721e-07, | |
| "loss": 0.3646, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.13318879902362823, | |
| "learning_rate": 3.7144012539123973e-07, | |
| "loss": 0.3449, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.6688741721854305, | |
| "grad_norm": 0.14231936633586884, | |
| "learning_rate": 3.665975024374879e-07, | |
| "loss": 0.3661, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 2.671081677704194, | |
| "grad_norm": 0.13167575001716614, | |
| "learning_rate": 3.617854530866671e-07, | |
| "loss": 0.3627, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.673289183222958, | |
| "grad_norm": 0.13379094004631042, | |
| "learning_rate": 3.5700400909118306e-07, | |
| "loss": 0.3585, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 2.6754966887417218, | |
| "grad_norm": 0.1270161271095276, | |
| "learning_rate": 3.522532020014946e-07, | |
| "loss": 0.3571, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.677704194260486, | |
| "grad_norm": 0.1238294392824173, | |
| "learning_rate": 3.475330631659002e-07, | |
| "loss": 0.3653, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 2.6799116997792494, | |
| "grad_norm": 0.12466707080602646, | |
| "learning_rate": 3.4284362373033566e-07, | |
| "loss": 0.3535, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.6821192052980134, | |
| "grad_norm": 0.12770843505859375, | |
| "learning_rate": 3.3818491463816385e-07, | |
| "loss": 0.3573, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 2.684326710816777, | |
| "grad_norm": 0.1300242394208908, | |
| "learning_rate": 3.335569666299748e-07, | |
| "loss": 0.3566, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.6865342163355406, | |
| "grad_norm": 0.1325581669807434, | |
| "learning_rate": 3.289598102433794e-07, | |
| "loss": 0.3575, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 2.6887417218543046, | |
| "grad_norm": 0.1356075257062912, | |
| "learning_rate": 3.2439347581280865e-07, | |
| "loss": 0.3614, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.6909492273730686, | |
| "grad_norm": 0.12728764116764069, | |
| "learning_rate": 3.19857993469318e-07, | |
| "loss": 0.3586, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 2.693156732891832, | |
| "grad_norm": 0.1246216669678688, | |
| "learning_rate": 3.1535339314038015e-07, | |
| "loss": 0.3581, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.695364238410596, | |
| "grad_norm": 0.1401343196630478, | |
| "learning_rate": 3.108797045496942e-07, | |
| "loss": 0.3611, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 2.69757174392936, | |
| "grad_norm": 0.1308797299861908, | |
| "learning_rate": 3.0643695721698783e-07, | |
| "loss": 0.3664, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.699779249448124, | |
| "grad_norm": 0.12849809229373932, | |
| "learning_rate": 3.0202518045782337e-07, | |
| "loss": 0.3605, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 2.7019867549668874, | |
| "grad_norm": 0.128408744931221, | |
| "learning_rate": 2.9764440338340083e-07, | |
| "loss": 0.3583, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.704194260485651, | |
| "grad_norm": 0.1296570599079132, | |
| "learning_rate": 2.9329465490036844e-07, | |
| "loss": 0.3528, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.706401766004415, | |
| "grad_norm": 0.14551834762096405, | |
| "learning_rate": 2.8897596371063153e-07, | |
| "loss": 0.3497, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.7086092715231787, | |
| "grad_norm": 0.13115178048610687, | |
| "learning_rate": 2.846883583111648e-07, | |
| "loss": 0.3594, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 2.7108167770419427, | |
| "grad_norm": 0.12846940755844116, | |
| "learning_rate": 2.804318669938233e-07, | |
| "loss": 0.3583, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.7130242825607063, | |
| "grad_norm": 0.1380581110715866, | |
| "learning_rate": 2.762065178451517e-07, | |
| "loss": 0.3585, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 2.7152317880794703, | |
| "grad_norm": 0.12574802339076996, | |
| "learning_rate": 2.7201233874620534e-07, | |
| "loss": 0.3598, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.717439293598234, | |
| "grad_norm": 0.13319121301174164, | |
| "learning_rate": 2.678493573723612e-07, | |
| "loss": 0.3669, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 2.719646799116998, | |
| "grad_norm": 0.12967029213905334, | |
| "learning_rate": 2.6371760119314026e-07, | |
| "loss": 0.3637, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.7218543046357615, | |
| "grad_norm": 0.13314829766750336, | |
| "learning_rate": 2.596170974720202e-07, | |
| "loss": 0.3505, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 2.7240618101545255, | |
| "grad_norm": 0.13859589397907257, | |
| "learning_rate": 2.5554787326626194e-07, | |
| "loss": 0.3508, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.726269315673289, | |
| "grad_norm": 0.13054342567920685, | |
| "learning_rate": 2.515099554267247e-07, | |
| "loss": 0.3459, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 2.7284768211920527, | |
| "grad_norm": 0.13782578706741333, | |
| "learning_rate": 2.4750337059769425e-07, | |
| "loss": 0.3653, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.7306843267108167, | |
| "grad_norm": 0.1294957846403122, | |
| "learning_rate": 2.4352814521670375e-07, | |
| "loss": 0.3614, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 2.7328918322295808, | |
| "grad_norm": 0.1325952708721161, | |
| "learning_rate": 2.3958430551436095e-07, | |
| "loss": 0.3638, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.7350993377483444, | |
| "grad_norm": 0.12079501897096634, | |
| "learning_rate": 2.3567187751417475e-07, | |
| "loss": 0.3605, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 2.737306843267108, | |
| "grad_norm": 0.13967706263065338, | |
| "learning_rate": 2.3179088703238096e-07, | |
| "loss": 0.3721, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.739514348785872, | |
| "grad_norm": 0.133636474609375, | |
| "learning_rate": 2.2794135967777908e-07, | |
| "loss": 0.3655, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 2.741721854304636, | |
| "grad_norm": 0.13323010504245758, | |
| "learning_rate": 2.2412332085155364e-07, | |
| "loss": 0.3531, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.7439293598233996, | |
| "grad_norm": 0.13812537491321564, | |
| "learning_rate": 2.2033679574711365e-07, | |
| "loss": 0.3453, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 2.746136865342163, | |
| "grad_norm": 0.12806718051433563, | |
| "learning_rate": 2.1658180934992333e-07, | |
| "loss": 0.3632, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.748344370860927, | |
| "grad_norm": 0.13933822512626648, | |
| "learning_rate": 2.1285838643733958e-07, | |
| "loss": 0.3595, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 2.750551876379691, | |
| "grad_norm": 0.12600216269493103, | |
| "learning_rate": 2.0916655157844634e-07, | |
| "loss": 0.352, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.752759381898455, | |
| "grad_norm": 0.12791283428668976, | |
| "learning_rate": 2.0550632913389213e-07, | |
| "loss": 0.3556, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 2.7549668874172184, | |
| "grad_norm": 0.13090188801288605, | |
| "learning_rate": 2.0187774325573174e-07, | |
| "loss": 0.3632, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.7571743929359824, | |
| "grad_norm": 0.1253674477338791, | |
| "learning_rate": 1.9828081788726307e-07, | |
| "loss": 0.3499, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 2.759381898454746, | |
| "grad_norm": 0.13384896516799927, | |
| "learning_rate": 1.9471557676287501e-07, | |
| "loss": 0.3556, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.76158940397351, | |
| "grad_norm": 0.13008341193199158, | |
| "learning_rate": 1.9118204340788426e-07, | |
| "loss": 0.3521, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 2.7637969094922736, | |
| "grad_norm": 0.14128050208091736, | |
| "learning_rate": 1.8768024113838546e-07, | |
| "loss": 0.3629, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.7660044150110377, | |
| "grad_norm": 0.13070915639400482, | |
| "learning_rate": 1.8421019306109288e-07, | |
| "loss": 0.3576, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 2.7682119205298013, | |
| "grad_norm": 0.13155661523342133, | |
| "learning_rate": 1.8077192207319072e-07, | |
| "loss": 0.3614, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.7704194260485653, | |
| "grad_norm": 0.14038477838039398, | |
| "learning_rate": 1.773654508621825e-07, | |
| "loss": 0.354, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 2.772626931567329, | |
| "grad_norm": 0.13292455673217773, | |
| "learning_rate": 1.7399080190573903e-07, | |
| "loss": 0.3635, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.774834437086093, | |
| "grad_norm": 0.1393938958644867, | |
| "learning_rate": 1.7064799747155248e-07, | |
| "loss": 0.3502, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 2.7770419426048565, | |
| "grad_norm": 0.13566245138645172, | |
| "learning_rate": 1.6733705961718694e-07, | |
| "loss": 0.353, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.77924944812362, | |
| "grad_norm": 0.1313687562942505, | |
| "learning_rate": 1.640580101899353e-07, | |
| "loss": 0.3519, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 2.781456953642384, | |
| "grad_norm": 0.13328541815280914, | |
| "learning_rate": 1.6081087082667314e-07, | |
| "loss": 0.3513, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.783664459161148, | |
| "grad_norm": 0.13640496134757996, | |
| "learning_rate": 1.575956629537173e-07, | |
| "loss": 0.3559, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 2.7858719646799117, | |
| "grad_norm": 0.13169561326503754, | |
| "learning_rate": 1.5441240778668321e-07, | |
| "loss": 0.3589, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.7880794701986753, | |
| "grad_norm": 0.1369987428188324, | |
| "learning_rate": 1.5126112633034761e-07, | |
| "loss": 0.368, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 2.7902869757174393, | |
| "grad_norm": 0.12467605620622635, | |
| "learning_rate": 1.4814183937850668e-07, | |
| "loss": 0.3563, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.7924944812362034, | |
| "grad_norm": 0.1307917833328247, | |
| "learning_rate": 1.4505456751383985e-07, | |
| "loss": 0.3516, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 2.794701986754967, | |
| "grad_norm": 0.14058281481266022, | |
| "learning_rate": 1.4199933110777553e-07, | |
| "loss": 0.3546, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.7969094922737305, | |
| "grad_norm": 0.12925602495670319, | |
| "learning_rate": 1.389761503203535e-07, | |
| "loss": 0.3615, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 2.7991169977924946, | |
| "grad_norm": 0.1409684419631958, | |
| "learning_rate": 1.3598504510009602e-07, | |
| "loss": 0.3611, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.801324503311258, | |
| "grad_norm": 0.13529111444950104, | |
| "learning_rate": 1.3302603518387358e-07, | |
| "loss": 0.3556, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 2.803532008830022, | |
| "grad_norm": 0.12611360847949982, | |
| "learning_rate": 1.3009914009677493e-07, | |
| "loss": 0.3695, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.8057395143487858, | |
| "grad_norm": 0.1310908943414688, | |
| "learning_rate": 1.272043791519778e-07, | |
| "loss": 0.3571, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 2.80794701986755, | |
| "grad_norm": 0.13993841409683228, | |
| "learning_rate": 1.2434177145062177e-07, | |
| "loss": 0.3572, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.8101545253863134, | |
| "grad_norm": 0.1199953481554985, | |
| "learning_rate": 1.215113358816844e-07, | |
| "loss": 0.3523, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 2.8123620309050774, | |
| "grad_norm": 0.12030813843011856, | |
| "learning_rate": 1.187130911218537e-07, | |
| "loss": 0.3533, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.814569536423841, | |
| "grad_norm": 0.1288137286901474, | |
| "learning_rate": 1.1594705563540642e-07, | |
| "loss": 0.3587, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.816777041942605, | |
| "grad_norm": 0.1391681730747223, | |
| "learning_rate": 1.1321324767408382e-07, | |
| "loss": 0.3592, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.8189845474613686, | |
| "grad_norm": 0.12392991036176682, | |
| "learning_rate": 1.1051168527697665e-07, | |
| "loss": 0.3605, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 2.821192052980132, | |
| "grad_norm": 0.12354668974876404, | |
| "learning_rate": 1.0784238627039977e-07, | |
| "loss": 0.3593, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.8233995584988962, | |
| "grad_norm": 0.13385632634162903, | |
| "learning_rate": 1.0520536826777783e-07, | |
| "loss": 0.3573, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 2.8256070640176603, | |
| "grad_norm": 0.12551288306713104, | |
| "learning_rate": 1.0260064866952968e-07, | |
| "loss": 0.3563, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.827814569536424, | |
| "grad_norm": 0.119255430996418, | |
| "learning_rate": 1.0002824466295191e-07, | |
| "loss": 0.3553, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 2.8300220750551874, | |
| "grad_norm": 0.12346068769693375, | |
| "learning_rate": 9.748817322210558e-08, | |
| "loss": 0.3547, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.8322295805739515, | |
| "grad_norm": 0.12130031734704971, | |
| "learning_rate": 9.498045110770571e-08, | |
| "loss": 0.3476, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 2.8344370860927155, | |
| "grad_norm": 0.13486534357070923, | |
| "learning_rate": 9.250509486700809e-08, | |
| "loss": 0.3652, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.836644591611479, | |
| "grad_norm": 0.12220917642116547, | |
| "learning_rate": 9.006212083370213e-08, | |
| "loss": 0.3548, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 2.8388520971302427, | |
| "grad_norm": 0.12486063688993454, | |
| "learning_rate": 8.765154512780428e-08, | |
| "loss": 0.3502, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.8410596026490067, | |
| "grad_norm": 0.11943277716636658, | |
| "learning_rate": 8.527338365554749e-08, | |
| "loss": 0.3462, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 2.8432671081677703, | |
| "grad_norm": 0.13910432159900665, | |
| "learning_rate": 8.292765210928089e-08, | |
| "loss": 0.3585, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.8454746136865343, | |
| "grad_norm": 0.12948773801326752, | |
| "learning_rate": 8.061436596736139e-08, | |
| "loss": 0.3589, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 2.847682119205298, | |
| "grad_norm": 0.13374410569667816, | |
| "learning_rate": 7.833354049405717e-08, | |
| "loss": 0.3505, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.849889624724062, | |
| "grad_norm": 0.1347481608390808, | |
| "learning_rate": 7.608519073944165e-08, | |
| "loss": 0.3687, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 2.8520971302428255, | |
| "grad_norm": 0.13521930575370789, | |
| "learning_rate": 7.386933153929798e-08, | |
| "loss": 0.366, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.8543046357615895, | |
| "grad_norm": 0.13502496480941772, | |
| "learning_rate": 7.168597751501972e-08, | |
| "loss": 0.366, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 2.856512141280353, | |
| "grad_norm": 0.1404918134212494, | |
| "learning_rate": 6.953514307351306e-08, | |
| "loss": 0.357, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.858719646799117, | |
| "grad_norm": 0.12767677009105682, | |
| "learning_rate": 6.741684240710477e-08, | |
| "loss": 0.3517, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 2.8609271523178808, | |
| "grad_norm": 0.13126912713050842, | |
| "learning_rate": 6.533108949344558e-08, | |
| "loss": 0.3564, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.8631346578366443, | |
| "grad_norm": 0.13056641817092896, | |
| "learning_rate": 6.32778980954185e-08, | |
| "loss": 0.3587, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 2.8653421633554084, | |
| "grad_norm": 0.13267594575881958, | |
| "learning_rate": 6.125728176105129e-08, | |
| "loss": 0.3686, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.8675496688741724, | |
| "grad_norm": 0.12520645558834076, | |
| "learning_rate": 5.9269253823421855e-08, | |
| "loss": 0.3703, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 2.869757174392936, | |
| "grad_norm": 0.1308923214673996, | |
| "learning_rate": 5.731382740057523e-08, | |
| "loss": 0.3549, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.8719646799116996, | |
| "grad_norm": 0.13203322887420654, | |
| "learning_rate": 5.5391015395432346e-08, | |
| "loss": 0.353, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 2.8741721854304636, | |
| "grad_norm": 0.13061153888702393, | |
| "learning_rate": 5.350083049570853e-08, | |
| "loss": 0.3556, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.8763796909492276, | |
| "grad_norm": 0.13314051926136017, | |
| "learning_rate": 5.164328517382744e-08, | |
| "loss": 0.3578, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 2.878587196467991, | |
| "grad_norm": 0.13481754064559937, | |
| "learning_rate": 4.9818391686842214e-08, | |
| "loss": 0.362, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.880794701986755, | |
| "grad_norm": 0.12934917211532593, | |
| "learning_rate": 4.802616207634947e-08, | |
| "loss": 0.3489, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 2.883002207505519, | |
| "grad_norm": 0.1276032030582428, | |
| "learning_rate": 4.626660816841433e-08, | |
| "loss": 0.3554, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.8852097130242824, | |
| "grad_norm": 0.12852588295936584, | |
| "learning_rate": 4.4539741573489395e-08, | |
| "loss": 0.3519, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 2.8874172185430464, | |
| "grad_norm": 0.1251831203699112, | |
| "learning_rate": 4.2845573686339235e-08, | |
| "loss": 0.3586, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.88962472406181, | |
| "grad_norm": 0.12635353207588196, | |
| "learning_rate": 4.118411568596714e-08, | |
| "loss": 0.3521, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 2.891832229580574, | |
| "grad_norm": 0.13549165427684784, | |
| "learning_rate": 3.9555378535537925e-08, | |
| "loss": 0.3573, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.8940397350993377, | |
| "grad_norm": 0.12129613012075424, | |
| "learning_rate": 3.795937298230801e-08, | |
| "loss": 0.3644, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 2.8962472406181017, | |
| "grad_norm": 0.12583400309085846, | |
| "learning_rate": 3.639610955755213e-08, | |
| "loss": 0.3628, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.8984547461368653, | |
| "grad_norm": 0.11590792238712311, | |
| "learning_rate": 3.486559857649785e-08, | |
| "loss": 0.3541, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 2.9006622516556293, | |
| "grad_norm": 0.1254376322031021, | |
| "learning_rate": 3.336785013825339e-08, | |
| "loss": 0.3591, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.902869757174393, | |
| "grad_norm": 0.12448057532310486, | |
| "learning_rate": 3.190287412574267e-08, | |
| "loss": 0.3685, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 2.9050772626931565, | |
| "grad_norm": 0.12795038521289825, | |
| "learning_rate": 3.047068020564037e-08, | |
| "loss": 0.3661, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.9072847682119205, | |
| "grad_norm": 0.1275874376296997, | |
| "learning_rate": 2.9071277828308654e-08, | |
| "loss": 0.3547, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 2.9094922737306845, | |
| "grad_norm": 0.13140250742435455, | |
| "learning_rate": 2.7704676227732764e-08, | |
| "loss": 0.3476, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.911699779249448, | |
| "grad_norm": 0.13054263591766357, | |
| "learning_rate": 2.637088442146163e-08, | |
| "loss": 0.3555, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 2.9139072847682117, | |
| "grad_norm": 0.14265048503875732, | |
| "learning_rate": 2.506991121054847e-08, | |
| "loss": 0.3552, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.9161147902869757, | |
| "grad_norm": 0.12008768320083618, | |
| "learning_rate": 2.380176517949251e-08, | |
| "loss": 0.3555, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 2.9183222958057398, | |
| "grad_norm": 0.13723327219486237, | |
| "learning_rate": 2.256645469618124e-08, | |
| "loss": 0.3578, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.9205298013245033, | |
| "grad_norm": 0.13760052621364594, | |
| "learning_rate": 2.136398791183658e-08, | |
| "loss": 0.3543, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 2.922737306843267, | |
| "grad_norm": 0.129390150308609, | |
| "learning_rate": 2.0194372760961034e-08, | |
| "loss": 0.362, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.924944812362031, | |
| "grad_norm": 0.15059252083301544, | |
| "learning_rate": 1.905761696128494e-08, | |
| "loss": 0.3611, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.9271523178807946, | |
| "grad_norm": 0.13671807944774628, | |
| "learning_rate": 1.795372801371431e-08, | |
| "loss": 0.3601, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.9293598233995586, | |
| "grad_norm": 0.138703852891922, | |
| "learning_rate": 1.6882713202283076e-08, | |
| "loss": 0.3525, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 2.931567328918322, | |
| "grad_norm": 0.12975488603115082, | |
| "learning_rate": 1.5844579594105904e-08, | |
| "loss": 0.3636, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.933774834437086, | |
| "grad_norm": 0.15756423771381378, | |
| "learning_rate": 1.48393340393288e-08, | |
| "loss": 0.3586, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 2.93598233995585, | |
| "grad_norm": 0.13145272433757782, | |
| "learning_rate": 1.3866983171084703e-08, | |
| "loss": 0.3563, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.938189845474614, | |
| "grad_norm": 0.15646247565746307, | |
| "learning_rate": 1.292753340545183e-08, | |
| "loss": 0.3567, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 2.9403973509933774, | |
| "grad_norm": 0.1439492553472519, | |
| "learning_rate": 1.2020990941408739e-08, | |
| "loss": 0.3678, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.9426048565121414, | |
| "grad_norm": 0.1377602368593216, | |
| "learning_rate": 1.1147361760794895e-08, | |
| "loss": 0.3547, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 2.944812362030905, | |
| "grad_norm": 0.14668287336826324, | |
| "learning_rate": 1.0306651628270715e-08, | |
| "loss": 0.3672, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.9470198675496686, | |
| "grad_norm": 0.12802091240882874, | |
| "learning_rate": 9.498866091278702e-09, | |
| "loss": 0.3596, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 2.9492273730684326, | |
| "grad_norm": 0.15803688764572144, | |
| "learning_rate": 8.724010480010147e-09, | |
| "loss": 0.3586, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.9514348785871967, | |
| "grad_norm": 0.13727080821990967, | |
| "learning_rate": 7.982089907364598e-09, | |
| "loss": 0.3515, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 2.9536423841059603, | |
| "grad_norm": 0.14836713671684265, | |
| "learning_rate": 7.273109268920997e-09, | |
| "loss": 0.3572, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.955849889624724, | |
| "grad_norm": 0.15154053270816803, | |
| "learning_rate": 6.597073242902707e-09, | |
| "loss": 0.3632, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 2.958057395143488, | |
| "grad_norm": 0.14268819987773895, | |
| "learning_rate": 5.95398629014754e-09, | |
| "loss": 0.3737, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.960264900662252, | |
| "grad_norm": 0.13592885434627533, | |
| "learning_rate": 5.3438526540777745e-09, | |
| "loss": 0.3647, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 2.9624724061810155, | |
| "grad_norm": 0.15282437205314636, | |
| "learning_rate": 4.766676360674072e-09, | |
| "loss": 0.3666, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.964679911699779, | |
| "grad_norm": 0.14543361961841583, | |
| "learning_rate": 4.22246121844494e-09, | |
| "loss": 0.3576, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 2.966887417218543, | |
| "grad_norm": 0.1356423795223236, | |
| "learning_rate": 3.7112108184061966e-09, | |
| "loss": 0.3646, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.9690949227373067, | |
| "grad_norm": 0.13516850769519806, | |
| "learning_rate": 3.2329285340537696e-09, | |
| "loss": 0.3577, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 2.9713024282560707, | |
| "grad_norm": 0.12816190719604492, | |
| "learning_rate": 2.7876175213431557e-09, | |
| "loss": 0.3568, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.9735099337748343, | |
| "grad_norm": 0.13490551710128784, | |
| "learning_rate": 2.375280718668882e-09, | |
| "loss": 0.3548, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 2.9757174392935983, | |
| "grad_norm": 0.14126001298427582, | |
| "learning_rate": 1.995920846843968e-09, | |
| "loss": 0.3666, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.977924944812362, | |
| "grad_norm": 0.12279074639081955, | |
| "learning_rate": 1.649540409081607e-09, | |
| "loss": 0.3562, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 2.980132450331126, | |
| "grad_norm": 0.12752176821231842, | |
| "learning_rate": 1.3361416909812852e-09, | |
| "loss": 0.365, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.9823399558498895, | |
| "grad_norm": 0.13065557181835175, | |
| "learning_rate": 1.055726760510467e-09, | |
| "loss": 0.3615, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 2.9845474613686536, | |
| "grad_norm": 0.11860226094722748, | |
| "learning_rate": 8.082974679929357e-10, | |
| "loss": 0.3564, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.986754966887417, | |
| "grad_norm": 0.1291607767343521, | |
| "learning_rate": 5.938554460965807e-10, | |
| "loss": 0.359, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 2.988962472406181, | |
| "grad_norm": 0.146859273314476, | |
| "learning_rate": 4.12402109820631e-10, | |
| "loss": 0.3614, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.9911699779249448, | |
| "grad_norm": 0.12649129331111908, | |
| "learning_rate": 2.639386564889934e-10, | |
| "loss": 0.3566, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 2.993377483443709, | |
| "grad_norm": 0.14126034080982208, | |
| "learning_rate": 1.4846606574137058e-10, | |
| "loss": 0.3608, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.9955849889624724, | |
| "grad_norm": 0.13480675220489502, | |
| "learning_rate": 6.59850995254896e-11, | |
| "loss": 0.3555, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 2.997792494481236, | |
| "grad_norm": 0.14647357165813446, | |
| "learning_rate": 1.649630209432651e-11, | |
| "loss": 0.3643, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.13931505382061005, | |
| "learning_rate": 0.0, | |
| "loss": 0.3486, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1359, | |
| "total_flos": 2.7571726585954304e+16, | |
| "train_loss": 0.13500037013037053, | |
| "train_runtime": 42654.1727, | |
| "train_samples_per_second": 12.208, | |
| "train_steps_per_second": 0.032 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1359, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.7571726585954304e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |