| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9927710843373494, | |
| "eval_steps": 500, | |
| "global_step": 621, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004819277108433735, | |
| "grad_norm": 6.955545902252197, | |
| "learning_rate": 1.5873015873015874e-07, | |
| "loss": 1.0435, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00963855421686747, | |
| "grad_norm": 6.793174743652344, | |
| "learning_rate": 3.174603174603175e-07, | |
| "loss": 1.0378, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.014457831325301205, | |
| "grad_norm": 6.922445774078369, | |
| "learning_rate": 4.7619047619047623e-07, | |
| "loss": 1.0495, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01927710843373494, | |
| "grad_norm": 6.903090000152588, | |
| "learning_rate": 6.34920634920635e-07, | |
| "loss": 1.0422, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.024096385542168676, | |
| "grad_norm": 6.824069499969482, | |
| "learning_rate": 7.936507936507937e-07, | |
| "loss": 1.0456, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02891566265060241, | |
| "grad_norm": 6.71932315826416, | |
| "learning_rate": 9.523809523809525e-07, | |
| "loss": 1.0416, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.033734939759036145, | |
| "grad_norm": 6.264998912811279, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 1.0182, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.03855421686746988, | |
| "grad_norm": 6.157326698303223, | |
| "learning_rate": 1.26984126984127e-06, | |
| "loss": 1.0122, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.043373493975903614, | |
| "grad_norm": 5.046329498291016, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.002, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04819277108433735, | |
| "grad_norm": 4.785087585449219, | |
| "learning_rate": 1.5873015873015873e-06, | |
| "loss": 0.9743, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05301204819277108, | |
| "grad_norm": 4.505481719970703, | |
| "learning_rate": 1.746031746031746e-06, | |
| "loss": 0.9795, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.05783132530120482, | |
| "grad_norm": 3.0111618041992188, | |
| "learning_rate": 1.904761904761905e-06, | |
| "loss": 0.9396, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06265060240963856, | |
| "grad_norm": 2.865196466445923, | |
| "learning_rate": 2.0634920634920634e-06, | |
| "loss": 0.9502, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06746987951807229, | |
| "grad_norm": 2.732715368270874, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.9336, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07228915662650602, | |
| "grad_norm": 2.576692581176758, | |
| "learning_rate": 2.380952380952381e-06, | |
| "loss": 0.933, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07710843373493977, | |
| "grad_norm": 3.3130340576171875, | |
| "learning_rate": 2.53968253968254e-06, | |
| "loss": 0.9039, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0819277108433735, | |
| "grad_norm": 3.8548290729522705, | |
| "learning_rate": 2.6984126984126986e-06, | |
| "loss": 0.8975, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.08674698795180723, | |
| "grad_norm": 3.933690071105957, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.9065, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.09156626506024096, | |
| "grad_norm": 3.6290793418884277, | |
| "learning_rate": 3.015873015873016e-06, | |
| "loss": 0.8772, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0963855421686747, | |
| "grad_norm": 3.045029878616333, | |
| "learning_rate": 3.1746031746031746e-06, | |
| "loss": 0.8738, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10120481927710843, | |
| "grad_norm": 2.285907506942749, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.8443, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.10602409638554217, | |
| "grad_norm": 1.6617300510406494, | |
| "learning_rate": 3.492063492063492e-06, | |
| "loss": 0.8385, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.1108433734939759, | |
| "grad_norm": 1.595490574836731, | |
| "learning_rate": 3.6507936507936507e-06, | |
| "loss": 0.8201, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.11566265060240964, | |
| "grad_norm": 1.3998520374298096, | |
| "learning_rate": 3.80952380952381e-06, | |
| "loss": 0.8115, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.12048192771084337, | |
| "grad_norm": 1.24544358253479, | |
| "learning_rate": 3.968253968253968e-06, | |
| "loss": 0.8131, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.12530120481927712, | |
| "grad_norm": 1.094412088394165, | |
| "learning_rate": 4.126984126984127e-06, | |
| "loss": 0.7769, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.13012048192771083, | |
| "grad_norm": 1.0416014194488525, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 0.7739, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.13493975903614458, | |
| "grad_norm": 1.002666711807251, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 0.7797, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.13975903614457832, | |
| "grad_norm": 0.9590147137641907, | |
| "learning_rate": 4.603174603174604e-06, | |
| "loss": 0.7592, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.14457831325301204, | |
| "grad_norm": 0.8477379679679871, | |
| "learning_rate": 4.761904761904762e-06, | |
| "loss": 0.755, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1493975903614458, | |
| "grad_norm": 0.8571174740791321, | |
| "learning_rate": 4.920634920634921e-06, | |
| "loss": 0.7514, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.15421686746987953, | |
| "grad_norm": 0.8876820802688599, | |
| "learning_rate": 5.07936507936508e-06, | |
| "loss": 0.7371, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.15903614457831325, | |
| "grad_norm": 0.8745198249816895, | |
| "learning_rate": 5.2380952380952384e-06, | |
| "loss": 0.7375, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.163855421686747, | |
| "grad_norm": 0.94056236743927, | |
| "learning_rate": 5.396825396825397e-06, | |
| "loss": 0.7282, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.1686746987951807, | |
| "grad_norm": 0.7261127829551697, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.7217, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.17349397590361446, | |
| "grad_norm": 0.7808772325515747, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.7069, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.1783132530120482, | |
| "grad_norm": 0.8928313255310059, | |
| "learning_rate": 5.873015873015874e-06, | |
| "loss": 0.7165, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.18313253012048192, | |
| "grad_norm": 0.7088349461555481, | |
| "learning_rate": 6.031746031746032e-06, | |
| "loss": 0.7122, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.18795180722891566, | |
| "grad_norm": 0.8545775413513184, | |
| "learning_rate": 6.1904761904761914e-06, | |
| "loss": 0.7151, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1927710843373494, | |
| "grad_norm": 0.8626333475112915, | |
| "learning_rate": 6.349206349206349e-06, | |
| "loss": 0.7127, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.19759036144578312, | |
| "grad_norm": 0.7708625197410583, | |
| "learning_rate": 6.507936507936509e-06, | |
| "loss": 0.6916, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.20240963855421687, | |
| "grad_norm": 0.6877345442771912, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.707, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.20722891566265061, | |
| "grad_norm": 0.6759275197982788, | |
| "learning_rate": 6.825396825396826e-06, | |
| "loss": 0.6822, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.21204819277108433, | |
| "grad_norm": 0.9545477032661438, | |
| "learning_rate": 6.984126984126984e-06, | |
| "loss": 0.6943, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.21686746987951808, | |
| "grad_norm": 0.789354681968689, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 0.6799, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2216867469879518, | |
| "grad_norm": 0.6081812381744385, | |
| "learning_rate": 7.301587301587301e-06, | |
| "loss": 0.6824, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.22650602409638554, | |
| "grad_norm": 0.8076633214950562, | |
| "learning_rate": 7.460317460317461e-06, | |
| "loss": 0.6701, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.23132530120481928, | |
| "grad_norm": 0.6392861604690552, | |
| "learning_rate": 7.61904761904762e-06, | |
| "loss": 0.6774, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.236144578313253, | |
| "grad_norm": 0.6257482767105103, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 0.6908, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.24096385542168675, | |
| "grad_norm": 0.6038609743118286, | |
| "learning_rate": 7.936507936507936e-06, | |
| "loss": 0.6784, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2457831325301205, | |
| "grad_norm": 0.5643861293792725, | |
| "learning_rate": 8.095238095238097e-06, | |
| "loss": 0.6705, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.25060240963855424, | |
| "grad_norm": 0.6746996641159058, | |
| "learning_rate": 8.253968253968254e-06, | |
| "loss": 0.6674, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.25542168674698795, | |
| "grad_norm": 0.6142429113388062, | |
| "learning_rate": 8.412698412698414e-06, | |
| "loss": 0.6798, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.26024096385542167, | |
| "grad_norm": 0.48977726697921753, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.6678, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.26506024096385544, | |
| "grad_norm": 0.6775186061859131, | |
| "learning_rate": 8.730158730158731e-06, | |
| "loss": 0.6777, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.26987951807228916, | |
| "grad_norm": 0.6069589257240295, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 0.6571, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.2746987951807229, | |
| "grad_norm": 0.5993375778198242, | |
| "learning_rate": 9.047619047619049e-06, | |
| "loss": 0.6659, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.27951807228915665, | |
| "grad_norm": 0.5999571681022644, | |
| "learning_rate": 9.206349206349207e-06, | |
| "loss": 0.6586, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.28433734939759037, | |
| "grad_norm": 0.6867564916610718, | |
| "learning_rate": 9.365079365079366e-06, | |
| "loss": 0.6662, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.2891566265060241, | |
| "grad_norm": 0.6454688906669617, | |
| "learning_rate": 9.523809523809525e-06, | |
| "loss": 0.655, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.29397590361445786, | |
| "grad_norm": 0.6191383600234985, | |
| "learning_rate": 9.682539682539683e-06, | |
| "loss": 0.6641, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.2987951807228916, | |
| "grad_norm": 0.6252975463867188, | |
| "learning_rate": 9.841269841269842e-06, | |
| "loss": 0.6436, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.3036144578313253, | |
| "grad_norm": 0.5347315669059753, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6542, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.30843373493975906, | |
| "grad_norm": 0.6288977861404419, | |
| "learning_rate": 9.999920755303033e-06, | |
| "loss": 0.6472, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3132530120481928, | |
| "grad_norm": 0.5932735204696655, | |
| "learning_rate": 9.999683023724021e-06, | |
| "loss": 0.6385, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3180722891566265, | |
| "grad_norm": 0.573313295841217, | |
| "learning_rate": 9.99928681279855e-06, | |
| "loss": 0.6504, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.3228915662650602, | |
| "grad_norm": 0.5847601294517517, | |
| "learning_rate": 9.998732135085665e-06, | |
| "loss": 0.6469, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.327710843373494, | |
| "grad_norm": 0.5990540385246277, | |
| "learning_rate": 9.998019008167476e-06, | |
| "loss": 0.6498, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3325301204819277, | |
| "grad_norm": 0.5455608367919922, | |
| "learning_rate": 9.99714745464859e-06, | |
| "loss": 0.6425, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3373493975903614, | |
| "grad_norm": 0.6302762627601624, | |
| "learning_rate": 9.99611750215541e-06, | |
| "loss": 0.6386, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3421686746987952, | |
| "grad_norm": 0.6006896495819092, | |
| "learning_rate": 9.994929183335237e-06, | |
| "loss": 0.6402, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3469879518072289, | |
| "grad_norm": 0.6036058664321899, | |
| "learning_rate": 9.993582535855265e-06, | |
| "loss": 0.66, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.35180722891566263, | |
| "grad_norm": 0.5187335014343262, | |
| "learning_rate": 9.992077602401358e-06, | |
| "loss": 0.6471, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3566265060240964, | |
| "grad_norm": 0.6480401754379272, | |
| "learning_rate": 9.990414430676716e-06, | |
| "loss": 0.6432, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3614457831325301, | |
| "grad_norm": 0.5152376294136047, | |
| "learning_rate": 9.988593073400354e-06, | |
| "loss": 0.6351, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.36626506024096384, | |
| "grad_norm": 0.5486935377120972, | |
| "learning_rate": 9.986613588305435e-06, | |
| "loss": 0.6307, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3710843373493976, | |
| "grad_norm": 0.6851136684417725, | |
| "learning_rate": 9.984476038137437e-06, | |
| "loss": 0.6366, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3759036144578313, | |
| "grad_norm": 0.5467675924301147, | |
| "learning_rate": 9.982180490652165e-06, | |
| "loss": 0.6366, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.38072289156626504, | |
| "grad_norm": 0.7552728652954102, | |
| "learning_rate": 9.979727018613607e-06, | |
| "loss": 0.6286, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.3855421686746988, | |
| "grad_norm": 0.651406466960907, | |
| "learning_rate": 9.977115699791622e-06, | |
| "loss": 0.6359, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.39036144578313253, | |
| "grad_norm": 0.7135255336761475, | |
| "learning_rate": 9.974346616959476e-06, | |
| "loss": 0.6327, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.39518072289156625, | |
| "grad_norm": 0.6827096343040466, | |
| "learning_rate": 9.971419857891223e-06, | |
| "loss": 0.6181, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.6823281645774841, | |
| "learning_rate": 9.968335515358916e-06, | |
| "loss": 0.6377, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.40481927710843374, | |
| "grad_norm": 0.5493991374969482, | |
| "learning_rate": 9.965093687129669e-06, | |
| "loss": 0.6391, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.40963855421686746, | |
| "grad_norm": 0.6720536351203918, | |
| "learning_rate": 9.961694475962562e-06, | |
| "loss": 0.6331, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.41445783132530123, | |
| "grad_norm": 0.6092298030853271, | |
| "learning_rate": 9.95813798960538e-06, | |
| "loss": 0.6341, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.41927710843373495, | |
| "grad_norm": 0.5759454369544983, | |
| "learning_rate": 9.954424340791195e-06, | |
| "loss": 0.633, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.42409638554216866, | |
| "grad_norm": 0.6757510900497437, | |
| "learning_rate": 9.950553647234798e-06, | |
| "loss": 0.6243, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.42891566265060244, | |
| "grad_norm": 0.5293172597885132, | |
| "learning_rate": 9.94652603162896e-06, | |
| "loss": 0.6424, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.43373493975903615, | |
| "grad_norm": 0.663861870765686, | |
| "learning_rate": 9.942341621640558e-06, | |
| "loss": 0.6409, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.43855421686746987, | |
| "grad_norm": 0.5438668727874756, | |
| "learning_rate": 9.938000549906509e-06, | |
| "loss": 0.6391, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4433734939759036, | |
| "grad_norm": 0.6359225511550903, | |
| "learning_rate": 9.93350295402958e-06, | |
| "loss": 0.6288, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.44819277108433736, | |
| "grad_norm": 0.5770338773727417, | |
| "learning_rate": 9.92884897657402e-06, | |
| "loss": 0.6114, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.4530120481927711, | |
| "grad_norm": 0.5579841136932373, | |
| "learning_rate": 9.924038765061042e-06, | |
| "loss": 0.6137, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.4578313253012048, | |
| "grad_norm": 0.597624659538269, | |
| "learning_rate": 9.919072471964146e-06, | |
| "loss": 0.6455, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.46265060240963857, | |
| "grad_norm": 0.5436515212059021, | |
| "learning_rate": 9.913950254704291e-06, | |
| "loss": 0.6274, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.4674698795180723, | |
| "grad_norm": 0.5492904782295227, | |
| "learning_rate": 9.908672275644898e-06, | |
| "loss": 0.6291, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.472289156626506, | |
| "grad_norm": 0.5357149839401245, | |
| "learning_rate": 9.903238702086707e-06, | |
| "loss": 0.6422, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.4771084337349398, | |
| "grad_norm": 0.5629865527153015, | |
| "learning_rate": 9.897649706262474e-06, | |
| "loss": 0.636, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.4819277108433735, | |
| "grad_norm": 0.5739107728004456, | |
| "learning_rate": 9.89190546533151e-06, | |
| "loss": 0.6261, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4867469879518072, | |
| "grad_norm": 0.600075900554657, | |
| "learning_rate": 9.88600616137407e-06, | |
| "loss": 0.6206, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.491566265060241, | |
| "grad_norm": 0.562242329120636, | |
| "learning_rate": 9.879951981385577e-06, | |
| "loss": 0.6263, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4963855421686747, | |
| "grad_norm": 0.5759828686714172, | |
| "learning_rate": 9.873743117270691e-06, | |
| "loss": 0.6287, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5012048192771085, | |
| "grad_norm": 0.7623122334480286, | |
| "learning_rate": 9.867379765837237e-06, | |
| "loss": 0.6112, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5060240963855421, | |
| "grad_norm": 0.5862762928009033, | |
| "learning_rate": 9.860862128789954e-06, | |
| "loss": 0.63, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5108433734939759, | |
| "grad_norm": 0.848891019821167, | |
| "learning_rate": 9.854190412724114e-06, | |
| "loss": 0.6322, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5156626506024097, | |
| "grad_norm": 0.5686174035072327, | |
| "learning_rate": 9.847364829118963e-06, | |
| "loss": 0.6148, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5204819277108433, | |
| "grad_norm": 0.772788941860199, | |
| "learning_rate": 9.840385594331022e-06, | |
| "loss": 0.62, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5253012048192771, | |
| "grad_norm": 0.7146121263504028, | |
| "learning_rate": 9.833252929587231e-06, | |
| "loss": 0.6251, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5301204819277109, | |
| "grad_norm": 0.6730661988258362, | |
| "learning_rate": 9.825967060977933e-06, | |
| "loss": 0.6231, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5349397590361445, | |
| "grad_norm": 0.6394177675247192, | |
| "learning_rate": 9.818528219449705e-06, | |
| "loss": 0.6188, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5397590361445783, | |
| "grad_norm": 0.6056435108184814, | |
| "learning_rate": 9.810936640798046e-06, | |
| "loss": 0.6376, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5445783132530121, | |
| "grad_norm": 0.5977922677993774, | |
| "learning_rate": 9.803192565659898e-06, | |
| "loss": 0.6179, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5493975903614458, | |
| "grad_norm": 0.6605879664421082, | |
| "learning_rate": 9.795296239506011e-06, | |
| "loss": 0.6337, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5542168674698795, | |
| "grad_norm": 0.5492590069770813, | |
| "learning_rate": 9.78724791263318e-06, | |
| "loss": 0.6205, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5590361445783133, | |
| "grad_norm": 0.6682798862457275, | |
| "learning_rate": 9.779047840156288e-06, | |
| "loss": 0.6102, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.563855421686747, | |
| "grad_norm": 0.6884996294975281, | |
| "learning_rate": 9.770696282000245e-06, | |
| "loss": 0.6063, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5686746987951807, | |
| "grad_norm": 0.6539535522460938, | |
| "learning_rate": 9.762193502891726e-06, | |
| "loss": 0.6193, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5734939759036145, | |
| "grad_norm": 0.5900925397872925, | |
| "learning_rate": 9.753539772350792e-06, | |
| "loss": 0.6113, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5783132530120482, | |
| "grad_norm": 0.7695890069007874, | |
| "learning_rate": 9.744735364682347e-06, | |
| "loss": 0.6219, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5831325301204819, | |
| "grad_norm": 0.6271756887435913, | |
| "learning_rate": 9.735780558967434e-06, | |
| "loss": 0.603, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5879518072289157, | |
| "grad_norm": 0.667735755443573, | |
| "learning_rate": 9.726675639054403e-06, | |
| "loss": 0.616, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5927710843373494, | |
| "grad_norm": 0.5626581311225891, | |
| "learning_rate": 9.717420893549902e-06, | |
| "loss": 0.6251, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5975903614457831, | |
| "grad_norm": 0.5545538067817688, | |
| "learning_rate": 9.70801661580973e-06, | |
| "loss": 0.6165, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6024096385542169, | |
| "grad_norm": 0.5270641446113586, | |
| "learning_rate": 9.698463103929542e-06, | |
| "loss": 0.6012, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6072289156626506, | |
| "grad_norm": 0.6241425275802612, | |
| "learning_rate": 9.688760660735403e-06, | |
| "loss": 0.6072, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6120481927710844, | |
| "grad_norm": 0.5785877108573914, | |
| "learning_rate": 9.67890959377418e-06, | |
| "loss": 0.6165, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6168674698795181, | |
| "grad_norm": 0.6360320448875427, | |
| "learning_rate": 9.668910215303797e-06, | |
| "loss": 0.6112, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6216867469879518, | |
| "grad_norm": 0.5739641189575195, | |
| "learning_rate": 9.658762842283343e-06, | |
| "loss": 0.6101, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6265060240963856, | |
| "grad_norm": 0.698076605796814, | |
| "learning_rate": 9.648467796363019e-06, | |
| "loss": 0.6136, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6313253012048192, | |
| "grad_norm": 0.6175894141197205, | |
| "learning_rate": 9.638025403873939e-06, | |
| "loss": 0.6188, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.636144578313253, | |
| "grad_norm": 0.5730419754981995, | |
| "learning_rate": 9.627435995817799e-06, | |
| "loss": 0.6235, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6409638554216868, | |
| "grad_norm": 0.6372531056404114, | |
| "learning_rate": 9.616699907856368e-06, | |
| "loss": 0.6054, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.6457831325301204, | |
| "grad_norm": 0.5383384823799133, | |
| "learning_rate": 9.605817480300863e-06, | |
| "loss": 0.6066, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.6506024096385542, | |
| "grad_norm": 0.6308354735374451, | |
| "learning_rate": 9.594789058101154e-06, | |
| "loss": 0.6084, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.655421686746988, | |
| "grad_norm": 0.5583477020263672, | |
| "learning_rate": 9.58361499083483e-06, | |
| "loss": 0.6187, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6602409638554216, | |
| "grad_norm": 0.5494464039802551, | |
| "learning_rate": 9.57229563269612e-06, | |
| "loss": 0.6177, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6650602409638554, | |
| "grad_norm": 0.5109444856643677, | |
| "learning_rate": 9.560831342484668e-06, | |
| "loss": 0.6171, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6698795180722892, | |
| "grad_norm": 0.6080160737037659, | |
| "learning_rate": 9.549222483594154e-06, | |
| "loss": 0.6126, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6746987951807228, | |
| "grad_norm": 0.5663288235664368, | |
| "learning_rate": 9.53746942400078e-06, | |
| "loss": 0.616, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6795180722891566, | |
| "grad_norm": 0.5445743203163147, | |
| "learning_rate": 9.525572536251608e-06, | |
| "loss": 0.6093, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6843373493975904, | |
| "grad_norm": 0.6263227462768555, | |
| "learning_rate": 9.513532197452737e-06, | |
| "loss": 0.6118, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.689156626506024, | |
| "grad_norm": 0.6303161978721619, | |
| "learning_rate": 9.501348789257373e-06, | |
| "loss": 0.6136, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6939759036144578, | |
| "grad_norm": 0.5429930686950684, | |
| "learning_rate": 9.48902269785371e-06, | |
| "loss": 0.6039, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6987951807228916, | |
| "grad_norm": 0.6205868124961853, | |
| "learning_rate": 9.476554313952697e-06, | |
| "loss": 0.6216, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7036144578313253, | |
| "grad_norm": 0.5463981032371521, | |
| "learning_rate": 9.46394403277566e-06, | |
| "loss": 0.613, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.708433734939759, | |
| "grad_norm": 0.5573562979698181, | |
| "learning_rate": 9.451192254041759e-06, | |
| "loss": 0.6074, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7132530120481928, | |
| "grad_norm": 0.6575038433074951, | |
| "learning_rate": 9.438299381955333e-06, | |
| "loss": 0.6174, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7180722891566265, | |
| "grad_norm": 0.5069466233253479, | |
| "learning_rate": 9.425265825193077e-06, | |
| "loss": 0.5937, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7228915662650602, | |
| "grad_norm": 0.5574111938476562, | |
| "learning_rate": 9.412091996891097e-06, | |
| "loss": 0.6242, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.727710843373494, | |
| "grad_norm": 0.5976607799530029, | |
| "learning_rate": 9.398778314631801e-06, | |
| "loss": 0.6035, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.7325301204819277, | |
| "grad_norm": 0.5757405161857605, | |
| "learning_rate": 9.385325200430679e-06, | |
| "loss": 0.6024, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.7373493975903614, | |
| "grad_norm": 0.5733328461647034, | |
| "learning_rate": 9.371733080722911e-06, | |
| "loss": 0.6242, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.7421686746987952, | |
| "grad_norm": 0.600737452507019, | |
| "learning_rate": 9.358002386349862e-06, | |
| "loss": 0.6105, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7469879518072289, | |
| "grad_norm": 0.6255360841751099, | |
| "learning_rate": 9.34413355254542e-06, | |
| "loss": 0.6112, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7518072289156627, | |
| "grad_norm": 0.5567818880081177, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 0.5927, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.7566265060240964, | |
| "grad_norm": 0.7106531262397766, | |
| "learning_rate": 9.31598322945759e-06, | |
| "loss": 0.6112, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7614457831325301, | |
| "grad_norm": 0.5977474451065063, | |
| "learning_rate": 9.301702632479734e-06, | |
| "loss": 0.6084, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.7662650602409639, | |
| "grad_norm": 0.664481520652771, | |
| "learning_rate": 9.287285680653254e-06, | |
| "loss": 0.5982, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.7710843373493976, | |
| "grad_norm": 0.6926606297492981, | |
| "learning_rate": 9.272732830964948e-06, | |
| "loss": 0.6083, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7759036144578313, | |
| "grad_norm": 0.49633654952049255, | |
| "learning_rate": 9.258044544709276e-06, | |
| "loss": 0.6157, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.7807228915662651, | |
| "grad_norm": 0.6019198894500732, | |
| "learning_rate": 9.243221287473755e-06, | |
| "loss": 0.6017, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7855421686746988, | |
| "grad_norm": 0.5977044105529785, | |
| "learning_rate": 9.228263529124199e-06, | |
| "loss": 0.6164, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7903614457831325, | |
| "grad_norm": 0.6039947867393494, | |
| "learning_rate": 9.21317174378982e-06, | |
| "loss": 0.5977, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7951807228915663, | |
| "grad_norm": 0.5924364328384399, | |
| "learning_rate": 9.197946409848196e-06, | |
| "loss": 0.591, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.5773972868919373, | |
| "learning_rate": 9.182588009910119e-06, | |
| "loss": 0.6025, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.8048192771084337, | |
| "grad_norm": 0.5809570550918579, | |
| "learning_rate": 9.167097030804289e-06, | |
| "loss": 0.6091, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.8096385542168675, | |
| "grad_norm": 0.6400211453437805, | |
| "learning_rate": 9.151473963561884e-06, | |
| "loss": 0.5995, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8144578313253013, | |
| "grad_norm": 0.5365077257156372, | |
| "learning_rate": 9.135719303400995e-06, | |
| "loss": 0.6015, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8192771084337349, | |
| "grad_norm": 0.5059504508972168, | |
| "learning_rate": 9.119833549710927e-06, | |
| "loss": 0.5981, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8240963855421687, | |
| "grad_norm": 0.6700984835624695, | |
| "learning_rate": 9.103817206036383e-06, | |
| "loss": 0.623, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.8289156626506025, | |
| "grad_norm": 0.48179715871810913, | |
| "learning_rate": 9.087670780061477e-06, | |
| "loss": 0.6016, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.8337349397590361, | |
| "grad_norm": 0.571553647518158, | |
| "learning_rate": 9.071394783593664e-06, | |
| "loss": 0.5971, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.8385542168674699, | |
| "grad_norm": 0.5676701664924622, | |
| "learning_rate": 9.054989732547507e-06, | |
| "loss": 0.5975, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.8433734939759037, | |
| "grad_norm": 0.5889990925788879, | |
| "learning_rate": 9.038456146928325e-06, | |
| "loss": 0.6008, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.8481927710843373, | |
| "grad_norm": 0.5731596946716309, | |
| "learning_rate": 9.021794550815713e-06, | |
| "loss": 0.6061, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.8530120481927711, | |
| "grad_norm": 0.5482961535453796, | |
| "learning_rate": 9.005005472346923e-06, | |
| "loss": 0.621, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8578313253012049, | |
| "grad_norm": 0.5580418109893799, | |
| "learning_rate": 8.988089443700131e-06, | |
| "loss": 0.5861, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.8626506024096385, | |
| "grad_norm": 0.6586818099021912, | |
| "learning_rate": 8.971047001077561e-06, | |
| "loss": 0.5879, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.8674698795180723, | |
| "grad_norm": 0.5529095530509949, | |
| "learning_rate": 8.953878684688492e-06, | |
| "loss": 0.5908, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8722891566265061, | |
| "grad_norm": 0.7042589783668518, | |
| "learning_rate": 8.936585038732143e-06, | |
| "loss": 0.6026, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.8771084337349397, | |
| "grad_norm": 0.5728626251220703, | |
| "learning_rate": 8.919166611380397e-06, | |
| "loss": 0.5951, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.8819277108433735, | |
| "grad_norm": 0.6059430837631226, | |
| "learning_rate": 8.90162395476046e-06, | |
| "loss": 0.6004, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.8867469879518072, | |
| "grad_norm": 0.5582807660102844, | |
| "learning_rate": 8.883957624937333e-06, | |
| "loss": 0.5945, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.891566265060241, | |
| "grad_norm": 0.5483564138412476, | |
| "learning_rate": 8.866168181896198e-06, | |
| "loss": 0.5853, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.8963855421686747, | |
| "grad_norm": 0.5057897567749023, | |
| "learning_rate": 8.848256189524661e-06, | |
| "loss": 0.5998, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.9012048192771084, | |
| "grad_norm": 0.5957621932029724, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 0.6038, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.9060240963855422, | |
| "grad_norm": 0.6007516980171204, | |
| "learning_rate": 8.812066831745602e-06, | |
| "loss": 0.5897, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.9108433734939759, | |
| "grad_norm": 0.603797435760498, | |
| "learning_rate": 8.793790613463956e-06, | |
| "loss": 0.6006, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.9156626506024096, | |
| "grad_norm": 0.6564244627952576, | |
| "learning_rate": 8.775394140067299e-06, | |
| "loss": 0.597, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9204819277108434, | |
| "grad_norm": 0.7330297231674194, | |
| "learning_rate": 8.756877994684818e-06, | |
| "loss": 0.5946, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.9253012048192771, | |
| "grad_norm": 0.509863555431366, | |
| "learning_rate": 8.738242764239046e-06, | |
| "loss": 0.6, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.9301204819277108, | |
| "grad_norm": 0.6488967537879944, | |
| "learning_rate": 8.719489039427256e-06, | |
| "loss": 0.6097, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.9349397590361446, | |
| "grad_norm": 0.5334682464599609, | |
| "learning_rate": 8.700617414702746e-06, | |
| "loss": 0.6196, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.9397590361445783, | |
| "grad_norm": 0.4760792851448059, | |
| "learning_rate": 8.681628488255986e-06, | |
| "loss": 0.6079, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.944578313253012, | |
| "grad_norm": 0.5883437395095825, | |
| "learning_rate": 8.66252286199567e-06, | |
| "loss": 0.6005, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.9493975903614458, | |
| "grad_norm": 0.5796838998794556, | |
| "learning_rate": 8.643301141529619e-06, | |
| "loss": 0.5867, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.9542168674698795, | |
| "grad_norm": 0.6529198884963989, | |
| "learning_rate": 8.6239639361456e-06, | |
| "loss": 0.5948, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.9590361445783132, | |
| "grad_norm": 0.5081263184547424, | |
| "learning_rate": 8.604511858792006e-06, | |
| "loss": 0.6159, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.963855421686747, | |
| "grad_norm": 0.5447860360145569, | |
| "learning_rate": 8.584945526058426e-06, | |
| "loss": 0.6027, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9686746987951808, | |
| "grad_norm": 0.5446210503578186, | |
| "learning_rate": 8.565265558156101e-06, | |
| "loss": 0.586, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.9734939759036144, | |
| "grad_norm": 0.5749803781509399, | |
| "learning_rate": 8.545472578898276e-06, | |
| "loss": 0.6034, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.9783132530120482, | |
| "grad_norm": 0.539688229560852, | |
| "learning_rate": 8.525567215680397e-06, | |
| "loss": 0.6044, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.983132530120482, | |
| "grad_norm": 0.7089233994483948, | |
| "learning_rate": 8.505550099460264e-06, | |
| "loss": 0.6023, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.9879518072289156, | |
| "grad_norm": 0.5110678672790527, | |
| "learning_rate": 8.485421864737997e-06, | |
| "loss": 0.5975, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.9927710843373494, | |
| "grad_norm": 0.6963241696357727, | |
| "learning_rate": 8.465183149535939e-06, | |
| "loss": 0.5997, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.9975903614457832, | |
| "grad_norm": 0.5199764370918274, | |
| "learning_rate": 8.444834595378434e-06, | |
| "loss": 0.5887, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.002409638554217, | |
| "grad_norm": 1.0180178880691528, | |
| "learning_rate": 8.424376847271483e-06, | |
| "loss": 0.8314, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.0072289156626506, | |
| "grad_norm": 0.6434845924377441, | |
| "learning_rate": 8.403810553682307e-06, | |
| "loss": 0.6377, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.0120481927710843, | |
| "grad_norm": 0.5937601327896118, | |
| "learning_rate": 8.383136366518788e-06, | |
| "loss": 0.5636, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.0168674698795181, | |
| "grad_norm": 0.5739064812660217, | |
| "learning_rate": 8.362354941108803e-06, | |
| "loss": 0.5732, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.0216867469879518, | |
| "grad_norm": 0.6371908783912659, | |
| "learning_rate": 8.341466936179457e-06, | |
| "loss": 0.5808, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.0265060240963855, | |
| "grad_norm": 0.5970191359519958, | |
| "learning_rate": 8.320473013836197e-06, | |
| "loss": 0.5793, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.0313253012048194, | |
| "grad_norm": 0.6064131259918213, | |
| "learning_rate": 8.299373839541829e-06, | |
| "loss": 0.5324, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.036144578313253, | |
| "grad_norm": 0.7945104837417603, | |
| "learning_rate": 8.278170082095422e-06, | |
| "loss": 0.5765, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.0409638554216867, | |
| "grad_norm": 0.5729210376739502, | |
| "learning_rate": 8.256862413611113e-06, | |
| "loss": 0.578, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.0457831325301206, | |
| "grad_norm": 0.6838613152503967, | |
| "learning_rate": 8.23545150949679e-06, | |
| "loss": 0.5703, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.0506024096385542, | |
| "grad_norm": 0.5426240563392639, | |
| "learning_rate": 8.213938048432697e-06, | |
| "loss": 0.5667, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.0554216867469879, | |
| "grad_norm": 0.6620539426803589, | |
| "learning_rate": 8.192322712349917e-06, | |
| "loss": 0.5592, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.0602409638554218, | |
| "grad_norm": 0.6412476897239685, | |
| "learning_rate": 8.170606186408756e-06, | |
| "loss": 0.5855, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.0650602409638554, | |
| "grad_norm": 0.6816362142562866, | |
| "learning_rate": 8.148789158977012e-06, | |
| "loss": 0.5883, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.069879518072289, | |
| "grad_norm": 0.5668280124664307, | |
| "learning_rate": 8.126872321608185e-06, | |
| "loss": 0.5248, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.074698795180723, | |
| "grad_norm": 0.5772892832756042, | |
| "learning_rate": 8.104856369019525e-06, | |
| "loss": 0.6062, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.0795180722891566, | |
| "grad_norm": 0.7769073247909546, | |
| "learning_rate": 8.08274199907003e-06, | |
| "loss": 0.5695, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.0843373493975903, | |
| "grad_norm": 0.5870208740234375, | |
| "learning_rate": 8.060529912738316e-06, | |
| "loss": 0.543, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.0891566265060242, | |
| "grad_norm": 0.8082980513572693, | |
| "learning_rate": 8.038220814100403e-06, | |
| "loss": 0.5556, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.0939759036144578, | |
| "grad_norm": 0.5669785141944885, | |
| "learning_rate": 8.0158154103074e-06, | |
| "loss": 0.5608, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.0987951807228915, | |
| "grad_norm": 0.7154496908187866, | |
| "learning_rate": 7.993314411563075e-06, | |
| "loss": 0.5701, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.1036144578313254, | |
| "grad_norm": 0.5299010276794434, | |
| "learning_rate": 7.970718531101365e-06, | |
| "loss": 0.5321, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.108433734939759, | |
| "grad_norm": 0.5549928545951843, | |
| "learning_rate": 7.948028485163744e-06, | |
| "loss": 0.6037, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.1132530120481927, | |
| "grad_norm": 0.5550109148025513, | |
| "learning_rate": 7.925244992976538e-06, | |
| "loss": 0.5405, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.1180722891566266, | |
| "grad_norm": 0.5465673208236694, | |
| "learning_rate": 7.902368776728125e-06, | |
| "loss": 0.5652, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.1228915662650603, | |
| "grad_norm": 0.5998708009719849, | |
| "learning_rate": 7.879400561546033e-06, | |
| "loss": 0.5237, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.127710843373494, | |
| "grad_norm": 0.5261906981468201, | |
| "learning_rate": 7.856341075473963e-06, | |
| "loss": 0.5947, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.1325301204819278, | |
| "grad_norm": 0.5877994894981384, | |
| "learning_rate": 7.833191049448706e-06, | |
| "loss": 0.5326, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.1373493975903615, | |
| "grad_norm": 0.49554064869880676, | |
| "learning_rate": 7.809951217276986e-06, | |
| "loss": 0.5494, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.1421686746987951, | |
| "grad_norm": 0.6176630258560181, | |
| "learning_rate": 7.786622315612182e-06, | |
| "loss": 0.6198, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.146987951807229, | |
| "grad_norm": 0.5112401247024536, | |
| "learning_rate": 7.763205083930995e-06, | |
| "loss": 0.5629, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.1518072289156627, | |
| "grad_norm": 0.5183048844337463, | |
| "learning_rate": 7.739700264509993e-06, | |
| "loss": 0.5139, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.1566265060240963, | |
| "grad_norm": 0.5413950681686401, | |
| "learning_rate": 7.716108602402094e-06, | |
| "loss": 0.6041, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.16144578313253, | |
| "grad_norm": 0.526046872138977, | |
| "learning_rate": 7.692430845412946e-06, | |
| "loss": 0.5592, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.1662650602409639, | |
| "grad_norm": 0.5707272887229919, | |
| "learning_rate": 7.668667744077215e-06, | |
| "loss": 0.546, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.1710843373493975, | |
| "grad_norm": 0.6153143048286438, | |
| "learning_rate": 7.644820051634813e-06, | |
| "loss": 0.561, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.1759036144578312, | |
| "grad_norm": 0.5286562442779541, | |
| "learning_rate": 7.6208885240069995e-06, | |
| "loss": 0.5251, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.180722891566265, | |
| "grad_norm": 0.5454757809638977, | |
| "learning_rate": 7.596873919772438e-06, | |
| "loss": 0.5976, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.1855421686746987, | |
| "grad_norm": 0.6441790461540222, | |
| "learning_rate": 7.572777000143145e-06, | |
| "loss": 0.5747, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.1903614457831324, | |
| "grad_norm": 0.5183297395706177, | |
| "learning_rate": 7.548598528940354e-06, | |
| "loss": 0.4837, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.1951807228915663, | |
| "grad_norm": 0.5835433006286621, | |
| "learning_rate": 7.524339272570317e-06, | |
| "loss": 0.6416, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.5134496092796326, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.54, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.2048192771084336, | |
| "grad_norm": 0.5053899884223938, | |
| "learning_rate": 7.475581482732717e-06, | |
| "loss": 0.5513, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.2096385542168675, | |
| "grad_norm": 0.5715299844741821, | |
| "learning_rate": 7.451084494783668e-06, | |
| "loss": 0.5604, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.2144578313253012, | |
| "grad_norm": 0.5025492310523987, | |
| "learning_rate": 7.4265098126554065e-06, | |
| "loss": 0.5426, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.2192771084337348, | |
| "grad_norm": 0.5954846739768982, | |
| "learning_rate": 7.401858215313228e-06, | |
| "loss": 0.5872, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.2240963855421687, | |
| "grad_norm": 0.4413267970085144, | |
| "learning_rate": 7.3771304841604764e-06, | |
| "loss": 0.5803, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.2289156626506024, | |
| "grad_norm": 0.5452926754951477, | |
| "learning_rate": 7.352327403013779e-06, | |
| "loss": 0.5996, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.233734939759036, | |
| "grad_norm": 0.4635935127735138, | |
| "learning_rate": 7.327449758078194e-06, | |
| "loss": 0.5109, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.23855421686747, | |
| "grad_norm": 0.49783962965011597, | |
| "learning_rate": 7.302498337922293e-06, | |
| "loss": 0.5357, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.2433734939759036, | |
| "grad_norm": 0.5647582411766052, | |
| "learning_rate": 7.27747393345317e-06, | |
| "loss": 0.5724, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.2481927710843372, | |
| "grad_norm": 0.49849966168403625, | |
| "learning_rate": 7.2523773378913655e-06, | |
| "loss": 0.5722, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.2530120481927711, | |
| "grad_norm": 0.5430841445922852, | |
| "learning_rate": 7.2272093467457226e-06, | |
| "loss": 0.5995, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.2578313253012048, | |
| "grad_norm": 0.583263099193573, | |
| "learning_rate": 7.201970757788172e-06, | |
| "loss": 0.5915, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.2626506024096384, | |
| "grad_norm": 0.49482282996177673, | |
| "learning_rate": 7.17666237102845e-06, | |
| "loss": 0.5607, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.2674698795180723, | |
| "grad_norm": 0.5552793741226196, | |
| "learning_rate": 7.151284988688731e-06, | |
| "loss": 0.5993, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.272289156626506, | |
| "grad_norm": 0.5349662899971008, | |
| "learning_rate": 7.125839415178204e-06, | |
| "loss": 0.5268, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.2771084337349397, | |
| "grad_norm": 0.4651695191860199, | |
| "learning_rate": 7.100326457067576e-06, | |
| "loss": 0.5822, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.2819277108433735, | |
| "grad_norm": 0.5936465859413147, | |
| "learning_rate": 7.074746923063497e-06, | |
| "loss": 0.5578, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.2867469879518072, | |
| "grad_norm": 0.5026842355728149, | |
| "learning_rate": 7.049101623982938e-06, | |
| "loss": 0.5922, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.2915662650602409, | |
| "grad_norm": 0.5798065662384033, | |
| "learning_rate": 7.02339137272748e-06, | |
| "loss": 0.5598, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.2963855421686747, | |
| "grad_norm": 0.5257403254508972, | |
| "learning_rate": 6.9976169842575526e-06, | |
| "loss": 0.5361, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.3012048192771084, | |
| "grad_norm": 0.4787715971469879, | |
| "learning_rate": 6.971779275566593e-06, | |
| "loss": 0.5669, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.306024096385542, | |
| "grad_norm": 0.5384206175804138, | |
| "learning_rate": 6.945879065655164e-06, | |
| "loss": 0.5447, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.310843373493976, | |
| "grad_norm": 0.45355817675590515, | |
| "learning_rate": 6.919917175504978e-06, | |
| "loss": 0.6003, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.3156626506024096, | |
| "grad_norm": 0.5099201798439026, | |
| "learning_rate": 6.893894428052881e-06, | |
| "loss": 0.5545, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.3204819277108433, | |
| "grad_norm": 0.4605954587459564, | |
| "learning_rate": 6.867811648164769e-06, | |
| "loss": 0.5668, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.3253012048192772, | |
| "grad_norm": 0.5192990899085999, | |
| "learning_rate": 6.841669662609437e-06, | |
| "loss": 0.577, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.3301204819277108, | |
| "grad_norm": 0.48977819085121155, | |
| "learning_rate": 6.815469300032374e-06, | |
| "loss": 0.5341, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.3349397590361445, | |
| "grad_norm": 0.5304272174835205, | |
| "learning_rate": 6.789211390929497e-06, | |
| "loss": 0.565, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.3397590361445784, | |
| "grad_norm": 0.6027169823646545, | |
| "learning_rate": 6.762896767620827e-06, | |
| "loss": 0.5453, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.344578313253012, | |
| "grad_norm": 0.531121551990509, | |
| "learning_rate": 6.736526264224101e-06, | |
| "loss": 0.5959, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.3493975903614457, | |
| "grad_norm": 0.5106316208839417, | |
| "learning_rate": 6.710100716628345e-06, | |
| "loss": 0.5563, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.3542168674698796, | |
| "grad_norm": 0.5311499238014221, | |
| "learning_rate": 6.6836209624673575e-06, | |
| "loss": 0.5956, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.3590361445783132, | |
| "grad_norm": 0.4638383686542511, | |
| "learning_rate": 6.657087841093179e-06, | |
| "loss": 0.5184, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.363855421686747, | |
| "grad_norm": 0.5220621824264526, | |
| "learning_rate": 6.6305021935494755e-06, | |
| "loss": 0.6286, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.3686746987951808, | |
| "grad_norm": 0.4436984956264496, | |
| "learning_rate": 6.603864862544879e-06, | |
| "loss": 0.5357, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.3734939759036144, | |
| "grad_norm": 0.46485018730163574, | |
| "learning_rate": 6.5771766924262795e-06, | |
| "loss": 0.5653, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.378313253012048, | |
| "grad_norm": 0.5112223625183105, | |
| "learning_rate": 6.5504385291520554e-06, | |
| "loss": 0.5715, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.383132530120482, | |
| "grad_norm": 0.4580565094947815, | |
| "learning_rate": 6.523651220265269e-06, | |
| "loss": 0.5742, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.3879518072289156, | |
| "grad_norm": 0.43429890275001526, | |
| "learning_rate": 6.496815614866792e-06, | |
| "loss": 0.5597, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.3927710843373493, | |
| "grad_norm": 0.4501931667327881, | |
| "learning_rate": 6.469932563588386e-06, | |
| "loss": 0.566, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.3975903614457832, | |
| "grad_norm": 0.4644792377948761, | |
| "learning_rate": 6.443002918565754e-06, | |
| "loss": 0.5657, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.4024096385542169, | |
| "grad_norm": 0.4781135618686676, | |
| "learning_rate": 6.41602753341152e-06, | |
| "loss": 0.5773, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.4072289156626505, | |
| "grad_norm": 0.5058557391166687, | |
| "learning_rate": 6.389007263188176e-06, | |
| "loss": 0.5463, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.4120481927710844, | |
| "grad_norm": 0.4932103753089905, | |
| "learning_rate": 6.361942964380967e-06, | |
| "loss": 0.5801, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.416867469879518, | |
| "grad_norm": 0.5443869829177856, | |
| "learning_rate": 6.334835494870759e-06, | |
| "loss": 0.5264, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.4216867469879517, | |
| "grad_norm": 0.5028607845306396, | |
| "learning_rate": 6.307685713906835e-06, | |
| "loss": 0.6111, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.4265060240963856, | |
| "grad_norm": 0.4613734781742096, | |
| "learning_rate": 6.2804944820796596e-06, | |
| "loss": 0.5421, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.4313253012048193, | |
| "grad_norm": 0.5405146479606628, | |
| "learning_rate": 6.2532626612936035e-06, | |
| "loss": 0.6173, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.436144578313253, | |
| "grad_norm": 0.4562685787677765, | |
| "learning_rate": 6.225991114739622e-06, | |
| "loss": 0.534, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.4409638554216868, | |
| "grad_norm": 0.5416148900985718, | |
| "learning_rate": 6.1986807068678926e-06, | |
| "loss": 0.5861, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.4457831325301205, | |
| "grad_norm": 0.5628028512001038, | |
| "learning_rate": 6.171332303360411e-06, | |
| "loss": 0.5484, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.4506024096385541, | |
| "grad_norm": 0.4369142949581146, | |
| "learning_rate": 6.143946771103561e-06, | |
| "loss": 0.5304, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.455421686746988, | |
| "grad_norm": 0.5397925972938538, | |
| "learning_rate": 6.11652497816062e-06, | |
| "loss": 0.5407, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.4602409638554217, | |
| "grad_norm": 0.4883024990558624, | |
| "learning_rate": 6.089067793744258e-06, | |
| "loss": 0.588, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.4650602409638553, | |
| "grad_norm": 0.4950958490371704, | |
| "learning_rate": 6.061576088188981e-06, | |
| "loss": 0.5884, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.4698795180722892, | |
| "grad_norm": 0.4288600981235504, | |
| "learning_rate": 6.034050732923538e-06, | |
| "loss": 0.555, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.4746987951807229, | |
| "grad_norm": 0.5213388800621033, | |
| "learning_rate": 6.006492600443301e-06, | |
| "loss": 0.5619, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.4795180722891565, | |
| "grad_norm": 0.4272007346153259, | |
| "learning_rate": 5.978902564282616e-06, | |
| "loss": 0.5384, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.4843373493975904, | |
| "grad_norm": 0.5602285861968994, | |
| "learning_rate": 5.951281498987106e-06, | |
| "loss": 0.5827, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.489156626506024, | |
| "grad_norm": 0.47353655099868774, | |
| "learning_rate": 5.923630280085948e-06, | |
| "loss": 0.541, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.4939759036144578, | |
| "grad_norm": 0.5271124839782715, | |
| "learning_rate": 5.895949784064126e-06, | |
| "loss": 0.5503, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.4987951807228916, | |
| "grad_norm": 0.5316128134727478, | |
| "learning_rate": 5.8682408883346535e-06, | |
| "loss": 0.5409, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.5036144578313253, | |
| "grad_norm": 0.553626298904419, | |
| "learning_rate": 5.840504471210742e-06, | |
| "loss": 0.5777, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.508433734939759, | |
| "grad_norm": 0.48189061880111694, | |
| "learning_rate": 5.8127414118779825e-06, | |
| "loss": 0.5784, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.5132530120481928, | |
| "grad_norm": 0.6508918404579163, | |
| "learning_rate": 5.7849525903664636e-06, | |
| "loss": 0.5652, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.5180722891566265, | |
| "grad_norm": 0.4616662859916687, | |
| "learning_rate": 5.757138887522884e-06, | |
| "loss": 0.5433, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.5228915662650602, | |
| "grad_norm": 0.5459868311882019, | |
| "learning_rate": 5.729301184982622e-06, | |
| "loss": 0.5303, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.527710843373494, | |
| "grad_norm": 0.5954388976097107, | |
| "learning_rate": 5.701440365141799e-06, | |
| "loss": 0.584, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.5325301204819277, | |
| "grad_norm": 0.4622243344783783, | |
| "learning_rate": 5.673557311129306e-06, | |
| "loss": 0.5334, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.5373493975903614, | |
| "grad_norm": 0.6150903701782227, | |
| "learning_rate": 5.645652906778808e-06, | |
| "loss": 0.5803, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.5421686746987953, | |
| "grad_norm": 0.6536097526550293, | |
| "learning_rate": 5.617728036600734e-06, | |
| "loss": 0.6041, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.546987951807229, | |
| "grad_norm": 0.48668476939201355, | |
| "learning_rate": 5.5897835857542315e-06, | |
| "loss": 0.5577, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.5518072289156626, | |
| "grad_norm": 0.5237818956375122, | |
| "learning_rate": 5.561820440019117e-06, | |
| "loss": 0.5162, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.5566265060240965, | |
| "grad_norm": 0.5246903896331787, | |
| "learning_rate": 5.533839485767795e-06, | |
| "loss": 0.5891, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.5614457831325301, | |
| "grad_norm": 0.5068934559822083, | |
| "learning_rate": 5.505841609937162e-06, | |
| "loss": 0.5608, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.5662650602409638, | |
| "grad_norm": 0.5139860510826111, | |
| "learning_rate": 5.477827700000492e-06, | |
| "loss": 0.563, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.5710843373493977, | |
| "grad_norm": 0.49885353446006775, | |
| "learning_rate": 5.449798643939305e-06, | |
| "loss": 0.5472, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.5759036144578313, | |
| "grad_norm": 0.5572274327278137, | |
| "learning_rate": 5.421755330215223e-06, | |
| "loss": 0.5805, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.580722891566265, | |
| "grad_norm": 0.5297819972038269, | |
| "learning_rate": 5.39369864774181e-06, | |
| "loss": 0.5477, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.5855421686746989, | |
| "grad_norm": 0.5140382647514343, | |
| "learning_rate": 5.365629485856381e-06, | |
| "loss": 0.5239, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.5903614457831325, | |
| "grad_norm": 0.5845142006874084, | |
| "learning_rate": 5.337548734291827e-06, | |
| "loss": 0.5787, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.5951807228915662, | |
| "grad_norm": 0.4414353668689728, | |
| "learning_rate": 5.30945728314841e-06, | |
| "loss": 0.5797, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.49196693301200867, | |
| "learning_rate": 5.281356022865542e-06, | |
| "loss": 0.5767, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.6048192771084338, | |
| "grad_norm": 0.5447960495948792, | |
| "learning_rate": 5.253245844193564e-06, | |
| "loss": 0.5502, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.6096385542168674, | |
| "grad_norm": 0.4547137916088104, | |
| "learning_rate": 5.225127638165514e-06, | |
| "loss": 0.5579, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.6144578313253013, | |
| "grad_norm": 0.5411679148674011, | |
| "learning_rate": 5.197002296068878e-06, | |
| "loss": 0.5754, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.619277108433735, | |
| "grad_norm": 0.45455247163772583, | |
| "learning_rate": 5.168870709417342e-06, | |
| "loss": 0.5283, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.6240963855421686, | |
| "grad_norm": 0.5107097029685974, | |
| "learning_rate": 5.140733769922525e-06, | |
| "loss": 0.5377, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.6289156626506025, | |
| "grad_norm": 0.495237797498703, | |
| "learning_rate": 5.112592369465731e-06, | |
| "loss": 0.616, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.6337349397590362, | |
| "grad_norm": 0.5090997219085693, | |
| "learning_rate": 5.084447400069656e-06, | |
| "loss": 0.5562, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.6385542168674698, | |
| "grad_norm": 0.4626471996307373, | |
| "learning_rate": 5.0562997538701295e-06, | |
| "loss": 0.5302, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.6433734939759037, | |
| "grad_norm": 0.5563095211982727, | |
| "learning_rate": 5.0281503230878304e-06, | |
| "loss": 0.5631, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.6481927710843374, | |
| "grad_norm": 0.4692661166191101, | |
| "learning_rate": 5e-06, | |
| "loss": 0.609, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.653012048192771, | |
| "grad_norm": 0.4281330108642578, | |
| "learning_rate": 4.971849676912172e-06, | |
| "loss": 0.5204, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.657831325301205, | |
| "grad_norm": 0.5017001032829285, | |
| "learning_rate": 4.943700246129871e-06, | |
| "loss": 0.5618, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.6626506024096386, | |
| "grad_norm": 0.47061556577682495, | |
| "learning_rate": 4.915552599930345e-06, | |
| "loss": 0.5413, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.6674698795180722, | |
| "grad_norm": 0.5569798350334167, | |
| "learning_rate": 4.887407630534271e-06, | |
| "loss": 0.5217, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.6722891566265061, | |
| "grad_norm": 0.4373623728752136, | |
| "learning_rate": 4.859266230077474e-06, | |
| "loss": 0.577, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.6771084337349398, | |
| "grad_norm": 0.4877110719680786, | |
| "learning_rate": 4.83112929058266e-06, | |
| "loss": 0.586, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.6819277108433734, | |
| "grad_norm": 0.5045443773269653, | |
| "learning_rate": 4.802997703931124e-06, | |
| "loss": 0.5214, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.6867469879518073, | |
| "grad_norm": 0.4335879683494568, | |
| "learning_rate": 4.7748723618344865e-06, | |
| "loss": 0.5975, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.691566265060241, | |
| "grad_norm": 0.47140806913375854, | |
| "learning_rate": 4.746754155806437e-06, | |
| "loss": 0.5399, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.6963855421686747, | |
| "grad_norm": 0.44901373982429504, | |
| "learning_rate": 4.71864397713446e-06, | |
| "loss": 0.617, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.7012048192771085, | |
| "grad_norm": 0.3893554210662842, | |
| "learning_rate": 4.6905427168515914e-06, | |
| "loss": 0.4837, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.7060240963855422, | |
| "grad_norm": 0.5149268507957458, | |
| "learning_rate": 4.662451265708174e-06, | |
| "loss": 0.5979, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.7108433734939759, | |
| "grad_norm": 0.3967888057231903, | |
| "learning_rate": 4.63437051414362e-06, | |
| "loss": 0.5654, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.7156626506024097, | |
| "grad_norm": 0.4324769675731659, | |
| "learning_rate": 4.606301352258192e-06, | |
| "loss": 0.5548, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.7204819277108434, | |
| "grad_norm": 0.44806748628616333, | |
| "learning_rate": 4.5782446697847775e-06, | |
| "loss": 0.5607, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.725301204819277, | |
| "grad_norm": 0.457933634519577, | |
| "learning_rate": 4.550201356060695e-06, | |
| "loss": 0.5773, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.730120481927711, | |
| "grad_norm": 0.41432875394821167, | |
| "learning_rate": 4.52217229999951e-06, | |
| "loss": 0.5119, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.7349397590361446, | |
| "grad_norm": 0.46618443727493286, | |
| "learning_rate": 4.49415839006284e-06, | |
| "loss": 0.5282, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.7397590361445783, | |
| "grad_norm": 0.4658416509628296, | |
| "learning_rate": 4.466160514232206e-06, | |
| "loss": 0.5911, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.7445783132530122, | |
| "grad_norm": 0.45206916332244873, | |
| "learning_rate": 4.438179559980885e-06, | |
| "loss": 0.5368, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.7493975903614458, | |
| "grad_norm": 0.5187458395957947, | |
| "learning_rate": 4.410216414245771e-06, | |
| "loss": 0.5681, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.7542168674698795, | |
| "grad_norm": 0.44342824816703796, | |
| "learning_rate": 4.382271963399268e-06, | |
| "loss": 0.5654, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.7590361445783134, | |
| "grad_norm": 0.45453277230262756, | |
| "learning_rate": 4.354347093221194e-06, | |
| "loss": 0.5477, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.763855421686747, | |
| "grad_norm": 0.5282381176948547, | |
| "learning_rate": 4.326442688870697e-06, | |
| "loss": 0.5759, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.7686746987951807, | |
| "grad_norm": 0.4427390694618225, | |
| "learning_rate": 4.298559634858202e-06, | |
| "loss": 0.5478, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.7734939759036146, | |
| "grad_norm": 0.4339542090892792, | |
| "learning_rate": 4.270698815017379e-06, | |
| "loss": 0.5743, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.7783132530120482, | |
| "grad_norm": 0.4502948522567749, | |
| "learning_rate": 4.2428611124771184e-06, | |
| "loss": 0.5666, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.783132530120482, | |
| "grad_norm": 0.45227017998695374, | |
| "learning_rate": 4.2150474096335356e-06, | |
| "loss": 0.5561, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.7879518072289158, | |
| "grad_norm": 0.41950294375419617, | |
| "learning_rate": 4.187258588122019e-06, | |
| "loss": 0.5617, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.7927710843373494, | |
| "grad_norm": 0.4249022901058197, | |
| "learning_rate": 4.15949552878926e-06, | |
| "loss": 0.5506, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.797590361445783, | |
| "grad_norm": 0.4221281111240387, | |
| "learning_rate": 4.131759111665349e-06, | |
| "loss": 0.5161, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.802409638554217, | |
| "grad_norm": 0.48842665553092957, | |
| "learning_rate": 4.104050215935875e-06, | |
| "loss": 0.5988, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.8072289156626506, | |
| "grad_norm": 0.39976850152015686, | |
| "learning_rate": 4.076369719914055e-06, | |
| "loss": 0.5246, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.8120481927710843, | |
| "grad_norm": 0.4550154209136963, | |
| "learning_rate": 4.048718501012895e-06, | |
| "loss": 0.5871, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.8168674698795182, | |
| "grad_norm": 0.44595929980278015, | |
| "learning_rate": 4.021097435717386e-06, | |
| "loss": 0.5507, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.8216867469879519, | |
| "grad_norm": 0.407740980386734, | |
| "learning_rate": 3.993507399556699e-06, | |
| "loss": 0.5536, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.8265060240963855, | |
| "grad_norm": 0.518549919128418, | |
| "learning_rate": 3.965949267076465e-06, | |
| "loss": 0.569, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.8313253012048194, | |
| "grad_norm": 0.39872604608535767, | |
| "learning_rate": 3.938423911811021e-06, | |
| "loss": 0.5161, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.836144578313253, | |
| "grad_norm": 0.4431675374507904, | |
| "learning_rate": 3.910932206255742e-06, | |
| "loss": 0.5988, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.8409638554216867, | |
| "grad_norm": 0.43980199098587036, | |
| "learning_rate": 3.883475021839382e-06, | |
| "loss": 0.5529, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.8457831325301206, | |
| "grad_norm": 0.43700751662254333, | |
| "learning_rate": 3.856053228896442e-06, | |
| "loss": 0.5552, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.8506024096385543, | |
| "grad_norm": 0.4302417039871216, | |
| "learning_rate": 3.8286676966395895e-06, | |
| "loss": 0.532, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.855421686746988, | |
| "grad_norm": 0.4162864089012146, | |
| "learning_rate": 3.8013192931321095e-06, | |
| "loss": 0.5583, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.8602409638554218, | |
| "grad_norm": 0.43791574239730835, | |
| "learning_rate": 3.77400888526038e-06, | |
| "loss": 0.5247, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.8650602409638555, | |
| "grad_norm": 0.4298801124095917, | |
| "learning_rate": 3.7467373387063973e-06, | |
| "loss": 0.5928, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.8698795180722891, | |
| "grad_norm": 0.40685486793518066, | |
| "learning_rate": 3.719505517920342e-06, | |
| "loss": 0.5563, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.874698795180723, | |
| "grad_norm": 0.3878326416015625, | |
| "learning_rate": 3.692314286093167e-06, | |
| "loss": 0.5353, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.8795180722891565, | |
| "grad_norm": 0.4625506103038788, | |
| "learning_rate": 3.6651645051292415e-06, | |
| "loss": 0.5886, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.8843373493975903, | |
| "grad_norm": 0.42777660489082336, | |
| "learning_rate": 3.6380570356190346e-06, | |
| "loss": 0.5705, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.8891566265060242, | |
| "grad_norm": 0.40551841259002686, | |
| "learning_rate": 3.610992736811827e-06, | |
| "loss": 0.5354, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.8939759036144577, | |
| "grad_norm": 0.42313259840011597, | |
| "learning_rate": 3.58397246658848e-06, | |
| "loss": 0.5534, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.8987951807228916, | |
| "grad_norm": 0.4355023503303528, | |
| "learning_rate": 3.556997081434248e-06, | |
| "loss": 0.5733, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.9036144578313254, | |
| "grad_norm": 0.42895275354385376, | |
| "learning_rate": 3.5300674364116173e-06, | |
| "loss": 0.5624, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.9084337349397589, | |
| "grad_norm": 0.43042075634002686, | |
| "learning_rate": 3.5031843851332105e-06, | |
| "loss": 0.6029, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.9132530120481928, | |
| "grad_norm": 0.37077224254608154, | |
| "learning_rate": 3.476348779734732e-06, | |
| "loss": 0.5392, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.9180722891566266, | |
| "grad_norm": 0.4056945741176605, | |
| "learning_rate": 3.449561470847947e-06, | |
| "loss": 0.5322, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.92289156626506, | |
| "grad_norm": 0.4261414706707001, | |
| "learning_rate": 3.4228233075737225e-06, | |
| "loss": 0.5752, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.927710843373494, | |
| "grad_norm": 0.38658595085144043, | |
| "learning_rate": 3.3961351374551234e-06, | |
| "loss": 0.5601, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.9325301204819278, | |
| "grad_norm": 0.4442770183086395, | |
| "learning_rate": 3.3694978064505258e-06, | |
| "loss": 0.6121, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.9373493975903613, | |
| "grad_norm": 0.41190898418426514, | |
| "learning_rate": 3.3429121589068213e-06, | |
| "loss": 0.5003, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.9421686746987952, | |
| "grad_norm": 0.3673941195011139, | |
| "learning_rate": 3.316379037532644e-06, | |
| "loss": 0.535, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.946987951807229, | |
| "grad_norm": 0.4322197139263153, | |
| "learning_rate": 3.289899283371657e-06, | |
| "loss": 0.528, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.9518072289156625, | |
| "grad_norm": 0.45587092638015747, | |
| "learning_rate": 3.2634737357758994e-06, | |
| "loss": 0.5608, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.9566265060240964, | |
| "grad_norm": 0.4296489953994751, | |
| "learning_rate": 3.2371032323791757e-06, | |
| "loss": 0.6065, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.9614457831325303, | |
| "grad_norm": 0.4287756085395813, | |
| "learning_rate": 3.2107886090705035e-06, | |
| "loss": 0.5215, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.9662650602409637, | |
| "grad_norm": 0.4841082692146301, | |
| "learning_rate": 3.1845306999676274e-06, | |
| "loss": 0.5658, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.9710843373493976, | |
| "grad_norm": 0.3839523196220398, | |
| "learning_rate": 3.158330337390565e-06, | |
| "loss": 0.5713, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.9759036144578315, | |
| "grad_norm": 0.37823107838630676, | |
| "learning_rate": 3.132188351835232e-06, | |
| "loss": 0.5295, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.980722891566265, | |
| "grad_norm": 0.4143429696559906, | |
| "learning_rate": 3.10610557194712e-06, | |
| "loss": 0.5654, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.9855421686746988, | |
| "grad_norm": 0.3939684331417084, | |
| "learning_rate": 3.080082824495024e-06, | |
| "loss": 0.5429, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.9903614457831327, | |
| "grad_norm": 0.4271552264690399, | |
| "learning_rate": 3.0541209343448373e-06, | |
| "loss": 0.5801, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.9951807228915661, | |
| "grad_norm": 0.4314172565937042, | |
| "learning_rate": 3.0282207244334084e-06, | |
| "loss": 0.5558, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.6961327195167542, | |
| "learning_rate": 3.0023830157424504e-06, | |
| "loss": 0.7905, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.004819277108434, | |
| "grad_norm": 0.3929261863231659, | |
| "learning_rate": 2.97660862727252e-06, | |
| "loss": 0.5401, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.0096385542168673, | |
| "grad_norm": 0.46623867750167847, | |
| "learning_rate": 2.950898376017064e-06, | |
| "loss": 0.5227, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.014457831325301, | |
| "grad_norm": 0.40226995944976807, | |
| "learning_rate": 2.9252530769365053e-06, | |
| "loss": 0.533, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.019277108433735, | |
| "grad_norm": 0.43160927295684814, | |
| "learning_rate": 2.8996735429324256e-06, | |
| "loss": 0.5335, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.0240963855421685, | |
| "grad_norm": 0.42182430624961853, | |
| "learning_rate": 2.874160584821798e-06, | |
| "loss": 0.5219, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.0289156626506024, | |
| "grad_norm": 0.4267122149467468, | |
| "learning_rate": 2.848715011311271e-06, | |
| "loss": 0.5259, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.0337349397590363, | |
| "grad_norm": 0.41059398651123047, | |
| "learning_rate": 2.823337628971551e-06, | |
| "loss": 0.5302, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.0385542168674697, | |
| "grad_norm": 0.3666737675666809, | |
| "learning_rate": 2.7980292422118282e-06, | |
| "loss": 0.525, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.0433734939759036, | |
| "grad_norm": 0.460478276014328, | |
| "learning_rate": 2.7727906532542783e-06, | |
| "loss": 0.5416, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.0481927710843375, | |
| "grad_norm": 0.45187464356422424, | |
| "learning_rate": 2.7476226621086354e-06, | |
| "loss": 0.5293, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.053012048192771, | |
| "grad_norm": 0.40986311435699463, | |
| "learning_rate": 2.72252606654683e-06, | |
| "loss": 0.5429, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.057831325301205, | |
| "grad_norm": 0.3846244812011719, | |
| "learning_rate": 2.697501662077707e-06, | |
| "loss": 0.5322, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.0626506024096387, | |
| "grad_norm": 0.44859009981155396, | |
| "learning_rate": 2.6725502419218084e-06, | |
| "loss": 0.5246, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.067469879518072, | |
| "grad_norm": 0.4478023648262024, | |
| "learning_rate": 2.6476725969862227e-06, | |
| "loss": 0.5453, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.072289156626506, | |
| "grad_norm": 0.4618147015571594, | |
| "learning_rate": 2.622869515839524e-06, | |
| "loss": 0.5373, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.07710843373494, | |
| "grad_norm": 0.3915225863456726, | |
| "learning_rate": 2.5981417846867753e-06, | |
| "loss": 0.5298, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.0819277108433734, | |
| "grad_norm": 0.41270238161087036, | |
| "learning_rate": 2.573490187344596e-06, | |
| "loss": 0.5294, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.0867469879518072, | |
| "grad_norm": 0.44997212290763855, | |
| "learning_rate": 2.548915505216333e-06, | |
| "loss": 0.5152, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.091566265060241, | |
| "grad_norm": 0.445286363363266, | |
| "learning_rate": 2.524418517267283e-06, | |
| "loss": 0.5349, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.0963855421686746, | |
| "grad_norm": 0.4325414001941681, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.5424, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.1012048192771084, | |
| "grad_norm": 0.4230119287967682, | |
| "learning_rate": 2.4756607274296844e-06, | |
| "loss": 0.5278, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.1060240963855423, | |
| "grad_norm": 0.37730202078819275, | |
| "learning_rate": 2.4514014710596467e-06, | |
| "loss": 0.5133, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.1108433734939758, | |
| "grad_norm": 0.41996341943740845, | |
| "learning_rate": 2.4272229998568576e-06, | |
| "loss": 0.5367, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.1156626506024097, | |
| "grad_norm": 0.3746831715106964, | |
| "learning_rate": 2.4031260802275623e-06, | |
| "loss": 0.5228, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.1204819277108435, | |
| "grad_norm": 0.39061239361763, | |
| "learning_rate": 2.3791114759930013e-06, | |
| "loss": 0.5354, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.125301204819277, | |
| "grad_norm": 0.406981498003006, | |
| "learning_rate": 2.3551799483651894e-06, | |
| "loss": 0.5279, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.130120481927711, | |
| "grad_norm": 0.4518032670021057, | |
| "learning_rate": 2.331332255922784e-06, | |
| "loss": 0.5394, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.1349397590361447, | |
| "grad_norm": 0.40522995591163635, | |
| "learning_rate": 2.307569154587056e-06, | |
| "loss": 0.539, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.139759036144578, | |
| "grad_norm": 0.3735210597515106, | |
| "learning_rate": 2.283891397597908e-06, | |
| "loss": 0.5265, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.144578313253012, | |
| "grad_norm": 0.4129243791103363, | |
| "learning_rate": 2.2602997354900075e-06, | |
| "loss": 0.5319, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.149397590361446, | |
| "grad_norm": 0.36963480710983276, | |
| "learning_rate": 2.236794916069007e-06, | |
| "loss": 0.5279, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.1542168674698794, | |
| "grad_norm": 0.3678368926048279, | |
| "learning_rate": 2.2133776843878185e-06, | |
| "loss": 0.5317, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.1590361445783133, | |
| "grad_norm": 0.37665683031082153, | |
| "learning_rate": 2.190048782723015e-06, | |
| "loss": 0.5339, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.163855421686747, | |
| "grad_norm": 0.37703850865364075, | |
| "learning_rate": 2.166808950551296e-06, | |
| "loss": 0.5406, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.1686746987951806, | |
| "grad_norm": 0.40551963448524475, | |
| "learning_rate": 2.1436589245260375e-06, | |
| "loss": 0.5373, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.1734939759036145, | |
| "grad_norm": 0.35308101773262024, | |
| "learning_rate": 2.120599438453968e-06, | |
| "loss": 0.5154, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.1783132530120484, | |
| "grad_norm": 0.35655319690704346, | |
| "learning_rate": 2.0976312232718763e-06, | |
| "loss": 0.5408, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.183132530120482, | |
| "grad_norm": 0.3718145489692688, | |
| "learning_rate": 2.074755007023461e-06, | |
| "loss": 0.5486, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.1879518072289157, | |
| "grad_norm": 0.40163010358810425, | |
| "learning_rate": 2.0519715148362585e-06, | |
| "loss": 0.5233, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.1927710843373496, | |
| "grad_norm": 0.41173961758613586, | |
| "learning_rate": 2.0292814688986375e-06, | |
| "loss": 0.5135, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.197590361445783, | |
| "grad_norm": 0.3652428686618805, | |
| "learning_rate": 2.0066855884369246e-06, | |
| "loss": 0.5151, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.202409638554217, | |
| "grad_norm": 0.36961421370506287, | |
| "learning_rate": 1.9841845896926022e-06, | |
| "loss": 0.5446, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.207228915662651, | |
| "grad_norm": 0.3552957773208618, | |
| "learning_rate": 1.961779185899597e-06, | |
| "loss": 0.5252, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.212048192771084, | |
| "grad_norm": 0.3752812147140503, | |
| "learning_rate": 1.9394700872616856e-06, | |
| "loss": 0.5395, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.216867469879518, | |
| "grad_norm": 0.38084596395492554, | |
| "learning_rate": 1.9172580009299735e-06, | |
| "loss": 0.5256, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.221686746987952, | |
| "grad_norm": 0.3959069848060608, | |
| "learning_rate": 1.8951436309804766e-06, | |
| "loss": 0.5474, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.2265060240963854, | |
| "grad_norm": 0.4074706435203552, | |
| "learning_rate": 1.8731276783918162e-06, | |
| "loss": 0.5334, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.2313253012048193, | |
| "grad_norm": 0.37793341279029846, | |
| "learning_rate": 1.8512108410229878e-06, | |
| "loss": 0.5284, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.236144578313253, | |
| "grad_norm": 0.369093656539917, | |
| "learning_rate": 1.8293938135912475e-06, | |
| "loss": 0.5312, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.2409638554216866, | |
| "grad_norm": 0.4202345609664917, | |
| "learning_rate": 1.8076772876500831e-06, | |
| "loss": 0.5063, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.2457831325301205, | |
| "grad_norm": 0.36291444301605225, | |
| "learning_rate": 1.7860619515673034e-06, | |
| "loss": 0.5294, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.2506024096385544, | |
| "grad_norm": 0.3744347095489502, | |
| "learning_rate": 1.7645484905032129e-06, | |
| "loss": 0.5451, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.255421686746988, | |
| "grad_norm": 0.41913270950317383, | |
| "learning_rate": 1.74313758638889e-06, | |
| "loss": 0.5137, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.2602409638554217, | |
| "grad_norm": 0.3765053153038025, | |
| "learning_rate": 1.7218299179045789e-06, | |
| "loss": 0.5371, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.2650602409638556, | |
| "grad_norm": 0.33764129877090454, | |
| "learning_rate": 1.7006261604581725e-06, | |
| "loss": 0.5358, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.269879518072289, | |
| "grad_norm": 0.36633729934692383, | |
| "learning_rate": 1.6795269861638041e-06, | |
| "loss": 0.5336, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.274698795180723, | |
| "grad_norm": 0.37890884280204773, | |
| "learning_rate": 1.6585330638205454e-06, | |
| "loss": 0.5236, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.279518072289157, | |
| "grad_norm": 0.3956688344478607, | |
| "learning_rate": 1.6376450588911985e-06, | |
| "loss": 0.5505, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.2843373493975903, | |
| "grad_norm": 0.3635486662387848, | |
| "learning_rate": 1.6168636334812126e-06, | |
| "loss": 0.5234, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.289156626506024, | |
| "grad_norm": 0.3534378707408905, | |
| "learning_rate": 1.5961894463176942e-06, | |
| "loss": 0.5181, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.293975903614458, | |
| "grad_norm": 0.34564465284347534, | |
| "learning_rate": 1.5756231527285181e-06, | |
| "loss": 0.5247, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.2987951807228915, | |
| "grad_norm": 0.39127737283706665, | |
| "learning_rate": 1.555165404621567e-06, | |
| "loss": 0.5445, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.3036144578313253, | |
| "grad_norm": 0.3836536407470703, | |
| "learning_rate": 1.5348168504640631e-06, | |
| "loss": 0.5235, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.3084337349397592, | |
| "grad_norm": 0.361017107963562, | |
| "learning_rate": 1.5145781352620054e-06, | |
| "loss": 0.5248, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.3132530120481927, | |
| "grad_norm": 0.3382739722728729, | |
| "learning_rate": 1.4944499005397372e-06, | |
| "loss": 0.5279, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.3180722891566266, | |
| "grad_norm": 0.3852854073047638, | |
| "learning_rate": 1.4744327843196043e-06, | |
| "loss": 0.5159, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.32289156626506, | |
| "grad_norm": 0.3639327883720398, | |
| "learning_rate": 1.4545274211017264e-06, | |
| "loss": 0.524, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.327710843373494, | |
| "grad_norm": 0.3571608066558838, | |
| "learning_rate": 1.434734441843899e-06, | |
| "loss": 0.5219, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.3325301204819278, | |
| "grad_norm": 0.3344271183013916, | |
| "learning_rate": 1.4150544739415755e-06, | |
| "loss": 0.5267, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.337349397590361, | |
| "grad_norm": 0.35768088698387146, | |
| "learning_rate": 1.3954881412079945e-06, | |
| "loss": 0.5165, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.342168674698795, | |
| "grad_norm": 0.36367958784103394, | |
| "learning_rate": 1.3760360638544012e-06, | |
| "loss": 0.5154, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.346987951807229, | |
| "grad_norm": 0.36191970109939575, | |
| "learning_rate": 1.3566988584703817e-06, | |
| "loss": 0.5112, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.3518072289156624, | |
| "grad_norm": 0.3416457772254944, | |
| "learning_rate": 1.3374771380043306e-06, | |
| "loss": 0.5161, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.3566265060240963, | |
| "grad_norm": 0.3561415374279022, | |
| "learning_rate": 1.3183715117440143e-06, | |
| "loss": 0.5121, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.36144578313253, | |
| "grad_norm": 0.35016006231307983, | |
| "learning_rate": 1.2993825852972559e-06, | |
| "loss": 0.5324, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.3662650602409636, | |
| "grad_norm": 0.3487759530544281, | |
| "learning_rate": 1.280510960572745e-06, | |
| "loss": 0.5243, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.3710843373493975, | |
| "grad_norm": 0.3596084713935852, | |
| "learning_rate": 1.2617572357609565e-06, | |
| "loss": 0.5138, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.3759036144578314, | |
| "grad_norm": 0.3738745152950287, | |
| "learning_rate": 1.2431220053151832e-06, | |
| "loss": 0.5235, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.380722891566265, | |
| "grad_norm": 0.3412809371948242, | |
| "learning_rate": 1.2246058599327021e-06, | |
| "loss": 0.5283, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.3855421686746987, | |
| "grad_norm": 0.3288966119289398, | |
| "learning_rate": 1.2062093865360458e-06, | |
| "loss": 0.5332, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.3903614457831326, | |
| "grad_norm": 0.3649263381958008, | |
| "learning_rate": 1.1879331682543972e-06, | |
| "loss": 0.5308, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.395180722891566, | |
| "grad_norm": 0.34862828254699707, | |
| "learning_rate": 1.1697777844051105e-06, | |
| "loss": 0.5242, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.33387741446495056, | |
| "learning_rate": 1.1517438104753386e-06, | |
| "loss": 0.5346, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.404819277108434, | |
| "grad_norm": 0.3655238449573517, | |
| "learning_rate": 1.1338318181038037e-06, | |
| "loss": 0.5173, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.4096385542168672, | |
| "grad_norm": 0.3874536454677582, | |
| "learning_rate": 1.1160423750626693e-06, | |
| "loss": 0.5382, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.414457831325301, | |
| "grad_norm": 0.33767327666282654, | |
| "learning_rate": 1.0983760452395415e-06, | |
| "loss": 0.543, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.419277108433735, | |
| "grad_norm": 0.33204296231269836, | |
| "learning_rate": 1.0808333886196038e-06, | |
| "loss": 0.5508, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.4240963855421684, | |
| "grad_norm": 0.3084474802017212, | |
| "learning_rate": 1.063414961267859e-06, | |
| "loss": 0.5483, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.4289156626506023, | |
| "grad_norm": 0.36713990569114685, | |
| "learning_rate": 1.046121315311508e-06, | |
| "loss": 0.5242, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.433734939759036, | |
| "grad_norm": 0.3953552544116974, | |
| "learning_rate": 1.02895299892244e-06, | |
| "loss": 0.521, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.4385542168674696, | |
| "grad_norm": 0.32059621810913086, | |
| "learning_rate": 1.01191055629987e-06, | |
| "loss": 0.5199, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.4433734939759035, | |
| "grad_norm": 0.3375447392463684, | |
| "learning_rate": 9.949945276530782e-07, | |
| "loss": 0.5326, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.4481927710843374, | |
| "grad_norm": 0.34036242961883545, | |
| "learning_rate": 9.782054491842879e-07, | |
| "loss": 0.5394, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.453012048192771, | |
| "grad_norm": 0.35132986307144165, | |
| "learning_rate": 9.615438530716753e-07, | |
| "loss": 0.5458, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.4578313253012047, | |
| "grad_norm": 0.36198896169662476, | |
| "learning_rate": 9.450102674524952e-07, | |
| "loss": 0.5061, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.4626506024096386, | |
| "grad_norm": 0.3417421877384186, | |
| "learning_rate": 9.286052164063369e-07, | |
| "loss": 0.5279, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.467469879518072, | |
| "grad_norm": 0.3652937412261963, | |
| "learning_rate": 9.123292199385247e-07, | |
| "loss": 0.5379, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.472289156626506, | |
| "grad_norm": 0.3249654471874237, | |
| "learning_rate": 8.961827939636198e-07, | |
| "loss": 0.5483, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.47710843373494, | |
| "grad_norm": 0.3499116003513336, | |
| "learning_rate": 8.801664502890722e-07, | |
| "loss": 0.5173, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.4819277108433733, | |
| "grad_norm": 0.3960937559604645, | |
| "learning_rate": 8.64280696599008e-07, | |
| "loss": 0.5293, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.486746987951807, | |
| "grad_norm": 0.3167719841003418, | |
| "learning_rate": 8.485260364381187e-07, | |
| "loss": 0.5362, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.491566265060241, | |
| "grad_norm": 0.36978796124458313, | |
| "learning_rate": 8.329029691957124e-07, | |
| "loss": 0.5255, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.4963855421686745, | |
| "grad_norm": 0.34822145104408264, | |
| "learning_rate": 8.17411990089883e-07, | |
| "loss": 0.5274, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.5012048192771084, | |
| "grad_norm": 0.33780673146247864, | |
| "learning_rate": 8.02053590151805e-07, | |
| "loss": 0.5249, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.5060240963855422, | |
| "grad_norm": 0.3500611484050751, | |
| "learning_rate": 7.868282562101819e-07, | |
| "loss": 0.5419, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.5108433734939757, | |
| "grad_norm": 0.33515140414237976, | |
| "learning_rate": 7.717364708758024e-07, | |
| "loss": 0.5446, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.5156626506024096, | |
| "grad_norm": 0.35610055923461914, | |
| "learning_rate": 7.567787125262449e-07, | |
| "loss": 0.5345, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.5204819277108435, | |
| "grad_norm": 0.3281942903995514, | |
| "learning_rate": 7.41955455290726e-07, | |
| "loss": 0.5257, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.525301204819277, | |
| "grad_norm": 0.3701113164424896, | |
| "learning_rate": 7.27267169035053e-07, | |
| "loss": 0.519, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.5301204819277108, | |
| "grad_norm": 0.32227593660354614, | |
| "learning_rate": 7.127143193467445e-07, | |
| "loss": 0.53, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.5349397590361447, | |
| "grad_norm": 0.34199750423431396, | |
| "learning_rate": 6.982973675202676e-07, | |
| "loss": 0.5174, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.539759036144578, | |
| "grad_norm": 0.33478933572769165, | |
| "learning_rate": 6.840167705424106e-07, | |
| "loss": 0.5374, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.544578313253012, | |
| "grad_norm": 0.37539225816726685, | |
| "learning_rate": 6.698729810778065e-07, | |
| "loss": 0.5273, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.549397590361446, | |
| "grad_norm": 0.3374510705471039, | |
| "learning_rate": 6.558664474545817e-07, | |
| "loss": 0.5294, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.5542168674698793, | |
| "grad_norm": 0.37236514687538147, | |
| "learning_rate": 6.419976136501377e-07, | |
| "loss": 0.5398, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.559036144578313, | |
| "grad_norm": 0.35223281383514404, | |
| "learning_rate": 6.282669192770896e-07, | |
| "loss": 0.5034, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.563855421686747, | |
| "grad_norm": 0.3377327024936676, | |
| "learning_rate": 6.146747995693225e-07, | |
| "loss": 0.5197, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.5686746987951805, | |
| "grad_norm": 0.34087198972702026, | |
| "learning_rate": 6.012216853682001e-07, | |
| "loss": 0.5047, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.5734939759036144, | |
| "grad_norm": 0.3489024043083191, | |
| "learning_rate": 5.879080031089047e-07, | |
| "loss": 0.5131, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.5783132530120483, | |
| "grad_norm": 0.31479188799858093, | |
| "learning_rate": 5.747341748069229e-07, | |
| "loss": 0.5346, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.5831325301204817, | |
| "grad_norm": 0.352466344833374, | |
| "learning_rate": 5.617006180446688e-07, | |
| "loss": 0.5279, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.5879518072289156, | |
| "grad_norm": 0.34586963057518005, | |
| "learning_rate": 5.488077459582425e-07, | |
| "loss": 0.5321, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.5927710843373495, | |
| "grad_norm": 0.35135793685913086, | |
| "learning_rate": 5.360559672243421e-07, | |
| "loss": 0.5376, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.597590361445783, | |
| "grad_norm": 0.35118043422698975, | |
| "learning_rate": 5.234456860473042e-07, | |
| "loss": 0.5253, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.602409638554217, | |
| "grad_norm": 0.4087206721305847, | |
| "learning_rate": 5.109773021462921e-07, | |
| "loss": 0.5263, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.6072289156626507, | |
| "grad_norm": 0.3330649137496948, | |
| "learning_rate": 4.986512107426283e-07, | |
| "loss": 0.5325, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.612048192771084, | |
| "grad_norm": 0.3389038145542145, | |
| "learning_rate": 4.864678025472635e-07, | |
| "loss": 0.5334, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.616867469879518, | |
| "grad_norm": 0.35696545243263245, | |
| "learning_rate": 4.7442746374839363e-07, | |
| "loss": 0.5447, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.621686746987952, | |
| "grad_norm": 0.35791394114494324, | |
| "learning_rate": 4.625305759992205e-07, | |
| "loss": 0.5125, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.6265060240963853, | |
| "grad_norm": 0.3383076786994934, | |
| "learning_rate": 4.50777516405847e-07, | |
| "loss": 0.5353, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.6313253012048192, | |
| "grad_norm": 0.32981517910957336, | |
| "learning_rate": 4.3916865751533313e-07, | |
| "loss": 0.532, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.636144578313253, | |
| "grad_norm": 0.33359435200691223, | |
| "learning_rate": 4.2770436730388166e-07, | |
| "loss": 0.5177, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.6409638554216865, | |
| "grad_norm": 0.3236476480960846, | |
| "learning_rate": 4.163850091651717e-07, | |
| "loss": 0.5314, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.6457831325301204, | |
| "grad_norm": 0.31992560625076294, | |
| "learning_rate": 4.05210941898847e-07, | |
| "loss": 0.5489, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.6506024096385543, | |
| "grad_norm": 0.3217684030532837, | |
| "learning_rate": 3.941825196991378e-07, | |
| "loss": 0.5393, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.6554216867469878, | |
| "grad_norm": 0.3366377353668213, | |
| "learning_rate": 3.8330009214363197e-07, | |
| "loss": 0.5393, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.6602409638554216, | |
| "grad_norm": 0.3715660572052002, | |
| "learning_rate": 3.725640041822026e-07, | |
| "loss": 0.5259, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.6650602409638555, | |
| "grad_norm": 0.3347671329975128, | |
| "learning_rate": 3.619745961260623e-07, | |
| "loss": 0.5225, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.669879518072289, | |
| "grad_norm": 0.3271038234233856, | |
| "learning_rate": 3.5153220363698225e-07, | |
| "loss": 0.5336, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.674698795180723, | |
| "grad_norm": 0.36232107877731323, | |
| "learning_rate": 3.4123715771665786e-07, | |
| "loss": 0.5342, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.6795180722891567, | |
| "grad_norm": 0.3338398337364197, | |
| "learning_rate": 3.310897846962041e-07, | |
| "loss": 0.5349, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.68433734939759, | |
| "grad_norm": 0.33740854263305664, | |
| "learning_rate": 3.2109040622582186e-07, | |
| "loss": 0.5357, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.689156626506024, | |
| "grad_norm": 0.33538562059402466, | |
| "learning_rate": 3.112393392645985e-07, | |
| "loss": 0.542, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.693975903614458, | |
| "grad_norm": 0.3457934856414795, | |
| "learning_rate": 3.015368960704584e-07, | |
| "loss": 0.524, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.6987951807228914, | |
| "grad_norm": 0.32423001527786255, | |
| "learning_rate": 2.919833841902714e-07, | |
| "loss": 0.5345, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.7036144578313253, | |
| "grad_norm": 0.3165851831436157, | |
| "learning_rate": 2.8257910645009935e-07, | |
| "loss": 0.5491, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.708433734939759, | |
| "grad_norm": 0.3321724236011505, | |
| "learning_rate": 2.733243609455971e-07, | |
| "loss": 0.5283, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.7132530120481926, | |
| "grad_norm": 0.34521132707595825, | |
| "learning_rate": 2.6421944103256657e-07, | |
| "loss": 0.5348, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.7180722891566265, | |
| "grad_norm": 0.34511709213256836, | |
| "learning_rate": 2.5526463531765467e-07, | |
| "loss": 0.5283, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.7228915662650603, | |
| "grad_norm": 0.3328828513622284, | |
| "learning_rate": 2.4646022764920843e-07, | |
| "loss": 0.5213, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.727710843373494, | |
| "grad_norm": 0.31696540117263794, | |
| "learning_rate": 2.3780649710827552e-07, | |
| "loss": 0.5261, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.7325301204819277, | |
| "grad_norm": 0.31569039821624756, | |
| "learning_rate": 2.2930371799975593e-07, | |
| "loss": 0.5176, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.7373493975903616, | |
| "grad_norm": 0.3491840660572052, | |
| "learning_rate": 2.20952159843712e-07, | |
| "loss": 0.5261, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.742168674698795, | |
| "grad_norm": 0.3705739974975586, | |
| "learning_rate": 2.1275208736682262e-07, | |
| "loss": 0.5104, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.746987951807229, | |
| "grad_norm": 0.34111452102661133, | |
| "learning_rate": 2.0470376049398944e-07, | |
| "loss": 0.5179, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.7518072289156628, | |
| "grad_norm": 0.3375121057033539, | |
| "learning_rate": 1.9680743434010385e-07, | |
| "loss": 0.532, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.756626506024096, | |
| "grad_norm": 0.34646302461624146, | |
| "learning_rate": 1.8906335920195418e-07, | |
| "loss": 0.5358, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.76144578313253, | |
| "grad_norm": 0.3552838861942291, | |
| "learning_rate": 1.814717805502958e-07, | |
| "loss": 0.529, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.766265060240964, | |
| "grad_norm": 0.344050794839859, | |
| "learning_rate": 1.7403293902206851e-07, | |
| "loss": 0.5296, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.7710843373493974, | |
| "grad_norm": 0.3431393802165985, | |
| "learning_rate": 1.667470704127694e-07, | |
| "loss": 0.5234, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.7759036144578313, | |
| "grad_norm": 0.33988475799560547, | |
| "learning_rate": 1.5961440566897913e-07, | |
| "loss": 0.5205, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.780722891566265, | |
| "grad_norm": 0.3232595920562744, | |
| "learning_rate": 1.5263517088103862e-07, | |
| "loss": 0.521, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.7855421686746986, | |
| "grad_norm": 0.32091447710990906, | |
| "learning_rate": 1.4580958727588746e-07, | |
| "loss": 0.5295, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.7903614457831325, | |
| "grad_norm": 0.3785247206687927, | |
| "learning_rate": 1.3913787121004717e-07, | |
| "loss": 0.5105, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.7951807228915664, | |
| "grad_norm": 0.3253907561302185, | |
| "learning_rate": 1.3262023416276414e-07, | |
| "loss": 0.5347, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.3325127363204956, | |
| "learning_rate": 1.2625688272930925e-07, | |
| "loss": 0.5201, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.8048192771084337, | |
| "grad_norm": 0.32310402393341064, | |
| "learning_rate": 1.2004801861442373e-07, | |
| "loss": 0.5328, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.8096385542168676, | |
| "grad_norm": 0.3170939087867737, | |
| "learning_rate": 1.1399383862592928e-07, | |
| "loss": 0.536, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.814457831325301, | |
| "grad_norm": 0.33208224177360535, | |
| "learning_rate": 1.0809453466849029e-07, | |
| "loss": 0.5444, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.819277108433735, | |
| "grad_norm": 0.30856242775917053, | |
| "learning_rate": 1.0235029373752758e-07, | |
| "loss": 0.5357, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.824096385542169, | |
| "grad_norm": 0.3055464029312134, | |
| "learning_rate": 9.676129791329481e-08, | |
| "loss": 0.5371, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.8289156626506022, | |
| "grad_norm": 0.3335382342338562, | |
| "learning_rate": 9.132772435510362e-08, | |
| "loss": 0.5267, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.833734939759036, | |
| "grad_norm": 0.33981144428253174, | |
| "learning_rate": 8.604974529571042e-08, | |
| "loss": 0.5368, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.83855421686747, | |
| "grad_norm": 0.3252619802951813, | |
| "learning_rate": 8.092752803585513e-08, | |
| "loss": 0.5216, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.8433734939759034, | |
| "grad_norm": 0.33591899275779724, | |
| "learning_rate": 7.59612349389599e-08, | |
| "loss": 0.5165, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.8481927710843373, | |
| "grad_norm": 0.3571572005748749, | |
| "learning_rate": 7.115102342598101e-08, | |
| "loss": 0.5313, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.853012048192771, | |
| "grad_norm": 0.3264915943145752, | |
| "learning_rate": 6.649704597042061e-08, | |
| "loss": 0.5343, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.8578313253012047, | |
| "grad_norm": 0.3618753254413605, | |
| "learning_rate": 6.199945009349173e-08, | |
| "loss": 0.4944, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.8626506024096385, | |
| "grad_norm": 0.33330121636390686, | |
| "learning_rate": 5.7658378359443104e-08, | |
| "loss": 0.547, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.8674698795180724, | |
| "grad_norm": 0.3385322391986847, | |
| "learning_rate": 5.3473968371040575e-08, | |
| "loss": 0.5333, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.872289156626506, | |
| "grad_norm": 0.33145639300346375, | |
| "learning_rate": 4.944635276520393e-08, | |
| "loss": 0.5362, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.8771084337349397, | |
| "grad_norm": 0.33541208505630493, | |
| "learning_rate": 4.55756592088058e-08, | |
| "loss": 0.5307, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.8819277108433736, | |
| "grad_norm": 0.32140061259269714, | |
| "learning_rate": 4.186201039462046e-08, | |
| "loss": 0.5274, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.886746987951807, | |
| "grad_norm": 0.3215773403644562, | |
| "learning_rate": 3.8305524037438035e-08, | |
| "loss": 0.5363, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.891566265060241, | |
| "grad_norm": 0.3285579979419708, | |
| "learning_rate": 3.4906312870331973e-08, | |
| "loss": 0.5268, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.896385542168675, | |
| "grad_norm": 0.3298654854297638, | |
| "learning_rate": 3.166448464108629e-08, | |
| "loss": 0.5326, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.9012048192771083, | |
| "grad_norm": 0.32575464248657227, | |
| "learning_rate": 2.8580142108778354e-08, | |
| "loss": 0.5243, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.906024096385542, | |
| "grad_norm": 0.34302592277526855, | |
| "learning_rate": 2.5653383040524228e-08, | |
| "loss": 0.5192, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.910843373493976, | |
| "grad_norm": 0.314879834651947, | |
| "learning_rate": 2.2884300208378395e-08, | |
| "loss": 0.5039, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.9156626506024095, | |
| "grad_norm": 0.33504053950309753, | |
| "learning_rate": 2.0272981386393332e-08, | |
| "loss": 0.5245, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.9204819277108434, | |
| "grad_norm": 0.3199070394039154, | |
| "learning_rate": 1.781950934783505e-08, | |
| "loss": 0.5315, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.9253012048192772, | |
| "grad_norm": 0.3318592607975006, | |
| "learning_rate": 1.552396186256411e-08, | |
| "loss": 0.5318, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 2.9301204819277107, | |
| "grad_norm": 0.30516648292541504, | |
| "learning_rate": 1.3386411694565894e-08, | |
| "loss": 0.5263, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.9349397590361446, | |
| "grad_norm": 0.31260839104652405, | |
| "learning_rate": 1.1406926599646373e-08, | |
| "loss": 0.5299, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 2.9397590361445785, | |
| "grad_norm": 0.334585040807724, | |
| "learning_rate": 9.585569323284915e-09, | |
| "loss": 0.5403, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.944578313253012, | |
| "grad_norm": 0.343319296836853, | |
| "learning_rate": 7.922397598642551e-09, | |
| "loss": 0.5216, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 2.9493975903614458, | |
| "grad_norm": 0.3292441964149475, | |
| "learning_rate": 6.417464144736208e-09, | |
| "loss": 0.5248, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.9542168674698797, | |
| "grad_norm": 0.35108089447021484, | |
| "learning_rate": 5.0708166647628345e-09, | |
| "loss": 0.5206, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 2.959036144578313, | |
| "grad_norm": 0.33806362748146057, | |
| "learning_rate": 3.88249784459227e-09, | |
| "loss": 0.5416, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.963855421686747, | |
| "grad_norm": 0.304426908493042, | |
| "learning_rate": 2.8525453514099966e-09, | |
| "loss": 0.5359, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.968674698795181, | |
| "grad_norm": 0.34446102380752563, | |
| "learning_rate": 1.980991832524759e-09, | |
| "loss": 0.5275, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.9734939759036143, | |
| "grad_norm": 0.3511168956756592, | |
| "learning_rate": 1.2678649143349485e-09, | |
| "loss": 0.5181, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 2.978313253012048, | |
| "grad_norm": 0.37222975492477417, | |
| "learning_rate": 7.131872014509711e-10, | |
| "loss": 0.5235, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.983132530120482, | |
| "grad_norm": 0.3414348363876343, | |
| "learning_rate": 3.1697627597970794e-10, | |
| "loss": 0.5343, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 2.9879518072289155, | |
| "grad_norm": 0.3310386836528778, | |
| "learning_rate": 7.924469696718451e-11, | |
| "loss": 0.5362, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.9927710843373494, | |
| "grad_norm": 0.31892916560173035, | |
| "learning_rate": 0.0, | |
| "loss": 0.5334, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 2.9927710843373494, | |
| "step": 621, | |
| "total_flos": 8.029368648088945e+17, | |
| "train_loss": 0.5880239765233272, | |
| "train_runtime": 15009.674, | |
| "train_samples_per_second": 3.978, | |
| "train_steps_per_second": 0.041 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 621, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.029368648088945e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |