| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 492, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006097560975609756, |
| "grad_norm": 13.761144051368902, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 4.6438, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.012195121951219513, |
| "grad_norm": 13.183326369804801, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 4.8577, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.018292682926829267, |
| "grad_norm": 11.29732095469755, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 4.7954, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.024390243902439025, |
| "grad_norm": 11.769209187889713, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 4.7998, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.03048780487804878, |
| "grad_norm": 12.522749869416966, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 4.7958, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.036585365853658534, |
| "grad_norm": 13.40825064276547, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 4.5537, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.042682926829268296, |
| "grad_norm": 13.224745779198466, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 4.5891, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.04878048780487805, |
| "grad_norm": 11.459531949346005, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 4.8212, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.054878048780487805, |
| "grad_norm": 12.942263223893391, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 4.6557, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.06097560975609756, |
| "grad_norm": 11.67916919497089, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 4.6643, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06707317073170732, |
| "grad_norm": 11.256361402107963, |
| "learning_rate": 4.4e-06, |
| "loss": 4.6234, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.07317073170731707, |
| "grad_norm": 10.918667814241992, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 4.5428, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.07926829268292683, |
| "grad_norm": 11.18941706027207, |
| "learning_rate": 5.2e-06, |
| "loss": 4.1207, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.08536585365853659, |
| "grad_norm": 10.834833962041147, |
| "learning_rate": 5.600000000000001e-06, |
| "loss": 4.2727, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.09146341463414634, |
| "grad_norm": 11.193544605148698, |
| "learning_rate": 6e-06, |
| "loss": 4.3305, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0975609756097561, |
| "grad_norm": 11.156213868367029, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 3.855, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.10365853658536585, |
| "grad_norm": 11.03392463912042, |
| "learning_rate": 6.800000000000001e-06, |
| "loss": 3.8251, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.10975609756097561, |
| "grad_norm": 10.92516220698292, |
| "learning_rate": 7.2000000000000005e-06, |
| "loss": 3.6552, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.11585365853658537, |
| "grad_norm": 9.360934765475477, |
| "learning_rate": 7.600000000000001e-06, |
| "loss": 3.4844, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.12195121951219512, |
| "grad_norm": 8.698890724234088, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 3.4775, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12804878048780488, |
| "grad_norm": 8.307940622795766, |
| "learning_rate": 8.400000000000001e-06, |
| "loss": 3.1046, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.13414634146341464, |
| "grad_norm": 7.873509354464809, |
| "learning_rate": 8.8e-06, |
| "loss": 2.8967, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.1402439024390244, |
| "grad_norm": 6.74058652993515, |
| "learning_rate": 9.200000000000002e-06, |
| "loss": 2.7398, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.14634146341463414, |
| "grad_norm": 5.6682482038936985, |
| "learning_rate": 9.600000000000001e-06, |
| "loss": 2.554, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.1524390243902439, |
| "grad_norm": 5.196466384583255, |
| "learning_rate": 1e-05, |
| "loss": 2.4104, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.15853658536585366, |
| "grad_norm": 4.379173110371979, |
| "learning_rate": 1.04e-05, |
| "loss": 2.0813, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.16463414634146342, |
| "grad_norm": 4.041770670660932, |
| "learning_rate": 1.0800000000000002e-05, |
| "loss": 2.1974, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.17073170731707318, |
| "grad_norm": 3.7890756018085083, |
| "learning_rate": 1.1200000000000001e-05, |
| "loss": 2.1635, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.17682926829268292, |
| "grad_norm": 2.760454794268313, |
| "learning_rate": 1.16e-05, |
| "loss": 1.7247, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.18292682926829268, |
| "grad_norm": 2.084883247086018, |
| "learning_rate": 1.2e-05, |
| "loss": 1.7248, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.18902439024390244, |
| "grad_norm": 1.8946267393458731, |
| "learning_rate": 1.2400000000000002e-05, |
| "loss": 1.6519, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.1951219512195122, |
| "grad_norm": 1.4683657417452, |
| "learning_rate": 1.2800000000000001e-05, |
| "loss": 1.5153, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.20121951219512196, |
| "grad_norm": 1.1510156850186328, |
| "learning_rate": 1.3200000000000002e-05, |
| "loss": 1.4752, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.2073170731707317, |
| "grad_norm": 0.9975006671404031, |
| "learning_rate": 1.3600000000000002e-05, |
| "loss": 1.3437, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.21341463414634146, |
| "grad_norm": 1.175487116789021, |
| "learning_rate": 1.4e-05, |
| "loss": 1.5412, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.21951219512195122, |
| "grad_norm": 1.0595320461986457, |
| "learning_rate": 1.4400000000000001e-05, |
| "loss": 1.3513, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.22560975609756098, |
| "grad_norm": 1.1536993931361366, |
| "learning_rate": 1.48e-05, |
| "loss": 1.387, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.23170731707317074, |
| "grad_norm": 1.166131895120981, |
| "learning_rate": 1.5200000000000002e-05, |
| "loss": 1.3192, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.23780487804878048, |
| "grad_norm": 1.1083262024444887, |
| "learning_rate": 1.5600000000000003e-05, |
| "loss": 1.3045, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.24390243902439024, |
| "grad_norm": 1.0376815768281262, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 1.3058, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.87640355368596, |
| "learning_rate": 1.64e-05, |
| "loss": 1.1079, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.25609756097560976, |
| "grad_norm": 0.8012590361351394, |
| "learning_rate": 1.6800000000000002e-05, |
| "loss": 1.0897, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.2621951219512195, |
| "grad_norm": 0.7274182856521663, |
| "learning_rate": 1.72e-05, |
| "loss": 1.1044, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.2682926829268293, |
| "grad_norm": 0.6432052930355101, |
| "learning_rate": 1.76e-05, |
| "loss": 1.071, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.27439024390243905, |
| "grad_norm": 0.6162901066533818, |
| "learning_rate": 1.8e-05, |
| "loss": 1.0739, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2804878048780488, |
| "grad_norm": 0.4936365689365201, |
| "learning_rate": 1.8400000000000003e-05, |
| "loss": 0.9854, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.2865853658536585, |
| "grad_norm": 0.4413825753074836, |
| "learning_rate": 1.88e-05, |
| "loss": 0.9589, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.2926829268292683, |
| "grad_norm": 0.3835552689369408, |
| "learning_rate": 1.9200000000000003e-05, |
| "loss": 0.9157, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.29878048780487804, |
| "grad_norm": 0.36169118496626246, |
| "learning_rate": 1.9600000000000002e-05, |
| "loss": 0.9501, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.3048780487804878, |
| "grad_norm": 0.3201102977202935, |
| "learning_rate": 2e-05, |
| "loss": 0.8649, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.31097560975609756, |
| "grad_norm": 0.3301312685545821, |
| "learning_rate": 1.9999747405795057e-05, |
| "loss": 0.9233, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.3170731707317073, |
| "grad_norm": 0.31971021695823615, |
| "learning_rate": 1.9998989635940996e-05, |
| "loss": 0.8435, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.3231707317073171, |
| "grad_norm": 0.35515389044587536, |
| "learning_rate": 1.9997726728719468e-05, |
| "loss": 0.8589, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.32926829268292684, |
| "grad_norm": 0.36743148858881, |
| "learning_rate": 1.9995958747931083e-05, |
| "loss": 0.8576, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.3353658536585366, |
| "grad_norm": 0.42811375031790766, |
| "learning_rate": 1.9993685782892184e-05, |
| "loss": 0.9279, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.34146341463414637, |
| "grad_norm": 0.41721558479353726, |
| "learning_rate": 1.9990907948430327e-05, |
| "loss": 0.8907, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.3475609756097561, |
| "grad_norm": 0.35352961285727363, |
| "learning_rate": 1.9987625384878493e-05, |
| "loss": 0.8291, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.35365853658536583, |
| "grad_norm": 0.2804864451654209, |
| "learning_rate": 1.998383825806799e-05, |
| "loss": 0.7566, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.3597560975609756, |
| "grad_norm": 0.2882591085430372, |
| "learning_rate": 1.997954675932006e-05, |
| "loss": 0.8485, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.36585365853658536, |
| "grad_norm": 0.2607140485168168, |
| "learning_rate": 1.9974751105436266e-05, |
| "loss": 0.8366, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3719512195121951, |
| "grad_norm": 0.23074985418491212, |
| "learning_rate": 1.9969451538687474e-05, |
| "loss": 0.8274, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.3780487804878049, |
| "grad_norm": 0.23223595784320752, |
| "learning_rate": 1.9963648326801653e-05, |
| "loss": 0.9039, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.38414634146341464, |
| "grad_norm": 0.17404598061817236, |
| "learning_rate": 1.9957341762950346e-05, |
| "loss": 0.6557, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.3902439024390244, |
| "grad_norm": 0.20998407134341585, |
| "learning_rate": 1.9950532165733847e-05, |
| "loss": 0.7985, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.39634146341463417, |
| "grad_norm": 0.194171035037221, |
| "learning_rate": 1.9943219879165113e-05, |
| "loss": 0.7393, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.4024390243902439, |
| "grad_norm": 0.19325266400118835, |
| "learning_rate": 1.993540527265239e-05, |
| "loss": 0.7448, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.40853658536585363, |
| "grad_norm": 0.22349087274155047, |
| "learning_rate": 1.992708874098054e-05, |
| "loss": 0.9037, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.4146341463414634, |
| "grad_norm": 0.1952833479528782, |
| "learning_rate": 1.9918270704291104e-05, |
| "loss": 0.7685, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.42073170731707316, |
| "grad_norm": 0.18405443762754753, |
| "learning_rate": 1.9908951608061078e-05, |
| "loss": 0.6956, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.4268292682926829, |
| "grad_norm": 0.18503738755792795, |
| "learning_rate": 1.98991319230804e-05, |
| "loss": 0.7063, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4329268292682927, |
| "grad_norm": 0.19690421628538282, |
| "learning_rate": 1.9888812145428172e-05, |
| "loss": 0.7793, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.43902439024390244, |
| "grad_norm": 0.16284982763895423, |
| "learning_rate": 1.9877992796447604e-05, |
| "loss": 0.6833, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.4451219512195122, |
| "grad_norm": 0.14309181240903507, |
| "learning_rate": 1.9866674422719666e-05, |
| "loss": 0.6706, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.45121951219512196, |
| "grad_norm": 0.15477185844290706, |
| "learning_rate": 1.9854857596035476e-05, |
| "loss": 0.7312, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.4573170731707317, |
| "grad_norm": 0.1293717417561759, |
| "learning_rate": 1.984254291336743e-05, |
| "loss": 0.6589, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4634146341463415, |
| "grad_norm": 0.12123882539222287, |
| "learning_rate": 1.982973099683902e-05, |
| "loss": 0.62, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.4695121951219512, |
| "grad_norm": 0.13924219962219428, |
| "learning_rate": 1.9816422493693417e-05, |
| "loss": 0.7501, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.47560975609756095, |
| "grad_norm": 0.11917935845470132, |
| "learning_rate": 1.9802618076260784e-05, |
| "loss": 0.6819, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.4817073170731707, |
| "grad_norm": 0.11800076531829735, |
| "learning_rate": 1.9788318441924276e-05, |
| "loss": 0.615, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.4878048780487805, |
| "grad_norm": 0.11971198977185014, |
| "learning_rate": 1.9773524313084857e-05, |
| "loss": 0.6414, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.49390243902439024, |
| "grad_norm": 0.13027819783391864, |
| "learning_rate": 1.9758236437124768e-05, |
| "loss": 0.6463, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.12460980978522168, |
| "learning_rate": 1.9742455586369786e-05, |
| "loss": 0.6529, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.5060975609756098, |
| "grad_norm": 0.13756050372643888, |
| "learning_rate": 1.972618255805019e-05, |
| "loss": 0.7114, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.5121951219512195, |
| "grad_norm": 0.12522657328282585, |
| "learning_rate": 1.9709418174260523e-05, |
| "loss": 0.6289, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.5182926829268293, |
| "grad_norm": 0.12334790238705769, |
| "learning_rate": 1.9692163281918016e-05, |
| "loss": 0.6985, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.524390243902439, |
| "grad_norm": 0.10972400668603455, |
| "learning_rate": 1.9674418752719835e-05, |
| "loss": 0.6453, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.5304878048780488, |
| "grad_norm": 0.10309779127439034, |
| "learning_rate": 1.9656185483099027e-05, |
| "loss": 0.6347, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.5365853658536586, |
| "grad_norm": 0.10717651442117264, |
| "learning_rate": 1.963746439417924e-05, |
| "loss": 0.6389, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.5426829268292683, |
| "grad_norm": 0.10108518878859295, |
| "learning_rate": 1.961825643172819e-05, |
| "loss": 0.6449, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.5487804878048781, |
| "grad_norm": 0.10582770991792313, |
| "learning_rate": 1.959856256610988e-05, |
| "loss": 0.6407, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5548780487804879, |
| "grad_norm": 0.09805540518718314, |
| "learning_rate": 1.9578383792235573e-05, |
| "loss": 0.6146, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.5609756097560976, |
| "grad_norm": 0.09178135236393883, |
| "learning_rate": 1.9557721129513538e-05, |
| "loss": 0.5477, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.5670731707317073, |
| "grad_norm": 0.09658702034736838, |
| "learning_rate": 1.9536575621797546e-05, |
| "loss": 0.5892, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.573170731707317, |
| "grad_norm": 0.09736462601933246, |
| "learning_rate": 1.9514948337334144e-05, |
| "loss": 0.6138, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5792682926829268, |
| "grad_norm": 0.08905830955745823, |
| "learning_rate": 1.9492840368708668e-05, |
| "loss": 0.5399, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5853658536585366, |
| "grad_norm": 0.09660552709108973, |
| "learning_rate": 1.947025283279008e-05, |
| "loss": 0.6364, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5914634146341463, |
| "grad_norm": 0.09133915004182258, |
| "learning_rate": 1.9447186870674505e-05, |
| "loss": 0.5921, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5975609756097561, |
| "grad_norm": 0.0933997331456134, |
| "learning_rate": 1.9423643647627625e-05, |
| "loss": 0.6915, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.6036585365853658, |
| "grad_norm": 0.08353569640772877, |
| "learning_rate": 1.9399624353025774e-05, |
| "loss": 0.6408, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.6097560975609756, |
| "grad_norm": 0.08634151989354441, |
| "learning_rate": 1.937513020029588e-05, |
| "loss": 0.5963, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6158536585365854, |
| "grad_norm": 0.08898929542438962, |
| "learning_rate": 1.9350162426854152e-05, |
| "loss": 0.595, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.6219512195121951, |
| "grad_norm": 0.08305460595097429, |
| "learning_rate": 1.932472229404356e-05, |
| "loss": 0.5669, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.6280487804878049, |
| "grad_norm": 0.08888872021259317, |
| "learning_rate": 1.9298811087070134e-05, |
| "loss": 0.6165, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.6341463414634146, |
| "grad_norm": 0.08058144202586265, |
| "learning_rate": 1.9272430114938018e-05, |
| "loss": 0.5728, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.6402439024390244, |
| "grad_norm": 0.08217790638268045, |
| "learning_rate": 1.9245580710383344e-05, |
| "loss": 0.577, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.6463414634146342, |
| "grad_norm": 0.07659807407519503, |
| "learning_rate": 1.9218264229806917e-05, |
| "loss": 0.5881, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.6524390243902439, |
| "grad_norm": 0.07540223196226505, |
| "learning_rate": 1.9190482053205673e-05, |
| "loss": 0.62, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.6585365853658537, |
| "grad_norm": 0.08107411301661235, |
| "learning_rate": 1.9162235584102973e-05, |
| "loss": 0.6488, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.6646341463414634, |
| "grad_norm": 0.07719107791626204, |
| "learning_rate": 1.91335262494777e-05, |
| "loss": 0.5771, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.6707317073170732, |
| "grad_norm": 0.08173053132540807, |
| "learning_rate": 1.9104355499692166e-05, |
| "loss": 0.5666, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.676829268292683, |
| "grad_norm": 0.07965621160015979, |
| "learning_rate": 1.9074724808418837e-05, |
| "loss": 0.6113, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.6829268292682927, |
| "grad_norm": 0.08980818058271649, |
| "learning_rate": 1.9044635672565898e-05, |
| "loss": 0.6089, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.6890243902439024, |
| "grad_norm": 0.07194337673119468, |
| "learning_rate": 1.9014089612201612e-05, |
| "loss": 0.5728, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6951219512195121, |
| "grad_norm": 0.08706992381814065, |
| "learning_rate": 1.8983088170477556e-05, |
| "loss": 0.7144, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.7012195121951219, |
| "grad_norm": 0.06652366663030163, |
| "learning_rate": 1.8951632913550625e-05, |
| "loss": 0.5026, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.7073170731707317, |
| "grad_norm": 0.07192252823061802, |
| "learning_rate": 1.8919725430503946e-05, |
| "loss": 0.5533, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.7134146341463414, |
| "grad_norm": 0.08014714412171235, |
| "learning_rate": 1.888736733326658e-05, |
| "loss": 0.6077, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.7195121951219512, |
| "grad_norm": 0.0751151009465322, |
| "learning_rate": 1.8854560256532098e-05, |
| "loss": 0.5554, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.725609756097561, |
| "grad_norm": 0.08384104993084439, |
| "learning_rate": 1.8821305857675997e-05, |
| "loss": 0.6079, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.7317073170731707, |
| "grad_norm": 0.07596092975802397, |
| "learning_rate": 1.8787605816671956e-05, |
| "loss": 0.6262, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7378048780487805, |
| "grad_norm": 0.06984378368031652, |
| "learning_rate": 1.875346183600699e-05, |
| "loss": 0.5579, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.7439024390243902, |
| "grad_norm": 0.06972708877396938, |
| "learning_rate": 1.8718875640595432e-05, |
| "loss": 0.5568, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.0708625905901818, |
| "learning_rate": 1.8683848977691784e-05, |
| "loss": 0.582, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.7560975609756098, |
| "grad_norm": 0.07712676436551813, |
| "learning_rate": 1.864838361680247e-05, |
| "loss": 0.5935, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.7621951219512195, |
| "grad_norm": 0.06823828416067228, |
| "learning_rate": 1.8612481349596406e-05, |
| "loss": 0.5503, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7682926829268293, |
| "grad_norm": 0.07681189237975657, |
| "learning_rate": 1.8576143989814524e-05, |
| "loss": 0.6412, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.774390243902439, |
| "grad_norm": 0.06850293773437466, |
| "learning_rate": 1.8539373373178126e-05, |
| "loss": 0.5771, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.7804878048780488, |
| "grad_norm": 0.06791710528546226, |
| "learning_rate": 1.8502171357296144e-05, |
| "loss": 0.6076, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.7865853658536586, |
| "grad_norm": 0.06599767271998445, |
| "learning_rate": 1.8464539821571302e-05, |
| "loss": 0.5583, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.7926829268292683, |
| "grad_norm": 0.07021764659304032, |
| "learning_rate": 1.8426480667105178e-05, |
| "loss": 0.5439, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7987804878048781, |
| "grad_norm": 0.06809097796108884, |
| "learning_rate": 1.8387995816602137e-05, |
| "loss": 0.5584, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.8048780487804879, |
| "grad_norm": 0.07552768082187959, |
| "learning_rate": 1.8349087214272222e-05, |
| "loss": 0.6235, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.8109756097560976, |
| "grad_norm": 0.07388542257010466, |
| "learning_rate": 1.830975682573293e-05, |
| "loss": 0.5605, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.8170731707317073, |
| "grad_norm": 0.0734139769106561, |
| "learning_rate": 1.8270006637909907e-05, |
| "loss": 0.4911, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.823170731707317, |
| "grad_norm": 0.06661902834297227, |
| "learning_rate": 1.8229838658936566e-05, |
| "loss": 0.5263, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.8292682926829268, |
| "grad_norm": 0.08000530324170357, |
| "learning_rate": 1.818925491805265e-05, |
| "loss": 0.6063, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.8353658536585366, |
| "grad_norm": 0.06955587209390625, |
| "learning_rate": 1.8148257465501718e-05, |
| "loss": 0.5664, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.8414634146341463, |
| "grad_norm": 0.06999764411415345, |
| "learning_rate": 1.810684837242755e-05, |
| "loss": 0.5731, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.8475609756097561, |
| "grad_norm": 0.07392487537186451, |
| "learning_rate": 1.8065029730769534e-05, |
| "loss": 0.5771, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.8536585365853658, |
| "grad_norm": 0.07023462293275694, |
| "learning_rate": 1.8022803653156983e-05, |
| "loss": 0.5586, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8597560975609756, |
| "grad_norm": 0.0754370714846295, |
| "learning_rate": 1.7980172272802398e-05, |
| "loss": 0.5386, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.8658536585365854, |
| "grad_norm": 0.06014064520411485, |
| "learning_rate": 1.7937137743393695e-05, |
| "loss": 0.5019, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.8719512195121951, |
| "grad_norm": 0.0684039280130895, |
| "learning_rate": 1.7893702238985433e-05, |
| "loss": 0.5593, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.8780487804878049, |
| "grad_norm": 0.07523983909087964, |
| "learning_rate": 1.784986795388895e-05, |
| "loss": 0.608, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.8841463414634146, |
| "grad_norm": 0.06631906454003386, |
| "learning_rate": 1.7805637102561516e-05, |
| "loss": 0.5496, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.8902439024390244, |
| "grad_norm": 0.06861615150079985, |
| "learning_rate": 1.776101191949449e-05, |
| "loss": 0.543, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.8963414634146342, |
| "grad_norm": 0.06396979787588344, |
| "learning_rate": 1.771599465910039e-05, |
| "loss": 0.565, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.9024390243902439, |
| "grad_norm": 0.06363428283758014, |
| "learning_rate": 1.7670587595599034e-05, |
| "loss": 0.5657, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.9085365853658537, |
| "grad_norm": 0.06397911394577013, |
| "learning_rate": 1.7624793022902648e-05, |
| "loss": 0.5343, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.9146341463414634, |
| "grad_norm": 0.06974823526096927, |
| "learning_rate": 1.757861325449997e-05, |
| "loss": 0.5022, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9207317073170732, |
| "grad_norm": 0.06210202598875651, |
| "learning_rate": 1.753205062333937e-05, |
| "loss": 0.486, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.926829268292683, |
| "grad_norm": 0.07077403367537935, |
| "learning_rate": 1.7485107481711014e-05, |
| "loss": 0.4869, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.9329268292682927, |
| "grad_norm": 0.06531115051828462, |
| "learning_rate": 1.7437786201128003e-05, |
| "loss": 0.5544, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.9390243902439024, |
| "grad_norm": 0.0705614281634583, |
| "learning_rate": 1.7390089172206594e-05, |
| "loss": 0.5951, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.9451219512195121, |
| "grad_norm": 0.08044467420017791, |
| "learning_rate": 1.73420188045454e-05, |
| "loss": 0.5882, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.9512195121951219, |
| "grad_norm": 0.06778031094228984, |
| "learning_rate": 1.7293577526603684e-05, |
| "loss": 0.5307, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.9573170731707317, |
| "grad_norm": 0.07735829065491621, |
| "learning_rate": 1.724476778557866e-05, |
| "loss": 0.5803, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.9634146341463414, |
| "grad_norm": 0.06740940804822154, |
| "learning_rate": 1.719559204728188e-05, |
| "loss": 0.517, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.9695121951219512, |
| "grad_norm": 0.07639960491412072, |
| "learning_rate": 1.7146052796014646e-05, |
| "loss": 0.5753, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.975609756097561, |
| "grad_norm": 0.061081285281898906, |
| "learning_rate": 1.7096152534442515e-05, |
| "loss": 0.4686, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9817073170731707, |
| "grad_norm": 0.06793168030335978, |
| "learning_rate": 1.704589378346886e-05, |
| "loss": 0.5447, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.9878048780487805, |
| "grad_norm": 0.07395695811692952, |
| "learning_rate": 1.6995279082107537e-05, |
| "loss": 0.5657, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.9939024390243902, |
| "grad_norm": 0.065555783375453, |
| "learning_rate": 1.6944310987354597e-05, |
| "loss": 0.5449, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.07050013910319028, |
| "learning_rate": 1.689299207405911e-05, |
| "loss": 0.5184, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.0060975609756098, |
| "grad_norm": 0.0733901169113276, |
| "learning_rate": 1.6841324934793096e-05, |
| "loss": 0.5226, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.0121951219512195, |
| "grad_norm": 0.06589566948346295, |
| "learning_rate": 1.678931217972055e-05, |
| "loss": 0.4873, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.0182926829268293, |
| "grad_norm": 0.07475734276172114, |
| "learning_rate": 1.6736956436465573e-05, |
| "loss": 0.4827, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.024390243902439, |
| "grad_norm": 0.058903086819527835, |
| "learning_rate": 1.6684260349979637e-05, |
| "loss": 0.5053, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.0304878048780488, |
| "grad_norm": 0.06353582735567607, |
| "learning_rate": 1.6631226582407954e-05, |
| "loss": 0.5482, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.0365853658536586, |
| "grad_norm": 0.06380787517800868, |
| "learning_rate": 1.6577857812954994e-05, |
| "loss": 0.5248, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.0426829268292683, |
| "grad_norm": 0.06730058327208745, |
| "learning_rate": 1.6524156737749132e-05, |
| "loss": 0.4964, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.048780487804878, |
| "grad_norm": 0.06293892448460658, |
| "learning_rate": 1.6470126069706456e-05, |
| "loss": 0.5168, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.0548780487804879, |
| "grad_norm": 0.0694624094267741, |
| "learning_rate": 1.641576853839369e-05, |
| "loss": 0.5526, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.0609756097560976, |
| "grad_norm": 0.06478295672497261, |
| "learning_rate": 1.6361086889890307e-05, |
| "loss": 0.4853, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.0670731707317074, |
| "grad_norm": 0.06608027299921394, |
| "learning_rate": 1.6306083886649823e-05, |
| "loss": 0.5226, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.0731707317073171, |
| "grad_norm": 0.06681662898135733, |
| "learning_rate": 1.6250762307360206e-05, |
| "loss": 0.537, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.079268292682927, |
| "grad_norm": 0.06053711484659685, |
| "learning_rate": 1.6195124946803527e-05, |
| "loss": 0.4683, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.0853658536585367, |
| "grad_norm": 0.07013371267663553, |
| "learning_rate": 1.6139174615714753e-05, |
| "loss": 0.5767, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.0914634146341464, |
| "grad_norm": 0.06617676868427722, |
| "learning_rate": 1.6082914140639768e-05, |
| "loss": 0.5357, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.0975609756097562, |
| "grad_norm": 0.06805254845313483, |
| "learning_rate": 1.6026346363792565e-05, |
| "loss": 0.5179, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.103658536585366, |
| "grad_norm": 0.06882718691143014, |
| "learning_rate": 1.596947414291167e-05, |
| "loss": 0.5665, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.1097560975609757, |
| "grad_norm": 0.06329003072823183, |
| "learning_rate": 1.591230035111576e-05, |
| "loss": 0.512, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.1158536585365855, |
| "grad_norm": 0.06713658217392786, |
| "learning_rate": 1.5854827876758535e-05, |
| "loss": 0.4958, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.1219512195121952, |
| "grad_norm": 0.0677868436901709, |
| "learning_rate": 1.5797059623282787e-05, |
| "loss": 0.4715, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.1280487804878048, |
| "grad_norm": 0.06613758460632664, |
| "learning_rate": 1.573899850907373e-05, |
| "loss": 0.4829, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.1341463414634148, |
| "grad_norm": 0.06887716201911288, |
| "learning_rate": 1.568064746731156e-05, |
| "loss": 0.5418, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.1402439024390243, |
| "grad_norm": 0.07682982503987941, |
| "learning_rate": 1.5622009445823274e-05, |
| "loss": 0.5929, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.146341463414634, |
| "grad_norm": 0.06571420924574008, |
| "learning_rate": 1.5563087406933762e-05, |
| "loss": 0.511, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.1524390243902438, |
| "grad_norm": 0.0664511649725902, |
| "learning_rate": 1.550388432731613e-05, |
| "loss": 0.4558, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.1585365853658536, |
| "grad_norm": 0.07492574855512298, |
| "learning_rate": 1.5444403197841345e-05, |
| "loss": 0.5396, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.1646341463414633, |
| "grad_norm": 0.07122982585751268, |
| "learning_rate": 1.5384647023427136e-05, |
| "loss": 0.5301, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.170731707317073, |
| "grad_norm": 0.0658921691477124, |
| "learning_rate": 1.5324618822886167e-05, |
| "loss": 0.4947, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.1768292682926829, |
| "grad_norm": 0.07813967262256015, |
| "learning_rate": 1.526432162877356e-05, |
| "loss": 0.5522, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.1829268292682926, |
| "grad_norm": 0.06731988936901052, |
| "learning_rate": 1.5203758487233677e-05, |
| "loss": 0.476, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.1890243902439024, |
| "grad_norm": 0.07228505085779448, |
| "learning_rate": 1.514293245784623e-05, |
| "loss": 0.5278, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.1951219512195121, |
| "grad_norm": 0.07452980948980172, |
| "learning_rate": 1.5081846613471736e-05, |
| "loss": 0.5773, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.201219512195122, |
| "grad_norm": 0.06955858620563475, |
| "learning_rate": 1.5020504040096241e-05, |
| "loss": 0.5147, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.2073170731707317, |
| "grad_norm": 0.07065384450910228, |
| "learning_rate": 1.4958907836675467e-05, |
| "loss": 0.5275, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.2134146341463414, |
| "grad_norm": 0.07110195242202547, |
| "learning_rate": 1.489706111497821e-05, |
| "loss": 0.4819, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.2195121951219512, |
| "grad_norm": 0.06820779355050262, |
| "learning_rate": 1.4834966999429179e-05, |
| "loss": 0.521, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.225609756097561, |
| "grad_norm": 0.06964782085920465, |
| "learning_rate": 1.4772628626951114e-05, |
| "loss": 0.5234, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.2317073170731707, |
| "grad_norm": 0.06930582742745629, |
| "learning_rate": 1.4710049146806348e-05, |
| "loss": 0.4911, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.2378048780487805, |
| "grad_norm": 0.06741877286597113, |
| "learning_rate": 1.4647231720437687e-05, |
| "loss": 0.5215, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.2439024390243902, |
| "grad_norm": 0.06792423855223992, |
| "learning_rate": 1.4584179521308703e-05, |
| "loss": 0.5117, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.07167343063555995, |
| "learning_rate": 1.4520895734743419e-05, |
| "loss": 0.538, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.2560975609756098, |
| "grad_norm": 0.07256282666966574, |
| "learning_rate": 1.4457383557765385e-05, |
| "loss": 0.5529, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.2621951219512195, |
| "grad_norm": 0.06757284467612035, |
| "learning_rate": 1.4393646198936169e-05, |
| "loss": 0.4892, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.2682926829268293, |
| "grad_norm": 0.07188557520197532, |
| "learning_rate": 1.4329686878193271e-05, |
| "loss": 0.5602, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.274390243902439, |
| "grad_norm": 0.07747770661719462, |
| "learning_rate": 1.4265508826687442e-05, |
| "loss": 0.5658, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.2804878048780488, |
| "grad_norm": 0.06809716748651344, |
| "learning_rate": 1.4201115286619464e-05, |
| "loss": 0.4713, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.2865853658536586, |
| "grad_norm": 0.07448043410586613, |
| "learning_rate": 1.4136509511076347e-05, |
| "loss": 0.5311, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.2926829268292683, |
| "grad_norm": 0.08085706529770824, |
| "learning_rate": 1.4071694763866988e-05, |
| "loss": 0.5617, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.298780487804878, |
| "grad_norm": 0.0728263083382011, |
| "learning_rate": 1.4006674319357298e-05, |
| "loss": 0.4792, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.3048780487804879, |
| "grad_norm": 0.0670268791274602, |
| "learning_rate": 1.3941451462304778e-05, |
| "loss": 0.4675, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.3109756097560976, |
| "grad_norm": 0.08039512209656642, |
| "learning_rate": 1.387602948769257e-05, |
| "loss": 0.5056, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.3170731707317074, |
| "grad_norm": 0.06817800650730356, |
| "learning_rate": 1.3810411700563005e-05, |
| "loss": 0.4739, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.3231707317073171, |
| "grad_norm": 0.07057112884463199, |
| "learning_rate": 1.3744601415850637e-05, |
| "loss": 0.5573, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.329268292682927, |
| "grad_norm": 0.06981321032667759, |
| "learning_rate": 1.3678601958214779e-05, |
| "loss": 0.5014, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.3353658536585367, |
| "grad_norm": 0.06993357941167098, |
| "learning_rate": 1.3612416661871532e-05, |
| "loss": 0.524, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.3414634146341464, |
| "grad_norm": 0.06483182405879527, |
| "learning_rate": 1.3546048870425356e-05, |
| "loss": 0.4806, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.3475609756097562, |
| "grad_norm": 0.07027433088183081, |
| "learning_rate": 1.3479501936700161e-05, |
| "loss": 0.4944, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.3536585365853657, |
| "grad_norm": 0.08129330060634665, |
| "learning_rate": 1.3412779222569907e-05, |
| "loss": 0.5541, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.3597560975609757, |
| "grad_norm": 0.06825577692381518, |
| "learning_rate": 1.3345884098788775e-05, |
| "loss": 0.473, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.3658536585365852, |
| "grad_norm": 0.06613216751504289, |
| "learning_rate": 1.3278819944820893e-05, |
| "loss": 0.4318, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.3719512195121952, |
| "grad_norm": 0.07283827698992242, |
| "learning_rate": 1.3211590148669586e-05, |
| "loss": 0.5125, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.3780487804878048, |
| "grad_norm": 0.06892969965848932, |
| "learning_rate": 1.314419810670624e-05, |
| "loss": 0.4533, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.3841463414634148, |
| "grad_norm": 0.07855732769188979, |
| "learning_rate": 1.3076647223498703e-05, |
| "loss": 0.5461, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.3902439024390243, |
| "grad_norm": 0.07382325404677605, |
| "learning_rate": 1.3008940911639302e-05, |
| "loss": 0.4379, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.3963414634146343, |
| "grad_norm": 0.07190670195425049, |
| "learning_rate": 1.2941082591572443e-05, |
| "loss": 0.533, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.4024390243902438, |
| "grad_norm": 0.06888657163333817, |
| "learning_rate": 1.2873075691421808e-05, |
| "loss": 0.5146, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.4085365853658536, |
| "grad_norm": 0.06879068933807653, |
| "learning_rate": 1.2804923646817169e-05, |
| "loss": 0.542, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.4146341463414633, |
| "grad_norm": 0.06316663754312257, |
| "learning_rate": 1.2736629900720832e-05, |
| "loss": 0.4763, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.420731707317073, |
| "grad_norm": 0.0745177281343944, |
| "learning_rate": 1.2668197903253694e-05, |
| "loss": 0.5063, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.4268292682926829, |
| "grad_norm": 0.07549236104105322, |
| "learning_rate": 1.2599631111520956e-05, |
| "loss": 0.4871, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.4329268292682926, |
| "grad_norm": 0.07608890942436555, |
| "learning_rate": 1.2530932989437463e-05, |
| "loss": 0.5216, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.4390243902439024, |
| "grad_norm": 0.08852428564425496, |
| "learning_rate": 1.2462107007552726e-05, |
| "loss": 0.5814, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.4451219512195121, |
| "grad_norm": 0.07169927489263321, |
| "learning_rate": 1.2393156642875579e-05, |
| "loss": 0.5097, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.451219512195122, |
| "grad_norm": 0.0714844858843735, |
| "learning_rate": 1.2324085378698529e-05, |
| "loss": 0.4943, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.4573170731707317, |
| "grad_norm": 0.07303490526979263, |
| "learning_rate": 1.2254896704421789e-05, |
| "loss": 0.5254, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.4634146341463414, |
| "grad_norm": 0.07649861873490388, |
| "learning_rate": 1.2185594115376991e-05, |
| "loss": 0.4628, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.4695121951219512, |
| "grad_norm": 0.07968432654727967, |
| "learning_rate": 1.211618111265061e-05, |
| "loss": 0.5311, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.475609756097561, |
| "grad_norm": 0.08164340764032027, |
| "learning_rate": 1.2046661202907101e-05, |
| "loss": 0.5082, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.4817073170731707, |
| "grad_norm": 0.07536455518754334, |
| "learning_rate": 1.1977037898211723e-05, |
| "loss": 0.4963, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.4878048780487805, |
| "grad_norm": 0.07838988741024766, |
| "learning_rate": 1.1907314715853138e-05, |
| "loss": 0.4964, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.4939024390243902, |
| "grad_norm": 0.07681652298019655, |
| "learning_rate": 1.1837495178165706e-05, |
| "loss": 0.531, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.07585351296872528, |
| "learning_rate": 1.176758281235155e-05, |
| "loss": 0.4971, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.5060975609756098, |
| "grad_norm": 0.07391818296648663, |
| "learning_rate": 1.1697581150302362e-05, |
| "loss": 0.5189, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.5121951219512195, |
| "grad_norm": 0.07240466805493384, |
| "learning_rate": 1.1627493728420978e-05, |
| "loss": 0.4696, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.5182926829268293, |
| "grad_norm": 0.07336052530563156, |
| "learning_rate": 1.1557324087442719e-05, |
| "loss": 0.5158, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.524390243902439, |
| "grad_norm": 0.0734922713913184, |
| "learning_rate": 1.1487075772256517e-05, |
| "loss": 0.5013, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.5304878048780488, |
| "grad_norm": 0.07041146771920755, |
| "learning_rate": 1.1416752331725842e-05, |
| "loss": 0.4925, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.5365853658536586, |
| "grad_norm": 0.07749242303496245, |
| "learning_rate": 1.1346357318509395e-05, |
| "loss": 0.5115, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.5426829268292683, |
| "grad_norm": 0.06688029575585173, |
| "learning_rate": 1.1275894288881664e-05, |
| "loss": 0.434, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.548780487804878, |
| "grad_norm": 0.07880736062509318, |
| "learning_rate": 1.1205366802553231e-05, |
| "loss": 0.513, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.5548780487804879, |
| "grad_norm": 0.07925969227352526, |
| "learning_rate": 1.1134778422490971e-05, |
| "loss": 0.5467, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.5609756097560976, |
| "grad_norm": 0.07331952124747425, |
| "learning_rate": 1.1064132714738024e-05, |
| "loss": 0.5394, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.5670731707317072, |
| "grad_norm": 0.06881324378789994, |
| "learning_rate": 1.0993433248233672e-05, |
| "loss": 0.481, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.5731707317073171, |
| "grad_norm": 0.0760805406679711, |
| "learning_rate": 1.092268359463302e-05, |
| "loss": 0.4998, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.5792682926829267, |
| "grad_norm": 0.0723622186584405, |
| "learning_rate": 1.0851887328126569e-05, |
| "loss": 0.4989, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.5853658536585367, |
| "grad_norm": 0.0691805814488331, |
| "learning_rate": 1.0781048025259648e-05, |
| "loss": 0.4491, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.5914634146341462, |
| "grad_norm": 0.07332736517126122, |
| "learning_rate": 1.0710169264751733e-05, |
| "loss": 0.4767, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.5975609756097562, |
| "grad_norm": 0.07503167590781622, |
| "learning_rate": 1.0639254627315658e-05, |
| "loss": 0.5108, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.6036585365853657, |
| "grad_norm": 0.07676152916231047, |
| "learning_rate": 1.0568307695476712e-05, |
| "loss": 0.5324, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.6097560975609757, |
| "grad_norm": 0.0834575992691424, |
| "learning_rate": 1.049733205339167e-05, |
| "loss": 0.5628, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.6158536585365852, |
| "grad_norm": 0.07979828063308889, |
| "learning_rate": 1.0426331286667701e-05, |
| "loss": 0.5017, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.6219512195121952, |
| "grad_norm": 0.07227807983837574, |
| "learning_rate": 1.0355308982181254e-05, |
| "loss": 0.4286, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.6280487804878048, |
| "grad_norm": 0.08033151020781615, |
| "learning_rate": 1.0284268727896833e-05, |
| "loss": 0.4991, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.6341463414634148, |
| "grad_norm": 0.07726754814987509, |
| "learning_rate": 1.0213214112685747e-05, |
| "loss": 0.5663, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.6402439024390243, |
| "grad_norm": 0.06975489529697236, |
| "learning_rate": 1.0142148726144807e-05, |
| "loss": 0.4509, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.6463414634146343, |
| "grad_norm": 0.08607941878366727, |
| "learning_rate": 1.0071076158414977e-05, |
| "loss": 0.5012, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.6524390243902438, |
| "grad_norm": 0.07924808288315173, |
| "learning_rate": 1e-05, |
| "loss": 0.4968, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.6585365853658538, |
| "grad_norm": 0.07521641587131223, |
| "learning_rate": 9.928923841585025e-06, |
| "loss": 0.5333, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.6646341463414633, |
| "grad_norm": 0.0810914472998851, |
| "learning_rate": 9.857851273855195e-06, |
| "loss": 0.5256, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.6707317073170733, |
| "grad_norm": 0.07570611716859411, |
| "learning_rate": 9.786785887314255e-06, |
| "loss": 0.4844, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.6768292682926829, |
| "grad_norm": 0.08692043191937647, |
| "learning_rate": 9.715731272103172e-06, |
| "loss": 0.55, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.6829268292682928, |
| "grad_norm": 0.07549433485077096, |
| "learning_rate": 9.644691017818752e-06, |
| "loss": 0.4599, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.6890243902439024, |
| "grad_norm": 0.07013095295478262, |
| "learning_rate": 9.573668713332305e-06, |
| "loss": 0.4641, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.6951219512195121, |
| "grad_norm": 0.085732485667367, |
| "learning_rate": 9.502667946608332e-06, |
| "loss": 0.5409, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.701219512195122, |
| "grad_norm": 0.0786732581180552, |
| "learning_rate": 9.43169230452329e-06, |
| "loss": 0.5047, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.7073170731707317, |
| "grad_norm": 0.06973904067642213, |
| "learning_rate": 9.360745372684346e-06, |
| "loss": 0.4611, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.7134146341463414, |
| "grad_norm": 0.07149507221102347, |
| "learning_rate": 9.289830735248269e-06, |
| "loss": 0.5249, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.7195121951219512, |
| "grad_norm": 0.07598878917991338, |
| "learning_rate": 9.218951974740354e-06, |
| "loss": 0.53, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.725609756097561, |
| "grad_norm": 0.07880251948989025, |
| "learning_rate": 9.148112671873433e-06, |
| "loss": 0.5195, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.7317073170731707, |
| "grad_norm": 0.08006942318123801, |
| "learning_rate": 9.07731640536698e-06, |
| "loss": 0.4935, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.7378048780487805, |
| "grad_norm": 0.07419436980709716, |
| "learning_rate": 9.00656675176633e-06, |
| "loss": 0.5, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.7439024390243902, |
| "grad_norm": 0.07335821062357814, |
| "learning_rate": 8.935867285261977e-06, |
| "loss": 0.4689, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.08300432948525868, |
| "learning_rate": 8.865221577509034e-06, |
| "loss": 0.5499, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.7560975609756098, |
| "grad_norm": 0.07414556420580612, |
| "learning_rate": 8.79463319744677e-06, |
| "loss": 0.5016, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.7621951219512195, |
| "grad_norm": 0.0777082606308814, |
| "learning_rate": 8.724105711118342e-06, |
| "loss": 0.5094, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.7682926829268293, |
| "grad_norm": 0.07879787724881869, |
| "learning_rate": 8.653642681490608e-06, |
| "loss": 0.504, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.774390243902439, |
| "grad_norm": 0.07782466491724375, |
| "learning_rate": 8.583247668274163e-06, |
| "loss": 0.4871, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.7804878048780488, |
| "grad_norm": 0.07959753822335783, |
| "learning_rate": 8.512924227743482e-06, |
| "loss": 0.4637, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.7865853658536586, |
| "grad_norm": 0.08403574176326123, |
| "learning_rate": 8.442675912557281e-06, |
| "loss": 0.4978, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.7926829268292683, |
| "grad_norm": 0.07506838029170206, |
| "learning_rate": 8.372506271579022e-06, |
| "loss": 0.4801, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.798780487804878, |
| "grad_norm": 0.08007620776198685, |
| "learning_rate": 8.30241884969764e-06, |
| "loss": 0.5467, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.8048780487804879, |
| "grad_norm": 0.07302138656473144, |
| "learning_rate": 8.232417187648454e-06, |
| "loss": 0.4591, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.8109756097560976, |
| "grad_norm": 0.07968477173539414, |
| "learning_rate": 8.162504821834296e-06, |
| "loss": 0.4869, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.8170731707317072, |
| "grad_norm": 0.07295556591701204, |
| "learning_rate": 8.092685284146865e-06, |
| "loss": 0.4857, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.8231707317073171, |
| "grad_norm": 0.0687852684162483, |
| "learning_rate": 8.02296210178828e-06, |
| "loss": 0.4376, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.8292682926829267, |
| "grad_norm": 0.07614301377824628, |
| "learning_rate": 7.953338797092902e-06, |
| "loss": 0.4687, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.8353658536585367, |
| "grad_norm": 0.08065344887657697, |
| "learning_rate": 7.883818887349391e-06, |
| "loss": 0.558, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.8414634146341462, |
| "grad_norm": 0.07537828658738212, |
| "learning_rate": 7.814405884623012e-06, |
| "loss": 0.4641, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.8475609756097562, |
| "grad_norm": 0.07520932682727934, |
| "learning_rate": 7.745103295578216e-06, |
| "loss": 0.4807, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.8536585365853657, |
| "grad_norm": 0.08194352024084861, |
| "learning_rate": 7.675914621301476e-06, |
| "loss": 0.5249, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.8597560975609757, |
| "grad_norm": 0.06548230309414133, |
| "learning_rate": 7.606843357124426e-06, |
| "loss": 0.4296, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.8658536585365852, |
| "grad_norm": 0.07539507791922381, |
| "learning_rate": 7.5378929924472735e-06, |
| "loss": 0.4906, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.8719512195121952, |
| "grad_norm": 0.07757046744416946, |
| "learning_rate": 7.469067010562538e-06, |
| "loss": 0.4764, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.8780487804878048, |
| "grad_norm": 0.08347451677435065, |
| "learning_rate": 7.400368888479048e-06, |
| "loss": 0.5079, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.8841463414634148, |
| "grad_norm": 0.09459522265909277, |
| "learning_rate": 7.331802096746309e-06, |
| "loss": 0.5622, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.8902439024390243, |
| "grad_norm": 0.07271276680988117, |
| "learning_rate": 7.263370099279173e-06, |
| "loss": 0.4646, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.8963414634146343, |
| "grad_norm": 0.07270944162582102, |
| "learning_rate": 7.195076353182834e-06, |
| "loss": 0.4824, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.9024390243902438, |
| "grad_norm": 0.07557182291040342, |
| "learning_rate": 7.126924308578196e-06, |
| "loss": 0.4434, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.9085365853658538, |
| "grad_norm": 0.07838104917336293, |
| "learning_rate": 7.058917408427559e-06, |
| "loss": 0.4969, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.9146341463414633, |
| "grad_norm": 0.0772118612308542, |
| "learning_rate": 6.9910590883607e-06, |
| "loss": 0.4897, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.9207317073170733, |
| "grad_norm": 0.06733566470669253, |
| "learning_rate": 6.923352776501302e-06, |
| "loss": 0.4541, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.9268292682926829, |
| "grad_norm": 0.07768052936426381, |
| "learning_rate": 6.855801893293765e-06, |
| "loss": 0.4746, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.9329268292682928, |
| "grad_norm": 0.07601549840390559, |
| "learning_rate": 6.788409851330419e-06, |
| "loss": 0.5037, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.9390243902439024, |
| "grad_norm": 0.08311022242797193, |
| "learning_rate": 6.721180055179113e-06, |
| "loss": 0.5478, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.9451219512195121, |
| "grad_norm": 0.07527262749855876, |
| "learning_rate": 6.654115901211229e-06, |
| "loss": 0.4801, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.951219512195122, |
| "grad_norm": 0.0819908930356081, |
| "learning_rate": 6.587220777430097e-06, |
| "loss": 0.5252, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.9573170731707317, |
| "grad_norm": 0.07276145316957822, |
| "learning_rate": 6.5204980632998394e-06, |
| "loss": 0.411, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.9634146341463414, |
| "grad_norm": 0.06904185307886326, |
| "learning_rate": 6.453951129574644e-06, |
| "loss": 0.4813, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.9695121951219512, |
| "grad_norm": 0.07458141714965788, |
| "learning_rate": 6.387583338128471e-06, |
| "loss": 0.5033, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.975609756097561, |
| "grad_norm": 0.07431900473667878, |
| "learning_rate": 6.321398041785225e-06, |
| "loss": 0.4907, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.9817073170731707, |
| "grad_norm": 0.07780066087542951, |
| "learning_rate": 6.255398584149366e-06, |
| "loss": 0.4902, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.9878048780487805, |
| "grad_norm": 0.07639692261752619, |
| "learning_rate": 6.189588299436997e-06, |
| "loss": 0.4978, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.9939024390243902, |
| "grad_norm": 0.07548995093210441, |
| "learning_rate": 6.123970512307433e-06, |
| "loss": 0.4664, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.0747178162965431, |
| "learning_rate": 6.058548537695225e-06, |
| "loss": 0.474, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.0060975609756095, |
| "grad_norm": 0.07256188615230309, |
| "learning_rate": 5.9933256806427056e-06, |
| "loss": 0.45, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.0121951219512195, |
| "grad_norm": 0.07890015744026495, |
| "learning_rate": 5.928305236133016e-06, |
| "loss": 0.5278, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.018292682926829, |
| "grad_norm": 0.0743608081300144, |
| "learning_rate": 5.86349048892366e-06, |
| "loss": 0.5151, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.024390243902439, |
| "grad_norm": 0.07744372435832222, |
| "learning_rate": 5.798884713380542e-06, |
| "loss": 0.4706, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.0304878048780486, |
| "grad_norm": 0.07446342377276646, |
| "learning_rate": 5.734491173312559e-06, |
| "loss": 0.3936, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.0365853658536586, |
| "grad_norm": 0.07700683633251168, |
| "learning_rate": 5.67031312180673e-06, |
| "loss": 0.4931, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.042682926829268, |
| "grad_norm": 0.07438951456585641, |
| "learning_rate": 5.60635380106383e-06, |
| "loss": 0.4958, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.048780487804878, |
| "grad_norm": 0.07659907187784923, |
| "learning_rate": 5.542616442234618e-06, |
| "loss": 0.4846, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.0548780487804876, |
| "grad_norm": 0.06905536948425527, |
| "learning_rate": 5.479104265256583e-06, |
| "loss": 0.4426, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.0609756097560976, |
| "grad_norm": 0.07334778911378619, |
| "learning_rate": 5.415820478691301e-06, |
| "loss": 0.5074, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.067073170731707, |
| "grad_norm": 0.08664025624260953, |
| "learning_rate": 5.352768279562315e-06, |
| "loss": 0.5383, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.073170731707317, |
| "grad_norm": 0.08278651278238408, |
| "learning_rate": 5.2899508531936526e-06, |
| "loss": 0.4713, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.0792682926829267, |
| "grad_norm": 0.0705392226402741, |
| "learning_rate": 5.2273713730488886e-06, |
| "loss": 0.403, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.0853658536585367, |
| "grad_norm": 0.0717929565778019, |
| "learning_rate": 5.165033000570825e-06, |
| "loss": 0.472, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.091463414634146, |
| "grad_norm": 0.07551856711067856, |
| "learning_rate": 5.1029388850217935e-06, |
| "loss": 0.4945, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.097560975609756, |
| "grad_norm": 0.07438201387306197, |
| "learning_rate": 5.041092163324537e-06, |
| "loss": 0.4939, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.1036585365853657, |
| "grad_norm": 0.07626903753672695, |
| "learning_rate": 4.979495959903759e-06, |
| "loss": 0.4662, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.1097560975609757, |
| "grad_norm": 0.07374673606028373, |
| "learning_rate": 4.918153386528271e-06, |
| "loss": 0.4792, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.1158536585365852, |
| "grad_norm": 0.08229855647674697, |
| "learning_rate": 4.8570675421537685e-06, |
| "loss": 0.5428, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.1219512195121952, |
| "grad_norm": 0.07472947580500576, |
| "learning_rate": 4.7962415127663265e-06, |
| "loss": 0.5573, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.1280487804878048, |
| "grad_norm": 0.07720860403921907, |
| "learning_rate": 4.7356783712264405e-06, |
| "loss": 0.5366, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.1341463414634148, |
| "grad_norm": 0.07671762679161022, |
| "learning_rate": 4.675381177113837e-06, |
| "loss": 0.4991, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.1402439024390243, |
| "grad_norm": 0.0698611175055525, |
| "learning_rate": 4.615352976572867e-06, |
| "loss": 0.463, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.1463414634146343, |
| "grad_norm": 0.08168992782435783, |
| "learning_rate": 4.555596802158653e-06, |
| "loss": 0.5243, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.152439024390244, |
| "grad_norm": 0.07634240659584558, |
| "learning_rate": 4.4961156726838725e-06, |
| "loss": 0.4832, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.158536585365854, |
| "grad_norm": 0.07065742577897564, |
| "learning_rate": 4.436912593066241e-06, |
| "loss": 0.5121, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.1646341463414633, |
| "grad_norm": 0.07396087251257588, |
| "learning_rate": 4.377990554176729e-06, |
| "loss": 0.4896, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.1707317073170733, |
| "grad_norm": 0.07474646282993128, |
| "learning_rate": 4.319352532688444e-06, |
| "loss": 0.4612, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.176829268292683, |
| "grad_norm": 0.08069543680226443, |
| "learning_rate": 4.261001490926272e-06, |
| "loss": 0.5218, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.182926829268293, |
| "grad_norm": 0.07346735875767992, |
| "learning_rate": 4.2029403767172175e-06, |
| "loss": 0.435, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.1890243902439024, |
| "grad_norm": 0.07522119438266486, |
| "learning_rate": 4.14517212324147e-06, |
| "loss": 0.4956, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.1951219512195124, |
| "grad_norm": 0.08032494668646596, |
| "learning_rate": 4.087699648884248e-06, |
| "loss": 0.4752, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.201219512195122, |
| "grad_norm": 0.08192204498128373, |
| "learning_rate": 4.0305258570883336e-06, |
| "loss": 0.5108, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.207317073170732, |
| "grad_norm": 0.08394146118190073, |
| "learning_rate": 3.973653636207437e-06, |
| "loss": 0.5567, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.2134146341463414, |
| "grad_norm": 0.07596087618305393, |
| "learning_rate": 3.917085859360234e-06, |
| "loss": 0.4685, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.2195121951219514, |
| "grad_norm": 0.07887155715773822, |
| "learning_rate": 3.860825384285247e-06, |
| "loss": 0.5206, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.225609756097561, |
| "grad_norm": 0.07296513823227467, |
| "learning_rate": 3.804875053196477e-06, |
| "loss": 0.4469, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.231707317073171, |
| "grad_norm": 0.08190684855847946, |
| "learning_rate": 3.7492376926397966e-06, |
| "loss": 0.5094, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.2378048780487805, |
| "grad_norm": 0.07617526219017642, |
| "learning_rate": 3.6939161133501823e-06, |
| "loss": 0.4479, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.2439024390243905, |
| "grad_norm": 0.08063324451878306, |
| "learning_rate": 3.6389131101096953e-06, |
| "loss": 0.5099, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.07739599643572288, |
| "learning_rate": 3.5842314616063134e-06, |
| "loss": 0.491, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.2560975609756095, |
| "grad_norm": 0.07768918830309231, |
| "learning_rate": 3.529873930293546e-06, |
| "loss": 0.5417, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.2621951219512195, |
| "grad_norm": 0.0822012033362632, |
| "learning_rate": 3.4758432622508677e-06, |
| "loss": 0.5186, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.2682926829268295, |
| "grad_norm": 0.0764020181839071, |
| "learning_rate": 3.422142187045011e-06, |
| "loss": 0.4754, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.274390243902439, |
| "grad_norm": 0.08335314099643498, |
| "learning_rate": 3.3687734175920505e-06, |
| "loss": 0.5537, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.2804878048780486, |
| "grad_norm": 0.0803475394628154, |
| "learning_rate": 3.3157396500203655e-06, |
| "loss": 0.4212, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.2865853658536586, |
| "grad_norm": 0.06853604150629149, |
| "learning_rate": 3.2630435635344283e-06, |
| "loss": 0.4197, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.292682926829268, |
| "grad_norm": 0.07027233118743227, |
| "learning_rate": 3.2106878202794513e-06, |
| "loss": 0.426, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.298780487804878, |
| "grad_norm": 0.08035482501355977, |
| "learning_rate": 3.1586750652069077e-06, |
| "loss": 0.4768, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.3048780487804876, |
| "grad_norm": 0.0767033268066497, |
| "learning_rate": 3.1070079259408934e-06, |
| "loss": 0.4298, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.3109756097560976, |
| "grad_norm": 0.07871530893320917, |
| "learning_rate": 3.0556890126454075e-06, |
| "loss": 0.5194, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.317073170731707, |
| "grad_norm": 0.0694907669966186, |
| "learning_rate": 3.004720917892464e-06, |
| "loss": 0.4458, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.323170731707317, |
| "grad_norm": 0.07550036175573449, |
| "learning_rate": 2.954106216531141e-06, |
| "loss": 0.4877, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.3292682926829267, |
| "grad_norm": 0.06828766275227673, |
| "learning_rate": 2.90384746555749e-06, |
| "loss": 0.4694, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.3353658536585367, |
| "grad_norm": 0.07957885134154746, |
| "learning_rate": 2.8539472039853557e-06, |
| "loss": 0.4549, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.341463414634146, |
| "grad_norm": 0.07312612756103479, |
| "learning_rate": 2.804407952718119e-06, |
| "loss": 0.4717, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.347560975609756, |
| "grad_norm": 0.07462194695242244, |
| "learning_rate": 2.7552322144213405e-06, |
| "loss": 0.4681, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.3536585365853657, |
| "grad_norm": 0.07382029470746747, |
| "learning_rate": 2.7064224733963197e-06, |
| "loss": 0.4455, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.3597560975609757, |
| "grad_norm": 0.07566404170504752, |
| "learning_rate": 2.6579811954546054e-06, |
| "loss": 0.4601, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.3658536585365852, |
| "grad_norm": 0.06650889658374204, |
| "learning_rate": 2.6099108277934105e-06, |
| "loss": 0.403, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.3719512195121952, |
| "grad_norm": 0.08128051733864035, |
| "learning_rate": 2.5622137988719985e-06, |
| "loss": 0.5062, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.3780487804878048, |
| "grad_norm": 0.07645763895183058, |
| "learning_rate": 2.514892518288988e-06, |
| "loss": 0.4992, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.3841463414634148, |
| "grad_norm": 0.08185922748732076, |
| "learning_rate": 2.46794937666063e-06, |
| "loss": 0.4998, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.3902439024390243, |
| "grad_norm": 0.07724446363577575, |
| "learning_rate": 2.421386745500034e-06, |
| "loss": 0.4832, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.3963414634146343, |
| "grad_norm": 0.0719202945692499, |
| "learning_rate": 2.375206977097353e-06, |
| "loss": 0.4625, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.402439024390244, |
| "grad_norm": 0.07160181702178699, |
| "learning_rate": 2.329412404400969e-06, |
| "loss": 0.4786, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.408536585365854, |
| "grad_norm": 0.07705465155073153, |
| "learning_rate": 2.2840053408996154e-06, |
| "loss": 0.4873, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.4146341463414633, |
| "grad_norm": 0.06734740120536699, |
| "learning_rate": 2.238988080505513e-06, |
| "loss": 0.4268, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.4207317073170733, |
| "grad_norm": 0.07171806752940019, |
| "learning_rate": 2.1943628974384858e-06, |
| "loss": 0.4657, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.426829268292683, |
| "grad_norm": 0.06712821968746505, |
| "learning_rate": 2.150132046111054e-06, |
| "loss": 0.4201, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.432926829268293, |
| "grad_norm": 0.08316643198749764, |
| "learning_rate": 2.1062977610145697e-06, |
| "loss": 0.513, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.4390243902439024, |
| "grad_norm": 0.0730957075364869, |
| "learning_rate": 2.0628622566063063e-06, |
| "loss": 0.4895, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.4451219512195124, |
| "grad_norm": 0.07287061567762979, |
| "learning_rate": 2.019827727197605e-06, |
| "loss": 0.4306, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.451219512195122, |
| "grad_norm": 0.06700730358392487, |
| "learning_rate": 1.977196346843019e-06, |
| "loss": 0.4141, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.457317073170732, |
| "grad_norm": 0.07927651728219412, |
| "learning_rate": 1.934970269230464e-06, |
| "loss": 0.4702, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.4634146341463414, |
| "grad_norm": 0.07966939559181735, |
| "learning_rate": 1.8931516275724527e-06, |
| "loss": 0.4209, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.4695121951219514, |
| "grad_norm": 0.07505835152415707, |
| "learning_rate": 1.8517425344982831e-06, |
| "loss": 0.5004, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.475609756097561, |
| "grad_norm": 0.0792696269268693, |
| "learning_rate": 1.8107450819473505e-06, |
| "loss": 0.4954, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.4817073170731705, |
| "grad_norm": 0.07162945978931057, |
| "learning_rate": 1.7701613410634367e-06, |
| "loss": 0.496, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.4878048780487805, |
| "grad_norm": 0.07893944712223014, |
| "learning_rate": 1.7299933620900945e-06, |
| "loss": 0.4774, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.4939024390243905, |
| "grad_norm": 0.06827623677585598, |
| "learning_rate": 1.690243174267071e-06, |
| "loss": 0.4177, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.0754447597879692, |
| "learning_rate": 1.6509127857277784e-06, |
| "loss": 0.4889, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.5060975609756095, |
| "grad_norm": 0.08857036691886101, |
| "learning_rate": 1.6120041833978662e-06, |
| "loss": 0.5317, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.5121951219512195, |
| "grad_norm": 0.07177001116277269, |
| "learning_rate": 1.573519332894824e-06, |
| "loss": 0.414, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.5182926829268295, |
| "grad_norm": 0.07831205185485109, |
| "learning_rate": 1.535460178428697e-06, |
| "loss": 0.5028, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.524390243902439, |
| "grad_norm": 0.07285605695660612, |
| "learning_rate": 1.4978286427038602e-06, |
| "loss": 0.5031, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.5304878048780486, |
| "grad_norm": 0.08720951847793187, |
| "learning_rate": 1.4606266268218783e-06, |
| "loss": 0.5084, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.5365853658536586, |
| "grad_norm": 0.06975140711559835, |
| "learning_rate": 1.4238560101854815e-06, |
| "loss": 0.4253, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.5426829268292686, |
| "grad_norm": 0.07714853710418437, |
| "learning_rate": 1.3875186504035965e-06, |
| "loss": 0.4744, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.548780487804878, |
| "grad_norm": 0.07565492373996721, |
| "learning_rate": 1.3516163831975337e-06, |
| "loss": 0.5152, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.5548780487804876, |
| "grad_norm": 0.07030057664082874, |
| "learning_rate": 1.3161510223082152e-06, |
| "loss": 0.4461, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.5609756097560976, |
| "grad_norm": 0.0800726969605912, |
| "learning_rate": 1.2811243594045697e-06, |
| "loss": 0.5135, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.567073170731707, |
| "grad_norm": 0.07816897719762364, |
| "learning_rate": 1.246538163993013e-06, |
| "loss": 0.4999, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.573170731707317, |
| "grad_norm": 0.07745184122312047, |
| "learning_rate": 1.2123941833280472e-06, |
| "loss": 0.4847, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.5792682926829267, |
| "grad_norm": 0.07419017119462436, |
| "learning_rate": 1.1786941423240072e-06, |
| "loss": 0.4843, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.5853658536585367, |
| "grad_norm": 0.07931455390788596, |
| "learning_rate": 1.1454397434679022e-06, |
| "loss": 0.4946, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.591463414634146, |
| "grad_norm": 0.07615796865199526, |
| "learning_rate": 1.1126326667334196e-06, |
| "loss": 0.4524, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.597560975609756, |
| "grad_norm": 0.0772418363352449, |
| "learning_rate": 1.080274569496057e-06, |
| "loss": 0.5152, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.6036585365853657, |
| "grad_norm": 0.07025077296325957, |
| "learning_rate": 1.0483670864493777e-06, |
| "loss": 0.4332, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.6097560975609757, |
| "grad_norm": 0.07566288563595869, |
| "learning_rate": 1.0169118295224488e-06, |
| "loss": 0.5029, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.6158536585365852, |
| "grad_norm": 0.0758658441769188, |
| "learning_rate": 9.85910387798389e-07, |
| "loss": 0.4573, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.6219512195121952, |
| "grad_norm": 0.07964262946952728, |
| "learning_rate": 9.55364327434105e-07, |
| "loss": 0.4933, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.6280487804878048, |
| "grad_norm": 0.07874589016859943, |
| "learning_rate": 9.252751915811642e-07, |
| "loss": 0.473, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.6341463414634148, |
| "grad_norm": 0.0766185482028681, |
| "learning_rate": 8.956445003078351e-07, |
| "loss": 0.5018, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.6402439024390243, |
| "grad_norm": 0.08140880877769818, |
| "learning_rate": 8.664737505223009e-07, |
| "loss": 0.5203, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.6463414634146343, |
| "grad_norm": 0.08015141287053174, |
| "learning_rate": 8.377644158970277e-07, |
| "loss": 0.5215, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.652439024390244, |
| "grad_norm": 0.07563574533469265, |
| "learning_rate": 8.095179467943293e-07, |
| "loss": 0.4877, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.658536585365854, |
| "grad_norm": 0.08532123143914754, |
| "learning_rate": 7.81735770193085e-07, |
| "loss": 0.5027, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.6646341463414633, |
| "grad_norm": 0.06972127152615569, |
| "learning_rate": 7.544192896166569e-07, |
| "loss": 0.4691, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.6707317073170733, |
| "grad_norm": 0.0748196016294252, |
| "learning_rate": 7.275698850619861e-07, |
| "loss": 0.5059, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.676829268292683, |
| "grad_norm": 0.07757312698493772, |
| "learning_rate": 7.011889129298688e-07, |
| "loss": 0.5559, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.682926829268293, |
| "grad_norm": 0.07577704718768018, |
| "learning_rate": 6.752777059564431e-07, |
| "loss": 0.4718, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.6890243902439024, |
| "grad_norm": 0.07357519669905033, |
| "learning_rate": 6.498375731458529e-07, |
| "loss": 0.4876, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.6951219512195124, |
| "grad_norm": 0.07445682597283106, |
| "learning_rate": 6.248697997041219e-07, |
| "loss": 0.4833, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.701219512195122, |
| "grad_norm": 0.07241140052205494, |
| "learning_rate": 6.003756469742294e-07, |
| "loss": 0.4713, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.7073170731707314, |
| "grad_norm": 0.07656055745393084, |
| "learning_rate": 5.763563523723769e-07, |
| "loss": 0.4525, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.7134146341463414, |
| "grad_norm": 0.07364895292854746, |
| "learning_rate": 5.528131293254957e-07, |
| "loss": 0.477, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.7195121951219514, |
| "grad_norm": 0.07520667622845931, |
| "learning_rate": 5.29747167209923e-07, |
| "loss": 0.4747, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.725609756097561, |
| "grad_norm": 0.08218001537128848, |
| "learning_rate": 5.071596312913329e-07, |
| "loss": 0.54, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.7317073170731705, |
| "grad_norm": 0.0867606649208939, |
| "learning_rate": 4.850516626658585e-07, |
| "loss": 0.5081, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.7378048780487805, |
| "grad_norm": 0.07210738314053156, |
| "learning_rate": 4.634243782024539e-07, |
| "loss": 0.4431, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.7439024390243905, |
| "grad_norm": 0.06582252551133536, |
| "learning_rate": 4.4227887048646335e-07, |
| "loss": 0.4192, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.07567885015733454, |
| "learning_rate": 4.216162077644281e-07, |
| "loss": 0.4887, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.7560975609756095, |
| "grad_norm": 0.07689928271198733, |
| "learning_rate": 4.014374338901206e-07, |
| "loss": 0.4683, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.7621951219512195, |
| "grad_norm": 0.07188606850501078, |
| "learning_rate": 3.817435682718096e-07, |
| "loss": 0.467, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.7682926829268295, |
| "grad_norm": 0.07163831506927039, |
| "learning_rate": 3.6253560582076075e-07, |
| "loss": 0.4539, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.774390243902439, |
| "grad_norm": 0.07592375615552627, |
| "learning_rate": 3.4381451690097653e-07, |
| "loss": 0.4736, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.7804878048780486, |
| "grad_norm": 0.0814766286311381, |
| "learning_rate": 3.255812472801689e-07, |
| "loss": 0.532, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.7865853658536586, |
| "grad_norm": 0.0717352396757339, |
| "learning_rate": 3.078367180819863e-07, |
| "loss": 0.4316, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.7926829268292686, |
| "grad_norm": 0.0746931546839919, |
| "learning_rate": 2.905818257394799e-07, |
| "loss": 0.5206, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.798780487804878, |
| "grad_norm": 0.07272697545910387, |
| "learning_rate": 2.7381744194980963e-07, |
| "loss": 0.4832, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.8048780487804876, |
| "grad_norm": 0.07230252882563974, |
| "learning_rate": 2.5754441363021854e-07, |
| "loss": 0.4778, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.8109756097560976, |
| "grad_norm": 0.08033515297716644, |
| "learning_rate": 2.417635628752324e-07, |
| "loss": 0.5301, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.817073170731707, |
| "grad_norm": 0.08891143823100789, |
| "learning_rate": 2.264756869151441e-07, |
| "loss": 0.5255, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.823170731707317, |
| "grad_norm": 0.07069972457447841, |
| "learning_rate": 2.1168155807572476e-07, |
| "loss": 0.431, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.8292682926829267, |
| "grad_norm": 0.0735272098530535, |
| "learning_rate": 1.973819237392205e-07, |
| "loss": 0.4968, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.8353658536585367, |
| "grad_norm": 0.08455153800014176, |
| "learning_rate": 1.8357750630658367e-07, |
| "loss": 0.4924, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.841463414634146, |
| "grad_norm": 0.06931732980810784, |
| "learning_rate": 1.7026900316098217e-07, |
| "loss": 0.4309, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.847560975609756, |
| "grad_norm": 0.07200984122635784, |
| "learning_rate": 1.5745708663257199e-07, |
| "loss": 0.4667, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.8536585365853657, |
| "grad_norm": 0.07677994816913122, |
| "learning_rate": 1.4514240396452438e-07, |
| "loss": 0.4834, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.8597560975609757, |
| "grad_norm": 0.08178248437061655, |
| "learning_rate": 1.333255772803377e-07, |
| "loss": 0.5251, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.8658536585365852, |
| "grad_norm": 0.07923538700790901, |
| "learning_rate": 1.2200720355239893e-07, |
| "loss": 0.5171, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.8719512195121952, |
| "grad_norm": 0.06761152489499198, |
| "learning_rate": 1.1118785457183034e-07, |
| "loss": 0.4615, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.8780487804878048, |
| "grad_norm": 0.0767262597066072, |
| "learning_rate": 1.0086807691960243e-07, |
| "loss": 0.4976, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.8841463414634148, |
| "grad_norm": 0.08266454055489816, |
| "learning_rate": 9.104839193892379e-08, |
| "loss": 0.5389, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.8902439024390243, |
| "grad_norm": 0.08086361495523785, |
| "learning_rate": 8.172929570889553e-08, |
| "loss": 0.4929, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.8963414634146343, |
| "grad_norm": 0.0745089911333017, |
| "learning_rate": 7.291125901946027e-08, |
| "loss": 0.4939, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.902439024390244, |
| "grad_norm": 0.08025945659207359, |
| "learning_rate": 6.459472734760997e-08, |
| "loss": 0.4876, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.908536585365854, |
| "grad_norm": 0.07053947220102097, |
| "learning_rate": 5.6780120834887264e-08, |
| "loss": 0.4611, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.9146341463414633, |
| "grad_norm": 0.0803969476007233, |
| "learning_rate": 4.9467834266154756e-08, |
| "loss": 0.5419, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.9207317073170733, |
| "grad_norm": 0.07482955639907388, |
| "learning_rate": 4.2658237049655325e-08, |
| "loss": 0.4889, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.926829268292683, |
| "grad_norm": 0.07752759847951908, |
| "learning_rate": 3.635167319834709e-08, |
| "loss": 0.4749, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.932926829268293, |
| "grad_norm": 0.08359023148499886, |
| "learning_rate": 3.054846131252731e-08, |
| "loss": 0.5334, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.9390243902439024, |
| "grad_norm": 0.08139586618522474, |
| "learning_rate": 2.524889456373525e-08, |
| "loss": 0.5523, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.9451219512195124, |
| "grad_norm": 0.07935921771678194, |
| "learning_rate": 2.045324067993959e-08, |
| "loss": 0.4853, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.951219512195122, |
| "grad_norm": 0.07808298733873976, |
| "learning_rate": 1.6161741932017026e-08, |
| "loss": 0.5005, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.9573170731707314, |
| "grad_norm": 0.0649213930995393, |
| "learning_rate": 1.2374615121508726e-08, |
| "loss": 0.4098, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.9634146341463414, |
| "grad_norm": 0.08655284099627421, |
| "learning_rate": 9.092051569674632e-09, |
| "loss": 0.4856, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.9695121951219514, |
| "grad_norm": 0.07707319839718253, |
| "learning_rate": 6.314217107817877e-09, |
| "loss": 0.5193, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.975609756097561, |
| "grad_norm": 0.07715334723099698, |
| "learning_rate": 4.041252068918145e-09, |
| "loss": 0.5263, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.9817073170731705, |
| "grad_norm": 0.07908531746017289, |
| "learning_rate": 2.273271280534006e-09, |
| "loss": 0.4823, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.9878048780487805, |
| "grad_norm": 0.07306053297024867, |
| "learning_rate": 1.0103640590064524e-09, |
| "loss": 0.4543, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.9939024390243905, |
| "grad_norm": 0.08516212104693965, |
| "learning_rate": 2.525942049436125e-10, |
| "loss": 0.5, |
| "step": 491 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.07775010775380628, |
| "learning_rate": 0.0, |
| "loss": 0.4651, |
| "step": 492 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 492, |
| "total_flos": 7801758436818944.0, |
| "train_loss": 0.7506088816780385, |
| "train_runtime": 5228.6286, |
| "train_samples_per_second": 6.005, |
| "train_steps_per_second": 0.094 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 492, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7801758436818944.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|