| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.1822489520685256, | |
| "eval_steps": 500, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 9.112447603426281e-05, | |
| "grad_norm": 4.810983180999756, | |
| "learning_rate": 4.999999897557577e-05, | |
| "loss": 3.2064, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00018224895206852561, | |
| "grad_norm": 1.9742110967636108, | |
| "learning_rate": 4.999999590230316e-05, | |
| "loss": 2.9028, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0002733734281027884, | |
| "grad_norm": 3.905468463897705, | |
| "learning_rate": 4.99999907801824e-05, | |
| "loss": 3.1475, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00036449790413705123, | |
| "grad_norm": 3.2750675678253174, | |
| "learning_rate": 4.9999983609213935e-05, | |
| "loss": 3.1676, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.000455622380171314, | |
| "grad_norm": 2.567809581756592, | |
| "learning_rate": 4.999997438939835e-05, | |
| "loss": 2.3521, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0005467468562055768, | |
| "grad_norm": 2.246769905090332, | |
| "learning_rate": 4.9999963120736396e-05, | |
| "loss": 3.0339, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0006378713322398396, | |
| "grad_norm": 3.4692740440368652, | |
| "learning_rate": 4.999994980322899e-05, | |
| "loss": 2.579, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0007289958082741025, | |
| "grad_norm": 3.0380406379699707, | |
| "learning_rate": 4.999993443687723e-05, | |
| "loss": 3.035, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0008201202843083652, | |
| "grad_norm": 4.001114368438721, | |
| "learning_rate": 4.999991702168238e-05, | |
| "loss": 3.0391, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.000911244760342628, | |
| "grad_norm": 2.6958892345428467, | |
| "learning_rate": 4.9999897557645856e-05, | |
| "loss": 3.2347, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0010023692363768909, | |
| "grad_norm": 3.562798500061035, | |
| "learning_rate": 4.9999876044769266e-05, | |
| "loss": 3.0267, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0010934937124111536, | |
| "grad_norm": 3.4214489459991455, | |
| "learning_rate": 4.999985248305436e-05, | |
| "loss": 3.0557, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0011846181884454165, | |
| "grad_norm": 5.271027565002441, | |
| "learning_rate": 4.9999826872503085e-05, | |
| "loss": 3.191, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0012757426644796792, | |
| "grad_norm": 4.908616542816162, | |
| "learning_rate": 4.999979921311753e-05, | |
| "loss": 3.1134, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.001366867140513942, | |
| "grad_norm": 3.8159241676330566, | |
| "learning_rate": 4.999976950489995e-05, | |
| "loss": 3.3288, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.001457991616548205, | |
| "grad_norm": 4.082758903503418, | |
| "learning_rate": 4.99997377478528e-05, | |
| "loss": 3.1537, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0015491160925824676, | |
| "grad_norm": 3.051913022994995, | |
| "learning_rate": 4.999970394197867e-05, | |
| "loss": 3.2477, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0016402405686167304, | |
| "grad_norm": 4.618947505950928, | |
| "learning_rate": 4.9999668087280336e-05, | |
| "loss": 3.1326, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0017313650446509933, | |
| "grad_norm": 3.8966140747070312, | |
| "learning_rate": 4.9999630183760745e-05, | |
| "loss": 3.0284, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.001822489520685256, | |
| "grad_norm": 4.619442462921143, | |
| "learning_rate": 4.999959023142298e-05, | |
| "loss": 3.3404, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0019136139967195188, | |
| "grad_norm": 4.453152656555176, | |
| "learning_rate": 4.999954823027034e-05, | |
| "loss": 3.1907, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.0020047384727537817, | |
| "grad_norm": 2.996080160140991, | |
| "learning_rate": 4.999950418030625e-05, | |
| "loss": 3.32, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0020958629487880446, | |
| "grad_norm": 3.222342014312744, | |
| "learning_rate": 4.999945808153433e-05, | |
| "loss": 3.3387, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.002186987424822307, | |
| "grad_norm": 2.4748733043670654, | |
| "learning_rate": 4.9999409933958354e-05, | |
| "loss": 3.2016, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.00227811190085657, | |
| "grad_norm": 2.91965913772583, | |
| "learning_rate": 4.9999359737582266e-05, | |
| "loss": 3.0563, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.002369236376890833, | |
| "grad_norm": 3.890470027923584, | |
| "learning_rate": 4.9999307492410176e-05, | |
| "loss": 2.9124, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0024603608529250955, | |
| "grad_norm": 2.584951877593994, | |
| "learning_rate": 4.999925319844638e-05, | |
| "loss": 3.1967, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0025514853289593585, | |
| "grad_norm": 3.9780001640319824, | |
| "learning_rate": 4.999919685569532e-05, | |
| "loss": 3.279, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0026426098049936214, | |
| "grad_norm": 2.990048885345459, | |
| "learning_rate": 4.999913846416161e-05, | |
| "loss": 3.1481, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.002733734281027884, | |
| "grad_norm": 3.9399380683898926, | |
| "learning_rate": 4.999907802385003e-05, | |
| "loss": 2.9727, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.002824858757062147, | |
| "grad_norm": 3.745975971221924, | |
| "learning_rate": 4.999901553476555e-05, | |
| "loss": 2.8796, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.00291598323309641, | |
| "grad_norm": 3.4274938106536865, | |
| "learning_rate": 4.999895099691328e-05, | |
| "loss": 3.0988, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0030071077091306723, | |
| "grad_norm": 2.9808709621429443, | |
| "learning_rate": 4.999888441029852e-05, | |
| "loss": 3.0458, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0030982321851649353, | |
| "grad_norm": 2.1409685611724854, | |
| "learning_rate": 4.9998815774926714e-05, | |
| "loss": 3.0208, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0031893566611991982, | |
| "grad_norm": 2.318962335586548, | |
| "learning_rate": 4.9998745090803486e-05, | |
| "loss": 3.0808, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0032804811372334607, | |
| "grad_norm": 3.4923746585845947, | |
| "learning_rate": 4.999867235793464e-05, | |
| "loss": 3.2533, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0033716056132677237, | |
| "grad_norm": 2.893838882446289, | |
| "learning_rate": 4.9998597576326135e-05, | |
| "loss": 3.1789, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.0034627300893019866, | |
| "grad_norm": 2.9843909740448, | |
| "learning_rate": 4.999852074598409e-05, | |
| "loss": 3.088, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.003553854565336249, | |
| "grad_norm": 4.7274041175842285, | |
| "learning_rate": 4.99984418669148e-05, | |
| "loss": 3.0058, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.003644979041370512, | |
| "grad_norm": 4.613773822784424, | |
| "learning_rate": 4.999836093912475e-05, | |
| "loss": 3.0773, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.003736103517404775, | |
| "grad_norm": 5.928118705749512, | |
| "learning_rate": 4.9998277962620556e-05, | |
| "loss": 3.8138, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0038272279934390375, | |
| "grad_norm": 3.0459301471710205, | |
| "learning_rate": 4.9998192937409015e-05, | |
| "loss": 3.1967, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0039183524694733005, | |
| "grad_norm": 3.857424736022949, | |
| "learning_rate": 4.999810586349711e-05, | |
| "loss": 3.2375, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.004009476945507563, | |
| "grad_norm": 2.038663387298584, | |
| "learning_rate": 4.999801674089197e-05, | |
| "loss": 3.0122, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.004100601421541826, | |
| "grad_norm": 2.0778284072875977, | |
| "learning_rate": 4.999792556960089e-05, | |
| "loss": 3.0134, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.004191725897576089, | |
| "grad_norm": 3.187390089035034, | |
| "learning_rate": 4.999783234963136e-05, | |
| "loss": 2.9639, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.004282850373610351, | |
| "grad_norm": 4.144068717956543, | |
| "learning_rate": 4.9997737080991005e-05, | |
| "loss": 3.0554, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.004373974849644614, | |
| "grad_norm": 3.5001306533813477, | |
| "learning_rate": 4.999763976368763e-05, | |
| "loss": 3.0371, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.004465099325678877, | |
| "grad_norm": 3.6111905574798584, | |
| "learning_rate": 4.9997540397729226e-05, | |
| "loss": 2.9927, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.00455622380171314, | |
| "grad_norm": 2.989147901535034, | |
| "learning_rate": 4.999743898312392e-05, | |
| "loss": 3.1585, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.004647348277747403, | |
| "grad_norm": 3.8651845455169678, | |
| "learning_rate": 4.9997335519880036e-05, | |
| "loss": 2.4887, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.004738472753781666, | |
| "grad_norm": 3.4701950550079346, | |
| "learning_rate": 4.9997230008006045e-05, | |
| "loss": 3.0551, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.004829597229815928, | |
| "grad_norm": 2.621978998184204, | |
| "learning_rate": 4.99971224475106e-05, | |
| "loss": 3.1943, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.004920721705850191, | |
| "grad_norm": 2.872795343399048, | |
| "learning_rate": 4.999701283840252e-05, | |
| "loss": 2.8501, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.005011846181884454, | |
| "grad_norm": 2.817953586578369, | |
| "learning_rate": 4.9996901180690774e-05, | |
| "loss": 2.9754, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.005102970657918717, | |
| "grad_norm": 1.9297401905059814, | |
| "learning_rate": 4.999678747438452e-05, | |
| "loss": 3.034, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.00519409513395298, | |
| "grad_norm": 3.435847520828247, | |
| "learning_rate": 4.9996671719493085e-05, | |
| "loss": 3.1619, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.005285219609987243, | |
| "grad_norm": 1.9299631118774414, | |
| "learning_rate": 4.999655391602594e-05, | |
| "loss": 3.0437, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.005376344086021506, | |
| "grad_norm": 3.256587266921997, | |
| "learning_rate": 4.999643406399275e-05, | |
| "loss": 3.0342, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.005467468562055768, | |
| "grad_norm": 2.301542282104492, | |
| "learning_rate": 4.999631216340333e-05, | |
| "loss": 3.0987, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.005558593038090031, | |
| "grad_norm": 4.333310127258301, | |
| "learning_rate": 4.999618821426768e-05, | |
| "loss": 2.9449, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.005649717514124294, | |
| "grad_norm": 1.8521463871002197, | |
| "learning_rate": 4.999606221659595e-05, | |
| "loss": 2.991, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.005740841990158557, | |
| "grad_norm": 3.421117067337036, | |
| "learning_rate": 4.999593417039847e-05, | |
| "loss": 3.1391, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.00583196646619282, | |
| "grad_norm": 3.233793020248413, | |
| "learning_rate": 4.999580407568573e-05, | |
| "loss": 3.0962, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.005923090942227083, | |
| "grad_norm": 3.7364771366119385, | |
| "learning_rate": 4.99956719324684e-05, | |
| "loss": 3.1224, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.006014215418261345, | |
| "grad_norm": 3.039381980895996, | |
| "learning_rate": 4.9995537740757296e-05, | |
| "loss": 3.0738, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.006105339894295608, | |
| "grad_norm": 2.6636710166931152, | |
| "learning_rate": 4.999540150056343e-05, | |
| "loss": 2.7519, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.0061964643703298706, | |
| "grad_norm": 4.360434532165527, | |
| "learning_rate": 4.999526321189796e-05, | |
| "loss": 3.3508, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.0062875888463641335, | |
| "grad_norm": 3.0914595127105713, | |
| "learning_rate": 4.999512287477222e-05, | |
| "loss": 3.1953, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.0063787133223983964, | |
| "grad_norm": 2.340858221054077, | |
| "learning_rate": 4.999498048919771e-05, | |
| "loss": 3.0567, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.006469837798432659, | |
| "grad_norm": 2.7261271476745605, | |
| "learning_rate": 4.9994836055186104e-05, | |
| "loss": 2.9738, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0065609622744669215, | |
| "grad_norm": 4.343580722808838, | |
| "learning_rate": 4.999468957274923e-05, | |
| "loss": 2.1352, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.006652086750501184, | |
| "grad_norm": 4.178849220275879, | |
| "learning_rate": 4.9994541041899104e-05, | |
| "loss": 2.9654, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.006743211226535447, | |
| "grad_norm": 2.4153099060058594, | |
| "learning_rate": 4.999439046264789e-05, | |
| "loss": 3.0347, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.00683433570256971, | |
| "grad_norm": 3.150313377380371, | |
| "learning_rate": 4.999423783500793e-05, | |
| "loss": 3.0065, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.006925460178603973, | |
| "grad_norm": 3.502631902694702, | |
| "learning_rate": 4.999408315899173e-05, | |
| "loss": 3.0649, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.007016584654638236, | |
| "grad_norm": 3.1519196033477783, | |
| "learning_rate": 4.999392643461198e-05, | |
| "loss": 2.5478, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.007107709130672498, | |
| "grad_norm": 3.289674758911133, | |
| "learning_rate": 4.9993767661881505e-05, | |
| "loss": 2.9503, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.007198833606706761, | |
| "grad_norm": 2.8465538024902344, | |
| "learning_rate": 4.999360684081333e-05, | |
| "loss": 2.7604, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.007289958082741024, | |
| "grad_norm": 1.927045464515686, | |
| "learning_rate": 4.999344397142064e-05, | |
| "loss": 3.2912, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.007381082558775287, | |
| "grad_norm": 3.03855037689209, | |
| "learning_rate": 4.9993279053716767e-05, | |
| "loss": 2.8311, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.00747220703480955, | |
| "grad_norm": 3.128101348876953, | |
| "learning_rate": 4.9993112087715236e-05, | |
| "loss": 3.2222, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.007563331510843813, | |
| "grad_norm": 2.3521552085876465, | |
| "learning_rate": 4.999294307342972e-05, | |
| "loss": 3.0606, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.007654455986878075, | |
| "grad_norm": 4.49812650680542, | |
| "learning_rate": 4.999277201087409e-05, | |
| "loss": 2.5668, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.007745580462912338, | |
| "grad_norm": 1.8955475091934204, | |
| "learning_rate": 4.999259890006235e-05, | |
| "loss": 2.9524, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.007836704938946601, | |
| "grad_norm": 2.4218337535858154, | |
| "learning_rate": 4.999242374100869e-05, | |
| "loss": 2.7087, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.007927829414980863, | |
| "grad_norm": 2.254610538482666, | |
| "learning_rate": 4.999224653372747e-05, | |
| "loss": 3.0396, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.008018953891015127, | |
| "grad_norm": 3.1680707931518555, | |
| "learning_rate": 4.99920672782332e-05, | |
| "loss": 3.0425, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.008110078367049389, | |
| "grad_norm": 3.0137205123901367, | |
| "learning_rate": 4.999188597454059e-05, | |
| "loss": 2.9103, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.008201202843083653, | |
| "grad_norm": 3.345235824584961, | |
| "learning_rate": 4.999170262266448e-05, | |
| "loss": 3.1356, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.008292327319117915, | |
| "grad_norm": 3.2153403759002686, | |
| "learning_rate": 4.999151722261991e-05, | |
| "loss": 3.3007, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.008383451795152179, | |
| "grad_norm": 2.3512165546417236, | |
| "learning_rate": 4.999132977442207e-05, | |
| "loss": 3.0219, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.00847457627118644, | |
| "grad_norm": 2.2550835609436035, | |
| "learning_rate": 4.9991140278086316e-05, | |
| "loss": 3.1043, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.008565700747220703, | |
| "grad_norm": 4.217181205749512, | |
| "learning_rate": 4.9990948733628186e-05, | |
| "loss": 3.1672, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.008656825223254967, | |
| "grad_norm": 3.7786593437194824, | |
| "learning_rate": 4.9990755141063376e-05, | |
| "loss": 3.051, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.008747949699289229, | |
| "grad_norm": 2.935464859008789, | |
| "learning_rate": 4.999055950040775e-05, | |
| "loss": 2.7657, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.008839074175323492, | |
| "grad_norm": 2.1169307231903076, | |
| "learning_rate": 4.999036181167733e-05, | |
| "loss": 2.951, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.008930198651357754, | |
| "grad_norm": 3.458928108215332, | |
| "learning_rate": 4.999016207488835e-05, | |
| "loss": 2.7861, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.009021323127392018, | |
| "grad_norm": 3.1938233375549316, | |
| "learning_rate": 4.998996029005715e-05, | |
| "loss": 2.782, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.00911244760342628, | |
| "grad_norm": 1.885495901107788, | |
| "learning_rate": 4.998975645720027e-05, | |
| "loss": 2.9136, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.009203572079460542, | |
| "grad_norm": 3.273291826248169, | |
| "learning_rate": 4.998955057633442e-05, | |
| "loss": 3.2322, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.009294696555494806, | |
| "grad_norm": 1.6428008079528809, | |
| "learning_rate": 4.998934264747648e-05, | |
| "loss": 2.7924, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.009385821031529068, | |
| "grad_norm": 3.5032806396484375, | |
| "learning_rate": 4.9989132670643486e-05, | |
| "loss": 2.8194, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.009476945507563332, | |
| "grad_norm": 2.517979145050049, | |
| "learning_rate": 4.998892064585264e-05, | |
| "loss": 2.5998, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.009568069983597594, | |
| "grad_norm": 2.546837329864502, | |
| "learning_rate": 4.9988706573121324e-05, | |
| "loss": 3.0157, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.009659194459631856, | |
| "grad_norm": 3.4117698669433594, | |
| "learning_rate": 4.9988490452467074e-05, | |
| "loss": 3.1974, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.00975031893566612, | |
| "grad_norm": 3.3978612422943115, | |
| "learning_rate": 4.9988272283907616e-05, | |
| "loss": 2.4654, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.009841443411700382, | |
| "grad_norm": 2.8470406532287598, | |
| "learning_rate": 4.998805206746082e-05, | |
| "loss": 2.7727, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.009932567887734646, | |
| "grad_norm": 2.559325695037842, | |
| "learning_rate": 4.998782980314474e-05, | |
| "loss": 2.9934, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.010023692363768908, | |
| "grad_norm": 2.0657799243927, | |
| "learning_rate": 4.998760549097758e-05, | |
| "loss": 3.084, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.010114816839803172, | |
| "grad_norm": 4.179584980010986, | |
| "learning_rate": 4.998737913097773e-05, | |
| "loss": 2.7185, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.010205941315837434, | |
| "grad_norm": 3.370600700378418, | |
| "learning_rate": 4.998715072316375e-05, | |
| "loss": 3.3553, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.010297065791871696, | |
| "grad_norm": 3.263859987258911, | |
| "learning_rate": 4.998692026755435e-05, | |
| "loss": 3.2411, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.01038819026790596, | |
| "grad_norm": 3.004282236099243, | |
| "learning_rate": 4.998668776416842e-05, | |
| "loss": 3.2068, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.010479314743940222, | |
| "grad_norm": 4.776451110839844, | |
| "learning_rate": 4.9986453213024996e-05, | |
| "loss": 2.931, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.010570439219974486, | |
| "grad_norm": 3.8841757774353027, | |
| "learning_rate": 4.9986216614143335e-05, | |
| "loss": 2.6147, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.010661563696008748, | |
| "grad_norm": 3.1611170768737793, | |
| "learning_rate": 4.9985977967542794e-05, | |
| "loss": 3.2392, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.010752688172043012, | |
| "grad_norm": 3.0968759059906006, | |
| "learning_rate": 4.998573727324295e-05, | |
| "loss": 3.0568, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.010843812648077274, | |
| "grad_norm": 2.2086093425750732, | |
| "learning_rate": 4.998549453126353e-05, | |
| "loss": 3.0838, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.010934937124111536, | |
| "grad_norm": 3.466169834136963, | |
| "learning_rate": 4.998524974162442e-05, | |
| "loss": 2.9381, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0110260616001458, | |
| "grad_norm": 3.442246437072754, | |
| "learning_rate": 4.998500290434568e-05, | |
| "loss": 2.8684, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.011117186076180062, | |
| "grad_norm": 3.4763424396514893, | |
| "learning_rate": 4.998475401944754e-05, | |
| "loss": 3.1428, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.011208310552214325, | |
| "grad_norm": 3.302222728729248, | |
| "learning_rate": 4.9984503086950416e-05, | |
| "loss": 2.9401, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.011299435028248588, | |
| "grad_norm": 2.623926877975464, | |
| "learning_rate": 4.998425010687484e-05, | |
| "loss": 3.0758, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.01139055950428285, | |
| "grad_norm": 3.527374744415283, | |
| "learning_rate": 4.998399507924157e-05, | |
| "loss": 2.7145, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.011481683980317113, | |
| "grad_norm": 2.3806726932525635, | |
| "learning_rate": 4.9983738004071495e-05, | |
| "loss": 2.7357, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.011572808456351375, | |
| "grad_norm": 3.3078513145446777, | |
| "learning_rate": 4.998347888138569e-05, | |
| "loss": 3.0489, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.01166393293238564, | |
| "grad_norm": 2.8850951194763184, | |
| "learning_rate": 4.9983217711205386e-05, | |
| "loss": 3.0792, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.011755057408419901, | |
| "grad_norm": 1.8585487604141235, | |
| "learning_rate": 4.998295449355199e-05, | |
| "loss": 2.9322, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.011846181884454165, | |
| "grad_norm": 4.037771701812744, | |
| "learning_rate": 4.9982689228447064e-05, | |
| "loss": 2.9084, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.011937306360488427, | |
| "grad_norm": 2.485532283782959, | |
| "learning_rate": 4.998242191591237e-05, | |
| "loss": 2.8812, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.01202843083652269, | |
| "grad_norm": 2.1966612339019775, | |
| "learning_rate": 4.9982152555969786e-05, | |
| "loss": 3.1189, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.012119555312556953, | |
| "grad_norm": 1.8410605192184448, | |
| "learning_rate": 4.9981881148641405e-05, | |
| "loss": 3.0395, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.012210679788591215, | |
| "grad_norm": 2.812716007232666, | |
| "learning_rate": 4.998160769394947e-05, | |
| "loss": 3.0484, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.012301804264625479, | |
| "grad_norm": 3.6690826416015625, | |
| "learning_rate": 4.99813321919164e-05, | |
| "loss": 2.9871, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.012392928740659741, | |
| "grad_norm": 4.960578918457031, | |
| "learning_rate": 4.998105464256475e-05, | |
| "loss": 2.1866, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.012484053216694003, | |
| "grad_norm": 3.6404716968536377, | |
| "learning_rate": 4.998077504591728e-05, | |
| "loss": 3.0983, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.012575177692728267, | |
| "grad_norm": 3.0544800758361816, | |
| "learning_rate": 4.9980493401996905e-05, | |
| "loss": 3.0374, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.012666302168762529, | |
| "grad_norm": 2.765801191329956, | |
| "learning_rate": 4.99802097108267e-05, | |
| "loss": 2.8467, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.012757426644796793, | |
| "grad_norm": 2.434861898422241, | |
| "learning_rate": 4.997992397242992e-05, | |
| "loss": 2.9516, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.012848551120831055, | |
| "grad_norm": 3.1218740940093994, | |
| "learning_rate": 4.997963618682998e-05, | |
| "loss": 3.0314, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.012939675596865319, | |
| "grad_norm": 1.805385947227478, | |
| "learning_rate": 4.997934635405047e-05, | |
| "loss": 2.9169, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.01303080007289958, | |
| "grad_norm": 3.3232510089874268, | |
| "learning_rate": 4.9979054474115144e-05, | |
| "loss": 3.2397, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.013121924548933843, | |
| "grad_norm": 2.7170846462249756, | |
| "learning_rate": 4.9978760547047915e-05, | |
| "loss": 3.1618, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.013213049024968107, | |
| "grad_norm": 5.612462520599365, | |
| "learning_rate": 4.9978464572872876e-05, | |
| "loss": 3.7115, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.013304173501002369, | |
| "grad_norm": 3.0545284748077393, | |
| "learning_rate": 4.997816655161428e-05, | |
| "loss": 2.8899, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.013395297977036633, | |
| "grad_norm": 2.2899675369262695, | |
| "learning_rate": 4.9977866483296544e-05, | |
| "loss": 3.01, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.013486422453070895, | |
| "grad_norm": 1.921761393547058, | |
| "learning_rate": 4.997756436794428e-05, | |
| "loss": 2.9777, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.013577546929105158, | |
| "grad_norm": 3.515000104904175, | |
| "learning_rate": 4.997726020558223e-05, | |
| "loss": 2.9246, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.01366867140513942, | |
| "grad_norm": 1.8955378532409668, | |
| "learning_rate": 4.997695399623533e-05, | |
| "loss": 2.9843, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.013759795881173683, | |
| "grad_norm": 2.8406405448913574, | |
| "learning_rate": 4.9976645739928675e-05, | |
| "loss": 2.819, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.013850920357207946, | |
| "grad_norm": 2.192831039428711, | |
| "learning_rate": 4.9976335436687525e-05, | |
| "loss": 2.9843, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.013942044833242209, | |
| "grad_norm": 2.079318046569824, | |
| "learning_rate": 4.997602308653731e-05, | |
| "loss": 2.9742, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.014033169309276472, | |
| "grad_norm": 2.436514139175415, | |
| "learning_rate": 4.997570868950363e-05, | |
| "loss": 2.8987, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.014124293785310734, | |
| "grad_norm": 2.0050177574157715, | |
| "learning_rate": 4.9975392245612254e-05, | |
| "loss": 2.9146, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.014215418261344996, | |
| "grad_norm": 2.7386975288391113, | |
| "learning_rate": 4.99750737548891e-05, | |
| "loss": 3.106, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.01430654273737926, | |
| "grad_norm": 3.8090169429779053, | |
| "learning_rate": 4.9974753217360295e-05, | |
| "loss": 2.5344, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.014397667213413522, | |
| "grad_norm": 1.7630343437194824, | |
| "learning_rate": 4.9974430633052085e-05, | |
| "loss": 2.6978, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.014488791689447786, | |
| "grad_norm": 2.020094156265259, | |
| "learning_rate": 4.9974106001990923e-05, | |
| "loss": 3.0851, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.014579916165482048, | |
| "grad_norm": 2.589174270629883, | |
| "learning_rate": 4.997377932420341e-05, | |
| "loss": 2.8161, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.014671040641516312, | |
| "grad_norm": 3.2339718341827393, | |
| "learning_rate": 4.997345059971631e-05, | |
| "loss": 2.6908, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.014762165117550574, | |
| "grad_norm": 2.0948266983032227, | |
| "learning_rate": 4.997311982855657e-05, | |
| "loss": 2.9472, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.014853289593584836, | |
| "grad_norm": 2.4752578735351562, | |
| "learning_rate": 4.99727870107513e-05, | |
| "loss": 3.1334, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.0149444140696191, | |
| "grad_norm": 3.5128135681152344, | |
| "learning_rate": 4.997245214632778e-05, | |
| "loss": 2.8477, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.015035538545653362, | |
| "grad_norm": 2.700103282928467, | |
| "learning_rate": 4.997211523531344e-05, | |
| "loss": 3.0588, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.015126663021687626, | |
| "grad_norm": 4.285271644592285, | |
| "learning_rate": 4.9971776277735906e-05, | |
| "loss": 2.7581, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.015217787497721888, | |
| "grad_norm": 3.8157570362091064, | |
| "learning_rate": 4.997143527362294e-05, | |
| "loss": 3.0998, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.01530891197375615, | |
| "grad_norm": 3.245560646057129, | |
| "learning_rate": 4.99710922230025e-05, | |
| "loss": 3.0689, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.015400036449790414, | |
| "grad_norm": 3.2004127502441406, | |
| "learning_rate": 4.9970747125902694e-05, | |
| "loss": 2.8422, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.015491160925824676, | |
| "grad_norm": 3.4759035110473633, | |
| "learning_rate": 4.997039998235181e-05, | |
| "loss": 3.1256, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.01558228540185894, | |
| "grad_norm": 2.497690200805664, | |
| "learning_rate": 4.99700507923783e-05, | |
| "loss": 2.9096, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.015673409877893202, | |
| "grad_norm": 2.1334710121154785, | |
| "learning_rate": 4.996969955601078e-05, | |
| "loss": 2.9044, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.015764534353927464, | |
| "grad_norm": 2.300891160964966, | |
| "learning_rate": 4.9969346273278025e-05, | |
| "loss": 2.8908, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.015855658829961726, | |
| "grad_norm": 2.0513505935668945, | |
| "learning_rate": 4.996899094420901e-05, | |
| "loss": 2.8817, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.01594678330599599, | |
| "grad_norm": 2.8278379440307617, | |
| "learning_rate": 4.996863356883282e-05, | |
| "loss": 3.1366, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.016037907782030254, | |
| "grad_norm": 3.43684720993042, | |
| "learning_rate": 4.996827414717878e-05, | |
| "loss": 3.0606, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.016129032258064516, | |
| "grad_norm": 3.0732672214508057, | |
| "learning_rate": 4.9967912679276316e-05, | |
| "loss": 2.843, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.016220156734098778, | |
| "grad_norm": 3.4920480251312256, | |
| "learning_rate": 4.996754916515508e-05, | |
| "loss": 2.8969, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.016311281210133043, | |
| "grad_norm": 3.4001033306121826, | |
| "learning_rate": 4.996718360484485e-05, | |
| "loss": 2.8035, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.016402405686167305, | |
| "grad_norm": 1.7016775608062744, | |
| "learning_rate": 4.9966815998375575e-05, | |
| "loss": 2.7236, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.016493530162201567, | |
| "grad_norm": 3.0281150341033936, | |
| "learning_rate": 4.99664463457774e-05, | |
| "loss": 3.2244, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.01658465463823583, | |
| "grad_norm": 3.8529109954833984, | |
| "learning_rate": 4.9966074647080606e-05, | |
| "loss": 2.7765, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.01667577911427009, | |
| "grad_norm": 4.410470008850098, | |
| "learning_rate": 4.996570090231566e-05, | |
| "loss": 3.2551, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.016766903590304357, | |
| "grad_norm": 2.621544122695923, | |
| "learning_rate": 4.99653251115132e-05, | |
| "loss": 2.8792, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.01685802806633862, | |
| "grad_norm": 3.7127602100372314, | |
| "learning_rate": 4.9964947274704e-05, | |
| "loss": 2.4034, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.01694915254237288, | |
| "grad_norm": 3.494060754776001, | |
| "learning_rate": 4.996456739191905e-05, | |
| "loss": 2.697, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.017040277018407143, | |
| "grad_norm": 2.872035264968872, | |
| "learning_rate": 4.9964185463189475e-05, | |
| "loss": 3.0493, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.017131401494441405, | |
| "grad_norm": 3.452061414718628, | |
| "learning_rate": 4.996380148854657e-05, | |
| "loss": 3.1065, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.01722252597047567, | |
| "grad_norm": 2.105959415435791, | |
| "learning_rate": 4.996341546802181e-05, | |
| "loss": 2.9128, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.017313650446509933, | |
| "grad_norm": 3.6331939697265625, | |
| "learning_rate": 4.996302740164683e-05, | |
| "loss": 2.7058, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.017404774922544195, | |
| "grad_norm": 2.7457187175750732, | |
| "learning_rate": 4.996263728945343e-05, | |
| "loss": 2.7081, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.017495899398578457, | |
| "grad_norm": 2.6010935306549072, | |
| "learning_rate": 4.9962245131473585e-05, | |
| "loss": 2.6168, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.01758702387461272, | |
| "grad_norm": 3.7223236560821533, | |
| "learning_rate": 4.996185092773943e-05, | |
| "loss": 2.9499, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.017678148350646985, | |
| "grad_norm": 3.707608938217163, | |
| "learning_rate": 4.996145467828327e-05, | |
| "loss": 2.4925, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.017769272826681247, | |
| "grad_norm": 2.3394830226898193, | |
| "learning_rate": 4.996105638313759e-05, | |
| "loss": 2.965, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.01786039730271551, | |
| "grad_norm": 3.4291019439697266, | |
| "learning_rate": 4.9960656042335016e-05, | |
| "loss": 2.9025, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.01795152177874977, | |
| "grad_norm": 3.2778756618499756, | |
| "learning_rate": 4.9960253655908374e-05, | |
| "loss": 2.7247, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.018042646254784037, | |
| "grad_norm": 2.798750877380371, | |
| "learning_rate": 4.995984922389063e-05, | |
| "loss": 2.6033, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.0181337707308183, | |
| "grad_norm": 3.6283669471740723, | |
| "learning_rate": 4.995944274631493e-05, | |
| "loss": 2.6426, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.01822489520685256, | |
| "grad_norm": 3.634934902191162, | |
| "learning_rate": 4.9959034223214595e-05, | |
| "loss": 2.8299, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.018316019682886823, | |
| "grad_norm": 4.604448318481445, | |
| "learning_rate": 4.99586236546231e-05, | |
| "loss": 2.9753, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.018407144158921085, | |
| "grad_norm": 3.651458501815796, | |
| "learning_rate": 4.995821104057409e-05, | |
| "loss": 2.5728, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.01849826863495535, | |
| "grad_norm": 2.339756727218628, | |
| "learning_rate": 4.9957796381101374e-05, | |
| "loss": 3.1176, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.018589393110989613, | |
| "grad_norm": 3.3692636489868164, | |
| "learning_rate": 4.9957379676238945e-05, | |
| "loss": 3.2663, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.018680517587023875, | |
| "grad_norm": 4.322436809539795, | |
| "learning_rate": 4.995696092602096e-05, | |
| "loss": 3.0394, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.018771642063058137, | |
| "grad_norm": 3.796675205230713, | |
| "learning_rate": 4.995654013048172e-05, | |
| "loss": 2.8621, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.0188627665390924, | |
| "grad_norm": 2.7833352088928223, | |
| "learning_rate": 4.995611728965571e-05, | |
| "loss": 2.8098, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.018953891015126664, | |
| "grad_norm": 3.21239972114563, | |
| "learning_rate": 4.995569240357761e-05, | |
| "loss": 2.7568, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.019045015491160926, | |
| "grad_norm": 1.92779541015625, | |
| "learning_rate": 4.995526547228222e-05, | |
| "loss": 2.9617, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.01913613996719519, | |
| "grad_norm": 1.4563132524490356, | |
| "learning_rate": 4.9954836495804525e-05, | |
| "loss": 2.7798, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.01922726444322945, | |
| "grad_norm": 2.3598124980926514, | |
| "learning_rate": 4.99544054741797e-05, | |
| "loss": 2.7242, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.019318388919263713, | |
| "grad_norm": 3.2183914184570312, | |
| "learning_rate": 4.995397240744305e-05, | |
| "loss": 2.725, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.019409513395297978, | |
| "grad_norm": 3.6406822204589844, | |
| "learning_rate": 4.9953537295630074e-05, | |
| "loss": 2.9159, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.01950063787133224, | |
| "grad_norm": 3.356208086013794, | |
| "learning_rate": 4.995310013877643e-05, | |
| "loss": 2.3788, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.019591762347366502, | |
| "grad_norm": 3.275792360305786, | |
| "learning_rate": 4.9952660936917953e-05, | |
| "loss": 3.2876, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.019682886823400764, | |
| "grad_norm": 3.7224249839782715, | |
| "learning_rate": 4.995221969009063e-05, | |
| "loss": 2.4127, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.01977401129943503, | |
| "grad_norm": 2.694688081741333, | |
| "learning_rate": 4.995177639833062e-05, | |
| "loss": 2.685, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.019865135775469292, | |
| "grad_norm": 2.4117233753204346, | |
| "learning_rate": 4.995133106167425e-05, | |
| "loss": 2.8893, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.019956260251503554, | |
| "grad_norm": 2.3335092067718506, | |
| "learning_rate": 4.995088368015804e-05, | |
| "loss": 2.9949, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.020047384727537816, | |
| "grad_norm": 2.5341763496398926, | |
| "learning_rate": 4.995043425381862e-05, | |
| "loss": 2.7349, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.020138509203572078, | |
| "grad_norm": 2.1053128242492676, | |
| "learning_rate": 4.994998278269286e-05, | |
| "loss": 2.872, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.020229633679606344, | |
| "grad_norm": 4.247159957885742, | |
| "learning_rate": 4.9949529266817716e-05, | |
| "loss": 2.8256, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.020320758155640606, | |
| "grad_norm": 2.9789533615112305, | |
| "learning_rate": 4.9949073706230395e-05, | |
| "loss": 2.9463, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.020411882631674868, | |
| "grad_norm": 4.879419326782227, | |
| "learning_rate": 4.994861610096821e-05, | |
| "loss": 3.0509, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.02050300710770913, | |
| "grad_norm": 2.939770221710205, | |
| "learning_rate": 4.994815645106867e-05, | |
| "loss": 2.6338, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.020594131583743392, | |
| "grad_norm": 4.3412861824035645, | |
| "learning_rate": 4.994769475656945e-05, | |
| "loss": 3.0108, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.020685256059777658, | |
| "grad_norm": 2.51426100730896, | |
| "learning_rate": 4.994723101750838e-05, | |
| "loss": 3.0526, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.02077638053581192, | |
| "grad_norm": 3.505676031112671, | |
| "learning_rate": 4.994676523392347e-05, | |
| "loss": 2.7056, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.020867505011846182, | |
| "grad_norm": 4.260951519012451, | |
| "learning_rate": 4.994629740585289e-05, | |
| "loss": 3.1183, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.020958629487880444, | |
| "grad_norm": 2.5711376667022705, | |
| "learning_rate": 4.994582753333498e-05, | |
| "loss": 3.0857, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.021049753963914706, | |
| "grad_norm": 2.166073799133301, | |
| "learning_rate": 4.994535561640824e-05, | |
| "loss": 2.8542, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.02114087843994897, | |
| "grad_norm": 1.8539170026779175, | |
| "learning_rate": 4.9944881655111366e-05, | |
| "loss": 3.0293, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.021232002915983234, | |
| "grad_norm": 1.9353551864624023, | |
| "learning_rate": 4.994440564948318e-05, | |
| "loss": 2.8262, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.021323127392017496, | |
| "grad_norm": 3.0932233333587646, | |
| "learning_rate": 4.994392759956271e-05, | |
| "loss": 2.8178, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.021414251868051758, | |
| "grad_norm": 4.661931991577148, | |
| "learning_rate": 4.994344750538913e-05, | |
| "loss": 3.0574, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.021505376344086023, | |
| "grad_norm": 3.5828258991241455, | |
| "learning_rate": 4.994296536700177e-05, | |
| "loss": 2.9585, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.021596500820120285, | |
| "grad_norm": 3.237452983856201, | |
| "learning_rate": 4.994248118444016e-05, | |
| "loss": 3.2907, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.021687625296154547, | |
| "grad_norm": 1.7712563276290894, | |
| "learning_rate": 4.9941994957743976e-05, | |
| "loss": 2.81, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.02177874977218881, | |
| "grad_norm": 4.197923183441162, | |
| "learning_rate": 4.9941506686953064e-05, | |
| "loss": 2.8537, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.02186987424822307, | |
| "grad_norm": 4.0791239738464355, | |
| "learning_rate": 4.994101637210744e-05, | |
| "loss": 3.1615, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.021960998724257337, | |
| "grad_norm": 1.8465322256088257, | |
| "learning_rate": 4.994052401324729e-05, | |
| "loss": 2.8619, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.0220521232002916, | |
| "grad_norm": 3.064363956451416, | |
| "learning_rate": 4.9940029610412964e-05, | |
| "loss": 2.9496, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.02214324767632586, | |
| "grad_norm": 5.682558536529541, | |
| "learning_rate": 4.993953316364498e-05, | |
| "loss": 2.9704, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.022234372152360123, | |
| "grad_norm": 3.240743637084961, | |
| "learning_rate": 4.993903467298402e-05, | |
| "loss": 2.9054, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.022325496628394385, | |
| "grad_norm": 3.0782744884490967, | |
| "learning_rate": 4.993853413847094e-05, | |
| "loss": 2.328, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.02241662110442865, | |
| "grad_norm": 3.2922260761260986, | |
| "learning_rate": 4.993803156014677e-05, | |
| "loss": 2.9641, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.022507745580462913, | |
| "grad_norm": 3.023947238922119, | |
| "learning_rate": 4.993752693805268e-05, | |
| "loss": 2.8576, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.022598870056497175, | |
| "grad_norm": 3.0554006099700928, | |
| "learning_rate": 4.993702027223004e-05, | |
| "loss": 2.7858, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.022689994532531437, | |
| "grad_norm": 3.693634271621704, | |
| "learning_rate": 4.9936511562720364e-05, | |
| "loss": 2.7147, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.0227811190085657, | |
| "grad_norm": 3.201390027999878, | |
| "learning_rate": 4.993600080956535e-05, | |
| "loss": 1.9882, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.022872243484599965, | |
| "grad_norm": 1.9936951398849487, | |
| "learning_rate": 4.993548801280686e-05, | |
| "loss": 3.1181, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.022963367960634227, | |
| "grad_norm": 3.260298490524292, | |
| "learning_rate": 4.993497317248691e-05, | |
| "loss": 3.1898, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.02305449243666849, | |
| "grad_norm": 4.4110517501831055, | |
| "learning_rate": 4.9934456288647694e-05, | |
| "loss": 2.8244, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.02314561691270275, | |
| "grad_norm": 2.4315178394317627, | |
| "learning_rate": 4.993393736133157e-05, | |
| "loss": 2.4183, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.023236741388737013, | |
| "grad_norm": 3.644021511077881, | |
| "learning_rate": 4.993341639058108e-05, | |
| "loss": 2.654, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.02332786586477128, | |
| "grad_norm": 5.052486896514893, | |
| "learning_rate": 4.99328933764389e-05, | |
| "loss": 3.4613, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.02341899034080554, | |
| "grad_norm": 2.375401020050049, | |
| "learning_rate": 4.993236831894792e-05, | |
| "loss": 2.9367, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.023510114816839803, | |
| "grad_norm": 2.3803887367248535, | |
| "learning_rate": 4.993184121815114e-05, | |
| "loss": 2.9791, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.023601239292874065, | |
| "grad_norm": 3.324200391769409, | |
| "learning_rate": 4.993131207409179e-05, | |
| "loss": 2.8913, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.02369236376890833, | |
| "grad_norm": 2.2855703830718994, | |
| "learning_rate": 4.99307808868132e-05, | |
| "loss": 3.1401, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.023783488244942592, | |
| "grad_norm": 3.8498096466064453, | |
| "learning_rate": 4.9930247656358926e-05, | |
| "loss": 2.7348, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.023874612720976855, | |
| "grad_norm": 2.989870071411133, | |
| "learning_rate": 4.992971238277266e-05, | |
| "loss": 2.7794, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.023965737197011117, | |
| "grad_norm": 1.9267282485961914, | |
| "learning_rate": 4.9929175066098285e-05, | |
| "loss": 2.7822, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.02405686167304538, | |
| "grad_norm": 3.4673049449920654, | |
| "learning_rate": 4.992863570637981e-05, | |
| "loss": 2.7716, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.024147986149079644, | |
| "grad_norm": 4.030117034912109, | |
| "learning_rate": 4.9928094303661465e-05, | |
| "loss": 2.3569, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.024239110625113906, | |
| "grad_norm": 2.8260750770568848, | |
| "learning_rate": 4.99275508579876e-05, | |
| "loss": 2.9893, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.02433023510114817, | |
| "grad_norm": 3.653055429458618, | |
| "learning_rate": 4.9927005369402756e-05, | |
| "loss": 3.1263, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.02442135957718243, | |
| "grad_norm": 2.9457454681396484, | |
| "learning_rate": 4.992645783795165e-05, | |
| "loss": 3.0073, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.024512484053216693, | |
| "grad_norm": 1.6144747734069824, | |
| "learning_rate": 4.992590826367913e-05, | |
| "loss": 2.7446, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.024603608529250958, | |
| "grad_norm": 3.5470383167266846, | |
| "learning_rate": 4.992535664663027e-05, | |
| "loss": 2.7096, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.02469473300528522, | |
| "grad_norm": 2.77581787109375, | |
| "learning_rate": 4.992480298685025e-05, | |
| "loss": 2.7322, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.024785857481319482, | |
| "grad_norm": 3.7434020042419434, | |
| "learning_rate": 4.992424728438445e-05, | |
| "loss": 2.9467, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.024876981957353744, | |
| "grad_norm": 2.4266579151153564, | |
| "learning_rate": 4.992368953927842e-05, | |
| "loss": 2.5198, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.024968106433388006, | |
| "grad_norm": 3.2962489128112793, | |
| "learning_rate": 4.9923129751577866e-05, | |
| "loss": 3.1596, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.025059230909422272, | |
| "grad_norm": 7.043239116668701, | |
| "learning_rate": 4.9922567921328665e-05, | |
| "loss": 3.488, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.025150355385456534, | |
| "grad_norm": 2.4639480113983154, | |
| "learning_rate": 4.992200404857686e-05, | |
| "loss": 2.4812, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.025241479861490796, | |
| "grad_norm": 1.857424020767212, | |
| "learning_rate": 4.9921438133368655e-05, | |
| "loss": 2.8176, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.025332604337525058, | |
| "grad_norm": 4.494309425354004, | |
| "learning_rate": 4.992087017575044e-05, | |
| "loss": 3.1054, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.025423728813559324, | |
| "grad_norm": 4.276947975158691, | |
| "learning_rate": 4.992030017576875e-05, | |
| "loss": 2.1909, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.025514853289593586, | |
| "grad_norm": 4.083865642547607, | |
| "learning_rate": 4.991972813347032e-05, | |
| "loss": 2.9479, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.025605977765627848, | |
| "grad_norm": 2.375821828842163, | |
| "learning_rate": 4.991915404890201e-05, | |
| "loss": 2.7618, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.02569710224166211, | |
| "grad_norm": 3.873976707458496, | |
| "learning_rate": 4.9918577922110875e-05, | |
| "loss": 3.0425, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.025788226717696372, | |
| "grad_norm": 2.8449337482452393, | |
| "learning_rate": 4.991799975314414e-05, | |
| "loss": 2.8696, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.025879351193730638, | |
| "grad_norm": 4.654402732849121, | |
| "learning_rate": 4.991741954204917e-05, | |
| "loss": 2.7261, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.0259704756697649, | |
| "grad_norm": 2.0715343952178955, | |
| "learning_rate": 4.991683728887353e-05, | |
| "loss": 2.9878, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.02606160014579916, | |
| "grad_norm": 1.749382734298706, | |
| "learning_rate": 4.991625299366494e-05, | |
| "loss": 2.8315, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.026152724621833424, | |
| "grad_norm": 2.439875841140747, | |
| "learning_rate": 4.991566665647127e-05, | |
| "loss": 2.7062, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.026243849097867686, | |
| "grad_norm": 3.324129581451416, | |
| "learning_rate": 4.991507827734058e-05, | |
| "loss": 3.0118, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.02633497357390195, | |
| "grad_norm": 3.6669745445251465, | |
| "learning_rate": 4.99144878563211e-05, | |
| "loss": 2.8931, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.026426098049936213, | |
| "grad_norm": 3.859177350997925, | |
| "learning_rate": 4.99138953934612e-05, | |
| "loss": 1.8732, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.026517222525970476, | |
| "grad_norm": 3.1230263710021973, | |
| "learning_rate": 4.991330088880945e-05, | |
| "loss": 3.0979, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.026608347002004738, | |
| "grad_norm": 6.740344524383545, | |
| "learning_rate": 4.9912704342414565e-05, | |
| "loss": 3.7346, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.026699471478039, | |
| "grad_norm": 2.1976635456085205, | |
| "learning_rate": 4.9912105754325435e-05, | |
| "loss": 2.9069, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.026790595954073265, | |
| "grad_norm": 3.5679171085357666, | |
| "learning_rate": 4.991150512459111e-05, | |
| "loss": 3.3382, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.026881720430107527, | |
| "grad_norm": 2.9355552196502686, | |
| "learning_rate": 4.9910902453260824e-05, | |
| "loss": 2.7731, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.02697284490614179, | |
| "grad_norm": 3.473723888397217, | |
| "learning_rate": 4.991029774038397e-05, | |
| "loss": 2.995, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.02706396938217605, | |
| "grad_norm": 3.0785019397735596, | |
| "learning_rate": 4.9909690986010095e-05, | |
| "loss": 2.7569, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.027155093858210317, | |
| "grad_norm": 2.493504762649536, | |
| "learning_rate": 4.990908219018894e-05, | |
| "loss": 2.9982, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.02724621833424458, | |
| "grad_norm": 2.2580089569091797, | |
| "learning_rate": 4.990847135297038e-05, | |
| "loss": 2.5264, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.02733734281027884, | |
| "grad_norm": 2.80513858795166, | |
| "learning_rate": 4.990785847440449e-05, | |
| "loss": 2.6634, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.027428467286313103, | |
| "grad_norm": 2.0804553031921387, | |
| "learning_rate": 4.9907243554541486e-05, | |
| "loss": 2.9459, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.027519591762347365, | |
| "grad_norm": 3.045847177505493, | |
| "learning_rate": 4.9906626593431776e-05, | |
| "loss": 2.611, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.02761071623838163, | |
| "grad_norm": 2.9146780967712402, | |
| "learning_rate": 4.9906007591125915e-05, | |
| "loss": 2.7193, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.027701840714415893, | |
| "grad_norm": 2.089606761932373, | |
| "learning_rate": 4.990538654767464e-05, | |
| "loss": 2.9351, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.027792965190450155, | |
| "grad_norm": 4.3602471351623535, | |
| "learning_rate": 4.9904763463128826e-05, | |
| "loss": 2.4889, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.027884089666484417, | |
| "grad_norm": 2.540752410888672, | |
| "learning_rate": 4.9904138337539566e-05, | |
| "loss": 3.1232, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.02797521414251868, | |
| "grad_norm": 3.7896921634674072, | |
| "learning_rate": 4.9903511170958074e-05, | |
| "loss": 3.2954, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.028066338618552945, | |
| "grad_norm": 2.222221612930298, | |
| "learning_rate": 4.990288196343575e-05, | |
| "loss": 2.8993, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.028157463094587207, | |
| "grad_norm": 3.169107437133789, | |
| "learning_rate": 4.990225071502418e-05, | |
| "loss": 2.6121, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.02824858757062147, | |
| "grad_norm": 2.2904651165008545, | |
| "learning_rate": 4.9901617425775067e-05, | |
| "loss": 3.0291, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.02833971204665573, | |
| "grad_norm": 2.019195556640625, | |
| "learning_rate": 4.990098209574033e-05, | |
| "loss": 2.8676, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.028430836522689993, | |
| "grad_norm": 3.177980661392212, | |
| "learning_rate": 4.9900344724972024e-05, | |
| "loss": 2.7502, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.02852196099872426, | |
| "grad_norm": 4.665421485900879, | |
| "learning_rate": 4.989970531352241e-05, | |
| "loss": 2.7847, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.02861308547475852, | |
| "grad_norm": 3.270128011703491, | |
| "learning_rate": 4.9899063861443854e-05, | |
| "loss": 2.2844, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.028704209950792783, | |
| "grad_norm": 4.186242580413818, | |
| "learning_rate": 4.989842036878895e-05, | |
| "loss": 3.1222, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.028795334426827045, | |
| "grad_norm": 3.346442937850952, | |
| "learning_rate": 4.989777483561043e-05, | |
| "loss": 3.1899, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.028886458902861307, | |
| "grad_norm": 4.0838541984558105, | |
| "learning_rate": 4.9897127261961196e-05, | |
| "loss": 2.7816, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.028977583378895572, | |
| "grad_norm": 2.0978620052337646, | |
| "learning_rate": 4.989647764789432e-05, | |
| "loss": 2.9921, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.029068707854929834, | |
| "grad_norm": 2.830665349960327, | |
| "learning_rate": 4.989582599346304e-05, | |
| "loss": 1.9849, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.029159832330964097, | |
| "grad_norm": 2.3662290573120117, | |
| "learning_rate": 4.989517229872076e-05, | |
| "loss": 3.0048, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.02925095680699836, | |
| "grad_norm": 3.844538927078247, | |
| "learning_rate": 4.989451656372106e-05, | |
| "loss": 2.8494, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.029342081283032624, | |
| "grad_norm": 6.312561511993408, | |
| "learning_rate": 4.989385878851767e-05, | |
| "loss": 2.9627, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.029433205759066886, | |
| "grad_norm": 3.0908846855163574, | |
| "learning_rate": 4.98931989731645e-05, | |
| "loss": 2.974, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.02952433023510115, | |
| "grad_norm": 2.5474183559417725, | |
| "learning_rate": 4.989253711771563e-05, | |
| "loss": 2.8485, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.02961545471113541, | |
| "grad_norm": 2.3893277645111084, | |
| "learning_rate": 4.98918732222253e-05, | |
| "loss": 2.8714, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.029706579187169672, | |
| "grad_norm": 2.8498189449310303, | |
| "learning_rate": 4.989120728674792e-05, | |
| "loss": 2.567, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.029797703663203938, | |
| "grad_norm": 3.217600107192993, | |
| "learning_rate": 4.989053931133806e-05, | |
| "loss": 2.7916, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.0298888281392382, | |
| "grad_norm": 3.3806464672088623, | |
| "learning_rate": 4.988986929605047e-05, | |
| "loss": 2.6764, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.029979952615272462, | |
| "grad_norm": 3.6862692832946777, | |
| "learning_rate": 4.988919724094005e-05, | |
| "loss": 2.8749, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.030071077091306724, | |
| "grad_norm": 2.995811939239502, | |
| "learning_rate": 4.9888523146061885e-05, | |
| "loss": 3.0403, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.030162201567340986, | |
| "grad_norm": 2.922734498977661, | |
| "learning_rate": 4.988784701147122e-05, | |
| "loss": 2.792, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.030253326043375252, | |
| "grad_norm": 2.79219651222229, | |
| "learning_rate": 4.988716883722348e-05, | |
| "loss": 2.9411, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.030344450519409514, | |
| "grad_norm": 4.6752495765686035, | |
| "learning_rate": 4.9886488623374214e-05, | |
| "loss": 3.1196, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.030435574995443776, | |
| "grad_norm": 2.8725225925445557, | |
| "learning_rate": 4.988580636997918e-05, | |
| "loss": 2.7962, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.030526699471478038, | |
| "grad_norm": 3.8501832485198975, | |
| "learning_rate": 4.9885122077094304e-05, | |
| "loss": 2.6243, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.0306178239475123, | |
| "grad_norm": 2.141561985015869, | |
| "learning_rate": 4.988443574477566e-05, | |
| "loss": 3.0925, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.030708948423546566, | |
| "grad_norm": 4.128694534301758, | |
| "learning_rate": 4.988374737307949e-05, | |
| "loss": 2.985, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.030800072899580828, | |
| "grad_norm": 2.3043148517608643, | |
| "learning_rate": 4.9883056962062213e-05, | |
| "loss": 2.9108, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.03089119737561509, | |
| "grad_norm": 2.1815946102142334, | |
| "learning_rate": 4.9882364511780414e-05, | |
| "loss": 2.9105, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.030982321851649352, | |
| "grad_norm": 3.693887710571289, | |
| "learning_rate": 4.9881670022290836e-05, | |
| "loss": 2.8508, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.031073446327683617, | |
| "grad_norm": 3.411998748779297, | |
| "learning_rate": 4.9880973493650394e-05, | |
| "loss": 3.1107, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.03116457080371788, | |
| "grad_norm": 3.362623453140259, | |
| "learning_rate": 4.9880274925916183e-05, | |
| "loss": 2.5887, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.03125569527975214, | |
| "grad_norm": 2.2149298191070557, | |
| "learning_rate": 4.987957431914544e-05, | |
| "loss": 2.9536, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.031346819755786404, | |
| "grad_norm": 4.359997749328613, | |
| "learning_rate": 4.9878871673395586e-05, | |
| "loss": 3.2973, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.03143794423182067, | |
| "grad_norm": 4.717598915100098, | |
| "learning_rate": 4.987816698872421e-05, | |
| "loss": 1.9499, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.03152906870785493, | |
| "grad_norm": 3.7561376094818115, | |
| "learning_rate": 4.9877460265189064e-05, | |
| "loss": 2.6827, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.03162019318388919, | |
| "grad_norm": 3.771298408508301, | |
| "learning_rate": 4.9876751502848064e-05, | |
| "loss": 2.5356, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.03171131765992345, | |
| "grad_norm": 3.1962294578552246, | |
| "learning_rate": 4.9876040701759286e-05, | |
| "loss": 2.7013, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.03180244213595772, | |
| "grad_norm": 4.098245143890381, | |
| "learning_rate": 4.9875327861981006e-05, | |
| "loss": 2.8326, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.03189356661199198, | |
| "grad_norm": 2.9347190856933594, | |
| "learning_rate": 4.9874612983571614e-05, | |
| "loss": 2.6662, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.03198469108802624, | |
| "grad_norm": 3.6982884407043457, | |
| "learning_rate": 4.9873896066589725e-05, | |
| "loss": 3.0266, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.03207581556406051, | |
| "grad_norm": 3.387371778488159, | |
| "learning_rate": 4.987317711109408e-05, | |
| "loss": 2.7588, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.03216694004009477, | |
| "grad_norm": 4.407079219818115, | |
| "learning_rate": 4.9872456117143607e-05, | |
| "loss": 3.1829, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.03225806451612903, | |
| "grad_norm": 3.283046007156372, | |
| "learning_rate": 4.987173308479738e-05, | |
| "loss": 1.7902, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.0323491889921633, | |
| "grad_norm": 3.132047653198242, | |
| "learning_rate": 4.987100801411467e-05, | |
| "loss": 2.5832, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.032440313468197556, | |
| "grad_norm": 3.831251859664917, | |
| "learning_rate": 4.9870280905154886e-05, | |
| "loss": 2.804, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.03253143794423182, | |
| "grad_norm": 4.598052024841309, | |
| "learning_rate": 4.986955175797763e-05, | |
| "loss": 2.3582, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.03262256242026609, | |
| "grad_norm": 3.8844218254089355, | |
| "learning_rate": 4.986882057264266e-05, | |
| "loss": 2.5354, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.032713686896300345, | |
| "grad_norm": 3.4492027759552, | |
| "learning_rate": 4.986808734920988e-05, | |
| "loss": 2.952, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.03280481137233461, | |
| "grad_norm": 3.0955710411071777, | |
| "learning_rate": 4.9867352087739405e-05, | |
| "loss": 2.7746, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.03289593584836887, | |
| "grad_norm": 2.497973680496216, | |
| "learning_rate": 4.986661478829147e-05, | |
| "loss": 2.9674, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.032987060324403135, | |
| "grad_norm": 4.156097412109375, | |
| "learning_rate": 4.986587545092651e-05, | |
| "loss": 3.0256, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.0330781848004374, | |
| "grad_norm": 2.2689907550811768, | |
| "learning_rate": 4.986513407570513e-05, | |
| "loss": 2.888, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.03316930927647166, | |
| "grad_norm": 5.262107849121094, | |
| "learning_rate": 4.986439066268807e-05, | |
| "loss": 3.0598, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.033260433752505925, | |
| "grad_norm": 2.9902825355529785, | |
| "learning_rate": 4.9863645211936254e-05, | |
| "loss": 3.0898, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.03335155822854018, | |
| "grad_norm": 1.6874420642852783, | |
| "learning_rate": 4.986289772351079e-05, | |
| "loss": 2.7454, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.03344268270457445, | |
| "grad_norm": 3.7136764526367188, | |
| "learning_rate": 4.986214819747293e-05, | |
| "loss": 2.9232, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.033533807180608714, | |
| "grad_norm": 2.174556255340576, | |
| "learning_rate": 4.986139663388409e-05, | |
| "loss": 2.9913, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.03362493165664297, | |
| "grad_norm": 3.348562240600586, | |
| "learning_rate": 4.986064303280588e-05, | |
| "loss": 3.0657, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.03371605613267724, | |
| "grad_norm": 2.8100669384002686, | |
| "learning_rate": 4.9859887394300055e-05, | |
| "loss": 2.8318, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.0338071806087115, | |
| "grad_norm": 3.4799964427948, | |
| "learning_rate": 4.9859129718428536e-05, | |
| "loss": 2.8546, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.03389830508474576, | |
| "grad_norm": 3.3681960105895996, | |
| "learning_rate": 4.985837000525343e-05, | |
| "loss": 2.4564, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.03398942956078003, | |
| "grad_norm": 3.261798620223999, | |
| "learning_rate": 4.985760825483699e-05, | |
| "loss": 2.6388, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.03408055403681429, | |
| "grad_norm": 2.5790557861328125, | |
| "learning_rate": 4.985684446724165e-05, | |
| "loss": 2.9411, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.03417167851284855, | |
| "grad_norm": 3.773350715637207, | |
| "learning_rate": 4.985607864252999e-05, | |
| "loss": 2.8002, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.03426280298888281, | |
| "grad_norm": 3.27166485786438, | |
| "learning_rate": 4.9855310780764794e-05, | |
| "loss": 2.7819, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.034353927464917076, | |
| "grad_norm": 3.0358798503875732, | |
| "learning_rate": 4.985454088200898e-05, | |
| "loss": 2.918, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.03444505194095134, | |
| "grad_norm": 2.6746315956115723, | |
| "learning_rate": 4.985376894632564e-05, | |
| "loss": 2.6342, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.0345361764169856, | |
| "grad_norm": 3.8095743656158447, | |
| "learning_rate": 4.985299497377805e-05, | |
| "loss": 2.7959, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.034627300893019866, | |
| "grad_norm": 4.985168933868408, | |
| "learning_rate": 4.985221896442963e-05, | |
| "loss": 2.5482, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.034718425369054125, | |
| "grad_norm": 2.8703434467315674, | |
| "learning_rate": 4.9851440918343985e-05, | |
| "loss": 3.3517, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.03480954984508839, | |
| "grad_norm": 3.232175827026367, | |
| "learning_rate": 4.985066083558486e-05, | |
| "loss": 2.5102, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.034900674321122656, | |
| "grad_norm": 2.980302095413208, | |
| "learning_rate": 4.9849878716216215e-05, | |
| "loss": 2.7518, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.034991798797156914, | |
| "grad_norm": 2.309779167175293, | |
| "learning_rate": 4.9849094560302124e-05, | |
| "loss": 2.9199, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.03508292327319118, | |
| "grad_norm": 3.3376975059509277, | |
| "learning_rate": 4.984830836790686e-05, | |
| "loss": 2.9014, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.03517404774922544, | |
| "grad_norm": 3.1377711296081543, | |
| "learning_rate": 4.984752013909485e-05, | |
| "loss": 3.1008, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.035265172225259704, | |
| "grad_norm": 3.804525136947632, | |
| "learning_rate": 4.9846729873930706e-05, | |
| "loss": 2.6573, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.03535629670129397, | |
| "grad_norm": 2.8694443702697754, | |
| "learning_rate": 4.984593757247918e-05, | |
| "loss": 2.8805, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.03544742117732823, | |
| "grad_norm": 2.8578102588653564, | |
| "learning_rate": 4.9845143234805216e-05, | |
| "loss": 2.6417, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.035538545653362494, | |
| "grad_norm": 1.6129778623580933, | |
| "learning_rate": 4.9844346860973896e-05, | |
| "loss": 2.5198, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.03562967012939676, | |
| "grad_norm": 3.4736506938934326, | |
| "learning_rate": 4.98435484510505e-05, | |
| "loss": 2.0989, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.03572079460543102, | |
| "grad_norm": 2.6138317584991455, | |
| "learning_rate": 4.984274800510046e-05, | |
| "loss": 2.9925, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.035811919081465284, | |
| "grad_norm": 4.978623867034912, | |
| "learning_rate": 4.984194552318936e-05, | |
| "loss": 2.3243, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.03590304355749954, | |
| "grad_norm": 1.921769142150879, | |
| "learning_rate": 4.984114100538299e-05, | |
| "loss": 2.95, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.03599416803353381, | |
| "grad_norm": 3.2645325660705566, | |
| "learning_rate": 4.984033445174727e-05, | |
| "loss": 2.4694, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.03608529250956807, | |
| "grad_norm": 2.63525390625, | |
| "learning_rate": 4.9839525862348304e-05, | |
| "loss": 2.4872, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.03617641698560233, | |
| "grad_norm": 3.7979824542999268, | |
| "learning_rate": 4.983871523725235e-05, | |
| "loss": 2.399, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.0362675414616366, | |
| "grad_norm": 3.695284128189087, | |
| "learning_rate": 4.983790257652585e-05, | |
| "loss": 3.2067, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.036358665937670856, | |
| "grad_norm": 4.359871864318848, | |
| "learning_rate": 4.9837087880235414e-05, | |
| "loss": 2.5611, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.03644979041370512, | |
| "grad_norm": 3.501504898071289, | |
| "learning_rate": 4.983627114844779e-05, | |
| "loss": 2.2389, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.03654091488973939, | |
| "grad_norm": 3.637976884841919, | |
| "learning_rate": 4.983545238122993e-05, | |
| "loss": 2.4473, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.036632039365773646, | |
| "grad_norm": 4.534337043762207, | |
| "learning_rate": 4.983463157864893e-05, | |
| "loss": 3.0207, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.03672316384180791, | |
| "grad_norm": 4.795605659484863, | |
| "learning_rate": 4.983380874077204e-05, | |
| "loss": 2.7606, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.03681428831784217, | |
| "grad_norm": 2.7595343589782715, | |
| "learning_rate": 4.983298386766672e-05, | |
| "loss": 3.0888, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.036905412793876435, | |
| "grad_norm": 4.718511581420898, | |
| "learning_rate": 4.983215695940057e-05, | |
| "loss": 2.9122, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.0369965372699107, | |
| "grad_norm": 3.5803143978118896, | |
| "learning_rate": 4.9831328016041335e-05, | |
| "loss": 2.9831, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.03708766174594496, | |
| "grad_norm": 3.076673984527588, | |
| "learning_rate": 4.983049703765697e-05, | |
| "loss": 2.1278, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.037178786221979225, | |
| "grad_norm": 2.236630439758301, | |
| "learning_rate": 4.9829664024315575e-05, | |
| "loss": 3.0333, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.037269910698013484, | |
| "grad_norm": 3.2108254432678223, | |
| "learning_rate": 4.982882897608542e-05, | |
| "loss": 2.612, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.03736103517404775, | |
| "grad_norm": 4.408178806304932, | |
| "learning_rate": 4.982799189303493e-05, | |
| "loss": 2.7848, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.037452159650082015, | |
| "grad_norm": 3.5266246795654297, | |
| "learning_rate": 4.9827152775232714e-05, | |
| "loss": 2.8434, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.03754328412611627, | |
| "grad_norm": 5.857843399047852, | |
| "learning_rate": 4.982631162274753e-05, | |
| "loss": 2.9037, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.03763440860215054, | |
| "grad_norm": 2.262103796005249, | |
| "learning_rate": 4.982546843564834e-05, | |
| "loss": 2.9968, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.0377255330781848, | |
| "grad_norm": 4.400634288787842, | |
| "learning_rate": 4.982462321400423e-05, | |
| "loss": 2.9611, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.03781665755421906, | |
| "grad_norm": 1.9334667921066284, | |
| "learning_rate": 4.982377595788447e-05, | |
| "loss": 2.8443, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.03790778203025333, | |
| "grad_norm": 3.3641562461853027, | |
| "learning_rate": 4.9822926667358494e-05, | |
| "loss": 2.8691, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.03799890650628759, | |
| "grad_norm": 3.93518328666687, | |
| "learning_rate": 4.98220753424959e-05, | |
| "loss": 2.7432, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.03809003098232185, | |
| "grad_norm": 3.3891704082489014, | |
| "learning_rate": 4.982122198336647e-05, | |
| "loss": 3.1939, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.03818115545835611, | |
| "grad_norm": 2.4883248805999756, | |
| "learning_rate": 4.982036659004014e-05, | |
| "loss": 2.8615, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.03827227993439038, | |
| "grad_norm": 2.802154064178467, | |
| "learning_rate": 4.9819509162587e-05, | |
| "loss": 3.0744, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.03836340441042464, | |
| "grad_norm": 4.077276706695557, | |
| "learning_rate": 4.981864970107733e-05, | |
| "loss": 2.6231, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.0384545288864589, | |
| "grad_norm": 3.337916135787964, | |
| "learning_rate": 4.981778820558156e-05, | |
| "loss": 2.8649, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.03854565336249317, | |
| "grad_norm": 3.7240352630615234, | |
| "learning_rate": 4.98169246761703e-05, | |
| "loss": 3.0618, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.038636777838527425, | |
| "grad_norm": 1.917153239250183, | |
| "learning_rate": 4.981605911291432e-05, | |
| "loss": 3.0332, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.03872790231456169, | |
| "grad_norm": 3.458873987197876, | |
| "learning_rate": 4.981519151588454e-05, | |
| "loss": 2.77, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.038819026790595956, | |
| "grad_norm": 1.8529995679855347, | |
| "learning_rate": 4.9814321885152085e-05, | |
| "loss": 2.8618, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.038910151266630215, | |
| "grad_norm": 2.460031509399414, | |
| "learning_rate": 4.981345022078821e-05, | |
| "loss": 2.8602, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.03900127574266448, | |
| "grad_norm": 3.4180796146392822, | |
| "learning_rate": 4.981257652286436e-05, | |
| "loss": 2.6805, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.03909240021869874, | |
| "grad_norm": 3.609208345413208, | |
| "learning_rate": 4.981170079145213e-05, | |
| "loss": 2.8997, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.039183524694733005, | |
| "grad_norm": 4.643214225769043, | |
| "learning_rate": 4.981082302662329e-05, | |
| "loss": 3.1809, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.03927464917076727, | |
| "grad_norm": 3.2911436557769775, | |
| "learning_rate": 4.980994322844979e-05, | |
| "loss": 2.982, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.03936577364680153, | |
| "grad_norm": 3.3252878189086914, | |
| "learning_rate": 4.980906139700372e-05, | |
| "loss": 3.1547, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.039456898122835794, | |
| "grad_norm": 2.907970666885376, | |
| "learning_rate": 4.980817753235735e-05, | |
| "loss": 3.1306, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.03954802259887006, | |
| "grad_norm": 2.1587812900543213, | |
| "learning_rate": 4.980729163458312e-05, | |
| "loss": 3.1019, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.03963914707490432, | |
| "grad_norm": 2.8087103366851807, | |
| "learning_rate": 4.9806403703753624e-05, | |
| "loss": 3.092, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.039730271550938584, | |
| "grad_norm": 3.1665287017822266, | |
| "learning_rate": 4.980551373994164e-05, | |
| "loss": 2.8803, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.03982139602697284, | |
| "grad_norm": 3.015409231185913, | |
| "learning_rate": 4.980462174322011e-05, | |
| "loss": 3.0123, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.03991252050300711, | |
| "grad_norm": 6.592101573944092, | |
| "learning_rate": 4.980372771366213e-05, | |
| "loss": 3.0624, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.040003644979041374, | |
| "grad_norm": 2.965012788772583, | |
| "learning_rate": 4.980283165134097e-05, | |
| "loss": 3.0143, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.04009476945507563, | |
| "grad_norm": 1.9055732488632202, | |
| "learning_rate": 4.980193355633006e-05, | |
| "loss": 2.9194, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.0401858939311099, | |
| "grad_norm": 2.0998048782348633, | |
| "learning_rate": 4.980103342870301e-05, | |
| "loss": 2.6373, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.040277018407144156, | |
| "grad_norm": 1.6069968938827515, | |
| "learning_rate": 4.980013126853358e-05, | |
| "loss": 2.4255, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.04036814288317842, | |
| "grad_norm": 2.8056509494781494, | |
| "learning_rate": 4.9799227075895714e-05, | |
| "loss": 2.4602, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.04045926735921269, | |
| "grad_norm": 2.874981641769409, | |
| "learning_rate": 4.979832085086352e-05, | |
| "loss": 2.6648, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.040550391835246946, | |
| "grad_norm": 4.484694004058838, | |
| "learning_rate": 4.979741259351125e-05, | |
| "loss": 3.4229, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.04064151631128121, | |
| "grad_norm": 3.391519546508789, | |
| "learning_rate": 4.979650230391335e-05, | |
| "loss": 2.7623, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.04073264078731547, | |
| "grad_norm": 3.0873026847839355, | |
| "learning_rate": 4.979558998214442e-05, | |
| "loss": 2.5945, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.040823765263349736, | |
| "grad_norm": 2.205383777618408, | |
| "learning_rate": 4.979467562827923e-05, | |
| "loss": 2.9883, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.040914889739384, | |
| "grad_norm": 3.873610019683838, | |
| "learning_rate": 4.979375924239271e-05, | |
| "loss": 3.1105, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.04100601421541826, | |
| "grad_norm": 2.3900392055511475, | |
| "learning_rate": 4.979284082455996e-05, | |
| "loss": 3.0243, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.041097138691452526, | |
| "grad_norm": 3.442873477935791, | |
| "learning_rate": 4.979192037485626e-05, | |
| "loss": 2.9851, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.041188263167486784, | |
| "grad_norm": 2.941448450088501, | |
| "learning_rate": 4.979099789335703e-05, | |
| "loss": 2.648, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.04127938764352105, | |
| "grad_norm": 2.6990129947662354, | |
| "learning_rate": 4.979007338013788e-05, | |
| "loss": 2.765, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.041370512119555315, | |
| "grad_norm": 2.8547112941741943, | |
| "learning_rate": 4.978914683527458e-05, | |
| "loss": 2.6851, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.041461636595589574, | |
| "grad_norm": 1.7790765762329102, | |
| "learning_rate": 4.978821825884306e-05, | |
| "loss": 2.897, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.04155276107162384, | |
| "grad_norm": 3.662311553955078, | |
| "learning_rate": 4.978728765091941e-05, | |
| "loss": 2.3728, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.0416438855476581, | |
| "grad_norm": 3.8467187881469727, | |
| "learning_rate": 4.978635501157991e-05, | |
| "loss": 2.6218, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.041735010023692364, | |
| "grad_norm": 3.8221049308776855, | |
| "learning_rate": 4.978542034090099e-05, | |
| "loss": 2.816, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.04182613449972663, | |
| "grad_norm": 4.882442951202393, | |
| "learning_rate": 4.9784483638959254e-05, | |
| "loss": 2.9258, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.04191725897576089, | |
| "grad_norm": 2.9506404399871826, | |
| "learning_rate": 4.978354490583146e-05, | |
| "loss": 2.9122, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.04200838345179515, | |
| "grad_norm": 3.596014976501465, | |
| "learning_rate": 4.978260414159455e-05, | |
| "loss": 2.9199, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.04209950792782941, | |
| "grad_norm": 2.6444709300994873, | |
| "learning_rate": 4.978166134632562e-05, | |
| "loss": 2.9953, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.04219063240386368, | |
| "grad_norm": 3.1696035861968994, | |
| "learning_rate": 4.978071652010193e-05, | |
| "loss": 3.0076, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.04228175687989794, | |
| "grad_norm": 1.9968247413635254, | |
| "learning_rate": 4.977976966300092e-05, | |
| "loss": 2.8129, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.0423728813559322, | |
| "grad_norm": 3.2720398902893066, | |
| "learning_rate": 4.9778820775100185e-05, | |
| "loss": 3.01, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.04246400583196647, | |
| "grad_norm": 3.994105577468872, | |
| "learning_rate": 4.9777869856477485e-05, | |
| "loss": 2.5902, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.042555130308000726, | |
| "grad_norm": 3.4857635498046875, | |
| "learning_rate": 4.977691690721076e-05, | |
| "loss": 2.8994, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.04264625478403499, | |
| "grad_norm": 3.149409294128418, | |
| "learning_rate": 4.977596192737811e-05, | |
| "loss": 2.7171, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.04273737926006926, | |
| "grad_norm": 3.7346558570861816, | |
| "learning_rate": 4.9775004917057786e-05, | |
| "loss": 3.0065, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.042828503736103515, | |
| "grad_norm": 4.905463695526123, | |
| "learning_rate": 4.977404587632824e-05, | |
| "loss": 2.5023, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.04291962821213778, | |
| "grad_norm": 3.6714894771575928, | |
| "learning_rate": 4.9773084805268045e-05, | |
| "loss": 2.1294, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.043010752688172046, | |
| "grad_norm": 2.8621413707733154, | |
| "learning_rate": 4.977212170395598e-05, | |
| "loss": 2.8082, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.043101877164206305, | |
| "grad_norm": 2.972755193710327, | |
| "learning_rate": 4.9771156572470966e-05, | |
| "loss": 2.8794, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.04319300164024057, | |
| "grad_norm": 1.98660409450531, | |
| "learning_rate": 4.97701894108921e-05, | |
| "loss": 2.9151, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.04328412611627483, | |
| "grad_norm": 3.258434534072876, | |
| "learning_rate": 4.9769220219298666e-05, | |
| "loss": 2.8181, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.043375250592309095, | |
| "grad_norm": 2.2653932571411133, | |
| "learning_rate": 4.9768248997770063e-05, | |
| "loss": 3.051, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.04346637506834336, | |
| "grad_norm": 4.378432273864746, | |
| "learning_rate": 4.97672757463859e-05, | |
| "loss": 2.7252, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.04355749954437762, | |
| "grad_norm": 5.332482814788818, | |
| "learning_rate": 4.976630046522594e-05, | |
| "loss": 2.9468, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.043648624020411884, | |
| "grad_norm": 2.8985297679901123, | |
| "learning_rate": 4.9765323154370114e-05, | |
| "loss": 2.782, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.04373974849644614, | |
| "grad_norm": 4.3086137771606445, | |
| "learning_rate": 4.976434381389851e-05, | |
| "loss": 3.0355, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.04383087297248041, | |
| "grad_norm": 4.562249660491943, | |
| "learning_rate": 4.976336244389138e-05, | |
| "loss": 2.6953, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.043921997448514674, | |
| "grad_norm": 3.0404744148254395, | |
| "learning_rate": 4.9762379044429174e-05, | |
| "loss": 2.9207, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.04401312192454893, | |
| "grad_norm": 2.2012205123901367, | |
| "learning_rate": 4.9761393615592465e-05, | |
| "loss": 2.8434, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.0441042464005832, | |
| "grad_norm": 2.0561892986297607, | |
| "learning_rate": 4.9760406157462024e-05, | |
| "loss": 2.7924, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.04419537087661746, | |
| "grad_norm": 7.559401988983154, | |
| "learning_rate": 4.975941667011877e-05, | |
| "loss": 2.9499, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.04428649535265172, | |
| "grad_norm": 3.387563943862915, | |
| "learning_rate": 4.9758425153643804e-05, | |
| "loss": 2.935, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.04437761982868599, | |
| "grad_norm": 3.8939521312713623, | |
| "learning_rate": 4.975743160811839e-05, | |
| "loss": 3.1682, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.04446874430472025, | |
| "grad_norm": 4.640250205993652, | |
| "learning_rate": 4.975643603362393e-05, | |
| "loss": 3.1556, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.04455986878075451, | |
| "grad_norm": 2.0823843479156494, | |
| "learning_rate": 4.975543843024203e-05, | |
| "loss": 2.9787, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.04465099325678877, | |
| "grad_norm": 3.4816336631774902, | |
| "learning_rate": 4.975443879805445e-05, | |
| "loss": 3.2459, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.044742117732823036, | |
| "grad_norm": 4.104045867919922, | |
| "learning_rate": 4.97534371371431e-05, | |
| "loss": 2.8059, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.0448332422088573, | |
| "grad_norm": 5.515511989593506, | |
| "learning_rate": 4.9752433447590084e-05, | |
| "loss": 3.98, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.04492436668489156, | |
| "grad_norm": 3.279454231262207, | |
| "learning_rate": 4.975142772947766e-05, | |
| "loss": 3.1761, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.045015491160925826, | |
| "grad_norm": 4.770516395568848, | |
| "learning_rate": 4.975041998288824e-05, | |
| "loss": 3.169, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.045106615636960085, | |
| "grad_norm": 4.414744853973389, | |
| "learning_rate": 4.9749410207904416e-05, | |
| "loss": 3.3102, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.04519774011299435, | |
| "grad_norm": 5.29826021194458, | |
| "learning_rate": 4.974839840460895e-05, | |
| "loss": 3.1497, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.045288864589028616, | |
| "grad_norm": 3.6789612770080566, | |
| "learning_rate": 4.974738457308475e-05, | |
| "loss": 3.1284, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.045379989065062874, | |
| "grad_norm": 2.8582963943481445, | |
| "learning_rate": 4.974636871341492e-05, | |
| "loss": 3.0947, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.04547111354109714, | |
| "grad_norm": 3.199500799179077, | |
| "learning_rate": 4.97453508256827e-05, | |
| "loss": 3.2182, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.0455622380171314, | |
| "grad_norm": 3.2001290321350098, | |
| "learning_rate": 4.9744330909971506e-05, | |
| "loss": 3.39, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.045653362493165664, | |
| "grad_norm": 2.053631544113159, | |
| "learning_rate": 4.9743308966364945e-05, | |
| "loss": 3.2044, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.04574448696919993, | |
| "grad_norm": 3.347944498062134, | |
| "learning_rate": 4.9742284994946756e-05, | |
| "loss": 3.3669, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.04583561144523419, | |
| "grad_norm": 4.525710105895996, | |
| "learning_rate": 4.974125899580086e-05, | |
| "loss": 3.661, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.045926735921268454, | |
| "grad_norm": 3.3910508155822754, | |
| "learning_rate": 4.974023096901133e-05, | |
| "loss": 3.3766, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.04601786039730271, | |
| "grad_norm": 4.090876579284668, | |
| "learning_rate": 4.973920091466243e-05, | |
| "loss": 3.4127, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.04610898487333698, | |
| "grad_norm": 3.523660898208618, | |
| "learning_rate": 4.973816883283858e-05, | |
| "loss": 3.4372, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.04620010934937124, | |
| "grad_norm": 3.0479791164398193, | |
| "learning_rate": 4.9737134723624355e-05, | |
| "loss": 3.7846, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.0462912338254055, | |
| "grad_norm": 1.953797459602356, | |
| "learning_rate": 4.973609858710451e-05, | |
| "loss": 3.1759, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.04638235830143977, | |
| "grad_norm": 3.4201056957244873, | |
| "learning_rate": 4.973506042336395e-05, | |
| "loss": 3.7478, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.046473482777474026, | |
| "grad_norm": 4.29667854309082, | |
| "learning_rate": 4.9734020232487766e-05, | |
| "loss": 2.8707, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.04656460725350829, | |
| "grad_norm": 3.7853410243988037, | |
| "learning_rate": 4.973297801456121e-05, | |
| "loss": 3.1466, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.04665573172954256, | |
| "grad_norm": 3.550114870071411, | |
| "learning_rate": 4.973193376966968e-05, | |
| "loss": 3.5852, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.046746856205576816, | |
| "grad_norm": 2.124420642852783, | |
| "learning_rate": 4.9730887497898766e-05, | |
| "loss": 3.3123, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.04683798068161108, | |
| "grad_norm": 5.119099140167236, | |
| "learning_rate": 4.9729839199334215e-05, | |
| "loss": 3.0232, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.04692910515764535, | |
| "grad_norm": 1.975109338760376, | |
| "learning_rate": 4.9728788874061936e-05, | |
| "loss": 3.2217, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.047020229633679606, | |
| "grad_norm": 2.8429102897644043, | |
| "learning_rate": 4.9727736522168016e-05, | |
| "loss": 3.5701, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.04711135410971387, | |
| "grad_norm": 2.3540258407592773, | |
| "learning_rate": 4.972668214373869e-05, | |
| "loss": 3.1378, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.04720247858574813, | |
| "grad_norm": 1.4242401123046875, | |
| "learning_rate": 4.972562573886037e-05, | |
| "loss": 3.0776, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.047293603061782395, | |
| "grad_norm": 2.1467764377593994, | |
| "learning_rate": 4.972456730761963e-05, | |
| "loss": 3.1369, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.04738472753781666, | |
| "grad_norm": 1.5915725231170654, | |
| "learning_rate": 4.972350685010322e-05, | |
| "loss": 3.0371, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.04747585201385092, | |
| "grad_norm": 3.358949899673462, | |
| "learning_rate": 4.972244436639804e-05, | |
| "loss": 3.225, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.047566976489885185, | |
| "grad_norm": 2.1057491302490234, | |
| "learning_rate": 4.972137985659117e-05, | |
| "loss": 3.2478, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.047658100965919444, | |
| "grad_norm": 1.7229844331741333, | |
| "learning_rate": 4.9720313320769854e-05, | |
| "loss": 3.1378, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.04774922544195371, | |
| "grad_norm": 4.693446636199951, | |
| "learning_rate": 4.971924475902149e-05, | |
| "loss": 3.6186, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.047840349917987975, | |
| "grad_norm": 2.06803822517395, | |
| "learning_rate": 4.971817417143366e-05, | |
| "loss": 3.3783, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.04793147439402223, | |
| "grad_norm": 1.9626054763793945, | |
| "learning_rate": 4.971710155809409e-05, | |
| "loss": 3.1188, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.0480225988700565, | |
| "grad_norm": 5.128235340118408, | |
| "learning_rate": 4.9716026919090705e-05, | |
| "loss": 2.7121, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.04811372334609076, | |
| "grad_norm": 1.7978111505508423, | |
| "learning_rate": 4.971495025451156e-05, | |
| "loss": 3.2038, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.04820484782212502, | |
| "grad_norm": 2.185279130935669, | |
| "learning_rate": 4.971387156444489e-05, | |
| "loss": 3.2331, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.04829597229815929, | |
| "grad_norm": 2.912400007247925, | |
| "learning_rate": 4.971279084897912e-05, | |
| "loss": 3.3246, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.04838709677419355, | |
| "grad_norm": 2.451106548309326, | |
| "learning_rate": 4.971170810820279e-05, | |
| "loss": 3.2453, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.04847822125022781, | |
| "grad_norm": 3.062638282775879, | |
| "learning_rate": 4.9710623342204646e-05, | |
| "loss": 3.1878, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.04856934572626207, | |
| "grad_norm": 4.197722434997559, | |
| "learning_rate": 4.97095365510736e-05, | |
| "loss": 3.5296, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.04866047020229634, | |
| "grad_norm": 5.098058700561523, | |
| "learning_rate": 4.97084477348987e-05, | |
| "loss": 4.8548, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.0487515946783306, | |
| "grad_norm": 2.903400182723999, | |
| "learning_rate": 4.9707356893769194e-05, | |
| "loss": 3.2411, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.04884271915436486, | |
| "grad_norm": 3.2839369773864746, | |
| "learning_rate": 4.970626402777447e-05, | |
| "loss": 3.5929, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.048933843630399126, | |
| "grad_norm": 2.5680384635925293, | |
| "learning_rate": 4.970516913700411e-05, | |
| "loss": 3.4145, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.049024968106433385, | |
| "grad_norm": 2.191035509109497, | |
| "learning_rate": 4.9704072221547824e-05, | |
| "loss": 3.3342, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.04911609258246765, | |
| "grad_norm": 4.748012065887451, | |
| "learning_rate": 4.970297328149551e-05, | |
| "loss": 3.2415, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.049207217058501916, | |
| "grad_norm": 3.8018529415130615, | |
| "learning_rate": 4.970187231693725e-05, | |
| "loss": 3.5361, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.049298341534536175, | |
| "grad_norm": 3.128706216812134, | |
| "learning_rate": 4.970076932796326e-05, | |
| "loss": 3.2684, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.04938946601057044, | |
| "grad_norm": 2.324488401412964, | |
| "learning_rate": 4.969966431466393e-05, | |
| "loss": 3.143, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.0494805904866047, | |
| "grad_norm": 4.210093975067139, | |
| "learning_rate": 4.969855727712982e-05, | |
| "loss": 3.2956, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.049571714962638964, | |
| "grad_norm": 2.552192449569702, | |
| "learning_rate": 4.969744821545166e-05, | |
| "loss": 3.52, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.04966283943867323, | |
| "grad_norm": 3.2044615745544434, | |
| "learning_rate": 4.9696337129720346e-05, | |
| "loss": 3.5377, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.04975396391470749, | |
| "grad_norm": 3.112868547439575, | |
| "learning_rate": 4.969522402002693e-05, | |
| "loss": 3.6119, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.049845088390741754, | |
| "grad_norm": 2.6442015171051025, | |
| "learning_rate": 4.969410888646264e-05, | |
| "loss": 3.3822, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.04993621286677601, | |
| "grad_norm": 4.498085975646973, | |
| "learning_rate": 4.969299172911887e-05, | |
| "loss": 3.6879, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.05002733734281028, | |
| "grad_norm": 4.011466979980469, | |
| "learning_rate": 4.969187254808715e-05, | |
| "loss": 3.55, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.050118461818844544, | |
| "grad_norm": 4.503020286560059, | |
| "learning_rate": 4.969075134345924e-05, | |
| "loss": 3.6521, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.0502095862948788, | |
| "grad_norm": 2.6148521900177, | |
| "learning_rate": 4.9689628115326994e-05, | |
| "loss": 3.0139, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.05030071077091307, | |
| "grad_norm": 2.978384017944336, | |
| "learning_rate": 4.9688502863782484e-05, | |
| "loss": 3.0377, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.050391835246947334, | |
| "grad_norm": 2.7750418186187744, | |
| "learning_rate": 4.9687375588917925e-05, | |
| "loss": 3.5164, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.05048295972298159, | |
| "grad_norm": 3.843651056289673, | |
| "learning_rate": 4.96862462908257e-05, | |
| "loss": 3.3884, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.05057408419901586, | |
| "grad_norm": 2.7308478355407715, | |
| "learning_rate": 4.968511496959835e-05, | |
| "loss": 3.5797, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.050665208675050116, | |
| "grad_norm": 3.842102289199829, | |
| "learning_rate": 4.968398162532861e-05, | |
| "loss": 3.537, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.05075633315108438, | |
| "grad_norm": 4.337724685668945, | |
| "learning_rate": 4.968284625810935e-05, | |
| "loss": 3.3409, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.05084745762711865, | |
| "grad_norm": 4.96396017074585, | |
| "learning_rate": 4.9681708868033616e-05, | |
| "loss": 3.6793, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.050938582103152906, | |
| "grad_norm": 2.3788044452667236, | |
| "learning_rate": 4.9680569455194634e-05, | |
| "loss": 3.3635, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.05102970657918717, | |
| "grad_norm": 3.7733585834503174, | |
| "learning_rate": 4.967942801968577e-05, | |
| "loss": 3.3418, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.05112083105522143, | |
| "grad_norm": 2.370511293411255, | |
| "learning_rate": 4.9678284561600575e-05, | |
| "loss": 3.3194, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.051211955531255696, | |
| "grad_norm": 2.5621142387390137, | |
| "learning_rate": 4.9677139081032754e-05, | |
| "loss": 3.2493, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.05130308000728996, | |
| "grad_norm": 2.7476348876953125, | |
| "learning_rate": 4.96759915780762e-05, | |
| "loss": 3.5329, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.05139420448332422, | |
| "grad_norm": 2.813443422317505, | |
| "learning_rate": 4.9674842052824934e-05, | |
| "loss": 3.4796, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.051485328959358485, | |
| "grad_norm": 2.8847901821136475, | |
| "learning_rate": 4.967369050537317e-05, | |
| "loss": 3.1291, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.051576453435392744, | |
| "grad_norm": 2.9698753356933594, | |
| "learning_rate": 4.96725369358153e-05, | |
| "loss": 3.4165, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.05166757791142701, | |
| "grad_norm": 4.517305374145508, | |
| "learning_rate": 4.9671381344245846e-05, | |
| "loss": 3.1617, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.051758702387461275, | |
| "grad_norm": 2.740018129348755, | |
| "learning_rate": 4.9670223730759515e-05, | |
| "loss": 3.2746, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.051849826863495534, | |
| "grad_norm": 3.409982681274414, | |
| "learning_rate": 4.966906409545118e-05, | |
| "loss": 3.1811, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.0519409513395298, | |
| "grad_norm": 5.607142925262451, | |
| "learning_rate": 4.9667902438415876e-05, | |
| "loss": 3.5238, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.05203207581556406, | |
| "grad_norm": 3.8322174549102783, | |
| "learning_rate": 4.966673875974881e-05, | |
| "loss": 3.4367, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.05212320029159832, | |
| "grad_norm": 4.471311569213867, | |
| "learning_rate": 4.9665573059545346e-05, | |
| "loss": 3.3449, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.05221432476763259, | |
| "grad_norm": 2.2863168716430664, | |
| "learning_rate": 4.966440533790102e-05, | |
| "loss": 3.4894, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.05230544924366685, | |
| "grad_norm": 3.153233289718628, | |
| "learning_rate": 4.966323559491153e-05, | |
| "loss": 3.0977, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.05239657371970111, | |
| "grad_norm": 3.909994602203369, | |
| "learning_rate": 4.9662063830672735e-05, | |
| "loss": 3.3355, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.05248769819573537, | |
| "grad_norm": 3.271372079849243, | |
| "learning_rate": 4.966089004528068e-05, | |
| "loss": 2.9002, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.05257882267176964, | |
| "grad_norm": 3.8702852725982666, | |
| "learning_rate": 4.965971423883155e-05, | |
| "loss": 3.2961, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.0526699471478039, | |
| "grad_norm": 2.3711459636688232, | |
| "learning_rate": 4.965853641142171e-05, | |
| "loss": 3.3116, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.05276107162383816, | |
| "grad_norm": 1.8924840688705444, | |
| "learning_rate": 4.965735656314769e-05, | |
| "loss": 3.2245, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.05285219609987243, | |
| "grad_norm": 2.0791070461273193, | |
| "learning_rate": 4.9656174694106186e-05, | |
| "loss": 3.2511, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.052943320575906685, | |
| "grad_norm": 3.904510259628296, | |
| "learning_rate": 4.9654990804394045e-05, | |
| "loss": 2.3235, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.05303444505194095, | |
| "grad_norm": 2.213655710220337, | |
| "learning_rate": 4.9653804894108294e-05, | |
| "loss": 3.1096, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.05312556952797522, | |
| "grad_norm": 2.9452335834503174, | |
| "learning_rate": 4.965261696334613e-05, | |
| "loss": 3.2722, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.053216694004009475, | |
| "grad_norm": 4.414241790771484, | |
| "learning_rate": 4.965142701220491e-05, | |
| "loss": 3.0292, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.05330781848004374, | |
| "grad_norm": 3.2087106704711914, | |
| "learning_rate": 4.965023504078215e-05, | |
| "loss": 2.8201, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.053398942956078, | |
| "grad_norm": 2.4379191398620605, | |
| "learning_rate": 4.964904104917554e-05, | |
| "loss": 2.8888, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.053490067432112265, | |
| "grad_norm": 3.739722728729248, | |
| "learning_rate": 4.964784503748293e-05, | |
| "loss": 3.3616, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.05358119190814653, | |
| "grad_norm": 3.223637104034424, | |
| "learning_rate": 4.9646647005802333e-05, | |
| "loss": 3.1671, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.05367231638418079, | |
| "grad_norm": 2.3534278869628906, | |
| "learning_rate": 4.9645446954231936e-05, | |
| "loss": 3.1879, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.053763440860215055, | |
| "grad_norm": 2.515484094619751, | |
| "learning_rate": 4.964424488287009e-05, | |
| "loss": 3.3132, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.05385456533624931, | |
| "grad_norm": 3.4469587802886963, | |
| "learning_rate": 4.964304079181532e-05, | |
| "loss": 3.0847, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.05394568981228358, | |
| "grad_norm": 3.8803462982177734, | |
| "learning_rate": 4.964183468116629e-05, | |
| "loss": 3.5651, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.054036814288317844, | |
| "grad_norm": 3.385795831680298, | |
| "learning_rate": 4.9640626551021846e-05, | |
| "loss": 3.4961, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.0541279387643521, | |
| "grad_norm": 2.582401990890503, | |
| "learning_rate": 4.9639416401481e-05, | |
| "loss": 3.5228, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.05421906324038637, | |
| "grad_norm": 4.507946014404297, | |
| "learning_rate": 4.9638204232642945e-05, | |
| "loss": 4.4401, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.054310187716420634, | |
| "grad_norm": 1.9351048469543457, | |
| "learning_rate": 4.9636990044607e-05, | |
| "loss": 3.2011, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.05440131219245489, | |
| "grad_norm": 3.384533166885376, | |
| "learning_rate": 4.9635773837472686e-05, | |
| "loss": 3.3884, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.05449243666848916, | |
| "grad_norm": 2.9044880867004395, | |
| "learning_rate": 4.963455561133967e-05, | |
| "loss": 3.0926, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.05458356114452342, | |
| "grad_norm": 3.2804243564605713, | |
| "learning_rate": 4.96333353663078e-05, | |
| "loss": 3.2308, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.05467468562055768, | |
| "grad_norm": 2.781702756881714, | |
| "learning_rate": 4.9632113102477066e-05, | |
| "loss": 3.2847, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.05476581009659195, | |
| "grad_norm": 1.6848156452178955, | |
| "learning_rate": 4.963088881994764e-05, | |
| "loss": 3.0225, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.054856934572626206, | |
| "grad_norm": 2.8916399478912354, | |
| "learning_rate": 4.962966251881987e-05, | |
| "loss": 3.2519, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.05494805904866047, | |
| "grad_norm": 5.240880489349365, | |
| "learning_rate": 4.962843419919424e-05, | |
| "loss": 4.7183, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.05503918352469473, | |
| "grad_norm": 4.498986721038818, | |
| "learning_rate": 4.962720386117143e-05, | |
| "loss": 4.3066, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.055130308000728996, | |
| "grad_norm": 3.2168045043945312, | |
| "learning_rate": 4.962597150485226e-05, | |
| "loss": 3.2982, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.05522143247676326, | |
| "grad_norm": 4.392197132110596, | |
| "learning_rate": 4.962473713033773e-05, | |
| "loss": 3.4625, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.05531255695279752, | |
| "grad_norm": 2.5749943256378174, | |
| "learning_rate": 4.9623500737729e-05, | |
| "loss": 3.2704, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.055403681428831786, | |
| "grad_norm": 2.41140079498291, | |
| "learning_rate": 4.96222623271274e-05, | |
| "loss": 2.9841, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.055494805904866044, | |
| "grad_norm": 2.583230972290039, | |
| "learning_rate": 4.962102189863442e-05, | |
| "loss": 3.3146, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.05558593038090031, | |
| "grad_norm": 2.78155779838562, | |
| "learning_rate": 4.9619779452351736e-05, | |
| "loss": 3.2742, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.055677054856934576, | |
| "grad_norm": 2.217174768447876, | |
| "learning_rate": 4.9618534988381136e-05, | |
| "loss": 3.2342, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.055768179332968834, | |
| "grad_norm": 3.625903367996216, | |
| "learning_rate": 4.9617288506824635e-05, | |
| "loss": 3.3274, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.0558593038090031, | |
| "grad_norm": 3.1227903366088867, | |
| "learning_rate": 4.961604000778438e-05, | |
| "loss": 3.4848, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.05595042828503736, | |
| "grad_norm": 3.104952812194824, | |
| "learning_rate": 4.961478949136269e-05, | |
| "loss": 3.0356, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.056041552761071624, | |
| "grad_norm": 1.903206467628479, | |
| "learning_rate": 4.961353695766206e-05, | |
| "loss": 3.18, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.05613267723710589, | |
| "grad_norm": 3.809479236602783, | |
| "learning_rate": 4.961228240678512e-05, | |
| "loss": 2.2945, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.05622380171314015, | |
| "grad_norm": 1.6982084512710571, | |
| "learning_rate": 4.961102583883469e-05, | |
| "loss": 3.097, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.056314926189174414, | |
| "grad_norm": 2.1733193397521973, | |
| "learning_rate": 4.960976725391376e-05, | |
| "loss": 3.1999, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.05640605066520867, | |
| "grad_norm": 1.8412457704544067, | |
| "learning_rate": 4.960850665212548e-05, | |
| "loss": 3.1114, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.05649717514124294, | |
| "grad_norm": 3.182962417602539, | |
| "learning_rate": 4.9607244033573156e-05, | |
| "loss": 3.5988, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.0565882996172772, | |
| "grad_norm": 1.3487292528152466, | |
| "learning_rate": 4.960597939836025e-05, | |
| "loss": 2.8788, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.05667942409331146, | |
| "grad_norm": 1.620439052581787, | |
| "learning_rate": 4.960471274659042e-05, | |
| "loss": 3.1345, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.05677054856934573, | |
| "grad_norm": 3.5589683055877686, | |
| "learning_rate": 4.9603444078367475e-05, | |
| "loss": 4.3533, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.056861673045379986, | |
| "grad_norm": 3.436901330947876, | |
| "learning_rate": 4.960217339379537e-05, | |
| "loss": 3.3396, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.05695279752141425, | |
| "grad_norm": 2.2627270221710205, | |
| "learning_rate": 4.960090069297827e-05, | |
| "loss": 3.1737, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.05704392199744852, | |
| "grad_norm": 1.921242594718933, | |
| "learning_rate": 4.9599625976020446e-05, | |
| "loss": 3.1868, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.057135046473482776, | |
| "grad_norm": 4.410575866699219, | |
| "learning_rate": 4.9598349243026394e-05, | |
| "loss": 3.0296, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.05722617094951704, | |
| "grad_norm": 1.6845407485961914, | |
| "learning_rate": 4.959707049410073e-05, | |
| "loss": 3.1197, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.0573172954255513, | |
| "grad_norm": 2.5050525665283203, | |
| "learning_rate": 4.9595789729348263e-05, | |
| "loss": 3.5628, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.057408419901585565, | |
| "grad_norm": 1.3204115629196167, | |
| "learning_rate": 4.9594506948873945e-05, | |
| "loss": 3.0306, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.05749954437761983, | |
| "grad_norm": 2.239015579223633, | |
| "learning_rate": 4.9593222152782916e-05, | |
| "loss": 3.204, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.05759066885365409, | |
| "grad_norm": 3.869081497192383, | |
| "learning_rate": 4.9591935341180464e-05, | |
| "loss": 3.3636, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.057681793329688355, | |
| "grad_norm": 2.7307801246643066, | |
| "learning_rate": 4.959064651417204e-05, | |
| "loss": 3.1173, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.057772917805722614, | |
| "grad_norm": 1.6466689109802246, | |
| "learning_rate": 4.9589355671863295e-05, | |
| "loss": 3.0867, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.05786404228175688, | |
| "grad_norm": 3.217461109161377, | |
| "learning_rate": 4.9588062814359996e-05, | |
| "loss": 3.4342, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.057955166757791145, | |
| "grad_norm": 3.0771045684814453, | |
| "learning_rate": 4.958676794176811e-05, | |
| "loss": 3.5423, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.0580462912338254, | |
| "grad_norm": 4.25571346282959, | |
| "learning_rate": 4.958547105419374e-05, | |
| "loss": 3.4624, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.05813741570985967, | |
| "grad_norm": 2.3620123863220215, | |
| "learning_rate": 4.958417215174318e-05, | |
| "loss": 3.1722, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.058228540185893934, | |
| "grad_norm": 2.8196592330932617, | |
| "learning_rate": 4.958287123452289e-05, | |
| "loss": 3.4062, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.05831966466192819, | |
| "grad_norm": 2.7436940670013428, | |
| "learning_rate": 4.958156830263948e-05, | |
| "loss": 3.3989, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.05841078913796246, | |
| "grad_norm": 3.0599286556243896, | |
| "learning_rate": 4.958026335619972e-05, | |
| "loss": 2.8931, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.05850191361399672, | |
| "grad_norm": 1.745510458946228, | |
| "learning_rate": 4.957895639531056e-05, | |
| "loss": 3.0919, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.05859303809003098, | |
| "grad_norm": 3.2269985675811768, | |
| "learning_rate": 4.957764742007912e-05, | |
| "loss": 3.264, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.05868416256606525, | |
| "grad_norm": 2.882855176925659, | |
| "learning_rate": 4.957633643061267e-05, | |
| "loss": 3.3968, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.05877528704209951, | |
| "grad_norm": 3.923797130584717, | |
| "learning_rate": 4.9575023427018645e-05, | |
| "loss": 3.1769, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.05886641151813377, | |
| "grad_norm": 1.8065714836120605, | |
| "learning_rate": 4.9573708409404665e-05, | |
| "loss": 3.1882, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.05895753599416803, | |
| "grad_norm": 2.9968409538269043, | |
| "learning_rate": 4.957239137787848e-05, | |
| "loss": 3.5354, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.0590486604702023, | |
| "grad_norm": 2.20745587348938, | |
| "learning_rate": 4.957107233254805e-05, | |
| "loss": 3.2484, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.05913978494623656, | |
| "grad_norm": 3.963139057159424, | |
| "learning_rate": 4.9569751273521454e-05, | |
| "loss": 4.0875, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.05923090942227082, | |
| "grad_norm": 3.877814531326294, | |
| "learning_rate": 4.956842820090697e-05, | |
| "loss": 4.6056, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.059322033898305086, | |
| "grad_norm": 3.671600818634033, | |
| "learning_rate": 4.956710311481303e-05, | |
| "loss": 3.2384, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.059413158374339345, | |
| "grad_norm": 3.7011606693267822, | |
| "learning_rate": 4.956577601534822e-05, | |
| "loss": 3.5084, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.05950428285037361, | |
| "grad_norm": 1.532228708267212, | |
| "learning_rate": 4.956444690262131e-05, | |
| "loss": 3.1647, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.059595407326407876, | |
| "grad_norm": 2.7572762966156006, | |
| "learning_rate": 4.956311577674123e-05, | |
| "loss": 3.1905, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.059686531802442135, | |
| "grad_norm": 2.9054203033447266, | |
| "learning_rate": 4.956178263781706e-05, | |
| "loss": 3.45, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.0597776562784764, | |
| "grad_norm": 2.3471450805664062, | |
| "learning_rate": 4.9560447485958065e-05, | |
| "loss": 3.4931, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.05986878075451066, | |
| "grad_norm": 3.769836664199829, | |
| "learning_rate": 4.955911032127365e-05, | |
| "loss": 3.5662, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.059959905230544924, | |
| "grad_norm": 2.2852306365966797, | |
| "learning_rate": 4.955777114387342e-05, | |
| "loss": 3.3788, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.06005102970657919, | |
| "grad_norm": 2.383812665939331, | |
| "learning_rate": 4.9556429953867124e-05, | |
| "loss": 3.0372, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.06014215418261345, | |
| "grad_norm": 4.165440559387207, | |
| "learning_rate": 4.9555086751364666e-05, | |
| "loss": 4.6025, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.060233278658647714, | |
| "grad_norm": 2.7133188247680664, | |
| "learning_rate": 4.955374153647613e-05, | |
| "loss": 3.4581, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.06032440313468197, | |
| "grad_norm": 2.894537925720215, | |
| "learning_rate": 4.955239430931177e-05, | |
| "loss": 3.2701, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.06041552761071624, | |
| "grad_norm": 2.548617362976074, | |
| "learning_rate": 4.955104506998199e-05, | |
| "loss": 3.5049, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.060506652086750504, | |
| "grad_norm": 1.5685203075408936, | |
| "learning_rate": 4.9549693818597365e-05, | |
| "loss": 3.0841, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.06059777656278476, | |
| "grad_norm": 3.0904016494750977, | |
| "learning_rate": 4.954834055526864e-05, | |
| "loss": 3.1755, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.06068890103881903, | |
| "grad_norm": 2.390272855758667, | |
| "learning_rate": 4.954698528010671e-05, | |
| "loss": 3.0737, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.060780025514853286, | |
| "grad_norm": 2.9072399139404297, | |
| "learning_rate": 4.954562799322266e-05, | |
| "loss": 3.426, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.06087114999088755, | |
| "grad_norm": 4.058100700378418, | |
| "learning_rate": 4.9544268694727714e-05, | |
| "loss": 3.303, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.06096227446692182, | |
| "grad_norm": 2.9293103218078613, | |
| "learning_rate": 4.9542907384733277e-05, | |
| "loss": 3.5832, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.061053398942956076, | |
| "grad_norm": 3.660994529724121, | |
| "learning_rate": 4.9541544063350916e-05, | |
| "loss": 3.4459, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.06114452341899034, | |
| "grad_norm": 1.5523614883422852, | |
| "learning_rate": 4.954017873069235e-05, | |
| "loss": 3.2077, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.0612356478950246, | |
| "grad_norm": 3.498552083969116, | |
| "learning_rate": 4.953881138686948e-05, | |
| "loss": 3.3605, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.061326772371058866, | |
| "grad_norm": 1.5058245658874512, | |
| "learning_rate": 4.953744203199437e-05, | |
| "loss": 2.9763, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.06141789684709313, | |
| "grad_norm": 3.1755263805389404, | |
| "learning_rate": 4.9536070666179236e-05, | |
| "loss": 3.2367, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.06150902132312739, | |
| "grad_norm": 4.108938694000244, | |
| "learning_rate": 4.953469728953647e-05, | |
| "loss": 3.3441, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.061600145799161656, | |
| "grad_norm": 2.8655855655670166, | |
| "learning_rate": 4.9533321902178634e-05, | |
| "loss": 3.4349, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.06169127027519592, | |
| "grad_norm": 1.3662402629852295, | |
| "learning_rate": 4.953194450421843e-05, | |
| "loss": 3.052, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.06178239475123018, | |
| "grad_norm": 3.1429646015167236, | |
| "learning_rate": 4.9530565095768744e-05, | |
| "loss": 3.3275, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.061873519227264445, | |
| "grad_norm": 2.9324707984924316, | |
| "learning_rate": 4.952918367694264e-05, | |
| "loss": 2.9269, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.061964643703298704, | |
| "grad_norm": 1.5480892658233643, | |
| "learning_rate": 4.952780024785331e-05, | |
| "loss": 3.1536, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.06205576817933297, | |
| "grad_norm": 3.543039321899414, | |
| "learning_rate": 4.9526414808614154e-05, | |
| "loss": 4.6384, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.062146892655367235, | |
| "grad_norm": 3.411710739135742, | |
| "learning_rate": 4.9525027359338696e-05, | |
| "loss": 3.9351, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.062238017131401493, | |
| "grad_norm": 4.721251487731934, | |
| "learning_rate": 4.952363790014064e-05, | |
| "loss": 2.8042, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.06232914160743576, | |
| "grad_norm": 2.0499167442321777, | |
| "learning_rate": 4.952224643113388e-05, | |
| "loss": 3.0585, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.06242026608347002, | |
| "grad_norm": 2.8441872596740723, | |
| "learning_rate": 4.9520852952432426e-05, | |
| "loss": 3.6444, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.06251139055950428, | |
| "grad_norm": 3.619750499725342, | |
| "learning_rate": 4.9519457464150496e-05, | |
| "loss": 3.1606, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.06260251503553854, | |
| "grad_norm": 3.968768358230591, | |
| "learning_rate": 4.951805996640245e-05, | |
| "loss": 3.4814, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.06269363951157281, | |
| "grad_norm": 2.2831945419311523, | |
| "learning_rate": 4.9516660459302827e-05, | |
| "loss": 3.1286, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.06278476398760707, | |
| "grad_norm": 4.824334144592285, | |
| "learning_rate": 4.9515258942966315e-05, | |
| "loss": 3.2481, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.06287588846364134, | |
| "grad_norm": 3.6594996452331543, | |
| "learning_rate": 4.951385541750777e-05, | |
| "loss": 2.9803, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.06296701293967559, | |
| "grad_norm": 4.994578838348389, | |
| "learning_rate": 4.951244988304221e-05, | |
| "loss": 2.7129, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.06305813741570986, | |
| "grad_norm": 3.323155403137207, | |
| "learning_rate": 4.9511042339684846e-05, | |
| "loss": 3.1815, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.06314926189174412, | |
| "grad_norm": 3.1093785762786865, | |
| "learning_rate": 4.950963278755102e-05, | |
| "loss": 3.32, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.06324038636777839, | |
| "grad_norm": 2.944016456604004, | |
| "learning_rate": 4.950822122675625e-05, | |
| "loss": 3.1048, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.06333151084381265, | |
| "grad_norm": 1.7950677871704102, | |
| "learning_rate": 4.950680765741622e-05, | |
| "loss": 3.1967, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.0634226353198469, | |
| "grad_norm": 2.822021961212158, | |
| "learning_rate": 4.950539207964677e-05, | |
| "loss": 2.8707, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.06351375979588117, | |
| "grad_norm": 3.3690731525421143, | |
| "learning_rate": 4.950397449356392e-05, | |
| "loss": 3.277, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.06360488427191544, | |
| "grad_norm": 3.850304126739502, | |
| "learning_rate": 4.9502554899283845e-05, | |
| "loss": 3.3475, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.0636960087479497, | |
| "grad_norm": 3.161121129989624, | |
| "learning_rate": 4.9501133296922897e-05, | |
| "loss": 2.9737, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.06378713322398397, | |
| "grad_norm": 1.786126971244812, | |
| "learning_rate": 4.949970968659757e-05, | |
| "loss": 3.1964, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.06387825770001823, | |
| "grad_norm": 2.7346184253692627, | |
| "learning_rate": 4.949828406842453e-05, | |
| "loss": 3.3159, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.06396938217605248, | |
| "grad_norm": 3.1189444065093994, | |
| "learning_rate": 4.9496856442520623e-05, | |
| "loss": 3.3436, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.06406050665208675, | |
| "grad_norm": 1.5578367710113525, | |
| "learning_rate": 4.949542680900284e-05, | |
| "loss": 3.093, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.06415163112812101, | |
| "grad_norm": 2.1540582180023193, | |
| "learning_rate": 4.9493995167988355e-05, | |
| "loss": 3.2466, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.06424275560415528, | |
| "grad_norm": 3.0360822677612305, | |
| "learning_rate": 4.949256151959449e-05, | |
| "loss": 2.8197, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.06433388008018955, | |
| "grad_norm": 5.320927143096924, | |
| "learning_rate": 4.9491125863938735e-05, | |
| "loss": 3.4521, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.0644250045562238, | |
| "grad_norm": 3.2697649002075195, | |
| "learning_rate": 4.948968820113875e-05, | |
| "loss": 3.3638, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.06451612903225806, | |
| "grad_norm": 2.503573417663574, | |
| "learning_rate": 4.948824853131236e-05, | |
| "loss": 3.3617, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.06460725350829233, | |
| "grad_norm": 1.5332664251327515, | |
| "learning_rate": 4.948680685457756e-05, | |
| "loss": 3.0453, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.0646983779843266, | |
| "grad_norm": 2.2039079666137695, | |
| "learning_rate": 4.948536317105248e-05, | |
| "loss": 3.3395, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.06478950246036086, | |
| "grad_norm": 2.7374370098114014, | |
| "learning_rate": 4.948391748085545e-05, | |
| "loss": 3.3615, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.06488062693639511, | |
| "grad_norm": 2.1900599002838135, | |
| "learning_rate": 4.948246978410495e-05, | |
| "loss": 3.2993, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.06497175141242938, | |
| "grad_norm": 1.8759992122650146, | |
| "learning_rate": 4.948102008091962e-05, | |
| "loss": 3.1306, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.06506287588846364, | |
| "grad_norm": 2.68538498878479, | |
| "learning_rate": 4.9479568371418274e-05, | |
| "loss": 3.3092, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.06515400036449791, | |
| "grad_norm": 2.6900649070739746, | |
| "learning_rate": 4.947811465571988e-05, | |
| "loss": 3.2655, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.06524512484053217, | |
| "grad_norm": 2.2141432762145996, | |
| "learning_rate": 4.947665893394357e-05, | |
| "loss": 3.165, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.06533624931656642, | |
| "grad_norm": 2.6907012462615967, | |
| "learning_rate": 4.947520120620865e-05, | |
| "loss": 3.1901, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.06542737379260069, | |
| "grad_norm": 2.0056562423706055, | |
| "learning_rate": 4.9473741472634606e-05, | |
| "loss": 3.2852, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.06551849826863496, | |
| "grad_norm": 1.5069571733474731, | |
| "learning_rate": 4.947227973334104e-05, | |
| "loss": 3.0089, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.06560962274466922, | |
| "grad_norm": 3.0702645778656006, | |
| "learning_rate": 4.947081598844777e-05, | |
| "loss": 3.191, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.06570074722070349, | |
| "grad_norm": 3.3552052974700928, | |
| "learning_rate": 4.946935023807474e-05, | |
| "loss": 4.4409, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.06579187169673774, | |
| "grad_norm": 5.324817180633545, | |
| "learning_rate": 4.946788248234209e-05, | |
| "loss": 3.2887, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.065882996172772, | |
| "grad_norm": 1.67562735080719, | |
| "learning_rate": 4.9466412721370084e-05, | |
| "loss": 3.1257, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.06597412064880627, | |
| "grad_norm": 3.6829192638397217, | |
| "learning_rate": 4.9464940955279195e-05, | |
| "loss": 3.0727, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.06606524512484054, | |
| "grad_norm": 2.184438705444336, | |
| "learning_rate": 4.946346718419004e-05, | |
| "loss": 3.2543, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.0661563696008748, | |
| "grad_norm": 1.7749693393707275, | |
| "learning_rate": 4.9461991408223386e-05, | |
| "loss": 3.107, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.06624749407690905, | |
| "grad_norm": 3.877955675125122, | |
| "learning_rate": 4.946051362750018e-05, | |
| "loss": 3.0837, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.06633861855294332, | |
| "grad_norm": 2.6731202602386475, | |
| "learning_rate": 4.9459033842141554e-05, | |
| "loss": 2.9075, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.06642974302897758, | |
| "grad_norm": 2.0825181007385254, | |
| "learning_rate": 4.9457552052268764e-05, | |
| "loss": 3.2235, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.06652086750501185, | |
| "grad_norm": 3.4630510807037354, | |
| "learning_rate": 4.945606825800325e-05, | |
| "loss": 4.399, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.06661199198104611, | |
| "grad_norm": 1.572504997253418, | |
| "learning_rate": 4.9454582459466615e-05, | |
| "loss": 2.988, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.06670311645708037, | |
| "grad_norm": 3.3033382892608643, | |
| "learning_rate": 4.945309465678063e-05, | |
| "loss": 3.272, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.06679424093311463, | |
| "grad_norm": 1.9305294752120972, | |
| "learning_rate": 4.945160485006722e-05, | |
| "loss": 3.1268, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.0668853654091489, | |
| "grad_norm": 2.0527968406677246, | |
| "learning_rate": 4.9450113039448484e-05, | |
| "loss": 3.1133, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.06697648988518316, | |
| "grad_norm": 3.5695364475250244, | |
| "learning_rate": 4.944861922504669e-05, | |
| "loss": 3.2998, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.06706761436121743, | |
| "grad_norm": 2.7028732299804688, | |
| "learning_rate": 4.944712340698424e-05, | |
| "loss": 3.1459, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.06715873883725168, | |
| "grad_norm": 1.6403956413269043, | |
| "learning_rate": 4.9445625585383746e-05, | |
| "loss": 3.1004, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.06724986331328595, | |
| "grad_norm": 2.2989110946655273, | |
| "learning_rate": 4.9444125760367956e-05, | |
| "loss": 3.3623, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.06734098778932021, | |
| "grad_norm": 3.925218343734741, | |
| "learning_rate": 4.944262393205977e-05, | |
| "loss": 3.4177, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.06743211226535448, | |
| "grad_norm": 2.743499517440796, | |
| "learning_rate": 4.944112010058229e-05, | |
| "loss": 3.4295, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.06752323674138874, | |
| "grad_norm": 2.836487293243408, | |
| "learning_rate": 4.943961426605874e-05, | |
| "loss": 3.1732, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.067614361217423, | |
| "grad_norm": 3.4316787719726562, | |
| "learning_rate": 4.943810642861255e-05, | |
| "loss": 3.2019, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.06770548569345726, | |
| "grad_norm": 1.637211799621582, | |
| "learning_rate": 4.943659658836728e-05, | |
| "loss": 3.0372, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.06779661016949153, | |
| "grad_norm": 2.633004665374756, | |
| "learning_rate": 4.9435084745446666e-05, | |
| "loss": 3.3982, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.06788773464552579, | |
| "grad_norm": 3.1574134826660156, | |
| "learning_rate": 4.9433570899974626e-05, | |
| "loss": 4.3972, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.06797885912156006, | |
| "grad_norm": 3.46399188041687, | |
| "learning_rate": 4.94320550520752e-05, | |
| "loss": 3.5156, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.06806998359759431, | |
| "grad_norm": 1.8419183492660522, | |
| "learning_rate": 4.943053720187264e-05, | |
| "loss": 3.1658, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.06816110807362857, | |
| "grad_norm": 4.034026622772217, | |
| "learning_rate": 4.942901734949133e-05, | |
| "loss": 3.2022, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.06825223254966284, | |
| "grad_norm": 2.7348647117614746, | |
| "learning_rate": 4.942749549505582e-05, | |
| "loss": 3.2519, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.0683433570256971, | |
| "grad_norm": 5.112464904785156, | |
| "learning_rate": 4.9425971638690847e-05, | |
| "loss": 3.2507, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.06843448150173137, | |
| "grad_norm": 3.64758563041687, | |
| "learning_rate": 4.942444578052129e-05, | |
| "loss": 2.8225, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.06852560597776562, | |
| "grad_norm": 2.541335344314575, | |
| "learning_rate": 4.942291792067221e-05, | |
| "loss": 3.1085, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.06861673045379989, | |
| "grad_norm": 3.1781222820281982, | |
| "learning_rate": 4.9421388059268794e-05, | |
| "loss": 3.4272, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.06870785492983415, | |
| "grad_norm": 2.2702085971832275, | |
| "learning_rate": 4.941985619643645e-05, | |
| "loss": 3.2569, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.06879897940586842, | |
| "grad_norm": 5.204946517944336, | |
| "learning_rate": 4.94183223323007e-05, | |
| "loss": 3.3751, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.06889010388190268, | |
| "grad_norm": 2.0559349060058594, | |
| "learning_rate": 4.941678646698726e-05, | |
| "loss": 3.0242, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.06898122835793694, | |
| "grad_norm": 3.680403470993042, | |
| "learning_rate": 4.941524860062201e-05, | |
| "loss": 3.3072, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.0690723528339712, | |
| "grad_norm": 2.779707908630371, | |
| "learning_rate": 4.941370873333096e-05, | |
| "loss": 2.8916, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.06916347731000547, | |
| "grad_norm": 2.8263614177703857, | |
| "learning_rate": 4.941216686524032e-05, | |
| "loss": 3.3456, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.06925460178603973, | |
| "grad_norm": 2.906216621398926, | |
| "learning_rate": 4.941062299647645e-05, | |
| "loss": 3.3625, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.069345726262074, | |
| "grad_norm": 3.632577419281006, | |
| "learning_rate": 4.9409077127165895e-05, | |
| "loss": 3.2432, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.06943685073810825, | |
| "grad_norm": 3.2788524627685547, | |
| "learning_rate": 4.940752925743531e-05, | |
| "loss": 3.2008, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.06952797521414252, | |
| "grad_norm": 2.848799467086792, | |
| "learning_rate": 4.9405979387411576e-05, | |
| "loss": 3.6153, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.06961909969017678, | |
| "grad_norm": 1.9642467498779297, | |
| "learning_rate": 4.940442751722171e-05, | |
| "loss": 3.1354, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.06971022416621105, | |
| "grad_norm": 2.173759698867798, | |
| "learning_rate": 4.9402873646992876e-05, | |
| "loss": 3.0818, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.06980134864224531, | |
| "grad_norm": 3.0131309032440186, | |
| "learning_rate": 4.940131777685243e-05, | |
| "loss": 3.4091, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.06989247311827956, | |
| "grad_norm": 2.9783716201782227, | |
| "learning_rate": 4.939975990692789e-05, | |
| "loss": 3.2632, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.06998359759431383, | |
| "grad_norm": 3.359174966812134, | |
| "learning_rate": 4.9398200037346907e-05, | |
| "loss": 3.057, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.0700747220703481, | |
| "grad_norm": 3.2321484088897705, | |
| "learning_rate": 4.939663816823735e-05, | |
| "loss": 2.8868, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.07016584654638236, | |
| "grad_norm": 2.782243013381958, | |
| "learning_rate": 4.9395074299727196e-05, | |
| "loss": 3.6189, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.07025697102241663, | |
| "grad_norm": 3.496765375137329, | |
| "learning_rate": 4.939350843194462e-05, | |
| "loss": 3.4184, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.07034809549845088, | |
| "grad_norm": 3.07650089263916, | |
| "learning_rate": 4.939194056501795e-05, | |
| "loss": 3.1974, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.07043921997448514, | |
| "grad_norm": 2.057051181793213, | |
| "learning_rate": 4.939037069907567e-05, | |
| "loss": 3.3996, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.07053034445051941, | |
| "grad_norm": 1.907810926437378, | |
| "learning_rate": 4.938879883424645e-05, | |
| "loss": 3.0664, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.07062146892655367, | |
| "grad_norm": 3.811920166015625, | |
| "learning_rate": 4.93872249706591e-05, | |
| "loss": 3.3717, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.07071259340258794, | |
| "grad_norm": 2.526494026184082, | |
| "learning_rate": 4.938564910844261e-05, | |
| "loss": 3.2352, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.07080371787862219, | |
| "grad_norm": 3.059999465942383, | |
| "learning_rate": 4.938407124772613e-05, | |
| "loss": 3.541, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.07089484235465646, | |
| "grad_norm": 3.959871530532837, | |
| "learning_rate": 4.9382491388638976e-05, | |
| "loss": 3.5196, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.07098596683069072, | |
| "grad_norm": 2.6813833713531494, | |
| "learning_rate": 4.93809095313106e-05, | |
| "loss": 3.1128, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.07107709130672499, | |
| "grad_norm": 3.876431465148926, | |
| "learning_rate": 4.937932567587067e-05, | |
| "loss": 3.2911, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.07116821578275925, | |
| "grad_norm": 1.5844011306762695, | |
| "learning_rate": 4.9377739822448975e-05, | |
| "loss": 3.0431, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.07125934025879352, | |
| "grad_norm": 4.27528190612793, | |
| "learning_rate": 4.937615197117549e-05, | |
| "loss": 3.429, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.07135046473482777, | |
| "grad_norm": 3.49869441986084, | |
| "learning_rate": 4.937456212218034e-05, | |
| "loss": 3.5458, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.07144158921086204, | |
| "grad_norm": 2.712157964706421, | |
| "learning_rate": 4.9372970275593805e-05, | |
| "loss": 3.2802, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.0715327136868963, | |
| "grad_norm": 3.354679584503174, | |
| "learning_rate": 4.937137643154637e-05, | |
| "loss": 3.3316, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.07162383816293057, | |
| "grad_norm": 4.650734901428223, | |
| "learning_rate": 4.9369780590168635e-05, | |
| "loss": 3.1748, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.07171496263896483, | |
| "grad_norm": 4.040694713592529, | |
| "learning_rate": 4.93681827515914e-05, | |
| "loss": 3.3054, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.07180608711499908, | |
| "grad_norm": 3.034775733947754, | |
| "learning_rate": 4.936658291594562e-05, | |
| "loss": 3.4519, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.07189721159103335, | |
| "grad_norm": 3.9057462215423584, | |
| "learning_rate": 4.9364981083362374e-05, | |
| "loss": 3.2165, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.07198833606706762, | |
| "grad_norm": 3.4691364765167236, | |
| "learning_rate": 4.9363377253972976e-05, | |
| "loss": 3.4187, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.07207946054310188, | |
| "grad_norm": 5.106943130493164, | |
| "learning_rate": 4.936177142790885e-05, | |
| "loss": 3.1486, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.07217058501913615, | |
| "grad_norm": 2.5772221088409424, | |
| "learning_rate": 4.9360163605301604e-05, | |
| "loss": 3.2757, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.0722617094951704, | |
| "grad_norm": 3.403024196624756, | |
| "learning_rate": 4.935855378628299e-05, | |
| "loss": 3.2914, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.07235283397120466, | |
| "grad_norm": 1.7153654098510742, | |
| "learning_rate": 4.935694197098496e-05, | |
| "loss": 3.1355, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.07244395844723893, | |
| "grad_norm": 3.2709758281707764, | |
| "learning_rate": 4.9355328159539606e-05, | |
| "loss": 3.3144, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.0725350829232732, | |
| "grad_norm": 2.949646472930908, | |
| "learning_rate": 4.935371235207917e-05, | |
| "loss": 3.457, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.07262620739930746, | |
| "grad_norm": 3.8524835109710693, | |
| "learning_rate": 4.935209454873609e-05, | |
| "loss": 2.6742, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.07271733187534171, | |
| "grad_norm": 2.2433860301971436, | |
| "learning_rate": 4.9350474749642946e-05, | |
| "loss": 3.0764, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.07280845635137598, | |
| "grad_norm": 2.8999814987182617, | |
| "learning_rate": 4.9348852954932476e-05, | |
| "loss": 3.3237, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.07289958082741024, | |
| "grad_norm": 1.8484467267990112, | |
| "learning_rate": 4.9347229164737615e-05, | |
| "loss": 3.1616, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.07299070530344451, | |
| "grad_norm": 1.6664539575576782, | |
| "learning_rate": 4.934560337919143e-05, | |
| "loss": 2.9996, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.07308182977947877, | |
| "grad_norm": 4.941806316375732, | |
| "learning_rate": 4.934397559842715e-05, | |
| "loss": 3.5425, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.07317295425551303, | |
| "grad_norm": 1.4220082759857178, | |
| "learning_rate": 4.9342345822578184e-05, | |
| "loss": 3.1634, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.07326407873154729, | |
| "grad_norm": 2.4295237064361572, | |
| "learning_rate": 4.9340714051778106e-05, | |
| "loss": 3.4212, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.07335520320758156, | |
| "grad_norm": 1.844810962677002, | |
| "learning_rate": 4.933908028616063e-05, | |
| "loss": 3.1608, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.07344632768361582, | |
| "grad_norm": 3.3480727672576904, | |
| "learning_rate": 4.933744452585966e-05, | |
| "loss": 3.4193, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.07353745215965009, | |
| "grad_norm": 3.4452803134918213, | |
| "learning_rate": 4.9335806771009266e-05, | |
| "loss": 4.3444, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.07362857663568434, | |
| "grad_norm": 3.2160511016845703, | |
| "learning_rate": 4.933416702174365e-05, | |
| "loss": 3.2287, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.0737197011117186, | |
| "grad_norm": 1.905893325805664, | |
| "learning_rate": 4.9332525278197195e-05, | |
| "loss": 3.0757, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.07381082558775287, | |
| "grad_norm": 4.715121269226074, | |
| "learning_rate": 4.9330881540504457e-05, | |
| "loss": 3.4811, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.07390195006378714, | |
| "grad_norm": 3.127492904663086, | |
| "learning_rate": 4.932923580880015e-05, | |
| "loss": 3.5574, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.0739930745398214, | |
| "grad_norm": 3.37953782081604, | |
| "learning_rate": 4.9327588083219136e-05, | |
| "loss": 3.4364, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.07408419901585565, | |
| "grad_norm": 3.43113374710083, | |
| "learning_rate": 4.932593836389646e-05, | |
| "loss": 2.5653, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.07417532349188992, | |
| "grad_norm": 2.3801136016845703, | |
| "learning_rate": 4.9324286650967324e-05, | |
| "loss": 3.1677, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.07426644796792418, | |
| "grad_norm": 3.0977799892425537, | |
| "learning_rate": 4.932263294456708e-05, | |
| "loss": 3.0717, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.07435757244395845, | |
| "grad_norm": 2.2414751052856445, | |
| "learning_rate": 4.9320977244831277e-05, | |
| "loss": 2.9498, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.07444869691999272, | |
| "grad_norm": 3.319639205932617, | |
| "learning_rate": 4.931931955189559e-05, | |
| "loss": 3.3386, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.07453982139602697, | |
| "grad_norm": 2.776702642440796, | |
| "learning_rate": 4.931765986589588e-05, | |
| "loss": 3.1402, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.07463094587206123, | |
| "grad_norm": 3.072389841079712, | |
| "learning_rate": 4.931599818696817e-05, | |
| "loss": 3.1573, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.0747220703480955, | |
| "grad_norm": 3.179121255874634, | |
| "learning_rate": 4.931433451524863e-05, | |
| "loss": 3.2369, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.07481319482412976, | |
| "grad_norm": 3.051584005355835, | |
| "learning_rate": 4.9312668850873603e-05, | |
| "loss": 3.381, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.07490431930016403, | |
| "grad_norm": 3.383882761001587, | |
| "learning_rate": 4.931100119397961e-05, | |
| "loss": 3.42, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.07499544377619828, | |
| "grad_norm": 3.531190872192383, | |
| "learning_rate": 4.930933154470331e-05, | |
| "loss": 3.0216, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.07508656825223255, | |
| "grad_norm": 5.148257255554199, | |
| "learning_rate": 4.9307659903181545e-05, | |
| "loss": 3.4292, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.07517769272826681, | |
| "grad_norm": 1.8193916082382202, | |
| "learning_rate": 4.9305986269551315e-05, | |
| "loss": 3.1074, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.07526881720430108, | |
| "grad_norm": 3.702211380004883, | |
| "learning_rate": 4.930431064394977e-05, | |
| "loss": 3.2786, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.07535994168033534, | |
| "grad_norm": 1.5868266820907593, | |
| "learning_rate": 4.930263302651424e-05, | |
| "loss": 2.9994, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.0754510661563696, | |
| "grad_norm": 3.0381083488464355, | |
| "learning_rate": 4.930095341738221e-05, | |
| "loss": 3.5689, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.07554219063240386, | |
| "grad_norm": 4.9261884689331055, | |
| "learning_rate": 4.929927181669133e-05, | |
| "loss": 4.6381, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.07563331510843813, | |
| "grad_norm": 2.415921688079834, | |
| "learning_rate": 4.929758822457943e-05, | |
| "loss": 3.4642, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.07572443958447239, | |
| "grad_norm": 2.334571123123169, | |
| "learning_rate": 4.929590264118446e-05, | |
| "loss": 3.1396, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.07581556406050666, | |
| "grad_norm": 3.4270524978637695, | |
| "learning_rate": 4.929421506664458e-05, | |
| "loss": 3.0609, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.07590668853654091, | |
| "grad_norm": 1.8095070123672485, | |
| "learning_rate": 4.929252550109808e-05, | |
| "loss": 3.0537, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.07599781301257517, | |
| "grad_norm": 2.4400718212127686, | |
| "learning_rate": 4.929083394468344e-05, | |
| "loss": 2.9386, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.07608893748860944, | |
| "grad_norm": 3.1036880016326904, | |
| "learning_rate": 4.928914039753928e-05, | |
| "loss": 3.5941, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.0761800619646437, | |
| "grad_norm": 2.4113924503326416, | |
| "learning_rate": 4.92874448598044e-05, | |
| "loss": 3.4679, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.07627118644067797, | |
| "grad_norm": 1.8080517053604126, | |
| "learning_rate": 4.9285747331617746e-05, | |
| "loss": 3.0843, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.07636231091671222, | |
| "grad_norm": 2.755985975265503, | |
| "learning_rate": 4.928404781311845e-05, | |
| "loss": 3.5471, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.07645343539274649, | |
| "grad_norm": 2.892883777618408, | |
| "learning_rate": 4.928234630444579e-05, | |
| "loss": 3.2349, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.07654455986878075, | |
| "grad_norm": 2.8694229125976562, | |
| "learning_rate": 4.92806428057392e-05, | |
| "loss": 3.4227, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.07663568434481502, | |
| "grad_norm": 4.09429407119751, | |
| "learning_rate": 4.9278937317138305e-05, | |
| "loss": 3.4834, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.07672680882084928, | |
| "grad_norm": 2.272854804992676, | |
| "learning_rate": 4.927722983878286e-05, | |
| "loss": 3.2056, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.07681793329688354, | |
| "grad_norm": 5.0233330726623535, | |
| "learning_rate": 4.927552037081282e-05, | |
| "loss": 2.9908, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.0769090577729178, | |
| "grad_norm": 2.9697277545928955, | |
| "learning_rate": 4.9273808913368256e-05, | |
| "loss": 3.0797, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.07700018224895207, | |
| "grad_norm": 2.6238036155700684, | |
| "learning_rate": 4.927209546658946e-05, | |
| "loss": 3.1607, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.07709130672498633, | |
| "grad_norm": 2.3229193687438965, | |
| "learning_rate": 4.9270380030616826e-05, | |
| "loss": 3.0519, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.0771824312010206, | |
| "grad_norm": 3.1790342330932617, | |
| "learning_rate": 4.9268662605590963e-05, | |
| "loss": 3.1259, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.07727355567705485, | |
| "grad_norm": 2.6441993713378906, | |
| "learning_rate": 4.926694319165261e-05, | |
| "loss": 3.2281, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.07736468015308912, | |
| "grad_norm": 3.473982572555542, | |
| "learning_rate": 4.926522178894268e-05, | |
| "loss": 3.0969, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.07745580462912338, | |
| "grad_norm": 3.585967540740967, | |
| "learning_rate": 4.926349839760225e-05, | |
| "loss": 3.2388, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.07754692910515765, | |
| "grad_norm": 2.786681890487671, | |
| "learning_rate": 4.926177301777256e-05, | |
| "loss": 2.8739, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.07763805358119191, | |
| "grad_norm": 2.578705072402954, | |
| "learning_rate": 4.926004564959501e-05, | |
| "loss": 3.1861, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.07772917805722616, | |
| "grad_norm": 3.162743091583252, | |
| "learning_rate": 4.925831629321117e-05, | |
| "loss": 3.4526, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.07782030253326043, | |
| "grad_norm": 2.0641379356384277, | |
| "learning_rate": 4.925658494876275e-05, | |
| "loss": 3.1193, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.0779114270092947, | |
| "grad_norm": 4.980138778686523, | |
| "learning_rate": 4.9254851616391664e-05, | |
| "loss": 3.4487, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.07800255148532896, | |
| "grad_norm": 1.8417590856552124, | |
| "learning_rate": 4.9253116296239956e-05, | |
| "loss": 3.1246, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.07809367596136323, | |
| "grad_norm": 2.736356496810913, | |
| "learning_rate": 4.9251378988449835e-05, | |
| "loss": 3.3114, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.07818480043739748, | |
| "grad_norm": 2.5761330127716064, | |
| "learning_rate": 4.924963969316369e-05, | |
| "loss": 3.3988, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.07827592491343174, | |
| "grad_norm": 3.1260087490081787, | |
| "learning_rate": 4.924789841052406e-05, | |
| "loss": 3.2409, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.07836704938946601, | |
| "grad_norm": 1.8674402236938477, | |
| "learning_rate": 4.9246155140673646e-05, | |
| "loss": 3.0823, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.07845817386550027, | |
| "grad_norm": 2.6160728931427, | |
| "learning_rate": 4.924440988375532e-05, | |
| "loss": 3.4579, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.07854929834153454, | |
| "grad_norm": 1.7055904865264893, | |
| "learning_rate": 4.924266263991212e-05, | |
| "loss": 3.119, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.0786404228175688, | |
| "grad_norm": 1.8979192972183228, | |
| "learning_rate": 4.924091340928722e-05, | |
| "loss": 3.1205, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.07873154729360306, | |
| "grad_norm": 1.8284133672714233, | |
| "learning_rate": 4.923916219202399e-05, | |
| "loss": 2.9849, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.07882267176963732, | |
| "grad_norm": 1.7913658618927002, | |
| "learning_rate": 4.923740898826595e-05, | |
| "loss": 3.0129, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.07891379624567159, | |
| "grad_norm": 2.9675111770629883, | |
| "learning_rate": 4.9235653798156786e-05, | |
| "loss": 3.2939, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.07900492072170585, | |
| "grad_norm": 2.1613569259643555, | |
| "learning_rate": 4.9233896621840326e-05, | |
| "loss": 3.1203, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.07909604519774012, | |
| "grad_norm": 2.8138372898101807, | |
| "learning_rate": 4.923213745946059e-05, | |
| "loss": 3.3916, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.07918716967377437, | |
| "grad_norm": 5.18245792388916, | |
| "learning_rate": 4.9230376311161744e-05, | |
| "loss": 3.1091, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.07927829414980864, | |
| "grad_norm": 3.7926981449127197, | |
| "learning_rate": 4.922861317708812e-05, | |
| "loss": 3.0363, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.0793694186258429, | |
| "grad_norm": 2.6583340167999268, | |
| "learning_rate": 4.9226848057384225e-05, | |
| "loss": 3.1699, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.07946054310187717, | |
| "grad_norm": 3.3531649112701416, | |
| "learning_rate": 4.92250809521947e-05, | |
| "loss": 3.0846, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.07955166757791143, | |
| "grad_norm": 2.9770283699035645, | |
| "learning_rate": 4.922331186166438e-05, | |
| "loss": 3.0176, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.07964279205394569, | |
| "grad_norm": 2.4211061000823975, | |
| "learning_rate": 4.922154078593824e-05, | |
| "loss": 3.5094, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.07973391652997995, | |
| "grad_norm": 2.4895503520965576, | |
| "learning_rate": 4.9219767725161436e-05, | |
| "loss": 3.4907, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.07982504100601422, | |
| "grad_norm": 2.4370858669281006, | |
| "learning_rate": 4.9217992679479266e-05, | |
| "loss": 3.4705, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.07991616548204848, | |
| "grad_norm": 2.213453531265259, | |
| "learning_rate": 4.921621564903721e-05, | |
| "loss": 3.1494, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.08000728995808275, | |
| "grad_norm": 2.5228660106658936, | |
| "learning_rate": 4.9214436633980904e-05, | |
| "loss": 2.3643, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.080098414434117, | |
| "grad_norm": 1.479423999786377, | |
| "learning_rate": 4.921265563445614e-05, | |
| "loss": 2.9752, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.08018953891015126, | |
| "grad_norm": 3.9881060123443604, | |
| "learning_rate": 4.921087265060888e-05, | |
| "loss": 3.172, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.08028066338618553, | |
| "grad_norm": 3.8781585693359375, | |
| "learning_rate": 4.920908768258524e-05, | |
| "loss": 3.2123, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.0803717878622198, | |
| "grad_norm": 1.9390805959701538, | |
| "learning_rate": 4.920730073053152e-05, | |
| "loss": 3.156, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.08046291233825406, | |
| "grad_norm": 3.341097116470337, | |
| "learning_rate": 4.920551179459415e-05, | |
| "loss": 3.2462, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.08055403681428831, | |
| "grad_norm": 3.1172938346862793, | |
| "learning_rate": 4.9203720874919765e-05, | |
| "loss": 3.2327, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.08064516129032258, | |
| "grad_norm": 2.6865100860595703, | |
| "learning_rate": 4.920192797165511e-05, | |
| "loss": 3.0381, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.08073628576635684, | |
| "grad_norm": 1.9933525323867798, | |
| "learning_rate": 4.920013308494714e-05, | |
| "loss": 3.1288, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.08082741024239111, | |
| "grad_norm": 2.4274346828460693, | |
| "learning_rate": 4.919833621494294e-05, | |
| "loss": 3.1731, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.08091853471842538, | |
| "grad_norm": 3.6805949211120605, | |
| "learning_rate": 4.919653736178977e-05, | |
| "loss": 3.4796, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.08100965919445963, | |
| "grad_norm": 2.9740312099456787, | |
| "learning_rate": 4.9194736525635074e-05, | |
| "loss": 3.2645, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.08110078367049389, | |
| "grad_norm": 1.9813849925994873, | |
| "learning_rate": 4.919293370662642e-05, | |
| "loss": 3.1699, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.08119190814652816, | |
| "grad_norm": 2.8427340984344482, | |
| "learning_rate": 4.9191128904911556e-05, | |
| "loss": 3.3489, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.08128303262256242, | |
| "grad_norm": 3.997051954269409, | |
| "learning_rate": 4.91893221206384e-05, | |
| "loss": 3.4374, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.08137415709859669, | |
| "grad_norm": 1.672037959098816, | |
| "learning_rate": 4.9187513353955016e-05, | |
| "loss": 3.1125, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.08146528157463094, | |
| "grad_norm": 2.2593343257904053, | |
| "learning_rate": 4.9185702605009645e-05, | |
| "loss": 3.1311, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.0815564060506652, | |
| "grad_norm": 3.1953940391540527, | |
| "learning_rate": 4.9183889873950684e-05, | |
| "loss": 3.366, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.08164753052669947, | |
| "grad_norm": 3.4176578521728516, | |
| "learning_rate": 4.91820751609267e-05, | |
| "loss": 3.3673, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.08173865500273374, | |
| "grad_norm": 2.87166166305542, | |
| "learning_rate": 4.9180258466086404e-05, | |
| "loss": 3.5602, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.081829779478768, | |
| "grad_norm": 2.719068765640259, | |
| "learning_rate": 4.917843978957869e-05, | |
| "loss": 3.2119, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.08192090395480225, | |
| "grad_norm": 2.7754950523376465, | |
| "learning_rate": 4.9176619131552604e-05, | |
| "loss": 3.0594, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.08201202843083652, | |
| "grad_norm": 3.5347611904144287, | |
| "learning_rate": 4.917479649215735e-05, | |
| "loss": 3.3751, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.08210315290687079, | |
| "grad_norm": 4.182806015014648, | |
| "learning_rate": 4.917297187154232e-05, | |
| "loss": 3.1338, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.08219427738290505, | |
| "grad_norm": 1.5867587327957153, | |
| "learning_rate": 4.9171145269857024e-05, | |
| "loss": 3.0826, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.08228540185893932, | |
| "grad_norm": 3.97678279876709, | |
| "learning_rate": 4.916931668725117e-05, | |
| "loss": 3.3052, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.08237652633497357, | |
| "grad_norm": 3.3548977375030518, | |
| "learning_rate": 4.916748612387461e-05, | |
| "loss": 3.3696, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.08246765081100783, | |
| "grad_norm": 4.031994342803955, | |
| "learning_rate": 4.916565357987738e-05, | |
| "loss": 3.0432, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.0825587752870421, | |
| "grad_norm": 3.2942988872528076, | |
| "learning_rate": 4.916381905540966e-05, | |
| "loss": 3.0257, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.08264989976307636, | |
| "grad_norm": 2.751410484313965, | |
| "learning_rate": 4.916198255062179e-05, | |
| "loss": 3.1613, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.08274102423911063, | |
| "grad_norm": 2.4237067699432373, | |
| "learning_rate": 4.916014406566428e-05, | |
| "loss": 3.2109, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.08283214871514488, | |
| "grad_norm": 1.9257638454437256, | |
| "learning_rate": 4.915830360068781e-05, | |
| "loss": 3.0888, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.08292327319117915, | |
| "grad_norm": 2.7850747108459473, | |
| "learning_rate": 4.91564611558432e-05, | |
| "loss": 3.0367, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.08301439766721341, | |
| "grad_norm": 2.9995596408843994, | |
| "learning_rate": 4.915461673128146e-05, | |
| "loss": 3.1854, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.08310552214324768, | |
| "grad_norm": 2.685365915298462, | |
| "learning_rate": 4.915277032715374e-05, | |
| "loss": 3.491, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.08319664661928194, | |
| "grad_norm": 2.090184211730957, | |
| "learning_rate": 4.915092194361136e-05, | |
| "loss": 3.3902, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.0832877710953162, | |
| "grad_norm": 2.95298171043396, | |
| "learning_rate": 4.91490715808058e-05, | |
| "loss": 3.349, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.08337889557135046, | |
| "grad_norm": 2.3491621017456055, | |
| "learning_rate": 4.914721923888871e-05, | |
| "loss": 3.0253, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.08347002004738473, | |
| "grad_norm": 2.7936818599700928, | |
| "learning_rate": 4.914536491801189e-05, | |
| "loss": 3.1338, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.08356114452341899, | |
| "grad_norm": 2.7228002548217773, | |
| "learning_rate": 4.914350861832732e-05, | |
| "loss": 3.0906, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.08365226899945326, | |
| "grad_norm": 3.0175414085388184, | |
| "learning_rate": 4.914165033998711e-05, | |
| "loss": 3.4414, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.08374339347548751, | |
| "grad_norm": 1.8119590282440186, | |
| "learning_rate": 4.9139790083143574e-05, | |
| "loss": 3.1763, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.08383451795152178, | |
| "grad_norm": 2.5555202960968018, | |
| "learning_rate": 4.913792784794917e-05, | |
| "loss": 3.356, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.08392564242755604, | |
| "grad_norm": 2.8031249046325684, | |
| "learning_rate": 4.913606363455649e-05, | |
| "loss": 3.3269, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.0840167669035903, | |
| "grad_norm": 2.178687810897827, | |
| "learning_rate": 4.913419744311835e-05, | |
| "loss": 3.1791, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.08410789137962457, | |
| "grad_norm": 2.583512544631958, | |
| "learning_rate": 4.9132329273787655e-05, | |
| "loss": 3.5688, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.08419901585565882, | |
| "grad_norm": 2.3542723655700684, | |
| "learning_rate": 4.913045912671753e-05, | |
| "loss": 3.061, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.08429014033169309, | |
| "grad_norm": 1.9939539432525635, | |
| "learning_rate": 4.9128587002061245e-05, | |
| "loss": 3.074, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.08438126480772735, | |
| "grad_norm": 3.179673194885254, | |
| "learning_rate": 4.912671289997221e-05, | |
| "loss": 2.9628, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.08447238928376162, | |
| "grad_norm": 4.300661563873291, | |
| "learning_rate": 4.912483682060403e-05, | |
| "loss": 3.2144, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.08456351375979589, | |
| "grad_norm": 3.354478597640991, | |
| "learning_rate": 4.912295876411044e-05, | |
| "loss": 2.8357, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.08465463823583014, | |
| "grad_norm": 2.492208957672119, | |
| "learning_rate": 4.9121078730645375e-05, | |
| "loss": 3.2905, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.0847457627118644, | |
| "grad_norm": 3.1754820346832275, | |
| "learning_rate": 4.91191967203629e-05, | |
| "loss": 3.205, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.08483688718789867, | |
| "grad_norm": 2.5363569259643555, | |
| "learning_rate": 4.911731273341725e-05, | |
| "loss": 3.395, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.08492801166393293, | |
| "grad_norm": 4.300615310668945, | |
| "learning_rate": 4.911542676996284e-05, | |
| "loss": 3.0683, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.0850191361399672, | |
| "grad_norm": 2.777848958969116, | |
| "learning_rate": 4.911353883015422e-05, | |
| "loss": 3.1315, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.08511026061600145, | |
| "grad_norm": 3.849351406097412, | |
| "learning_rate": 4.9111648914146116e-05, | |
| "loss": 3.1234, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.08520138509203572, | |
| "grad_norm": 2.4552981853485107, | |
| "learning_rate": 4.910975702209341e-05, | |
| "loss": 3.3631, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.08529250956806998, | |
| "grad_norm": 3.1233198642730713, | |
| "learning_rate": 4.910786315415115e-05, | |
| "loss": 3.4195, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.08538363404410425, | |
| "grad_norm": 4.6678900718688965, | |
| "learning_rate": 4.910596731047456e-05, | |
| "loss": 3.4252, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.08547475852013851, | |
| "grad_norm": 2.9370291233062744, | |
| "learning_rate": 4.9104069491218995e-05, | |
| "loss": 4.3231, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.08556588299617277, | |
| "grad_norm": 1.6954116821289062, | |
| "learning_rate": 4.910216969654e-05, | |
| "loss": 3.0919, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.08565700747220703, | |
| "grad_norm": 1.941863775253296, | |
| "learning_rate": 4.9100267926593266e-05, | |
| "loss": 3.13, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.0857481319482413, | |
| "grad_norm": 2.876239776611328, | |
| "learning_rate": 4.909836418153465e-05, | |
| "loss": 3.3744, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.08583925642427556, | |
| "grad_norm": 1.5756586790084839, | |
| "learning_rate": 4.909645846152018e-05, | |
| "loss": 3.0713, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.08593038090030983, | |
| "grad_norm": 3.047095775604248, | |
| "learning_rate": 4.909455076670601e-05, | |
| "loss": 3.3628, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.08602150537634409, | |
| "grad_norm": 4.294236660003662, | |
| "learning_rate": 4.909264109724853e-05, | |
| "loss": 3.4089, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.08611262985237834, | |
| "grad_norm": 3.7276289463043213, | |
| "learning_rate": 4.9090729453304197e-05, | |
| "loss": 3.5265, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.08620375432841261, | |
| "grad_norm": 2.4728293418884277, | |
| "learning_rate": 4.908881583502971e-05, | |
| "loss": 4.0518, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.08629487880444688, | |
| "grad_norm": 2.5228142738342285, | |
| "learning_rate": 4.908690024258188e-05, | |
| "loss": 3.3074, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.08638600328048114, | |
| "grad_norm": 2.56369686126709, | |
| "learning_rate": 4.90849826761177e-05, | |
| "loss": 3.472, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.0864771277565154, | |
| "grad_norm": 3.3140337467193604, | |
| "learning_rate": 4.908306313579433e-05, | |
| "loss": 3.027, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.08656825223254966, | |
| "grad_norm": 1.8993895053863525, | |
| "learning_rate": 4.908114162176908e-05, | |
| "loss": 3.0605, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.08665937670858392, | |
| "grad_norm": 4.037572383880615, | |
| "learning_rate": 4.907921813419942e-05, | |
| "loss": 3.2735, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.08675050118461819, | |
| "grad_norm": 2.20011043548584, | |
| "learning_rate": 4.9077292673243e-05, | |
| "loss": 3.4346, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.08684162566065246, | |
| "grad_norm": 1.5664113759994507, | |
| "learning_rate": 4.907536523905761e-05, | |
| "loss": 2.9917, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.08693275013668672, | |
| "grad_norm": 1.562983512878418, | |
| "learning_rate": 4.907343583180122e-05, | |
| "loss": 2.831, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.08702387461272097, | |
| "grad_norm": 1.4592325687408447, | |
| "learning_rate": 4.9071504451631934e-05, | |
| "loss": 3.1492, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.08711499908875524, | |
| "grad_norm": 3.860102653503418, | |
| "learning_rate": 4.9069571098708045e-05, | |
| "loss": 3.3501, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.0872061235647895, | |
| "grad_norm": 3.7838504314422607, | |
| "learning_rate": 4.9067635773188005e-05, | |
| "loss": 3.2827, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.08729724804082377, | |
| "grad_norm": 1.798142671585083, | |
| "learning_rate": 4.906569847523042e-05, | |
| "loss": 3.2199, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.08738837251685803, | |
| "grad_norm": 2.6328585147857666, | |
| "learning_rate": 4.906375920499405e-05, | |
| "loss": 3.1803, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.08747949699289229, | |
| "grad_norm": 2.4833974838256836, | |
| "learning_rate": 4.906181796263784e-05, | |
| "loss": 3.7958, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.08757062146892655, | |
| "grad_norm": 2.0607047080993652, | |
| "learning_rate": 4.9059874748320876e-05, | |
| "loss": 3.1254, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.08766174594496082, | |
| "grad_norm": 2.7997632026672363, | |
| "learning_rate": 4.90579295622024e-05, | |
| "loss": 2.953, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.08775287042099508, | |
| "grad_norm": 2.249958038330078, | |
| "learning_rate": 4.905598240444185e-05, | |
| "loss": 3.4737, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.08784399489702935, | |
| "grad_norm": 3.1202094554901123, | |
| "learning_rate": 4.9054033275198794e-05, | |
| "loss": 3.1832, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.0879351193730636, | |
| "grad_norm": 1.4089468717575073, | |
| "learning_rate": 4.905208217463296e-05, | |
| "loss": 3.0008, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.08802624384909787, | |
| "grad_norm": 2.194896697998047, | |
| "learning_rate": 4.905012910290426e-05, | |
| "loss": 3.1747, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.08811736832513213, | |
| "grad_norm": 1.8730498552322388, | |
| "learning_rate": 4.904817406017275e-05, | |
| "loss": 3.0702, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.0882084928011664, | |
| "grad_norm": 1.5486280918121338, | |
| "learning_rate": 4.904621704659866e-05, | |
| "loss": 3.0648, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.08829961727720066, | |
| "grad_norm": 2.200500965118408, | |
| "learning_rate": 4.9044258062342376e-05, | |
| "loss": 3.095, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.08839074175323491, | |
| "grad_norm": 4.8551788330078125, | |
| "learning_rate": 4.904229710756444e-05, | |
| "loss": 4.232, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.08848186622926918, | |
| "grad_norm": 1.4722237586975098, | |
| "learning_rate": 4.904033418242555e-05, | |
| "loss": 3.021, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.08857299070530344, | |
| "grad_norm": 2.9047417640686035, | |
| "learning_rate": 4.9038369287086594e-05, | |
| "loss": 2.9605, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.08866411518133771, | |
| "grad_norm": 2.3891854286193848, | |
| "learning_rate": 4.9036402421708596e-05, | |
| "loss": 3.4125, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.08875523965737198, | |
| "grad_norm": 3.597698211669922, | |
| "learning_rate": 4.903443358645274e-05, | |
| "loss": 3.4755, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.08884636413340623, | |
| "grad_norm": 2.3222601413726807, | |
| "learning_rate": 4.903246278148039e-05, | |
| "loss": 3.1024, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.0889374886094405, | |
| "grad_norm": 1.7623449563980103, | |
| "learning_rate": 4.903049000695305e-05, | |
| "loss": 3.0337, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.08902861308547476, | |
| "grad_norm": 4.266841411590576, | |
| "learning_rate": 4.9028515263032415e-05, | |
| "loss": 3.3937, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.08911973756150902, | |
| "grad_norm": 1.746504783630371, | |
| "learning_rate": 4.902653854988031e-05, | |
| "loss": 3.0249, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.08921086203754329, | |
| "grad_norm": 3.4824695587158203, | |
| "learning_rate": 4.9024559867658734e-05, | |
| "loss": 3.1459, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.08930198651357754, | |
| "grad_norm": 3.1984243392944336, | |
| "learning_rate": 4.9022579216529854e-05, | |
| "loss": 3.1946, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.08939311098961181, | |
| "grad_norm": 2.4650838375091553, | |
| "learning_rate": 4.902059659665599e-05, | |
| "loss": 3.3177, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.08948423546564607, | |
| "grad_norm": 3.3806710243225098, | |
| "learning_rate": 4.9018612008199616e-05, | |
| "loss": 3.4283, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.08957535994168034, | |
| "grad_norm": 2.031496286392212, | |
| "learning_rate": 4.9016625451323396e-05, | |
| "loss": 3.2034, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.0896664844177146, | |
| "grad_norm": 1.8821396827697754, | |
| "learning_rate": 4.9014636926190116e-05, | |
| "loss": 3.0086, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.08975760889374886, | |
| "grad_norm": 3.310356378555298, | |
| "learning_rate": 4.901264643296276e-05, | |
| "loss": 3.2155, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.08984873336978312, | |
| "grad_norm": 2.422724723815918, | |
| "learning_rate": 4.9010653971804444e-05, | |
| "loss": 3.3122, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.08993985784581739, | |
| "grad_norm": 4.234830856323242, | |
| "learning_rate": 4.9008659542878464e-05, | |
| "loss": 3.5449, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.09003098232185165, | |
| "grad_norm": 1.48231840133667, | |
| "learning_rate": 4.900666314634828e-05, | |
| "loss": 3.0412, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.09012210679788592, | |
| "grad_norm": 2.9697065353393555, | |
| "learning_rate": 4.900466478237748e-05, | |
| "loss": 3.2992, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.09021323127392017, | |
| "grad_norm": 2.7923099994659424, | |
| "learning_rate": 4.900266445112986e-05, | |
| "loss": 3.4856, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.09030435574995443, | |
| "grad_norm": 2.1255149841308594, | |
| "learning_rate": 4.900066215276936e-05, | |
| "loss": 2.9898, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.0903954802259887, | |
| "grad_norm": 2.3270339965820312, | |
| "learning_rate": 4.899865788746005e-05, | |
| "loss": 3.127, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.09048660470202297, | |
| "grad_norm": 2.2811200618743896, | |
| "learning_rate": 4.899665165536621e-05, | |
| "loss": 3.3315, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.09057772917805723, | |
| "grad_norm": 2.0090272426605225, | |
| "learning_rate": 4.8994643456652244e-05, | |
| "loss": 3.1308, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.09066885365409148, | |
| "grad_norm": 5.991725921630859, | |
| "learning_rate": 4.8992633291482746e-05, | |
| "loss": 2.8415, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.09075997813012575, | |
| "grad_norm": 1.7398011684417725, | |
| "learning_rate": 4.899062116002244e-05, | |
| "loss": 3.247, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.09085110260616001, | |
| "grad_norm": 2.6200222969055176, | |
| "learning_rate": 4.898860706243625e-05, | |
| "loss": 3.0824, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.09094222708219428, | |
| "grad_norm": 2.7721424102783203, | |
| "learning_rate": 4.898659099888921e-05, | |
| "loss": 4.5453, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.09103335155822855, | |
| "grad_norm": 4.053179740905762, | |
| "learning_rate": 4.8984572969546575e-05, | |
| "loss": 3.4542, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.0911244760342628, | |
| "grad_norm": 3.0186972618103027, | |
| "learning_rate": 4.8982552974573717e-05, | |
| "loss": 3.1511, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.09121560051029706, | |
| "grad_norm": 2.732668876647949, | |
| "learning_rate": 4.8980531014136175e-05, | |
| "loss": 3.0888, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.09130672498633133, | |
| "grad_norm": 2.304547071456909, | |
| "learning_rate": 4.897850708839966e-05, | |
| "loss": 3.3741, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.0913978494623656, | |
| "grad_norm": 2.8242197036743164, | |
| "learning_rate": 4.897648119753006e-05, | |
| "loss": 3.3513, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.09148897393839986, | |
| "grad_norm": 3.2186803817749023, | |
| "learning_rate": 4.897445334169337e-05, | |
| "loss": 4.7131, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.09158009841443411, | |
| "grad_norm": 2.906078577041626, | |
| "learning_rate": 4.897242352105581e-05, | |
| "loss": 3.0597, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.09167122289046838, | |
| "grad_norm": 3.2613537311553955, | |
| "learning_rate": 4.8970391735783725e-05, | |
| "loss": 3.0269, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.09176234736650264, | |
| "grad_norm": 4.029659271240234, | |
| "learning_rate": 4.896835798604362e-05, | |
| "loss": 3.5813, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.09185347184253691, | |
| "grad_norm": 8.040237426757812, | |
| "learning_rate": 4.8966322272002174e-05, | |
| "loss": 3.176, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.09194459631857117, | |
| "grad_norm": 3.0369622707366943, | |
| "learning_rate": 4.8964284593826215e-05, | |
| "loss": 3.1676, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.09203572079460542, | |
| "grad_norm": 3.2260282039642334, | |
| "learning_rate": 4.8962244951682754e-05, | |
| "loss": 3.3491, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.09212684527063969, | |
| "grad_norm": 1.53379225730896, | |
| "learning_rate": 4.8960203345738934e-05, | |
| "loss": 3.0221, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.09221796974667396, | |
| "grad_norm": 4.076636791229248, | |
| "learning_rate": 4.895815977616208e-05, | |
| "loss": 3.5203, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.09230909422270822, | |
| "grad_norm": 2.1849517822265625, | |
| "learning_rate": 4.895611424311967e-05, | |
| "loss": 3.4105, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.09240021869874249, | |
| "grad_norm": 8.389893531799316, | |
| "learning_rate": 4.8954066746779334e-05, | |
| "loss": 3.2236, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.09249134317477674, | |
| "grad_norm": 2.0767617225646973, | |
| "learning_rate": 4.895201728730888e-05, | |
| "loss": 3.1362, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.092582467650811, | |
| "grad_norm": 2.6527016162872314, | |
| "learning_rate": 4.894996586487627e-05, | |
| "loss": 3.0877, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.09267359212684527, | |
| "grad_norm": 3.5784764289855957, | |
| "learning_rate": 4.8947912479649624e-05, | |
| "loss": 3.279, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.09276471660287954, | |
| "grad_norm": 1.6435048580169678, | |
| "learning_rate": 4.894585713179723e-05, | |
| "loss": 2.9978, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.0928558410789138, | |
| "grad_norm": 1.3273972272872925, | |
| "learning_rate": 4.894379982148753e-05, | |
| "loss": 2.998, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.09294696555494805, | |
| "grad_norm": 1.7817779779434204, | |
| "learning_rate": 4.894174054888912e-05, | |
| "loss": 2.9691, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.09303809003098232, | |
| "grad_norm": 2.4242284297943115, | |
| "learning_rate": 4.893967931417078e-05, | |
| "loss": 3.228, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.09312921450701658, | |
| "grad_norm": 2.9169692993164062, | |
| "learning_rate": 4.8937616117501414e-05, | |
| "loss": 3.5446, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.09322033898305085, | |
| "grad_norm": 3.6334569454193115, | |
| "learning_rate": 4.893555095905014e-05, | |
| "loss": 3.0, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.09331146345908511, | |
| "grad_norm": 2.8542470932006836, | |
| "learning_rate": 4.8933483838986184e-05, | |
| "loss": 3.1218, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.09340258793511938, | |
| "grad_norm": 2.078474283218384, | |
| "learning_rate": 4.8931414757478954e-05, | |
| "loss": 3.2325, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.09349371241115363, | |
| "grad_norm": 3.3290367126464844, | |
| "learning_rate": 4.8929343714698026e-05, | |
| "loss": 3.1449, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.0935848368871879, | |
| "grad_norm": 2.655738592147827, | |
| "learning_rate": 4.892727071081314e-05, | |
| "loss": 4.3078, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.09367596136322216, | |
| "grad_norm": 2.936398983001709, | |
| "learning_rate": 4.8925195745994165e-05, | |
| "loss": 2.9901, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.09376708583925643, | |
| "grad_norm": 1.61790931224823, | |
| "learning_rate": 4.892311882041117e-05, | |
| "loss": 2.9575, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.0938582103152907, | |
| "grad_norm": 4.781036853790283, | |
| "learning_rate": 4.892103993423436e-05, | |
| "loss": 3.3626, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.09394933479132495, | |
| "grad_norm": 4.162670612335205, | |
| "learning_rate": 4.891895908763411e-05, | |
| "loss": 3.5073, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.09404045926735921, | |
| "grad_norm": 2.5747599601745605, | |
| "learning_rate": 4.8916876280780946e-05, | |
| "loss": 2.6645, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.09413158374339348, | |
| "grad_norm": 2.0610013008117676, | |
| "learning_rate": 4.8914791513845575e-05, | |
| "loss": 2.8624, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.09422270821942774, | |
| "grad_norm": 1.6727491617202759, | |
| "learning_rate": 4.8912704786998844e-05, | |
| "loss": 3.0615, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.09431383269546201, | |
| "grad_norm": 2.5787103176116943, | |
| "learning_rate": 4.8910616100411774e-05, | |
| "loss": 3.1513, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.09440495717149626, | |
| "grad_norm": 2.7966387271881104, | |
| "learning_rate": 4.890852545425553e-05, | |
| "loss": 3.2184, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.09449608164753052, | |
| "grad_norm": 3.2339022159576416, | |
| "learning_rate": 4.8906432848701464e-05, | |
| "loss": 3.219, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.09458720612356479, | |
| "grad_norm": 3.6414124965667725, | |
| "learning_rate": 4.8904338283921056e-05, | |
| "loss": 3.0272, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.09467833059959906, | |
| "grad_norm": 1.915804147720337, | |
| "learning_rate": 4.890224176008598e-05, | |
| "loss": 3.0635, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.09476945507563332, | |
| "grad_norm": 1.533538579940796, | |
| "learning_rate": 4.890014327736804e-05, | |
| "loss": 3.1068, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.09486057955166757, | |
| "grad_norm": 4.12912130355835, | |
| "learning_rate": 4.889804283593923e-05, | |
| "loss": 3.1286, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.09495170402770184, | |
| "grad_norm": 3.413926362991333, | |
| "learning_rate": 4.889594043597168e-05, | |
| "loss": 4.3663, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.0950428285037361, | |
| "grad_norm": 3.632355213165283, | |
| "learning_rate": 4.8893836077637686e-05, | |
| "loss": 3.6121, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.09513395297977037, | |
| "grad_norm": 3.041640043258667, | |
| "learning_rate": 4.8891729761109726e-05, | |
| "loss": 3.0379, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.09522507745580464, | |
| "grad_norm": 2.1716883182525635, | |
| "learning_rate": 4.88896214865604e-05, | |
| "loss": 3.0752, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.09531620193183889, | |
| "grad_norm": 1.7440366744995117, | |
| "learning_rate": 4.88875112541625e-05, | |
| "loss": 2.9915, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.09540732640787315, | |
| "grad_norm": 2.8066303730010986, | |
| "learning_rate": 4.888539906408897e-05, | |
| "loss": 3.2162, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.09549845088390742, | |
| "grad_norm": 2.6546630859375, | |
| "learning_rate": 4.888328491651291e-05, | |
| "loss": 3.1054, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.09558957535994168, | |
| "grad_norm": 2.986856460571289, | |
| "learning_rate": 4.888116881160757e-05, | |
| "loss": 3.1334, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.09568069983597595, | |
| "grad_norm": 4.76503324508667, | |
| "learning_rate": 4.8879050749546395e-05, | |
| "loss": 3.461, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.0957718243120102, | |
| "grad_norm": 1.329960823059082, | |
| "learning_rate": 4.8876930730502954e-05, | |
| "loss": 3.0356, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.09586294878804447, | |
| "grad_norm": 2.466423511505127, | |
| "learning_rate": 4.887480875465099e-05, | |
| "loss": 3.3193, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.09595407326407873, | |
| "grad_norm": 3.3241379261016846, | |
| "learning_rate": 4.887268482216442e-05, | |
| "loss": 3.4059, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.096045197740113, | |
| "grad_norm": 2.425245523452759, | |
| "learning_rate": 4.88705589332173e-05, | |
| "loss": 3.0792, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.09613632221614726, | |
| "grad_norm": 2.820553779602051, | |
| "learning_rate": 4.886843108798386e-05, | |
| "loss": 3.1892, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.09622744669218151, | |
| "grad_norm": 2.662749767303467, | |
| "learning_rate": 4.886630128663847e-05, | |
| "loss": 3.4359, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.09631857116821578, | |
| "grad_norm": 3.7689478397369385, | |
| "learning_rate": 4.8864169529355694e-05, | |
| "loss": 3.3501, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.09640969564425005, | |
| "grad_norm": 2.9950053691864014, | |
| "learning_rate": 4.8862035816310225e-05, | |
| "loss": 3.0473, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.09650082012028431, | |
| "grad_norm": 3.828263521194458, | |
| "learning_rate": 4.885990014767694e-05, | |
| "loss": 3.7086, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.09659194459631858, | |
| "grad_norm": 2.4364869594573975, | |
| "learning_rate": 4.885776252363086e-05, | |
| "loss": 4.1158, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.09668306907235283, | |
| "grad_norm": 3.0693185329437256, | |
| "learning_rate": 4.8855622944347174e-05, | |
| "loss": 4.4396, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.0967741935483871, | |
| "grad_norm": 2.158339738845825, | |
| "learning_rate": 4.885348141000122e-05, | |
| "loss": 3.1364, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.09686531802442136, | |
| "grad_norm": 3.3291866779327393, | |
| "learning_rate": 4.885133792076852e-05, | |
| "loss": 3.4187, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.09695644250045563, | |
| "grad_norm": 3.016261100769043, | |
| "learning_rate": 4.884919247682473e-05, | |
| "loss": 3.4883, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.09704756697648989, | |
| "grad_norm": 1.6200766563415527, | |
| "learning_rate": 4.8847045078345674e-05, | |
| "loss": 3.0487, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.09713869145252414, | |
| "grad_norm": 2.354325771331787, | |
| "learning_rate": 4.884489572550736e-05, | |
| "loss": 3.2557, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.09722981592855841, | |
| "grad_norm": 4.061933994293213, | |
| "learning_rate": 4.884274441848592e-05, | |
| "loss": 3.4442, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.09732094040459267, | |
| "grad_norm": 4.645877838134766, | |
| "learning_rate": 4.884059115745766e-05, | |
| "loss": 3.0568, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.09741206488062694, | |
| "grad_norm": 1.73179292678833, | |
| "learning_rate": 4.883843594259905e-05, | |
| "loss": 3.125, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.0975031893566612, | |
| "grad_norm": 1.4215937852859497, | |
| "learning_rate": 4.883627877408673e-05, | |
| "loss": 2.8963, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.09759431383269546, | |
| "grad_norm": 3.7663443088531494, | |
| "learning_rate": 4.8834119652097475e-05, | |
| "loss": 4.3777, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.09768543830872972, | |
| "grad_norm": 1.6245098114013672, | |
| "learning_rate": 4.883195857680824e-05, | |
| "loss": 3.0648, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.09777656278476399, | |
| "grad_norm": 3.0550179481506348, | |
| "learning_rate": 4.882979554839613e-05, | |
| "loss": 3.2421, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.09786768726079825, | |
| "grad_norm": 3.2408952713012695, | |
| "learning_rate": 4.8827630567038416e-05, | |
| "loss": 2.8971, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.09795881173683252, | |
| "grad_norm": 7.056894302368164, | |
| "learning_rate": 4.882546363291253e-05, | |
| "loss": 3.2447, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.09804993621286677, | |
| "grad_norm": 4.1067914962768555, | |
| "learning_rate": 4.882329474619606e-05, | |
| "loss": 3.3844, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.09814106068890104, | |
| "grad_norm": 2.7459664344787598, | |
| "learning_rate": 4.882112390706675e-05, | |
| "loss": 2.8364, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.0982321851649353, | |
| "grad_norm": 1.7303998470306396, | |
| "learning_rate": 4.8818951115702506e-05, | |
| "loss": 3.1362, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.09832330964096957, | |
| "grad_norm": 2.4760732650756836, | |
| "learning_rate": 4.88167763722814e-05, | |
| "loss": 3.5898, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.09841443411700383, | |
| "grad_norm": 1.439005970954895, | |
| "learning_rate": 4.8814599676981667e-05, | |
| "loss": 3.0291, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.09850555859303808, | |
| "grad_norm": 3.3156778812408447, | |
| "learning_rate": 4.881242102998169e-05, | |
| "loss": 2.9964, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.09859668306907235, | |
| "grad_norm": 2.405925750732422, | |
| "learning_rate": 4.881024043146002e-05, | |
| "loss": 3.549, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.09868780754510662, | |
| "grad_norm": 1.8694865703582764, | |
| "learning_rate": 4.880805788159537e-05, | |
| "loss": 3.3145, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.09877893202114088, | |
| "grad_norm": 3.397982358932495, | |
| "learning_rate": 4.880587338056659e-05, | |
| "loss": 3.1483, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.09887005649717515, | |
| "grad_norm": 3.9848830699920654, | |
| "learning_rate": 4.8803686928552736e-05, | |
| "loss": 3.3601, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.0989611809732094, | |
| "grad_norm": 2.076350688934326, | |
| "learning_rate": 4.880149852573297e-05, | |
| "loss": 2.9707, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.09905230544924366, | |
| "grad_norm": 4.586529731750488, | |
| "learning_rate": 4.8799308172286665e-05, | |
| "loss": 3.0142, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.09914342992527793, | |
| "grad_norm": 2.224879503250122, | |
| "learning_rate": 4.8797115868393304e-05, | |
| "loss": 3.2586, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.0992345544013122, | |
| "grad_norm": 2.13420033454895, | |
| "learning_rate": 4.879492161423257e-05, | |
| "loss": 3.3615, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.09932567887734646, | |
| "grad_norm": 2.3102781772613525, | |
| "learning_rate": 4.8792725409984295e-05, | |
| "loss": 3.2469, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.09941680335338071, | |
| "grad_norm": 2.7327070236206055, | |
| "learning_rate": 4.8790527255828453e-05, | |
| "loss": 3.0008, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.09950792782941498, | |
| "grad_norm": 4.016688823699951, | |
| "learning_rate": 4.8788327151945204e-05, | |
| "loss": 3.3525, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.09959905230544924, | |
| "grad_norm": 2.56805682182312, | |
| "learning_rate": 4.878612509851484e-05, | |
| "loss": 2.7244, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.09969017678148351, | |
| "grad_norm": 2.7914106845855713, | |
| "learning_rate": 4.878392109571784e-05, | |
| "loss": 2.9815, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.09978130125751777, | |
| "grad_norm": 1.5488040447235107, | |
| "learning_rate": 4.878171514373483e-05, | |
| "loss": 2.9438, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.09987242573355203, | |
| "grad_norm": 2.1465392112731934, | |
| "learning_rate": 4.87795072427466e-05, | |
| "loss": 3.2417, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.09996355020958629, | |
| "grad_norm": 2.0856845378875732, | |
| "learning_rate": 4.877729739293409e-05, | |
| "loss": 2.9681, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.10005467468562056, | |
| "grad_norm": 2.8549904823303223, | |
| "learning_rate": 4.87750855944784e-05, | |
| "loss": 4.3383, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.10014579916165482, | |
| "grad_norm": 3.344149351119995, | |
| "learning_rate": 4.87728718475608e-05, | |
| "loss": 2.9673, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.10023692363768909, | |
| "grad_norm": 3.0725624561309814, | |
| "learning_rate": 4.877065615236272e-05, | |
| "loss": 3.4142, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.10032804811372334, | |
| "grad_norm": 3.4187722206115723, | |
| "learning_rate": 4.876843850906574e-05, | |
| "loss": 3.392, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.1004191725897576, | |
| "grad_norm": 1.4068093299865723, | |
| "learning_rate": 4.8766218917851614e-05, | |
| "loss": 2.9884, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.10051029706579187, | |
| "grad_norm": 1.7465176582336426, | |
| "learning_rate": 4.876399737890223e-05, | |
| "loss": 3.1005, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.10060142154182614, | |
| "grad_norm": 2.4753262996673584, | |
| "learning_rate": 4.876177389239967e-05, | |
| "loss": 3.3825, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.1006925460178604, | |
| "grad_norm": 2.6038968563079834, | |
| "learning_rate": 4.8759548458526145e-05, | |
| "loss": 3.4349, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.10078367049389467, | |
| "grad_norm": 3.703859806060791, | |
| "learning_rate": 4.8757321077464035e-05, | |
| "loss": 3.128, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.10087479496992892, | |
| "grad_norm": 2.0796546936035156, | |
| "learning_rate": 4.87550917493959e-05, | |
| "loss": 3.1497, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.10096591944596318, | |
| "grad_norm": 1.381535291671753, | |
| "learning_rate": 4.8752860474504424e-05, | |
| "loss": 2.9456, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.10105704392199745, | |
| "grad_norm": 1.740310549736023, | |
| "learning_rate": 4.875062725297248e-05, | |
| "loss": 3.454, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.10114816839803172, | |
| "grad_norm": 2.3114092350006104, | |
| "learning_rate": 4.874839208498309e-05, | |
| "loss": 3.149, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.10123929287406598, | |
| "grad_norm": 2.582498788833618, | |
| "learning_rate": 4.8746154970719414e-05, | |
| "loss": 3.0988, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.10133041735010023, | |
| "grad_norm": 1.5047119855880737, | |
| "learning_rate": 4.874391591036482e-05, | |
| "loss": 3.0241, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.1014215418261345, | |
| "grad_norm": 2.3863258361816406, | |
| "learning_rate": 4.87416749041028e-05, | |
| "loss": 3.3233, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.10151266630216876, | |
| "grad_norm": 2.8434104919433594, | |
| "learning_rate": 4.8739431952117e-05, | |
| "loss": 3.3524, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.10160379077820303, | |
| "grad_norm": 2.5264041423797607, | |
| "learning_rate": 4.8737187054591256e-05, | |
| "loss": 3.2051, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.1016949152542373, | |
| "grad_norm": 1.7804640531539917, | |
| "learning_rate": 4.873494021170953e-05, | |
| "loss": 2.987, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.10178603973027155, | |
| "grad_norm": 1.650615930557251, | |
| "learning_rate": 4.873269142365598e-05, | |
| "loss": 2.9908, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.10187716420630581, | |
| "grad_norm": 1.6238685846328735, | |
| "learning_rate": 4.873044069061489e-05, | |
| "loss": 3.0443, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.10196828868234008, | |
| "grad_norm": 2.299797773361206, | |
| "learning_rate": 4.87281880127707e-05, | |
| "loss": 3.355, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.10205941315837434, | |
| "grad_norm": 1.3027498722076416, | |
| "learning_rate": 4.872593339030806e-05, | |
| "loss": 2.98, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.10215053763440861, | |
| "grad_norm": 1.9283638000488281, | |
| "learning_rate": 4.872367682341173e-05, | |
| "loss": 3.3087, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.10224166211044286, | |
| "grad_norm": 3.2081453800201416, | |
| "learning_rate": 4.872141831226664e-05, | |
| "loss": 3.0544, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.10233278658647713, | |
| "grad_norm": 3.2388205528259277, | |
| "learning_rate": 4.871915785705788e-05, | |
| "loss": 3.2673, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.10242391106251139, | |
| "grad_norm": 2.2480404376983643, | |
| "learning_rate": 4.871689545797072e-05, | |
| "loss": 3.2273, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.10251503553854566, | |
| "grad_norm": 3.065657615661621, | |
| "learning_rate": 4.871463111519056e-05, | |
| "loss": 3.3631, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.10260616001457992, | |
| "grad_norm": 2.148409843444824, | |
| "learning_rate": 4.8712364828902965e-05, | |
| "loss": 2.8683, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.10269728449061417, | |
| "grad_norm": 2.8003501892089844, | |
| "learning_rate": 4.8710096599293695e-05, | |
| "loss": 3.2381, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.10278840896664844, | |
| "grad_norm": 2.558243989944458, | |
| "learning_rate": 4.870782642654861e-05, | |
| "loss": 3.3222, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.1028795334426827, | |
| "grad_norm": 4.242537975311279, | |
| "learning_rate": 4.870555431085377e-05, | |
| "loss": 3.296, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.10297065791871697, | |
| "grad_norm": 2.546668291091919, | |
| "learning_rate": 4.8703280252395385e-05, | |
| "loss": 3.1536, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.10306178239475124, | |
| "grad_norm": 1.5829936265945435, | |
| "learning_rate": 4.870100425135982e-05, | |
| "loss": 2.9867, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.10315290687078549, | |
| "grad_norm": 2.930858850479126, | |
| "learning_rate": 4.869872630793361e-05, | |
| "loss": 3.2111, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.10324403134681975, | |
| "grad_norm": 2.89884877204895, | |
| "learning_rate": 4.869644642230343e-05, | |
| "loss": 3.1203, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.10333515582285402, | |
| "grad_norm": 4.641218662261963, | |
| "learning_rate": 4.869416459465615e-05, | |
| "loss": 3.4105, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.10342628029888828, | |
| "grad_norm": 3.085787534713745, | |
| "learning_rate": 4.869188082517874e-05, | |
| "loss": 3.2745, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.10351740477492255, | |
| "grad_norm": 1.8495999574661255, | |
| "learning_rate": 4.8689595114058375e-05, | |
| "loss": 3.032, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.1036085292509568, | |
| "grad_norm": 1.8329887390136719, | |
| "learning_rate": 4.86873074614824e-05, | |
| "loss": 3.0344, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.10369965372699107, | |
| "grad_norm": 1.369670033454895, | |
| "learning_rate": 4.868501786763827e-05, | |
| "loss": 3.0423, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.10379077820302533, | |
| "grad_norm": 3.01965594291687, | |
| "learning_rate": 4.868272633271363e-05, | |
| "loss": 3.0057, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.1038819026790596, | |
| "grad_norm": 3.3625049591064453, | |
| "learning_rate": 4.868043285689631e-05, | |
| "loss": 3.0685, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.10397302715509386, | |
| "grad_norm": 2.6331353187561035, | |
| "learning_rate": 4.867813744037423e-05, | |
| "loss": 3.3065, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.10406415163112812, | |
| "grad_norm": 2.193513870239258, | |
| "learning_rate": 4.867584008333553e-05, | |
| "loss": 3.1842, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.10415527610716238, | |
| "grad_norm": 3.555842638015747, | |
| "learning_rate": 4.867354078596848e-05, | |
| "loss": 3.3865, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.10424640058319665, | |
| "grad_norm": 1.3614274263381958, | |
| "learning_rate": 4.867123954846152e-05, | |
| "loss": 3.1375, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.10433752505923091, | |
| "grad_norm": 2.8560686111450195, | |
| "learning_rate": 4.8668936371003246e-05, | |
| "loss": 3.347, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.10442864953526518, | |
| "grad_norm": 3.2532799243927, | |
| "learning_rate": 4.8666631253782405e-05, | |
| "loss": 3.5448, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.10451977401129943, | |
| "grad_norm": 2.5030109882354736, | |
| "learning_rate": 4.866432419698792e-05, | |
| "loss": 3.1628, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.1046108984873337, | |
| "grad_norm": 2.1041152477264404, | |
| "learning_rate": 4.866201520080886e-05, | |
| "loss": 3.211, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.10470202296336796, | |
| "grad_norm": 3.0748095512390137, | |
| "learning_rate": 4.8659704265434466e-05, | |
| "loss": 3.3296, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.10479314743940223, | |
| "grad_norm": 1.9824806451797485, | |
| "learning_rate": 4.865739139105411e-05, | |
| "loss": 3.087, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.10488427191543649, | |
| "grad_norm": 2.9198248386383057, | |
| "learning_rate": 4.8655076577857344e-05, | |
| "loss": 2.9909, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.10497539639147074, | |
| "grad_norm": 3.1498401165008545, | |
| "learning_rate": 4.8652759826033886e-05, | |
| "loss": 4.2349, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.10506652086750501, | |
| "grad_norm": 2.7998111248016357, | |
| "learning_rate": 4.86504411357736e-05, | |
| "loss": 3.1282, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.10515764534353927, | |
| "grad_norm": 1.2687691450119019, | |
| "learning_rate": 4.864812050726651e-05, | |
| "loss": 2.9539, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.10524876981957354, | |
| "grad_norm": 2.9933853149414062, | |
| "learning_rate": 4.86457979407028e-05, | |
| "loss": 3.2156, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.1053398942956078, | |
| "grad_norm": 3.073864698410034, | |
| "learning_rate": 4.864347343627281e-05, | |
| "loss": 3.0966, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.10543101877164206, | |
| "grad_norm": 3.451331615447998, | |
| "learning_rate": 4.864114699416706e-05, | |
| "loss": 3.4785, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.10552214324767632, | |
| "grad_norm": 1.6930314302444458, | |
| "learning_rate": 4.863881861457619e-05, | |
| "loss": 3.0383, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.10561326772371059, | |
| "grad_norm": 3.148261785507202, | |
| "learning_rate": 4.8636488297691025e-05, | |
| "loss": 2.4165, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.10570439219974485, | |
| "grad_norm": 3.410334587097168, | |
| "learning_rate": 4.863415604370255e-05, | |
| "loss": 3.277, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.10579551667577912, | |
| "grad_norm": 2.989598274230957, | |
| "learning_rate": 4.8631821852801894e-05, | |
| "loss": 3.1124, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.10588664115181337, | |
| "grad_norm": 2.705728530883789, | |
| "learning_rate": 4.8629485725180364e-05, | |
| "loss": 3.4259, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.10597776562784764, | |
| "grad_norm": 2.8298377990722656, | |
| "learning_rate": 4.862714766102941e-05, | |
| "loss": 2.6366, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.1060688901038819, | |
| "grad_norm": 2.781217098236084, | |
| "learning_rate": 4.862480766054064e-05, | |
| "loss": 3.2281, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.10616001457991617, | |
| "grad_norm": 1.5384852886199951, | |
| "learning_rate": 4.862246572390583e-05, | |
| "loss": 2.9956, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.10625113905595043, | |
| "grad_norm": 3.5114247798919678, | |
| "learning_rate": 4.862012185131691e-05, | |
| "loss": 2.7165, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.10634226353198468, | |
| "grad_norm": 2.069568634033203, | |
| "learning_rate": 4.861777604296597e-05, | |
| "loss": 3.2318, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.10643338800801895, | |
| "grad_norm": 3.111557722091675, | |
| "learning_rate": 4.8615428299045265e-05, | |
| "loss": 3.1355, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.10652451248405322, | |
| "grad_norm": 1.6879878044128418, | |
| "learning_rate": 4.86130786197472e-05, | |
| "loss": 3.1002, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.10661563696008748, | |
| "grad_norm": 3.0851385593414307, | |
| "learning_rate": 4.861072700526433e-05, | |
| "loss": 4.5198, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.10670676143612175, | |
| "grad_norm": 1.7297760248184204, | |
| "learning_rate": 4.860837345578938e-05, | |
| "loss": 3.0423, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.106797885912156, | |
| "grad_norm": 2.900595188140869, | |
| "learning_rate": 4.860601797151525e-05, | |
| "loss": 3.3863, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.10688901038819026, | |
| "grad_norm": 2.360994815826416, | |
| "learning_rate": 4.8603660552634965e-05, | |
| "loss": 3.3508, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.10698013486422453, | |
| "grad_norm": 3.4403953552246094, | |
| "learning_rate": 4.860130119934173e-05, | |
| "loss": 3.4562, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.1070712593402588, | |
| "grad_norm": 2.7521438598632812, | |
| "learning_rate": 4.85989399118289e-05, | |
| "loss": 3.066, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.10716238381629306, | |
| "grad_norm": 2.927729606628418, | |
| "learning_rate": 4.859657669029e-05, | |
| "loss": 3.3748, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.10725350829232731, | |
| "grad_norm": 2.8207826614379883, | |
| "learning_rate": 4.859421153491869e-05, | |
| "loss": 3.097, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.10734463276836158, | |
| "grad_norm": 2.950704336166382, | |
| "learning_rate": 4.859184444590882e-05, | |
| "loss": 2.8803, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.10743575724439584, | |
| "grad_norm": 2.3205230236053467, | |
| "learning_rate": 4.858947542345438e-05, | |
| "loss": 3.405, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.10752688172043011, | |
| "grad_norm": 2.2538068294525146, | |
| "learning_rate": 4.858710446774951e-05, | |
| "loss": 3.2668, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.10761800619646437, | |
| "grad_norm": 1.6739517450332642, | |
| "learning_rate": 4.858473157898853e-05, | |
| "loss": 3.096, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.10770913067249863, | |
| "grad_norm": 2.09653377532959, | |
| "learning_rate": 4.85823567573659e-05, | |
| "loss": 3.1997, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.10780025514853289, | |
| "grad_norm": 2.1584832668304443, | |
| "learning_rate": 4.8579980003076245e-05, | |
| "loss": 3.1801, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.10789137962456716, | |
| "grad_norm": 3.139174461364746, | |
| "learning_rate": 4.857760131631436e-05, | |
| "loss": 2.9841, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.10798250410060142, | |
| "grad_norm": 3.363103151321411, | |
| "learning_rate": 4.857522069727518e-05, | |
| "loss": 3.3184, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.10807362857663569, | |
| "grad_norm": 2.7438161373138428, | |
| "learning_rate": 4.857283814615381e-05, | |
| "loss": 2.904, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.10816475305266994, | |
| "grad_norm": 3.721757411956787, | |
| "learning_rate": 4.8570453663145506e-05, | |
| "loss": 2.8647, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.1082558775287042, | |
| "grad_norm": 2.8211238384246826, | |
| "learning_rate": 4.856806724844568e-05, | |
| "loss": 3.1336, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.10834700200473847, | |
| "grad_norm": 2.988065004348755, | |
| "learning_rate": 4.856567890224992e-05, | |
| "loss": 3.3704, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.10843812648077274, | |
| "grad_norm": 2.3461434841156006, | |
| "learning_rate": 4.856328862475396e-05, | |
| "loss": 3.2745, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.108529250956807, | |
| "grad_norm": 2.9137821197509766, | |
| "learning_rate": 4.8560896416153684e-05, | |
| "loss": 2.8177, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.10862037543284127, | |
| "grad_norm": 2.177649974822998, | |
| "learning_rate": 4.8558502276645146e-05, | |
| "loss": 3.0648, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.10871149990887552, | |
| "grad_norm": 4.471045970916748, | |
| "learning_rate": 4.8556106206424556e-05, | |
| "loss": 3.2198, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.10880262438490979, | |
| "grad_norm": 1.7140840291976929, | |
| "learning_rate": 4.855370820568829e-05, | |
| "loss": 3.0084, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.10889374886094405, | |
| "grad_norm": 1.9925346374511719, | |
| "learning_rate": 4.855130827463285e-05, | |
| "loss": 3.076, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.10898487333697832, | |
| "grad_norm": 4.0725555419921875, | |
| "learning_rate": 4.8548906413454944e-05, | |
| "loss": 3.1775, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.10907599781301258, | |
| "grad_norm": 4.49186897277832, | |
| "learning_rate": 4.85465026223514e-05, | |
| "loss": 3.0196, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.10916712228904683, | |
| "grad_norm": 2.624659776687622, | |
| "learning_rate": 4.8544096901519227e-05, | |
| "loss": 3.3422, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.1092582467650811, | |
| "grad_norm": 2.492645740509033, | |
| "learning_rate": 4.8541689251155575e-05, | |
| "loss": 3.1926, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.10934937124111536, | |
| "grad_norm": 1.60177743434906, | |
| "learning_rate": 4.853927967145777e-05, | |
| "loss": 2.946, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.10944049571714963, | |
| "grad_norm": 2.5063915252685547, | |
| "learning_rate": 4.853686816262327e-05, | |
| "loss": 3.1408, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.1095316201931839, | |
| "grad_norm": 8.176122665405273, | |
| "learning_rate": 4.8534454724849734e-05, | |
| "loss": 3.2513, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.10962274466921815, | |
| "grad_norm": 2.628269910812378, | |
| "learning_rate": 4.853203935833493e-05, | |
| "loss": 3.1195, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.10971386914525241, | |
| "grad_norm": 2.0619876384735107, | |
| "learning_rate": 4.8529622063276814e-05, | |
| "loss": 3.3001, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.10980499362128668, | |
| "grad_norm": 3.177309989929199, | |
| "learning_rate": 4.85272028398735e-05, | |
| "loss": 3.09, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.10989611809732094, | |
| "grad_norm": 2.6613833904266357, | |
| "learning_rate": 4.852478168832323e-05, | |
| "loss": 3.297, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.10998724257335521, | |
| "grad_norm": 2.257078170776367, | |
| "learning_rate": 4.852235860882446e-05, | |
| "loss": 2.2332, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.11007836704938946, | |
| "grad_norm": 3.8638627529144287, | |
| "learning_rate": 4.851993360157575e-05, | |
| "loss": 2.7631, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.11016949152542373, | |
| "grad_norm": 2.2826056480407715, | |
| "learning_rate": 4.851750666677584e-05, | |
| "loss": 3.3047, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.11026061600145799, | |
| "grad_norm": 4.1844706535339355, | |
| "learning_rate": 4.851507780462362e-05, | |
| "loss": 3.2165, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.11035174047749226, | |
| "grad_norm": 2.7843410968780518, | |
| "learning_rate": 4.8512647015318166e-05, | |
| "loss": 2.7579, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.11044286495352652, | |
| "grad_norm": 1.784948706626892, | |
| "learning_rate": 4.851021429905868e-05, | |
| "loss": 3.0934, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.11053398942956077, | |
| "grad_norm": 2.8713064193725586, | |
| "learning_rate": 4.850777965604453e-05, | |
| "loss": 2.8885, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.11062511390559504, | |
| "grad_norm": 2.985374927520752, | |
| "learning_rate": 4.850534308647524e-05, | |
| "loss": 3.232, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.1107162383816293, | |
| "grad_norm": 1.9292786121368408, | |
| "learning_rate": 4.8502904590550514e-05, | |
| "loss": 3.1795, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.11080736285766357, | |
| "grad_norm": 3.1823911666870117, | |
| "learning_rate": 4.850046416847018e-05, | |
| "loss": 3.2059, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.11089848733369784, | |
| "grad_norm": 3.464905023574829, | |
| "learning_rate": 4.849802182043425e-05, | |
| "loss": 2.51, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.11098961180973209, | |
| "grad_norm": 1.3138779401779175, | |
| "learning_rate": 4.8495577546642864e-05, | |
| "loss": 2.9226, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.11108073628576635, | |
| "grad_norm": 2.682544469833374, | |
| "learning_rate": 4.849313134729637e-05, | |
| "loss": 3.2597, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.11117186076180062, | |
| "grad_norm": 1.8260408639907837, | |
| "learning_rate": 4.8490683222595224e-05, | |
| "loss": 2.9877, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.11126298523783489, | |
| "grad_norm": 2.5833330154418945, | |
| "learning_rate": 4.848823317274007e-05, | |
| "loss": 3.2531, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.11135410971386915, | |
| "grad_norm": 3.7257158756256104, | |
| "learning_rate": 4.848578119793169e-05, | |
| "loss": 3.3707, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.1114452341899034, | |
| "grad_norm": 2.2442314624786377, | |
| "learning_rate": 4.848332729837103e-05, | |
| "loss": 3.151, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.11153635866593767, | |
| "grad_norm": 2.0687997341156006, | |
| "learning_rate": 4.8480871474259215e-05, | |
| "loss": 3.2912, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.11162748314197193, | |
| "grad_norm": 2.783472776412964, | |
| "learning_rate": 4.847841372579749e-05, | |
| "loss": 2.5138, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.1117186076180062, | |
| "grad_norm": 2.2932865619659424, | |
| "learning_rate": 4.847595405318729e-05, | |
| "loss": 2.9144, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.11180973209404046, | |
| "grad_norm": 2.7115135192871094, | |
| "learning_rate": 4.847349245663019e-05, | |
| "loss": 2.9571, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.11190085657007472, | |
| "grad_norm": 2.5347988605499268, | |
| "learning_rate": 4.847102893632792e-05, | |
| "loss": 3.4235, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.11199198104610898, | |
| "grad_norm": 2.7291646003723145, | |
| "learning_rate": 4.8468563492482395e-05, | |
| "loss": 3.1117, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.11208310552214325, | |
| "grad_norm": 2.545180320739746, | |
| "learning_rate": 4.8466096125295644e-05, | |
| "loss": 3.1981, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.11217422999817751, | |
| "grad_norm": 1.7681077718734741, | |
| "learning_rate": 4.84636268349699e-05, | |
| "loss": 3.0884, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.11226535447421178, | |
| "grad_norm": 2.8129305839538574, | |
| "learning_rate": 4.846115562170751e-05, | |
| "loss": 3.1274, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.11235647895024603, | |
| "grad_norm": 3.1257529258728027, | |
| "learning_rate": 4.8458682485711014e-05, | |
| "loss": 3.1307, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.1124476034262803, | |
| "grad_norm": 3.14485502243042, | |
| "learning_rate": 4.8456207427183094e-05, | |
| "loss": 3.0517, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.11253872790231456, | |
| "grad_norm": 2.2514820098876953, | |
| "learning_rate": 4.8453730446326585e-05, | |
| "loss": 3.3268, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.11262985237834883, | |
| "grad_norm": 1.3902634382247925, | |
| "learning_rate": 4.845125154334449e-05, | |
| "loss": 3.0041, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.11272097685438309, | |
| "grad_norm": 1.5997536182403564, | |
| "learning_rate": 4.844877071843996e-05, | |
| "loss": 3.08, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.11281210133041734, | |
| "grad_norm": 3.2128071784973145, | |
| "learning_rate": 4.8446287971816305e-05, | |
| "loss": 3.4485, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.11290322580645161, | |
| "grad_norm": 2.0368711948394775, | |
| "learning_rate": 4.844380330367701e-05, | |
| "loss": 2.9709, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.11299435028248588, | |
| "grad_norm": 3.5499250888824463, | |
| "learning_rate": 4.84413167142257e-05, | |
| "loss": 3.1674, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.11308547475852014, | |
| "grad_norm": 2.089275598526001, | |
| "learning_rate": 4.8438828203666156e-05, | |
| "loss": 3.0767, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.1131765992345544, | |
| "grad_norm": 2.063582420349121, | |
| "learning_rate": 4.843633777220231e-05, | |
| "loss": 3.0162, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.11326772371058866, | |
| "grad_norm": 1.9222890138626099, | |
| "learning_rate": 4.843384542003828e-05, | |
| "loss": 3.0367, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.11335884818662292, | |
| "grad_norm": 1.74138343334198, | |
| "learning_rate": 4.843135114737832e-05, | |
| "loss": 3.1665, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.11344997266265719, | |
| "grad_norm": 1.58941650390625, | |
| "learning_rate": 4.8428854954426846e-05, | |
| "loss": 3.1407, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.11354109713869145, | |
| "grad_norm": 1.7341008186340332, | |
| "learning_rate": 4.842635684138843e-05, | |
| "loss": 3.0836, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.11363222161472572, | |
| "grad_norm": 2.3776652812957764, | |
| "learning_rate": 4.84238568084678e-05, | |
| "loss": 3.0876, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.11372334609075997, | |
| "grad_norm": 3.617215156555176, | |
| "learning_rate": 4.842135485586983e-05, | |
| "loss": 2.6156, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.11381447056679424, | |
| "grad_norm": 3.285801410675049, | |
| "learning_rate": 4.841885098379959e-05, | |
| "loss": 3.1519, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.1139055950428285, | |
| "grad_norm": 2.993448257446289, | |
| "learning_rate": 4.841634519246227e-05, | |
| "loss": 2.734, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.11399671951886277, | |
| "grad_norm": 4.181899070739746, | |
| "learning_rate": 4.841383748206324e-05, | |
| "loss": 3.2974, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.11408784399489703, | |
| "grad_norm": 2.419050455093384, | |
| "learning_rate": 4.8411327852808e-05, | |
| "loss": 3.2745, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.11417896847093129, | |
| "grad_norm": 1.4583121538162231, | |
| "learning_rate": 4.8408816304902235e-05, | |
| "loss": 2.9262, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.11427009294696555, | |
| "grad_norm": 2.53656268119812, | |
| "learning_rate": 4.8406302838551765e-05, | |
| "loss": 3.3562, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.11436121742299982, | |
| "grad_norm": 2.8883700370788574, | |
| "learning_rate": 4.840378745396259e-05, | |
| "loss": 3.121, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.11445234189903408, | |
| "grad_norm": 3.119507074356079, | |
| "learning_rate": 4.840127015134086e-05, | |
| "loss": 3.0548, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.11454346637506835, | |
| "grad_norm": 3.061594247817993, | |
| "learning_rate": 4.839875093089286e-05, | |
| "loss": 3.1823, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.1146345908511026, | |
| "grad_norm": 2.9661896228790283, | |
| "learning_rate": 4.839622979282506e-05, | |
| "loss": 3.3524, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.11472571532713687, | |
| "grad_norm": 2.240601062774658, | |
| "learning_rate": 4.8393706737344085e-05, | |
| "loss": 3.0736, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.11481683980317113, | |
| "grad_norm": 2.651048183441162, | |
| "learning_rate": 4.8391181764656696e-05, | |
| "loss": 3.296, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.1149079642792054, | |
| "grad_norm": 1.6715929508209229, | |
| "learning_rate": 4.838865487496983e-05, | |
| "loss": 2.9067, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.11499908875523966, | |
| "grad_norm": 2.577331304550171, | |
| "learning_rate": 4.838612606849058e-05, | |
| "loss": 3.1685, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.11509021323127391, | |
| "grad_norm": 3.5737011432647705, | |
| "learning_rate": 4.8383595345426184e-05, | |
| "loss": 2.9315, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.11518133770730818, | |
| "grad_norm": 4.45041036605835, | |
| "learning_rate": 4.838106270598405e-05, | |
| "loss": 3.2293, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.11527246218334244, | |
| "grad_norm": 2.778306007385254, | |
| "learning_rate": 4.837852815037173e-05, | |
| "loss": 3.1879, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.11536358665937671, | |
| "grad_norm": 2.894092321395874, | |
| "learning_rate": 4.837599167879695e-05, | |
| "loss": 3.4217, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.11545471113541098, | |
| "grad_norm": 2.804297685623169, | |
| "learning_rate": 4.837345329146758e-05, | |
| "loss": 2.3764, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.11554583561144523, | |
| "grad_norm": 1.5919743776321411, | |
| "learning_rate": 4.837091298859165e-05, | |
| "loss": 2.9899, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.11563696008747949, | |
| "grad_norm": 2.645395278930664, | |
| "learning_rate": 4.836837077037735e-05, | |
| "loss": 3.2655, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.11572808456351376, | |
| "grad_norm": 2.5275466442108154, | |
| "learning_rate": 4.8365826637033024e-05, | |
| "loss": 3.2374, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.11581920903954802, | |
| "grad_norm": 1.634635329246521, | |
| "learning_rate": 4.836328058876717e-05, | |
| "loss": 3.035, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.11591033351558229, | |
| "grad_norm": 3.141817808151245, | |
| "learning_rate": 4.836073262578846e-05, | |
| "loss": 3.0451, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.11600145799161656, | |
| "grad_norm": 1.7054616212844849, | |
| "learning_rate": 4.835818274830569e-05, | |
| "loss": 3.0899, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 0.1160925824676508, | |
| "grad_norm": 3.47708797454834, | |
| "learning_rate": 4.835563095652785e-05, | |
| "loss": 2.872, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.11618370694368507, | |
| "grad_norm": 3.0025618076324463, | |
| "learning_rate": 4.835307725066406e-05, | |
| "loss": 3.2133, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.11627483141971934, | |
| "grad_norm": 1.4146130084991455, | |
| "learning_rate": 4.83505216309236e-05, | |
| "loss": 2.9197, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.1163659558957536, | |
| "grad_norm": 2.802640914916992, | |
| "learning_rate": 4.834796409751593e-05, | |
| "loss": 3.0783, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.11645708037178787, | |
| "grad_norm": 3.4829201698303223, | |
| "learning_rate": 4.834540465065063e-05, | |
| "loss": 3.2377, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.11654820484782212, | |
| "grad_norm": 1.5123839378356934, | |
| "learning_rate": 4.8342843290537476e-05, | |
| "loss": 2.9014, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.11663932932385639, | |
| "grad_norm": 3.511087656021118, | |
| "learning_rate": 4.8340280017386375e-05, | |
| "loss": 3.5285, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.11673045379989065, | |
| "grad_norm": 3.651897430419922, | |
| "learning_rate": 4.833771483140739e-05, | |
| "loss": 3.474, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.11682157827592492, | |
| "grad_norm": 1.5524790287017822, | |
| "learning_rate": 4.833514773281076e-05, | |
| "loss": 2.9491, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.11691270275195918, | |
| "grad_norm": 3.1398253440856934, | |
| "learning_rate": 4.8332578721806856e-05, | |
| "loss": 4.4306, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.11700382722799343, | |
| "grad_norm": 3.3685638904571533, | |
| "learning_rate": 4.8330007798606236e-05, | |
| "loss": 3.0482, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.1170949517040277, | |
| "grad_norm": 2.788203001022339, | |
| "learning_rate": 4.832743496341958e-05, | |
| "loss": 2.9866, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.11718607618006197, | |
| "grad_norm": 1.323476791381836, | |
| "learning_rate": 4.8324860216457744e-05, | |
| "loss": 2.9145, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.11727720065609623, | |
| "grad_norm": 3.488725185394287, | |
| "learning_rate": 4.832228355793175e-05, | |
| "loss": 3.4236, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 0.1173683251321305, | |
| "grad_norm": 2.773366689682007, | |
| "learning_rate": 4.831970498805275e-05, | |
| "loss": 3.2463, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.11745944960816475, | |
| "grad_norm": 2.8726413249969482, | |
| "learning_rate": 4.8317124507032083e-05, | |
| "loss": 3.1034, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 0.11755057408419901, | |
| "grad_norm": 2.574613094329834, | |
| "learning_rate": 4.831454211508122e-05, | |
| "loss": 3.2809, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.11764169856023328, | |
| "grad_norm": 4.687624454498291, | |
| "learning_rate": 4.83119578124118e-05, | |
| "loss": 2.8164, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 0.11773282303626754, | |
| "grad_norm": 1.4595892429351807, | |
| "learning_rate": 4.830937159923562e-05, | |
| "loss": 3.0615, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.11782394751230181, | |
| "grad_norm": 2.886767625808716, | |
| "learning_rate": 4.830678347576463e-05, | |
| "loss": 2.8481, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 0.11791507198833606, | |
| "grad_norm": 3.3065526485443115, | |
| "learning_rate": 4.830419344221093e-05, | |
| "loss": 3.4463, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.11800619646437033, | |
| "grad_norm": 3.140231132507324, | |
| "learning_rate": 4.83016014987868e-05, | |
| "loss": 2.7775, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.1180973209404046, | |
| "grad_norm": 5.782069206237793, | |
| "learning_rate": 4.829900764570464e-05, | |
| "loss": 3.4923, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.11818844541643886, | |
| "grad_norm": 3.9417202472686768, | |
| "learning_rate": 4.8296411883177026e-05, | |
| "loss": 3.0629, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 0.11827956989247312, | |
| "grad_norm": 2.3533289432525635, | |
| "learning_rate": 4.829381421141671e-05, | |
| "loss": 3.1436, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.11837069436850738, | |
| "grad_norm": 3.0032739639282227, | |
| "learning_rate": 4.829121463063657e-05, | |
| "loss": 3.3542, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 0.11846181884454164, | |
| "grad_norm": 3.742929220199585, | |
| "learning_rate": 4.828861314104966e-05, | |
| "loss": 3.1316, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.11855294332057591, | |
| "grad_norm": 2.169480800628662, | |
| "learning_rate": 4.828600974286917e-05, | |
| "loss": 3.1513, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 0.11864406779661017, | |
| "grad_norm": 2.3254833221435547, | |
| "learning_rate": 4.8283404436308464e-05, | |
| "loss": 3.0674, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.11873519227264444, | |
| "grad_norm": 3.3510477542877197, | |
| "learning_rate": 4.828079722158105e-05, | |
| "loss": 2.7181, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 0.11882631674867869, | |
| "grad_norm": 2.6919658184051514, | |
| "learning_rate": 4.8278188098900626e-05, | |
| "loss": 3.0987, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.11891744122471296, | |
| "grad_norm": 3.264378786087036, | |
| "learning_rate": 4.827557706848099e-05, | |
| "loss": 3.0503, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.11900856570074722, | |
| "grad_norm": 2.856445550918579, | |
| "learning_rate": 4.827296413053614e-05, | |
| "loss": 4.3108, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.11909969017678149, | |
| "grad_norm": 3.0132710933685303, | |
| "learning_rate": 4.82703492852802e-05, | |
| "loss": 3.0884, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 0.11919081465281575, | |
| "grad_norm": 1.8259915113449097, | |
| "learning_rate": 4.826773253292749e-05, | |
| "loss": 3.1467, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.11928193912885, | |
| "grad_norm": 2.925410509109497, | |
| "learning_rate": 4.826511387369246e-05, | |
| "loss": 3.1775, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 0.11937306360488427, | |
| "grad_norm": 2.620035409927368, | |
| "learning_rate": 4.826249330778971e-05, | |
| "loss": 3.2156, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.11946418808091853, | |
| "grad_norm": 3.094045400619507, | |
| "learning_rate": 4.825987083543401e-05, | |
| "loss": 3.187, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 0.1195553125569528, | |
| "grad_norm": 5.414752960205078, | |
| "learning_rate": 4.825724645684027e-05, | |
| "loss": 3.3959, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.11964643703298707, | |
| "grad_norm": 3.0916340351104736, | |
| "learning_rate": 4.825462017222359e-05, | |
| "loss": 2.8089, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 0.11973756150902132, | |
| "grad_norm": 2.8586535453796387, | |
| "learning_rate": 4.825199198179919e-05, | |
| "loss": 3.2082, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.11982868598505558, | |
| "grad_norm": 3.6413466930389404, | |
| "learning_rate": 4.824936188578246e-05, | |
| "loss": 3.396, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.11991981046108985, | |
| "grad_norm": 1.7131729125976562, | |
| "learning_rate": 4.824672988438895e-05, | |
| "loss": 3.1427, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.12001093493712411, | |
| "grad_norm": 2.2340872287750244, | |
| "learning_rate": 4.824409597783438e-05, | |
| "loss": 3.2302, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 0.12010205941315838, | |
| "grad_norm": 2.6722652912139893, | |
| "learning_rate": 4.8241460166334577e-05, | |
| "loss": 2.9591, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.12019318388919263, | |
| "grad_norm": 2.3413898944854736, | |
| "learning_rate": 4.823882245010557e-05, | |
| "loss": 3.0534, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 0.1202843083652269, | |
| "grad_norm": 3.7826550006866455, | |
| "learning_rate": 4.823618282936354e-05, | |
| "loss": 4.7466, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.12037543284126116, | |
| "grad_norm": 1.7024617195129395, | |
| "learning_rate": 4.82335413043248e-05, | |
| "loss": 3.086, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 0.12046655731729543, | |
| "grad_norm": 1.9387747049331665, | |
| "learning_rate": 4.8230897875205844e-05, | |
| "loss": 3.2878, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.1205576817933297, | |
| "grad_norm": 1.4321894645690918, | |
| "learning_rate": 4.8228252542223305e-05, | |
| "loss": 3.1001, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 0.12064880626936395, | |
| "grad_norm": 3.9441845417022705, | |
| "learning_rate": 4.822560530559398e-05, | |
| "loss": 3.1041, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.12073993074539821, | |
| "grad_norm": 2.0349435806274414, | |
| "learning_rate": 4.8222956165534824e-05, | |
| "loss": 3.0336, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.12083105522143248, | |
| "grad_norm": 2.7970826625823975, | |
| "learning_rate": 4.822030512226294e-05, | |
| "loss": 3.1574, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.12092217969746674, | |
| "grad_norm": 2.463871717453003, | |
| "learning_rate": 4.821765217599559e-05, | |
| "loss": 3.2166, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 0.12101330417350101, | |
| "grad_norm": 2.5402960777282715, | |
| "learning_rate": 4.82149973269502e-05, | |
| "loss": 3.0926, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.12110442864953526, | |
| "grad_norm": 3.7119953632354736, | |
| "learning_rate": 4.821234057534434e-05, | |
| "loss": 3.3525, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 0.12119555312556952, | |
| "grad_norm": 1.9446157217025757, | |
| "learning_rate": 4.820968192139575e-05, | |
| "loss": 3.0707, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.12128667760160379, | |
| "grad_norm": 3.0048012733459473, | |
| "learning_rate": 4.82070213653223e-05, | |
| "loss": 3.2723, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 0.12137780207763806, | |
| "grad_norm": 3.404109239578247, | |
| "learning_rate": 4.820435890734204e-05, | |
| "loss": 4.3627, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.12146892655367232, | |
| "grad_norm": 2.606018304824829, | |
| "learning_rate": 4.820169454767318e-05, | |
| "loss": 3.1892, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 0.12156005102970657, | |
| "grad_norm": 3.522080183029175, | |
| "learning_rate": 4.819902828653406e-05, | |
| "loss": 3.3526, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.12165117550574084, | |
| "grad_norm": 4.494570732116699, | |
| "learning_rate": 4.8196360124143204e-05, | |
| "loss": 3.1921, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.1217422999817751, | |
| "grad_norm": 2.7442262172698975, | |
| "learning_rate": 4.819369006071927e-05, | |
| "loss": 3.2029, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.12183342445780937, | |
| "grad_norm": 2.947127342224121, | |
| "learning_rate": 4.819101809648108e-05, | |
| "loss": 3.2069, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.12192454893384364, | |
| "grad_norm": 3.326021432876587, | |
| "learning_rate": 4.818834423164762e-05, | |
| "loss": 3.2037, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.12201567340987789, | |
| "grad_norm": 2.297687292098999, | |
| "learning_rate": 4.818566846643801e-05, | |
| "loss": 3.1991, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 0.12210679788591215, | |
| "grad_norm": 3.1498403549194336, | |
| "learning_rate": 4.8182990801071546e-05, | |
| "loss": 3.605, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.12219792236194642, | |
| "grad_norm": 2.481204032897949, | |
| "learning_rate": 4.8180311235767684e-05, | |
| "loss": 3.0523, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 0.12228904683798068, | |
| "grad_norm": 1.7611827850341797, | |
| "learning_rate": 4.817762977074601e-05, | |
| "loss": 2.9838, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.12238017131401495, | |
| "grad_norm": 2.524806261062622, | |
| "learning_rate": 4.8174946406226286e-05, | |
| "loss": 3.1246, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 0.1224712957900492, | |
| "grad_norm": 1.8765568733215332, | |
| "learning_rate": 4.817226114242843e-05, | |
| "loss": 3.097, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.12256242026608347, | |
| "grad_norm": 2.483398675918579, | |
| "learning_rate": 4.816957397957249e-05, | |
| "loss": 3.2328, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.12265354474211773, | |
| "grad_norm": 2.1173160076141357, | |
| "learning_rate": 4.816688491787872e-05, | |
| "loss": 3.182, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.122744669218152, | |
| "grad_norm": 3.149275779724121, | |
| "learning_rate": 4.816419395756747e-05, | |
| "loss": 3.2735, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 0.12283579369418626, | |
| "grad_norm": 1.985774278640747, | |
| "learning_rate": 4.8161501098859295e-05, | |
| "loss": 2.91, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.12292691817022051, | |
| "grad_norm": 3.5181996822357178, | |
| "learning_rate": 4.8158806341974875e-05, | |
| "loss": 3.3365, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 0.12301804264625478, | |
| "grad_norm": 3.0273499488830566, | |
| "learning_rate": 4.8156109687135064e-05, | |
| "loss": 3.1726, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.12310916712228905, | |
| "grad_norm": 2.0603082180023193, | |
| "learning_rate": 4.8153411134560856e-05, | |
| "loss": 3.2063, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 0.12320029159832331, | |
| "grad_norm": 2.9190120697021484, | |
| "learning_rate": 4.8150710684473407e-05, | |
| "loss": 3.339, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.12329141607435758, | |
| "grad_norm": 1.935994029045105, | |
| "learning_rate": 4.814800833709403e-05, | |
| "loss": 3.0274, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 0.12338254055039184, | |
| "grad_norm": 2.194535493850708, | |
| "learning_rate": 4.814530409264421e-05, | |
| "loss": 2.8919, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.1234736650264261, | |
| "grad_norm": 2.7491567134857178, | |
| "learning_rate": 4.814259795134555e-05, | |
| "loss": 3.4111, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.12356478950246036, | |
| "grad_norm": 2.234680414199829, | |
| "learning_rate": 4.8139889913419825e-05, | |
| "loss": 3.2018, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.12365591397849462, | |
| "grad_norm": 2.1068971157073975, | |
| "learning_rate": 4.8137179979088995e-05, | |
| "loss": 2.811, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 0.12374703845452889, | |
| "grad_norm": 2.9462037086486816, | |
| "learning_rate": 4.8134468148575126e-05, | |
| "loss": 3.0475, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.12383816293056316, | |
| "grad_norm": 2.1699352264404297, | |
| "learning_rate": 4.813175442210047e-05, | |
| "loss": 2.9973, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 0.12392928740659741, | |
| "grad_norm": 2.727155923843384, | |
| "learning_rate": 4.8129038799887436e-05, | |
| "loss": 3.1522, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.12402041188263167, | |
| "grad_norm": 3.1219122409820557, | |
| "learning_rate": 4.812632128215857e-05, | |
| "loss": 3.1755, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 0.12411153635866594, | |
| "grad_norm": 1.593032956123352, | |
| "learning_rate": 4.8123601869136594e-05, | |
| "loss": 3.0699, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.1242026608347002, | |
| "grad_norm": 3.5900216102600098, | |
| "learning_rate": 4.8120880561044355e-05, | |
| "loss": 2.9064, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 0.12429378531073447, | |
| "grad_norm": 3.540292739868164, | |
| "learning_rate": 4.81181573581049e-05, | |
| "loss": 3.166, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.12438490978676872, | |
| "grad_norm": 2.823420763015747, | |
| "learning_rate": 4.811543226054138e-05, | |
| "loss": 2.9087, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.12447603426280299, | |
| "grad_norm": 2.7216222286224365, | |
| "learning_rate": 4.811270526857715e-05, | |
| "loss": 3.2342, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.12456715873883725, | |
| "grad_norm": 1.7820650339126587, | |
| "learning_rate": 4.810997638243569e-05, | |
| "loss": 2.9718, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 0.12465828321487152, | |
| "grad_norm": 3.528695821762085, | |
| "learning_rate": 4.8107245602340635e-05, | |
| "loss": 3.2545, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.12474940769090578, | |
| "grad_norm": 2.6391193866729736, | |
| "learning_rate": 4.8104512928515795e-05, | |
| "loss": 3.3742, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 0.12484053216694004, | |
| "grad_norm": 2.5269813537597656, | |
| "learning_rate": 4.8101778361185115e-05, | |
| "loss": 2.9321, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.1249316566429743, | |
| "grad_norm": 1.620867371559143, | |
| "learning_rate": 4.809904190057271e-05, | |
| "loss": 2.9661, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 0.12502278111900855, | |
| "grad_norm": 3.2925851345062256, | |
| "learning_rate": 4.809630354690284e-05, | |
| "loss": 3.0233, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.12511390559504282, | |
| "grad_norm": 1.9607048034667969, | |
| "learning_rate": 4.809356330039992e-05, | |
| "loss": 3.0341, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 0.12520503007107708, | |
| "grad_norm": 2.5559041500091553, | |
| "learning_rate": 4.809082116128853e-05, | |
| "loss": 3.0071, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.12529615454711135, | |
| "grad_norm": 2.898434638977051, | |
| "learning_rate": 4.8088077129793395e-05, | |
| "loss": 3.0887, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.12538727902314561, | |
| "grad_norm": 4.129066467285156, | |
| "learning_rate": 4.80853312061394e-05, | |
| "loss": 2.9477, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.12547840349917988, | |
| "grad_norm": 3.0416951179504395, | |
| "learning_rate": 4.80825833905516e-05, | |
| "loss": 3.064, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 0.12556952797521415, | |
| "grad_norm": 1.4269506931304932, | |
| "learning_rate": 4.8079833683255166e-05, | |
| "loss": 2.9104, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.1256606524512484, | |
| "grad_norm": 1.795159935951233, | |
| "learning_rate": 4.8077082084475455e-05, | |
| "loss": 3.0528, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 0.12575177692728268, | |
| "grad_norm": 1.8280433416366577, | |
| "learning_rate": 4.8074328594437976e-05, | |
| "loss": 3.0135, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.12584290140331694, | |
| "grad_norm": 2.7084426879882812, | |
| "learning_rate": 4.807157321336838e-05, | |
| "loss": 3.3247, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 0.12593402587935118, | |
| "grad_norm": 1.4713351726531982, | |
| "learning_rate": 4.8068815941492493e-05, | |
| "loss": 2.9893, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.12602515035538545, | |
| "grad_norm": 1.647141456604004, | |
| "learning_rate": 4.806605677903627e-05, | |
| "loss": 3.0282, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 0.1261162748314197, | |
| "grad_norm": 2.6358165740966797, | |
| "learning_rate": 4.806329572622585e-05, | |
| "loss": 3.0908, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.12620739930745398, | |
| "grad_norm": 2.0468578338623047, | |
| "learning_rate": 4.80605327832875e-05, | |
| "loss": 3.0642, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.12629852378348824, | |
| "grad_norm": 2.854619026184082, | |
| "learning_rate": 4.8057767950447676e-05, | |
| "loss": 3.3726, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.1263896482595225, | |
| "grad_norm": 2.3269712924957275, | |
| "learning_rate": 4.805500122793293e-05, | |
| "loss": 3.0171, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 0.12648077273555677, | |
| "grad_norm": 1.5846275091171265, | |
| "learning_rate": 4.805223261597004e-05, | |
| "loss": 3.0804, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.12657189721159104, | |
| "grad_norm": 3.1986141204833984, | |
| "learning_rate": 4.8049462114785884e-05, | |
| "loss": 3.3668, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 0.1266630216876253, | |
| "grad_norm": 3.514010429382324, | |
| "learning_rate": 4.804668972460752e-05, | |
| "loss": 2.3334, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.12675414616365957, | |
| "grad_norm": 3.47501540184021, | |
| "learning_rate": 4.804391544566216e-05, | |
| "loss": 2.9559, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 0.1268452706396938, | |
| "grad_norm": 2.4494612216949463, | |
| "learning_rate": 4.804113927817716e-05, | |
| "loss": 3.2474, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.12693639511572807, | |
| "grad_norm": 2.538818836212158, | |
| "learning_rate": 4.8038361222380054e-05, | |
| "loss": 3.3579, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 0.12702751959176234, | |
| "grad_norm": 3.1854991912841797, | |
| "learning_rate": 4.8035581278498496e-05, | |
| "loss": 3.2522, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.1271186440677966, | |
| "grad_norm": 3.3268232345581055, | |
| "learning_rate": 4.803279944676032e-05, | |
| "loss": 3.4415, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.12720976854383087, | |
| "grad_norm": 3.4658143520355225, | |
| "learning_rate": 4.803001572739352e-05, | |
| "loss": 3.2007, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.12730089301986514, | |
| "grad_norm": 2.278604507446289, | |
| "learning_rate": 4.802723012062622e-05, | |
| "loss": 3.1008, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 0.1273920174958994, | |
| "grad_norm": 3.4236605167388916, | |
| "learning_rate": 4.8024442626686706e-05, | |
| "loss": 3.1024, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.12748314197193367, | |
| "grad_norm": 1.7195907831192017, | |
| "learning_rate": 4.802165324580344e-05, | |
| "loss": 3.0942, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 0.12757426644796793, | |
| "grad_norm": 5.269683837890625, | |
| "learning_rate": 4.801886197820501e-05, | |
| "loss": 3.244, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.1276653909240022, | |
| "grad_norm": 3.6192774772644043, | |
| "learning_rate": 4.801606882412017e-05, | |
| "loss": 3.4044, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 0.12775651540003646, | |
| "grad_norm": 2.8494386672973633, | |
| "learning_rate": 4.8013273783777844e-05, | |
| "loss": 3.2665, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.1278476398760707, | |
| "grad_norm": 2.5437350273132324, | |
| "learning_rate": 4.801047685740709e-05, | |
| "loss": 3.0269, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 0.12793876435210497, | |
| "grad_norm": 3.878979444503784, | |
| "learning_rate": 4.800767804523713e-05, | |
| "loss": 2.5677, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.12802988882813923, | |
| "grad_norm": 3.141019821166992, | |
| "learning_rate": 4.800487734749732e-05, | |
| "loss": 3.2695, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.1281210133041735, | |
| "grad_norm": 2.654679536819458, | |
| "learning_rate": 4.8002074764417204e-05, | |
| "loss": 3.1931, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.12821213778020776, | |
| "grad_norm": 4.664015293121338, | |
| "learning_rate": 4.799927029622647e-05, | |
| "loss": 3.3309, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 0.12830326225624203, | |
| "grad_norm": 2.804391384124756, | |
| "learning_rate": 4.799646394315494e-05, | |
| "loss": 3.2404, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.1283943867322763, | |
| "grad_norm": 2.2764317989349365, | |
| "learning_rate": 4.799365570543262e-05, | |
| "loss": 3.1464, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 0.12848551120831056, | |
| "grad_norm": 2.904567003250122, | |
| "learning_rate": 4.799084558328965e-05, | |
| "loss": 3.3084, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.12857663568434483, | |
| "grad_norm": 1.6689000129699707, | |
| "learning_rate": 4.7988033576956315e-05, | |
| "loss": 2.981, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 0.1286677601603791, | |
| "grad_norm": 1.643546462059021, | |
| "learning_rate": 4.7985219686663096e-05, | |
| "loss": 3.1424, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.12875888463641333, | |
| "grad_norm": 2.7469589710235596, | |
| "learning_rate": 4.7982403912640594e-05, | |
| "loss": 3.0977, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 0.1288500091124476, | |
| "grad_norm": 2.6127498149871826, | |
| "learning_rate": 4.797958625511956e-05, | |
| "loss": 3.3504, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.12894113358848186, | |
| "grad_norm": 1.3221989870071411, | |
| "learning_rate": 4.7976766714330936e-05, | |
| "loss": 2.9263, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.12903225806451613, | |
| "grad_norm": 2.6175034046173096, | |
| "learning_rate": 4.7973945290505766e-05, | |
| "loss": 3.2393, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.1291233825405504, | |
| "grad_norm": 1.7804768085479736, | |
| "learning_rate": 4.79711219838753e-05, | |
| "loss": 3.0257, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 0.12921450701658466, | |
| "grad_norm": 3.141484498977661, | |
| "learning_rate": 4.796829679467091e-05, | |
| "loss": 3.3176, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.12930563149261892, | |
| "grad_norm": 2.9118833541870117, | |
| "learning_rate": 4.796546972312413e-05, | |
| "loss": 3.1751, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 0.1293967559686532, | |
| "grad_norm": 2.3245956897735596, | |
| "learning_rate": 4.796264076946665e-05, | |
| "loss": 3.2194, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.12948788044468745, | |
| "grad_norm": 1.7661198377609253, | |
| "learning_rate": 4.795980993393032e-05, | |
| "loss": 2.9074, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 0.12957900492072172, | |
| "grad_norm": 1.461868166923523, | |
| "learning_rate": 4.795697721674713e-05, | |
| "loss": 3.0488, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.12967012939675596, | |
| "grad_norm": 3.5125927925109863, | |
| "learning_rate": 4.795414261814923e-05, | |
| "loss": 3.3445, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 0.12976125387279022, | |
| "grad_norm": 5.004077434539795, | |
| "learning_rate": 4.795130613836894e-05, | |
| "loss": 2.4742, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.1298523783488245, | |
| "grad_norm": 2.956143856048584, | |
| "learning_rate": 4.7948467777638716e-05, | |
| "loss": 3.1681, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.12994350282485875, | |
| "grad_norm": 1.55666983127594, | |
| "learning_rate": 4.7945627536191166e-05, | |
| "loss": 3.0337, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.13003462730089302, | |
| "grad_norm": 3.5222396850585938, | |
| "learning_rate": 4.7942785414259064e-05, | |
| "loss": 4.1952, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 0.13012575177692728, | |
| "grad_norm": 1.7636314630508423, | |
| "learning_rate": 4.7939941412075326e-05, | |
| "loss": 3.1187, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.13021687625296155, | |
| "grad_norm": 1.582393765449524, | |
| "learning_rate": 4.7937095529873046e-05, | |
| "loss": 3.0345, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 0.13030800072899582, | |
| "grad_norm": 2.269049882888794, | |
| "learning_rate": 4.793424776788544e-05, | |
| "loss": 2.9892, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.13039912520503008, | |
| "grad_norm": 2.121647596359253, | |
| "learning_rate": 4.7931398126345895e-05, | |
| "loss": 3.2096, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 0.13049024968106435, | |
| "grad_norm": 2.4415335655212402, | |
| "learning_rate": 4.7928546605487956e-05, | |
| "loss": 3.3201, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.13058137415709858, | |
| "grad_norm": 1.8279386758804321, | |
| "learning_rate": 4.7925693205545306e-05, | |
| "loss": 3.0392, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 0.13067249863313285, | |
| "grad_norm": 2.7299156188964844, | |
| "learning_rate": 4.792283792675181e-05, | |
| "loss": 3.3905, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.13076362310916712, | |
| "grad_norm": 2.9097607135772705, | |
| "learning_rate": 4.791998076934145e-05, | |
| "loss": 3.1524, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.13085474758520138, | |
| "grad_norm": 1.3054111003875732, | |
| "learning_rate": 4.79171217335484e-05, | |
| "loss": 3.0252, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.13094587206123565, | |
| "grad_norm": 2.88234543800354, | |
| "learning_rate": 4.7914260819606956e-05, | |
| "loss": 3.0998, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 0.1310369965372699, | |
| "grad_norm": 1.7457668781280518, | |
| "learning_rate": 4.791139802775158e-05, | |
| "loss": 3.1138, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.13112812101330418, | |
| "grad_norm": 2.7463388442993164, | |
| "learning_rate": 4.79085333582169e-05, | |
| "loss": 3.1257, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 0.13121924548933844, | |
| "grad_norm": 2.5777535438537598, | |
| "learning_rate": 4.790566681123768e-05, | |
| "loss": 3.1941, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.1313103699653727, | |
| "grad_norm": 4.522150993347168, | |
| "learning_rate": 4.7902798387048845e-05, | |
| "loss": 2.92, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 0.13140149444140697, | |
| "grad_norm": 1.9641166925430298, | |
| "learning_rate": 4.789992808588547e-05, | |
| "loss": 3.0691, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.1314926189174412, | |
| "grad_norm": 1.9266825914382935, | |
| "learning_rate": 4.78970559079828e-05, | |
| "loss": 3.0564, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 0.13158374339347548, | |
| "grad_norm": 3.1203954219818115, | |
| "learning_rate": 4.78941818535762e-05, | |
| "loss": 2.7019, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.13167486786950974, | |
| "grad_norm": 4.034260272979736, | |
| "learning_rate": 4.7891305922901235e-05, | |
| "loss": 3.2111, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.131765992345544, | |
| "grad_norm": 1.3861982822418213, | |
| "learning_rate": 4.7888428116193585e-05, | |
| "loss": 3.1494, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.13185711682157827, | |
| "grad_norm": 2.3220410346984863, | |
| "learning_rate": 4.78855484336891e-05, | |
| "loss": 3.4374, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 0.13194824129761254, | |
| "grad_norm": 2.328646183013916, | |
| "learning_rate": 4.788266687562378e-05, | |
| "loss": 2.9784, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.1320393657736468, | |
| "grad_norm": 2.595676898956299, | |
| "learning_rate": 4.7879783442233776e-05, | |
| "loss": 3.2522, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 0.13213049024968107, | |
| "grad_norm": 3.259542942047119, | |
| "learning_rate": 4.787689813375541e-05, | |
| "loss": 3.4706, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.13222161472571534, | |
| "grad_norm": 3.3692314624786377, | |
| "learning_rate": 4.787401095042513e-05, | |
| "loss": 3.3304, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 0.1323127392017496, | |
| "grad_norm": 1.2401968240737915, | |
| "learning_rate": 4.787112189247956e-05, | |
| "loss": 2.7759, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.13240386367778384, | |
| "grad_norm": 2.5161514282226562, | |
| "learning_rate": 4.786823096015547e-05, | |
| "loss": 3.0425, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 0.1324949881538181, | |
| "grad_norm": 3.511383533477783, | |
| "learning_rate": 4.786533815368978e-05, | |
| "loss": 4.4405, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.13258611262985237, | |
| "grad_norm": 2.5629379749298096, | |
| "learning_rate": 4.786244347331956e-05, | |
| "loss": 3.0869, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.13267723710588664, | |
| "grad_norm": 1.6853193044662476, | |
| "learning_rate": 4.785954691928206e-05, | |
| "loss": 3.2892, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.1327683615819209, | |
| "grad_norm": 1.5359575748443604, | |
| "learning_rate": 4.785664849181465e-05, | |
| "loss": 2.9666, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 0.13285948605795517, | |
| "grad_norm": 2.309920310974121, | |
| "learning_rate": 4.785374819115487e-05, | |
| "loss": 3.3415, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.13295061053398943, | |
| "grad_norm": 1.5705186128616333, | |
| "learning_rate": 4.7850846017540404e-05, | |
| "loss": 2.9575, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 0.1330417350100237, | |
| "grad_norm": 2.871138572692871, | |
| "learning_rate": 4.784794197120911e-05, | |
| "loss": 2.8445, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.13313285948605796, | |
| "grad_norm": 2.2359492778778076, | |
| "learning_rate": 4.784503605239898e-05, | |
| "loss": 3.2162, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 0.13322398396209223, | |
| "grad_norm": 1.5131843090057373, | |
| "learning_rate": 4.7842128261348164e-05, | |
| "loss": 3.1909, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.13331510843812647, | |
| "grad_norm": 2.033951759338379, | |
| "learning_rate": 4.783921859829496e-05, | |
| "loss": 3.2403, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 0.13340623291416073, | |
| "grad_norm": 3.2505292892456055, | |
| "learning_rate": 4.783630706347785e-05, | |
| "loss": 3.3799, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.133497357390195, | |
| "grad_norm": 2.0687122344970703, | |
| "learning_rate": 4.783339365713542e-05, | |
| "loss": 3.1018, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.13358848186622926, | |
| "grad_norm": 3.194941282272339, | |
| "learning_rate": 4.7830478379506446e-05, | |
| "loss": 3.0541, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.13367960634226353, | |
| "grad_norm": 1.9203050136566162, | |
| "learning_rate": 4.782756123082986e-05, | |
| "loss": 3.1837, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 0.1337707308182978, | |
| "grad_norm": 1.5487380027770996, | |
| "learning_rate": 4.78246422113447e-05, | |
| "loss": 3.048, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.13386185529433206, | |
| "grad_norm": 3.313387870788574, | |
| "learning_rate": 4.7821721321290216e-05, | |
| "loss": 2.9177, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 0.13395297977036633, | |
| "grad_norm": 1.603618860244751, | |
| "learning_rate": 4.7818798560905785e-05, | |
| "loss": 3.0036, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.1340441042464006, | |
| "grad_norm": 2.8620636463165283, | |
| "learning_rate": 4.7815873930430934e-05, | |
| "loss": 3.0144, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 0.13413522872243486, | |
| "grad_norm": 3.5870542526245117, | |
| "learning_rate": 4.7812947430105346e-05, | |
| "loss": 3.2079, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.1342263531984691, | |
| "grad_norm": 1.7587014436721802, | |
| "learning_rate": 4.781001906016887e-05, | |
| "loss": 2.9849, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 0.13431747767450336, | |
| "grad_norm": 1.599656581878662, | |
| "learning_rate": 4.780708882086148e-05, | |
| "loss": 3.0486, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.13440860215053763, | |
| "grad_norm": 2.6890735626220703, | |
| "learning_rate": 4.780415671242334e-05, | |
| "loss": 3.2422, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.1344997266265719, | |
| "grad_norm": 2.605520248413086, | |
| "learning_rate": 4.780122273509473e-05, | |
| "loss": 3.0581, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.13459085110260616, | |
| "grad_norm": 2.7986021041870117, | |
| "learning_rate": 4.7798286889116113e-05, | |
| "loss": 3.2861, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 0.13468197557864042, | |
| "grad_norm": 2.143354892730713, | |
| "learning_rate": 4.779534917472809e-05, | |
| "loss": 3.1575, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.1347731000546747, | |
| "grad_norm": 3.173980712890625, | |
| "learning_rate": 4.779240959217141e-05, | |
| "loss": 2.7888, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 0.13486422453070895, | |
| "grad_norm": 2.8368523120880127, | |
| "learning_rate": 4.7789468141687e-05, | |
| "loss": 3.12, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.13495534900674322, | |
| "grad_norm": 2.084016799926758, | |
| "learning_rate": 4.778652482351591e-05, | |
| "loss": 3.0239, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 0.13504647348277748, | |
| "grad_norm": 2.599902629852295, | |
| "learning_rate": 4.778357963789936e-05, | |
| "loss": 3.1395, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.13513759795881175, | |
| "grad_norm": 3.741800546646118, | |
| "learning_rate": 4.778063258507872e-05, | |
| "loss": 3.1891, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 0.135228722434846, | |
| "grad_norm": 2.012301445007324, | |
| "learning_rate": 4.777768366529551e-05, | |
| "loss": 3.3807, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.13531984691088025, | |
| "grad_norm": 1.6227391958236694, | |
| "learning_rate": 4.777473287879142e-05, | |
| "loss": 3.0725, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.13541097138691452, | |
| "grad_norm": 2.414212703704834, | |
| "learning_rate": 4.777178022580826e-05, | |
| "loss": 3.3113, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.13550209586294878, | |
| "grad_norm": 3.2059073448181152, | |
| "learning_rate": 4.776882570658802e-05, | |
| "loss": 3.3099, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 0.13559322033898305, | |
| "grad_norm": 4.668062210083008, | |
| "learning_rate": 4.7765869321372836e-05, | |
| "loss": 3.2421, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.13568434481501732, | |
| "grad_norm": 1.8324368000030518, | |
| "learning_rate": 4.776291107040498e-05, | |
| "loss": 2.9967, | |
| "step": 1489 | |
| }, | |
| { | |
| "epoch": 0.13577546929105158, | |
| "grad_norm": 2.761749744415283, | |
| "learning_rate": 4.775995095392692e-05, | |
| "loss": 3.2551, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.13586659376708585, | |
| "grad_norm": 2.2648134231567383, | |
| "learning_rate": 4.775698897218123e-05, | |
| "loss": 3.1676, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 0.1359577182431201, | |
| "grad_norm": 3.5984230041503906, | |
| "learning_rate": 4.7754025125410654e-05, | |
| "loss": 2.5762, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.13604884271915438, | |
| "grad_norm": 2.518404245376587, | |
| "learning_rate": 4.77510594138581e-05, | |
| "loss": 3.1694, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 0.13613996719518862, | |
| "grad_norm": 2.6424813270568848, | |
| "learning_rate": 4.7748091837766623e-05, | |
| "loss": 3.5051, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.13623109167122288, | |
| "grad_norm": 1.2104846239089966, | |
| "learning_rate": 4.7745122397379413e-05, | |
| "loss": 2.9033, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.13632221614725715, | |
| "grad_norm": 2.830470085144043, | |
| "learning_rate": 4.774215109293984e-05, | |
| "loss": 2.8473, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.1364133406232914, | |
| "grad_norm": 2.8165035247802734, | |
| "learning_rate": 4.773917792469142e-05, | |
| "loss": 3.152, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 0.13650446509932568, | |
| "grad_norm": 2.5895888805389404, | |
| "learning_rate": 4.773620289287778e-05, | |
| "loss": 3.1265, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.13659558957535994, | |
| "grad_norm": 1.6958574056625366, | |
| "learning_rate": 4.773322599774278e-05, | |
| "loss": 3.1278, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 0.1366867140513942, | |
| "grad_norm": 1.5739076137542725, | |
| "learning_rate": 4.773024723953037e-05, | |
| "loss": 2.9989, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.13677783852742847, | |
| "grad_norm": 1.790186882019043, | |
| "learning_rate": 4.772726661848467e-05, | |
| "loss": 2.9863, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 0.13686896300346274, | |
| "grad_norm": 1.7760246992111206, | |
| "learning_rate": 4.7724284134849945e-05, | |
| "loss": 3.0368, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.136960087479497, | |
| "grad_norm": 1.7839252948760986, | |
| "learning_rate": 4.7721299788870634e-05, | |
| "loss": 3.0473, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 0.13705121195553124, | |
| "grad_norm": 3.4607291221618652, | |
| "learning_rate": 4.771831358079132e-05, | |
| "loss": 3.3073, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.1371423364315655, | |
| "grad_norm": 4.04274845123291, | |
| "learning_rate": 4.771532551085672e-05, | |
| "loss": 4.4627, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.13723346090759977, | |
| "grad_norm": 1.4731826782226562, | |
| "learning_rate": 4.771233557931172e-05, | |
| "loss": 2.8455, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.13732458538363404, | |
| "grad_norm": 6.251603603363037, | |
| "learning_rate": 4.770934378640137e-05, | |
| "loss": 3.3063, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 0.1374157098596683, | |
| "grad_norm": 2.3858461380004883, | |
| "learning_rate": 4.7706350132370844e-05, | |
| "loss": 3.1662, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.13750683433570257, | |
| "grad_norm": 2.9865639209747314, | |
| "learning_rate": 4.77033546174655e-05, | |
| "loss": 3.1456, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 0.13759795881173684, | |
| "grad_norm": 2.4804508686065674, | |
| "learning_rate": 4.7700357241930815e-05, | |
| "loss": 3.1369, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.1376890832877711, | |
| "grad_norm": 3.5824382305145264, | |
| "learning_rate": 4.769735800601245e-05, | |
| "loss": 3.1561, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 0.13778020776380537, | |
| "grad_norm": 2.742952585220337, | |
| "learning_rate": 4.7694356909956194e-05, | |
| "loss": 4.2205, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.13787133223983963, | |
| "grad_norm": 1.8473784923553467, | |
| "learning_rate": 4.7691353954008e-05, | |
| "loss": 2.8503, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 0.13796245671587387, | |
| "grad_norm": 2.6001064777374268, | |
| "learning_rate": 4.768834913841398e-05, | |
| "loss": 3.1715, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.13805358119190814, | |
| "grad_norm": 2.0480306148529053, | |
| "learning_rate": 4.768534246342038e-05, | |
| "loss": 3.1111, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.1381447056679424, | |
| "grad_norm": 1.9670919179916382, | |
| "learning_rate": 4.768233392927361e-05, | |
| "loss": 2.9742, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.13823583014397667, | |
| "grad_norm": 2.6016757488250732, | |
| "learning_rate": 4.767932353622025e-05, | |
| "loss": 3.2246, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 0.13832695462001093, | |
| "grad_norm": 1.480478048324585, | |
| "learning_rate": 4.767631128450699e-05, | |
| "loss": 2.9532, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.1384180790960452, | |
| "grad_norm": 2.2611401081085205, | |
| "learning_rate": 4.767329717438071e-05, | |
| "loss": 3.1999, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 0.13850920357207946, | |
| "grad_norm": 1.8727288246154785, | |
| "learning_rate": 4.7670281206088406e-05, | |
| "loss": 2.9416, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.13860032804811373, | |
| "grad_norm": 3.45841121673584, | |
| "learning_rate": 4.766726337987728e-05, | |
| "loss": 3.2848, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 0.138691452524148, | |
| "grad_norm": 2.117701530456543, | |
| "learning_rate": 4.7664243695994634e-05, | |
| "loss": 2.9624, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.13878257700018226, | |
| "grad_norm": 1.6901532411575317, | |
| "learning_rate": 4.766122215468795e-05, | |
| "loss": 2.9421, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 0.1388737014762165, | |
| "grad_norm": 3.0419657230377197, | |
| "learning_rate": 4.765819875620485e-05, | |
| "loss": 2.9388, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.13896482595225076, | |
| "grad_norm": 2.559110641479492, | |
| "learning_rate": 4.765517350079313e-05, | |
| "loss": 3.0909, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.13905595042828503, | |
| "grad_norm": 1.8691017627716064, | |
| "learning_rate": 4.7652146388700705e-05, | |
| "loss": 3.017, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.1391470749043193, | |
| "grad_norm": 2.1920082569122314, | |
| "learning_rate": 4.764911742017565e-05, | |
| "loss": 2.8878, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 0.13923819938035356, | |
| "grad_norm": 5.991618633270264, | |
| "learning_rate": 4.764608659546623e-05, | |
| "loss": 3.2354, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.13932932385638783, | |
| "grad_norm": 1.6653143167495728, | |
| "learning_rate": 4.764305391482081e-05, | |
| "loss": 2.9833, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 0.1394204483324221, | |
| "grad_norm": 2.5954642295837402, | |
| "learning_rate": 4.7640019378487934e-05, | |
| "loss": 3.1639, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.13951157280845636, | |
| "grad_norm": 3.557215929031372, | |
| "learning_rate": 4.763698298671629e-05, | |
| "loss": 2.8795, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 0.13960269728449062, | |
| "grad_norm": 1.8863126039505005, | |
| "learning_rate": 4.7633944739754746e-05, | |
| "loss": 2.833, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.1396938217605249, | |
| "grad_norm": 5.656214714050293, | |
| "learning_rate": 4.7630904637852275e-05, | |
| "loss": 3.1804, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 0.13978494623655913, | |
| "grad_norm": 2.910088300704956, | |
| "learning_rate": 4.7627862681258037e-05, | |
| "loss": 3.056, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.1398760707125934, | |
| "grad_norm": 1.1903241872787476, | |
| "learning_rate": 4.762481887022132e-05, | |
| "loss": 2.9107, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.13996719518862766, | |
| "grad_norm": 1.3917659521102905, | |
| "learning_rate": 4.762177320499158e-05, | |
| "loss": 2.9145, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.14005831966466192, | |
| "grad_norm": 2.0048739910125732, | |
| "learning_rate": 4.7618725685818434e-05, | |
| "loss": 2.9777, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 0.1401494441406962, | |
| "grad_norm": 3.215196371078491, | |
| "learning_rate": 4.761567631295163e-05, | |
| "loss": 2.9824, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.14024056861673045, | |
| "grad_norm": 2.187133550643921, | |
| "learning_rate": 4.761262508664107e-05, | |
| "loss": 3.1835, | |
| "step": 1539 | |
| }, | |
| { | |
| "epoch": 0.14033169309276472, | |
| "grad_norm": 2.6757946014404297, | |
| "learning_rate": 4.760957200713682e-05, | |
| "loss": 3.1799, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.14042281756879899, | |
| "grad_norm": 2.7127139568328857, | |
| "learning_rate": 4.760651707468908e-05, | |
| "loss": 3.1264, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 0.14051394204483325, | |
| "grad_norm": 2.6738502979278564, | |
| "learning_rate": 4.760346028954824e-05, | |
| "loss": 3.2401, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.14060506652086752, | |
| "grad_norm": 2.608595371246338, | |
| "learning_rate": 4.76004016519648e-05, | |
| "loss": 3.0873, | |
| "step": 1543 | |
| }, | |
| { | |
| "epoch": 0.14069619099690175, | |
| "grad_norm": 2.3539223670959473, | |
| "learning_rate": 4.7597341162189426e-05, | |
| "loss": 3.2628, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.14078731547293602, | |
| "grad_norm": 1.7645334005355835, | |
| "learning_rate": 4.7594278820472934e-05, | |
| "loss": 3.0802, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.14087843994897029, | |
| "grad_norm": 2.0945842266082764, | |
| "learning_rate": 4.759121462706631e-05, | |
| "loss": 3.3989, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.14096956442500455, | |
| "grad_norm": 3.260627031326294, | |
| "learning_rate": 4.758814858222066e-05, | |
| "loss": 3.2328, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 0.14106068890103882, | |
| "grad_norm": 3.0855679512023926, | |
| "learning_rate": 4.7585080686187264e-05, | |
| "loss": 4.337, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.14115181337707308, | |
| "grad_norm": 2.290228843688965, | |
| "learning_rate": 4.758201093921755e-05, | |
| "loss": 3.2579, | |
| "step": 1549 | |
| }, | |
| { | |
| "epoch": 0.14124293785310735, | |
| "grad_norm": 2.877847194671631, | |
| "learning_rate": 4.7578939341563095e-05, | |
| "loss": 3.0991, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.1413340623291416, | |
| "grad_norm": 2.6873252391815186, | |
| "learning_rate": 4.7575865893475625e-05, | |
| "loss": 3.2119, | |
| "step": 1551 | |
| }, | |
| { | |
| "epoch": 0.14142518680517588, | |
| "grad_norm": 2.232259750366211, | |
| "learning_rate": 4.757279059520703e-05, | |
| "loss": 2.8747, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.14151631128121014, | |
| "grad_norm": 3.1098685264587402, | |
| "learning_rate": 4.756971344700934e-05, | |
| "loss": 4.3473, | |
| "step": 1553 | |
| }, | |
| { | |
| "epoch": 0.14160743575724438, | |
| "grad_norm": 1.4829670190811157, | |
| "learning_rate": 4.7566634449134734e-05, | |
| "loss": 3.079, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.14169856023327865, | |
| "grad_norm": 2.051966667175293, | |
| "learning_rate": 4.7563553601835555e-05, | |
| "loss": 3.0552, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.1417896847093129, | |
| "grad_norm": 2.224562883377075, | |
| "learning_rate": 4.756047090536428e-05, | |
| "loss": 3.1404, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.14188080918534718, | |
| "grad_norm": 2.442316770553589, | |
| "learning_rate": 4.7557386359973554e-05, | |
| "loss": 2.9279, | |
| "step": 1557 | |
| }, | |
| { | |
| "epoch": 0.14197193366138144, | |
| "grad_norm": 2.465324640274048, | |
| "learning_rate": 4.755429996591618e-05, | |
| "loss": 2.7246, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.1420630581374157, | |
| "grad_norm": 2.204219102859497, | |
| "learning_rate": 4.755121172344508e-05, | |
| "loss": 3.0415, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 0.14215418261344998, | |
| "grad_norm": 1.9507112503051758, | |
| "learning_rate": 4.754812163281335e-05, | |
| "loss": 3.2588, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.14224530708948424, | |
| "grad_norm": 2.620863914489746, | |
| "learning_rate": 4.7545029694274254e-05, | |
| "loss": 2.7928, | |
| "step": 1561 | |
| }, | |
| { | |
| "epoch": 0.1423364315655185, | |
| "grad_norm": 2.5902299880981445, | |
| "learning_rate": 4.754193590808117e-05, | |
| "loss": 3.3398, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.14242755604155277, | |
| "grad_norm": 1.4641609191894531, | |
| "learning_rate": 4.753884027448765e-05, | |
| "loss": 2.9775, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 0.14251868051758704, | |
| "grad_norm": 1.9195103645324707, | |
| "learning_rate": 4.753574279374739e-05, | |
| "loss": 3.0164, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.14260980499362128, | |
| "grad_norm": 2.0863749980926514, | |
| "learning_rate": 4.7532643466114266e-05, | |
| "loss": 3.1996, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.14270092946965554, | |
| "grad_norm": 2.3902792930603027, | |
| "learning_rate": 4.752954229184224e-05, | |
| "loss": 3.1756, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.1427920539456898, | |
| "grad_norm": 2.883986711502075, | |
| "learning_rate": 4.75264392711855e-05, | |
| "loss": 3.3782, | |
| "step": 1567 | |
| }, | |
| { | |
| "epoch": 0.14288317842172407, | |
| "grad_norm": 1.5548768043518066, | |
| "learning_rate": 4.752333440439832e-05, | |
| "loss": 3.0867, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.14297430289775834, | |
| "grad_norm": 2.3053369522094727, | |
| "learning_rate": 4.752022769173519e-05, | |
| "loss": 3.2028, | |
| "step": 1569 | |
| }, | |
| { | |
| "epoch": 0.1430654273737926, | |
| "grad_norm": 2.11873722076416, | |
| "learning_rate": 4.751711913345069e-05, | |
| "loss": 3.4555, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.14315655184982687, | |
| "grad_norm": 1.6578961610794067, | |
| "learning_rate": 4.7514008729799584e-05, | |
| "loss": 2.8687, | |
| "step": 1571 | |
| }, | |
| { | |
| "epoch": 0.14324767632586113, | |
| "grad_norm": 2.5347859859466553, | |
| "learning_rate": 4.7510896481036796e-05, | |
| "loss": 3.3134, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.1433388008018954, | |
| "grad_norm": 1.5316507816314697, | |
| "learning_rate": 4.750778238741737e-05, | |
| "loss": 3.0429, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 0.14342992527792967, | |
| "grad_norm": 2.9990713596343994, | |
| "learning_rate": 4.7504666449196534e-05, | |
| "loss": 3.3818, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.1435210497539639, | |
| "grad_norm": 2.222365617752075, | |
| "learning_rate": 4.750154866662964e-05, | |
| "loss": 3.1164, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.14361217422999817, | |
| "grad_norm": 1.5940253734588623, | |
| "learning_rate": 4.7498429039972195e-05, | |
| "loss": 2.9707, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.14370329870603243, | |
| "grad_norm": 1.7318164110183716, | |
| "learning_rate": 4.7495307569479886e-05, | |
| "loss": 2.9525, | |
| "step": 1577 | |
| }, | |
| { | |
| "epoch": 0.1437944231820667, | |
| "grad_norm": 2.8493340015411377, | |
| "learning_rate": 4.749218425540851e-05, | |
| "loss": 2.9688, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.14388554765810097, | |
| "grad_norm": 3.3721001148223877, | |
| "learning_rate": 4.748905909801405e-05, | |
| "loss": 3.0444, | |
| "step": 1579 | |
| }, | |
| { | |
| "epoch": 0.14397667213413523, | |
| "grad_norm": 3.1313891410827637, | |
| "learning_rate": 4.748593209755262e-05, | |
| "loss": 3.0512, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.1440677966101695, | |
| "grad_norm": 1.885908842086792, | |
| "learning_rate": 4.7482803254280484e-05, | |
| "loss": 2.9611, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 0.14415892108620376, | |
| "grad_norm": 2.9428458213806152, | |
| "learning_rate": 4.747967256845407e-05, | |
| "loss": 2.9153, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.14425004556223803, | |
| "grad_norm": 4.332671165466309, | |
| "learning_rate": 4.747654004032995e-05, | |
| "loss": 3.2974, | |
| "step": 1583 | |
| }, | |
| { | |
| "epoch": 0.1443411700382723, | |
| "grad_norm": 1.3580037355422974, | |
| "learning_rate": 4.747340567016484e-05, | |
| "loss": 3.0826, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.14443229451430653, | |
| "grad_norm": 9.840303421020508, | |
| "learning_rate": 4.747026945821562e-05, | |
| "loss": 2.5629, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.1445234189903408, | |
| "grad_norm": 2.672960042953491, | |
| "learning_rate": 4.7467131404739315e-05, | |
| "loss": 3.1531, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.14461454346637506, | |
| "grad_norm": 2.945892572402954, | |
| "learning_rate": 4.7463991509993096e-05, | |
| "loss": 3.3506, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 0.14470566794240933, | |
| "grad_norm": 2.753072738647461, | |
| "learning_rate": 4.74608497742343e-05, | |
| "loss": 3.0978, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.1447967924184436, | |
| "grad_norm": 2.628932237625122, | |
| "learning_rate": 4.7457706197720395e-05, | |
| "loss": 3.2419, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 0.14488791689447786, | |
| "grad_norm": 3.0868217945098877, | |
| "learning_rate": 4.745456078070901e-05, | |
| "loss": 3.1019, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.14497904137051212, | |
| "grad_norm": 1.7500460147857666, | |
| "learning_rate": 4.745141352345793e-05, | |
| "loss": 3.0763, | |
| "step": 1591 | |
| }, | |
| { | |
| "epoch": 0.1450701658465464, | |
| "grad_norm": 2.3832669258117676, | |
| "learning_rate": 4.744826442622508e-05, | |
| "loss": 3.0807, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.14516129032258066, | |
| "grad_norm": 3.4366202354431152, | |
| "learning_rate": 4.7445113489268544e-05, | |
| "loss": 3.0111, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 0.14525241479861492, | |
| "grad_norm": 1.3814283609390259, | |
| "learning_rate": 4.744196071284655e-05, | |
| "loss": 2.9914, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.14534353927464916, | |
| "grad_norm": 3.4350616931915283, | |
| "learning_rate": 4.743880609721749e-05, | |
| "loss": 3.4018, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.14543466375068342, | |
| "grad_norm": 3.0712473392486572, | |
| "learning_rate": 4.7435649642639876e-05, | |
| "loss": 3.1312, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.1455257882267177, | |
| "grad_norm": 2.87288761138916, | |
| "learning_rate": 4.743249134937242e-05, | |
| "loss": 3.259, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 0.14561691270275195, | |
| "grad_norm": 3.8843579292297363, | |
| "learning_rate": 4.742933121767394e-05, | |
| "loss": 3.4705, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.14570803717878622, | |
| "grad_norm": 2.3016998767852783, | |
| "learning_rate": 4.742616924780342e-05, | |
| "loss": 3.1254, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 0.14579916165482049, | |
| "grad_norm": 2.080766439437866, | |
| "learning_rate": 4.742300544002e-05, | |
| "loss": 3.0068, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.14589028613085475, | |
| "grad_norm": 1.403579831123352, | |
| "learning_rate": 4.741983979458296e-05, | |
| "loss": 3.0318, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 0.14598141060688902, | |
| "grad_norm": 2.5273921489715576, | |
| "learning_rate": 4.741667231175175e-05, | |
| "loss": 3.2275, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.14607253508292328, | |
| "grad_norm": 2.89467453956604, | |
| "learning_rate": 4.741350299178595e-05, | |
| "loss": 2.9018, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 0.14616365955895755, | |
| "grad_norm": 1.527073621749878, | |
| "learning_rate": 4.74103318349453e-05, | |
| "loss": 2.9688, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.14625478403499179, | |
| "grad_norm": 1.2628742456436157, | |
| "learning_rate": 4.7407158841489693e-05, | |
| "loss": 2.9605, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.14634590851102605, | |
| "grad_norm": 1.3628411293029785, | |
| "learning_rate": 4.740398401167916e-05, | |
| "loss": 2.8406, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.14643703298706032, | |
| "grad_norm": 2.6044845581054688, | |
| "learning_rate": 4.740080734577389e-05, | |
| "loss": 2.8398, | |
| "step": 1607 | |
| }, | |
| { | |
| "epoch": 0.14652815746309458, | |
| "grad_norm": 3.561075210571289, | |
| "learning_rate": 4.7397628844034225e-05, | |
| "loss": 4.4007, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.14661928193912885, | |
| "grad_norm": 1.4533250331878662, | |
| "learning_rate": 4.739444850672067e-05, | |
| "loss": 2.9798, | |
| "step": 1609 | |
| }, | |
| { | |
| "epoch": 0.1467104064151631, | |
| "grad_norm": 3.046421527862549, | |
| "learning_rate": 4.7391266334093845e-05, | |
| "loss": 3.0307, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.14680153089119738, | |
| "grad_norm": 2.7299585342407227, | |
| "learning_rate": 4.738808232641455e-05, | |
| "loss": 3.1078, | |
| "step": 1611 | |
| }, | |
| { | |
| "epoch": 0.14689265536723164, | |
| "grad_norm": 1.458348035812378, | |
| "learning_rate": 4.738489648394373e-05, | |
| "loss": 2.9425, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.1469837798432659, | |
| "grad_norm": 1.5302770137786865, | |
| "learning_rate": 4.7381708806942474e-05, | |
| "loss": 3.0449, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 0.14707490431930018, | |
| "grad_norm": 4.053586006164551, | |
| "learning_rate": 4.737851929567203e-05, | |
| "loss": 3.5561, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.1471660287953344, | |
| "grad_norm": 2.3581931591033936, | |
| "learning_rate": 4.737532795039378e-05, | |
| "loss": 2.928, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.14725715327136868, | |
| "grad_norm": 1.6155071258544922, | |
| "learning_rate": 4.737213477136928e-05, | |
| "loss": 3.1576, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.14734827774740294, | |
| "grad_norm": 2.6783015727996826, | |
| "learning_rate": 4.736893975886022e-05, | |
| "loss": 3.3114, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 0.1474394022234372, | |
| "grad_norm": 2.132664203643799, | |
| "learning_rate": 4.7365742913128434e-05, | |
| "loss": 2.8533, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.14753052669947148, | |
| "grad_norm": 2.1703169345855713, | |
| "learning_rate": 4.736254423443593e-05, | |
| "loss": 2.973, | |
| "step": 1619 | |
| }, | |
| { | |
| "epoch": 0.14762165117550574, | |
| "grad_norm": 1.7046056985855103, | |
| "learning_rate": 4.7359343723044844e-05, | |
| "loss": 3.0936, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.14771277565154, | |
| "grad_norm": 2.7974915504455566, | |
| "learning_rate": 4.7356141379217475e-05, | |
| "loss": 3.1275, | |
| "step": 1621 | |
| }, | |
| { | |
| "epoch": 0.14780390012757427, | |
| "grad_norm": 2.6626646518707275, | |
| "learning_rate": 4.735293720321626e-05, | |
| "loss": 3.1263, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.14789502460360854, | |
| "grad_norm": 3.38659405708313, | |
| "learning_rate": 4.7349731195303805e-05, | |
| "loss": 2.5051, | |
| "step": 1623 | |
| }, | |
| { | |
| "epoch": 0.1479861490796428, | |
| "grad_norm": 2.316164493560791, | |
| "learning_rate": 4.734652335574285e-05, | |
| "loss": 3.0061, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.14807727355567704, | |
| "grad_norm": 2.6719372272491455, | |
| "learning_rate": 4.7343313684796275e-05, | |
| "loss": 2.7537, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.1481683980317113, | |
| "grad_norm": 2.598212480545044, | |
| "learning_rate": 4.7340102182727155e-05, | |
| "loss": 2.9145, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.14825952250774557, | |
| "grad_norm": 1.4254382848739624, | |
| "learning_rate": 4.7336888849798664e-05, | |
| "loss": 2.917, | |
| "step": 1627 | |
| }, | |
| { | |
| "epoch": 0.14835064698377984, | |
| "grad_norm": 2.534688949584961, | |
| "learning_rate": 4.733367368627415e-05, | |
| "loss": 2.8936, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.1484417714598141, | |
| "grad_norm": 4.316157817840576, | |
| "learning_rate": 4.7330456692417115e-05, | |
| "loss": 3.0555, | |
| "step": 1629 | |
| }, | |
| { | |
| "epoch": 0.14853289593584837, | |
| "grad_norm": 2.75527024269104, | |
| "learning_rate": 4.73272378684912e-05, | |
| "loss": 4.4197, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.14862402041188263, | |
| "grad_norm": 2.4005396366119385, | |
| "learning_rate": 4.73240172147602e-05, | |
| "loss": 3.2265, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 0.1487151448879169, | |
| "grad_norm": 2.969036102294922, | |
| "learning_rate": 4.732079473148806e-05, | |
| "loss": 3.2398, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.14880626936395117, | |
| "grad_norm": 1.4357764720916748, | |
| "learning_rate": 4.7317570418938884e-05, | |
| "loss": 2.9779, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 0.14889739383998543, | |
| "grad_norm": 3.58138108253479, | |
| "learning_rate": 4.73143442773769e-05, | |
| "loss": 3.0653, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.14898851831601967, | |
| "grad_norm": 2.6361777782440186, | |
| "learning_rate": 4.731111630706652e-05, | |
| "loss": 2.9738, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.14907964279205393, | |
| "grad_norm": 2.812761068344116, | |
| "learning_rate": 4.730788650827227e-05, | |
| "loss": 2.9724, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.1491707672680882, | |
| "grad_norm": 2.8054981231689453, | |
| "learning_rate": 4.730465488125887e-05, | |
| "loss": 3.2216, | |
| "step": 1637 | |
| }, | |
| { | |
| "epoch": 0.14926189174412247, | |
| "grad_norm": 1.5526211261749268, | |
| "learning_rate": 4.7301421426291135e-05, | |
| "loss": 2.9845, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.14935301622015673, | |
| "grad_norm": 2.076298236846924, | |
| "learning_rate": 4.729818614363409e-05, | |
| "loss": 2.8241, | |
| "step": 1639 | |
| }, | |
| { | |
| "epoch": 0.149444140696191, | |
| "grad_norm": 1.7731536626815796, | |
| "learning_rate": 4.729494903355285e-05, | |
| "loss": 2.9929, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.14953526517222526, | |
| "grad_norm": 1.967139482498169, | |
| "learning_rate": 4.7291710096312736e-05, | |
| "loss": 2.9744, | |
| "step": 1641 | |
| }, | |
| { | |
| "epoch": 0.14962638964825953, | |
| "grad_norm": 2.6699776649475098, | |
| "learning_rate": 4.728846933217918e-05, | |
| "loss": 3.0459, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.1497175141242938, | |
| "grad_norm": 1.3542306423187256, | |
| "learning_rate": 4.728522674141776e-05, | |
| "loss": 2.9644, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 0.14980863860032806, | |
| "grad_norm": 2.028887987136841, | |
| "learning_rate": 4.728198232429424e-05, | |
| "loss": 2.9015, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.14989976307636232, | |
| "grad_norm": 1.5386238098144531, | |
| "learning_rate": 4.72787360810745e-05, | |
| "loss": 2.9069, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.14999088755239656, | |
| "grad_norm": 2.95621395111084, | |
| "learning_rate": 4.727548801202461e-05, | |
| "loss": 3.0468, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.15008201202843083, | |
| "grad_norm": 4.1476545333862305, | |
| "learning_rate": 4.7272238117410715e-05, | |
| "loss": 2.6694, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 0.1501731365044651, | |
| "grad_norm": 2.2101778984069824, | |
| "learning_rate": 4.726898639749919e-05, | |
| "loss": 3.112, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.15026426098049936, | |
| "grad_norm": 3.93254017829895, | |
| "learning_rate": 4.726573285255652e-05, | |
| "loss": 3.3209, | |
| "step": 1649 | |
| }, | |
| { | |
| "epoch": 0.15035538545653362, | |
| "grad_norm": 1.543944001197815, | |
| "learning_rate": 4.726247748284935e-05, | |
| "loss": 2.9816, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.1504465099325679, | |
| "grad_norm": 2.223081588745117, | |
| "learning_rate": 4.725922028864446e-05, | |
| "loss": 3.0477, | |
| "step": 1651 | |
| }, | |
| { | |
| "epoch": 0.15053763440860216, | |
| "grad_norm": 2.9665400981903076, | |
| "learning_rate": 4.725596127020879e-05, | |
| "loss": 3.0741, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.15062875888463642, | |
| "grad_norm": 2.24804949760437, | |
| "learning_rate": 4.7252700427809436e-05, | |
| "loss": 3.0598, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 0.1507198833606707, | |
| "grad_norm": 2.718592643737793, | |
| "learning_rate": 4.724943776171364e-05, | |
| "loss": 3.0261, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.15081100783670495, | |
| "grad_norm": 1.653093695640564, | |
| "learning_rate": 4.7246173272188774e-05, | |
| "loss": 2.9727, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.1509021323127392, | |
| "grad_norm": 2.4713759422302246, | |
| "learning_rate": 4.72429069595024e-05, | |
| "loss": 2.9652, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.15099325678877346, | |
| "grad_norm": 2.3406810760498047, | |
| "learning_rate": 4.723963882392218e-05, | |
| "loss": 2.9158, | |
| "step": 1657 | |
| }, | |
| { | |
| "epoch": 0.15108438126480772, | |
| "grad_norm": 1.7240417003631592, | |
| "learning_rate": 4.723636886571597e-05, | |
| "loss": 3.0695, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.151175505740842, | |
| "grad_norm": 3.368987798690796, | |
| "learning_rate": 4.723309708515175e-05, | |
| "loss": 3.1482, | |
| "step": 1659 | |
| }, | |
| { | |
| "epoch": 0.15126663021687625, | |
| "grad_norm": 1.6549758911132812, | |
| "learning_rate": 4.722982348249765e-05, | |
| "loss": 3.017, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.15135775469291052, | |
| "grad_norm": 3.695152759552002, | |
| "learning_rate": 4.722654805802196e-05, | |
| "loss": 3.2281, | |
| "step": 1661 | |
| }, | |
| { | |
| "epoch": 0.15144887916894478, | |
| "grad_norm": 2.500075578689575, | |
| "learning_rate": 4.7223270811993116e-05, | |
| "loss": 2.9514, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.15154000364497905, | |
| "grad_norm": 4.324959754943848, | |
| "learning_rate": 4.721999174467969e-05, | |
| "loss": 3.2794, | |
| "step": 1663 | |
| }, | |
| { | |
| "epoch": 0.15163112812101331, | |
| "grad_norm": 2.2761075496673584, | |
| "learning_rate": 4.7216710856350424e-05, | |
| "loss": 2.7181, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.15172225259704758, | |
| "grad_norm": 2.5118227005004883, | |
| "learning_rate": 4.7213428147274195e-05, | |
| "loss": 3.2282, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.15181337707308182, | |
| "grad_norm": 2.3931479454040527, | |
| "learning_rate": 4.721014361772005e-05, | |
| "loss": 2.7632, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.15190450154911608, | |
| "grad_norm": 1.8002700805664062, | |
| "learning_rate": 4.720685726795714e-05, | |
| "loss": 2.9921, | |
| "step": 1667 | |
| }, | |
| { | |
| "epoch": 0.15199562602515035, | |
| "grad_norm": 1.4867767095565796, | |
| "learning_rate": 4.720356909825482e-05, | |
| "loss": 2.991, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.15208675050118461, | |
| "grad_norm": 2.3881375789642334, | |
| "learning_rate": 4.7200279108882554e-05, | |
| "loss": 3.2776, | |
| "step": 1669 | |
| }, | |
| { | |
| "epoch": 0.15217787497721888, | |
| "grad_norm": 2.2196474075317383, | |
| "learning_rate": 4.7196987300109974e-05, | |
| "loss": 3.1953, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.15226899945325315, | |
| "grad_norm": 1.6471738815307617, | |
| "learning_rate": 4.719369367220686e-05, | |
| "loss": 3.0896, | |
| "step": 1671 | |
| }, | |
| { | |
| "epoch": 0.1523601239292874, | |
| "grad_norm": 2.35978364944458, | |
| "learning_rate": 4.7190398225443134e-05, | |
| "loss": 2.9034, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.15245124840532168, | |
| "grad_norm": 3.6606178283691406, | |
| "learning_rate": 4.718710096008887e-05, | |
| "loss": 3.1773, | |
| "step": 1673 | |
| }, | |
| { | |
| "epoch": 0.15254237288135594, | |
| "grad_norm": 2.787719249725342, | |
| "learning_rate": 4.7183801876414294e-05, | |
| "loss": 3.3528, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.1526334973573902, | |
| "grad_norm": 4.270319938659668, | |
| "learning_rate": 4.718050097468978e-05, | |
| "loss": 2.5025, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.15272462183342445, | |
| "grad_norm": 1.192939281463623, | |
| "learning_rate": 4.717719825518585e-05, | |
| "loss": 2.9122, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.1528157463094587, | |
| "grad_norm": 2.2663092613220215, | |
| "learning_rate": 4.717389371817316e-05, | |
| "loss": 3.0213, | |
| "step": 1677 | |
| }, | |
| { | |
| "epoch": 0.15290687078549298, | |
| "grad_norm": 2.0562195777893066, | |
| "learning_rate": 4.717058736392256e-05, | |
| "loss": 3.0291, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.15299799526152724, | |
| "grad_norm": 1.2530059814453125, | |
| "learning_rate": 4.716727919270499e-05, | |
| "loss": 2.8827, | |
| "step": 1679 | |
| }, | |
| { | |
| "epoch": 0.1530891197375615, | |
| "grad_norm": 1.6773467063903809, | |
| "learning_rate": 4.716396920479158e-05, | |
| "loss": 2.9988, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.15318024421359577, | |
| "grad_norm": 2.1727144718170166, | |
| "learning_rate": 4.71606574004536e-05, | |
| "loss": 2.9866, | |
| "step": 1681 | |
| }, | |
| { | |
| "epoch": 0.15327136868963004, | |
| "grad_norm": 2.6730339527130127, | |
| "learning_rate": 4.715734377996246e-05, | |
| "loss": 3.1705, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.1533624931656643, | |
| "grad_norm": 2.3407809734344482, | |
| "learning_rate": 4.7154028343589726e-05, | |
| "loss": 2.997, | |
| "step": 1683 | |
| }, | |
| { | |
| "epoch": 0.15345361764169857, | |
| "grad_norm": 2.6133763790130615, | |
| "learning_rate": 4.7150711091607114e-05, | |
| "loss": 3.0567, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.15354474211773284, | |
| "grad_norm": 1.2702889442443848, | |
| "learning_rate": 4.714739202428648e-05, | |
| "loss": 2.9257, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.15363586659376707, | |
| "grad_norm": 2.4354734420776367, | |
| "learning_rate": 4.714407114189984e-05, | |
| "loss": 2.9063, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 0.15372699106980134, | |
| "grad_norm": 2.6022043228149414, | |
| "learning_rate": 4.714074844471934e-05, | |
| "loss": 3.0259, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 0.1538181155458356, | |
| "grad_norm": 3.257946252822876, | |
| "learning_rate": 4.713742393301731e-05, | |
| "loss": 3.0208, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.15390924002186987, | |
| "grad_norm": 2.0955233573913574, | |
| "learning_rate": 4.7134097607066194e-05, | |
| "loss": 2.9569, | |
| "step": 1689 | |
| }, | |
| { | |
| "epoch": 0.15400036449790414, | |
| "grad_norm": 2.2379086017608643, | |
| "learning_rate": 4.713076946713859e-05, | |
| "loss": 3.005, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.1540914889739384, | |
| "grad_norm": 2.551835060119629, | |
| "learning_rate": 4.712743951350727e-05, | |
| "loss": 2.8125, | |
| "step": 1691 | |
| }, | |
| { | |
| "epoch": 0.15418261344997267, | |
| "grad_norm": 1.4898242950439453, | |
| "learning_rate": 4.7124107746445126e-05, | |
| "loss": 2.9545, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 0.15427373792600693, | |
| "grad_norm": 3.3373043537139893, | |
| "learning_rate": 4.7120774166225215e-05, | |
| "loss": 2.5542, | |
| "step": 1693 | |
| }, | |
| { | |
| "epoch": 0.1543648624020412, | |
| "grad_norm": 2.2917416095733643, | |
| "learning_rate": 4.7117438773120725e-05, | |
| "loss": 3.1463, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.15445598687807546, | |
| "grad_norm": 2.7962570190429688, | |
| "learning_rate": 4.7114101567405016e-05, | |
| "loss": 2.9269, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.1545471113541097, | |
| "grad_norm": 2.6020569801330566, | |
| "learning_rate": 4.7110762549351586e-05, | |
| "loss": 3.0507, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.15463823583014397, | |
| "grad_norm": 3.0041913986206055, | |
| "learning_rate": 4.7107421719234066e-05, | |
| "loss": 3.1105, | |
| "step": 1697 | |
| }, | |
| { | |
| "epoch": 0.15472936030617823, | |
| "grad_norm": 3.319488525390625, | |
| "learning_rate": 4.710407907732627e-05, | |
| "loss": 3.1954, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 0.1548204847822125, | |
| "grad_norm": 1.2503721714019775, | |
| "learning_rate": 4.7100734623902135e-05, | |
| "loss": 2.8883, | |
| "step": 1699 | |
| }, | |
| { | |
| "epoch": 0.15491160925824676, | |
| "grad_norm": 2.5423226356506348, | |
| "learning_rate": 4.709738835923575e-05, | |
| "loss": 3.2076, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.15500273373428103, | |
| "grad_norm": 2.350539445877075, | |
| "learning_rate": 4.7094040283601345e-05, | |
| "loss": 3.1558, | |
| "step": 1701 | |
| }, | |
| { | |
| "epoch": 0.1550938582103153, | |
| "grad_norm": 2.6543209552764893, | |
| "learning_rate": 4.709069039727332e-05, | |
| "loss": 3.2013, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 0.15518498268634956, | |
| "grad_norm": 2.5844523906707764, | |
| "learning_rate": 4.708733870052621e-05, | |
| "loss": 4.2824, | |
| "step": 1703 | |
| }, | |
| { | |
| "epoch": 0.15527610716238383, | |
| "grad_norm": 4.499124526977539, | |
| "learning_rate": 4.708398519363469e-05, | |
| "loss": 3.0362, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 0.1553672316384181, | |
| "grad_norm": 2.7563819885253906, | |
| "learning_rate": 4.70806298768736e-05, | |
| "loss": 3.3136, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.15545835611445233, | |
| "grad_norm": 2.212899923324585, | |
| "learning_rate": 4.707727275051793e-05, | |
| "loss": 2.9758, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 0.1555494805904866, | |
| "grad_norm": 2.452393054962158, | |
| "learning_rate": 4.70739138148428e-05, | |
| "loss": 3.3339, | |
| "step": 1707 | |
| }, | |
| { | |
| "epoch": 0.15564060506652086, | |
| "grad_norm": 3.724048614501953, | |
| "learning_rate": 4.7070553070123494e-05, | |
| "loss": 3.231, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.15573172954255513, | |
| "grad_norm": 2.489640235900879, | |
| "learning_rate": 4.706719051663543e-05, | |
| "loss": 3.2228, | |
| "step": 1709 | |
| }, | |
| { | |
| "epoch": 0.1558228540185894, | |
| "grad_norm": 1.4192156791687012, | |
| "learning_rate": 4.7063826154654175e-05, | |
| "loss": 3.0394, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.15591397849462366, | |
| "grad_norm": 1.8565047979354858, | |
| "learning_rate": 4.706045998445548e-05, | |
| "loss": 2.9931, | |
| "step": 1711 | |
| }, | |
| { | |
| "epoch": 0.15600510297065792, | |
| "grad_norm": 3.727804183959961, | |
| "learning_rate": 4.70570920063152e-05, | |
| "loss": 3.3733, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.1560962274466922, | |
| "grad_norm": 1.7734785079956055, | |
| "learning_rate": 4.705372222050934e-05, | |
| "loss": 3.134, | |
| "step": 1713 | |
| }, | |
| { | |
| "epoch": 0.15618735192272645, | |
| "grad_norm": 1.5476933717727661, | |
| "learning_rate": 4.705035062731409e-05, | |
| "loss": 3.0023, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 0.15627847639876072, | |
| "grad_norm": 1.2864508628845215, | |
| "learning_rate": 4.7046977227005754e-05, | |
| "loss": 2.832, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.15636960087479496, | |
| "grad_norm": 1.4140430688858032, | |
| "learning_rate": 4.704360201986079e-05, | |
| "loss": 2.9195, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 0.15646072535082922, | |
| "grad_norm": 1.4808127880096436, | |
| "learning_rate": 4.704022500615583e-05, | |
| "loss": 3.0438, | |
| "step": 1717 | |
| }, | |
| { | |
| "epoch": 0.1565518498268635, | |
| "grad_norm": 2.3502492904663086, | |
| "learning_rate": 4.7036846186167605e-05, | |
| "loss": 3.0985, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 0.15664297430289775, | |
| "grad_norm": 2.2176742553710938, | |
| "learning_rate": 4.703346556017305e-05, | |
| "loss": 3.1193, | |
| "step": 1719 | |
| }, | |
| { | |
| "epoch": 0.15673409877893202, | |
| "grad_norm": 2.9510931968688965, | |
| "learning_rate": 4.70300831284492e-05, | |
| "loss": 3.2911, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.15682522325496628, | |
| "grad_norm": 2.358860969543457, | |
| "learning_rate": 4.702669889127328e-05, | |
| "loss": 3.121, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 0.15691634773100055, | |
| "grad_norm": 3.136566638946533, | |
| "learning_rate": 4.702331284892262e-05, | |
| "loss": 3.0761, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.15700747220703481, | |
| "grad_norm": 1.5300287008285522, | |
| "learning_rate": 4.701992500167473e-05, | |
| "loss": 3.0094, | |
| "step": 1723 | |
| }, | |
| { | |
| "epoch": 0.15709859668306908, | |
| "grad_norm": 3.3138129711151123, | |
| "learning_rate": 4.701653534980724e-05, | |
| "loss": 3.2065, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.15718972115910335, | |
| "grad_norm": 2.763362407684326, | |
| "learning_rate": 4.7013143893597984e-05, | |
| "loss": 3.1027, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.1572808456351376, | |
| "grad_norm": 1.5856213569641113, | |
| "learning_rate": 4.700975063332487e-05, | |
| "loss": 3.029, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 0.15737197011117185, | |
| "grad_norm": 3.636809825897217, | |
| "learning_rate": 4.700635556926601e-05, | |
| "loss": 3.1847, | |
| "step": 1727 | |
| }, | |
| { | |
| "epoch": 0.15746309458720611, | |
| "grad_norm": 3.5086801052093506, | |
| "learning_rate": 4.7002958701699626e-05, | |
| "loss": 2.9192, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.15755421906324038, | |
| "grad_norm": 2.65805983543396, | |
| "learning_rate": 4.699956003090412e-05, | |
| "loss": 3.2344, | |
| "step": 1729 | |
| }, | |
| { | |
| "epoch": 0.15764534353927465, | |
| "grad_norm": 1.6364223957061768, | |
| "learning_rate": 4.6996159557158015e-05, | |
| "loss": 2.9734, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.1577364680153089, | |
| "grad_norm": 1.9043081998825073, | |
| "learning_rate": 4.6992757280739994e-05, | |
| "loss": 3.0671, | |
| "step": 1731 | |
| }, | |
| { | |
| "epoch": 0.15782759249134318, | |
| "grad_norm": 2.699431896209717, | |
| "learning_rate": 4.698935320192889e-05, | |
| "loss": 4.0291, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.15791871696737744, | |
| "grad_norm": 3.2991626262664795, | |
| "learning_rate": 4.698594732100369e-05, | |
| "loss": 2.9972, | |
| "step": 1733 | |
| }, | |
| { | |
| "epoch": 0.1580098414434117, | |
| "grad_norm": 2.3580875396728516, | |
| "learning_rate": 4.6982539638243506e-05, | |
| "loss": 2.992, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 0.15810096591944597, | |
| "grad_norm": 1.856777310371399, | |
| "learning_rate": 4.6979130153927605e-05, | |
| "loss": 3.1221, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.15819209039548024, | |
| "grad_norm": 2.132002592086792, | |
| "learning_rate": 4.697571886833544e-05, | |
| "loss": 3.1007, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.15828321487151448, | |
| "grad_norm": 1.3877090215682983, | |
| "learning_rate": 4.697230578174654e-05, | |
| "loss": 3.0206, | |
| "step": 1737 | |
| }, | |
| { | |
| "epoch": 0.15837433934754874, | |
| "grad_norm": 2.1733388900756836, | |
| "learning_rate": 4.6968890894440646e-05, | |
| "loss": 3.1006, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 0.158465463823583, | |
| "grad_norm": 2.9271535873413086, | |
| "learning_rate": 4.696547420669761e-05, | |
| "loss": 4.3027, | |
| "step": 1739 | |
| }, | |
| { | |
| "epoch": 0.15855658829961727, | |
| "grad_norm": 2.7141754627227783, | |
| "learning_rate": 4.696205571879745e-05, | |
| "loss": 4.056, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.15864771277565154, | |
| "grad_norm": 1.7694344520568848, | |
| "learning_rate": 4.6958635431020315e-05, | |
| "loss": 2.9097, | |
| "step": 1741 | |
| }, | |
| { | |
| "epoch": 0.1587388372516858, | |
| "grad_norm": 2.743277072906494, | |
| "learning_rate": 4.695521334364653e-05, | |
| "loss": 3.0041, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.15882996172772007, | |
| "grad_norm": 2.976694107055664, | |
| "learning_rate": 4.6951789456956524e-05, | |
| "loss": 3.1339, | |
| "step": 1743 | |
| }, | |
| { | |
| "epoch": 0.15892108620375434, | |
| "grad_norm": 2.281081199645996, | |
| "learning_rate": 4.6948363771230917e-05, | |
| "loss": 3.0526, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.1590122106797886, | |
| "grad_norm": 2.0678000450134277, | |
| "learning_rate": 4.694493628675044e-05, | |
| "loss": 3.2022, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.15910333515582287, | |
| "grad_norm": 1.403245210647583, | |
| "learning_rate": 4.694150700379601e-05, | |
| "loss": 2.8774, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 0.1591944596318571, | |
| "grad_norm": 2.7492146492004395, | |
| "learning_rate": 4.693807592264866e-05, | |
| "loss": 3.3178, | |
| "step": 1747 | |
| }, | |
| { | |
| "epoch": 0.15928558410789137, | |
| "grad_norm": 2.7067978382110596, | |
| "learning_rate": 4.693464304358957e-05, | |
| "loss": 3.1067, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 0.15937670858392564, | |
| "grad_norm": 3.026930093765259, | |
| "learning_rate": 4.693120836690009e-05, | |
| "loss": 2.6783, | |
| "step": 1749 | |
| }, | |
| { | |
| "epoch": 0.1594678330599599, | |
| "grad_norm": 2.294149398803711, | |
| "learning_rate": 4.6927771892861715e-05, | |
| "loss": 3.0602, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.15955895753599417, | |
| "grad_norm": 2.4239230155944824, | |
| "learning_rate": 4.6924333621756055e-05, | |
| "loss": 3.366, | |
| "step": 1751 | |
| }, | |
| { | |
| "epoch": 0.15965008201202843, | |
| "grad_norm": 1.500225305557251, | |
| "learning_rate": 4.692089355386491e-05, | |
| "loss": 3.0087, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 0.1597412064880627, | |
| "grad_norm": 3.9721415042877197, | |
| "learning_rate": 4.691745168947019e-05, | |
| "loss": 1.5708, | |
| "step": 1753 | |
| }, | |
| { | |
| "epoch": 0.15983233096409696, | |
| "grad_norm": 2.3244717121124268, | |
| "learning_rate": 4.6914008028853974e-05, | |
| "loss": 3.062, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 0.15992345544013123, | |
| "grad_norm": 3.399137258529663, | |
| "learning_rate": 4.6910562572298496e-05, | |
| "loss": 3.275, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.1600145799161655, | |
| "grad_norm": 1.5590107440948486, | |
| "learning_rate": 4.690711532008611e-05, | |
| "loss": 3.0246, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 0.16010570439219973, | |
| "grad_norm": 1.5847901105880737, | |
| "learning_rate": 4.690366627249934e-05, | |
| "loss": 3.0687, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 0.160196828868234, | |
| "grad_norm": 3.033726453781128, | |
| "learning_rate": 4.690021542982084e-05, | |
| "loss": 3.1466, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 0.16028795334426826, | |
| "grad_norm": 2.4416680335998535, | |
| "learning_rate": 4.689676279233344e-05, | |
| "loss": 3.0355, | |
| "step": 1759 | |
| }, | |
| { | |
| "epoch": 0.16037907782030253, | |
| "grad_norm": 3.199636459350586, | |
| "learning_rate": 4.689330836032007e-05, | |
| "loss": 2.9404, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.1604702022963368, | |
| "grad_norm": 2.1317837238311768, | |
| "learning_rate": 4.688985213406386e-05, | |
| "loss": 3.3088, | |
| "step": 1761 | |
| }, | |
| { | |
| "epoch": 0.16056132677237106, | |
| "grad_norm": 2.5305070877075195, | |
| "learning_rate": 4.6886394113848034e-05, | |
| "loss": 2.8364, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 0.16065245124840533, | |
| "grad_norm": 1.2650429010391235, | |
| "learning_rate": 4.6882934299956014e-05, | |
| "loss": 2.881, | |
| "step": 1763 | |
| }, | |
| { | |
| "epoch": 0.1607435757244396, | |
| "grad_norm": 3.0399343967437744, | |
| "learning_rate": 4.6879472692671344e-05, | |
| "loss": 2.9391, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 0.16083470020047386, | |
| "grad_norm": 1.4439702033996582, | |
| "learning_rate": 4.68760092922777e-05, | |
| "loss": 2.9775, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.16092582467650812, | |
| "grad_norm": 3.708564281463623, | |
| "learning_rate": 4.6872544099058934e-05, | |
| "loss": 2.7142, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 0.16101694915254236, | |
| "grad_norm": 2.397183656692505, | |
| "learning_rate": 4.686907711329903e-05, | |
| "loss": 3.2577, | |
| "step": 1767 | |
| }, | |
| { | |
| "epoch": 0.16110807362857663, | |
| "grad_norm": 5.58218240737915, | |
| "learning_rate": 4.686560833528213e-05, | |
| "loss": 2.4521, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 0.1611991981046109, | |
| "grad_norm": 1.9717990159988403, | |
| "learning_rate": 4.6862137765292493e-05, | |
| "loss": 3.0401, | |
| "step": 1769 | |
| }, | |
| { | |
| "epoch": 0.16129032258064516, | |
| "grad_norm": 2.144928455352783, | |
| "learning_rate": 4.685866540361456e-05, | |
| "loss": 3.0899, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.16138144705667942, | |
| "grad_norm": 2.322648286819458, | |
| "learning_rate": 4.685519125053289e-05, | |
| "loss": 3.0279, | |
| "step": 1771 | |
| }, | |
| { | |
| "epoch": 0.1614725715327137, | |
| "grad_norm": 1.8815804719924927, | |
| "learning_rate": 4.6851715306332235e-05, | |
| "loss": 3.0445, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 0.16156369600874795, | |
| "grad_norm": 3.7805604934692383, | |
| "learning_rate": 4.684823757129743e-05, | |
| "loss": 2.8023, | |
| "step": 1773 | |
| }, | |
| { | |
| "epoch": 0.16165482048478222, | |
| "grad_norm": 3.898134231567383, | |
| "learning_rate": 4.684475804571351e-05, | |
| "loss": 3.1171, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 0.16174594496081648, | |
| "grad_norm": 2.4973833560943604, | |
| "learning_rate": 4.684127672986562e-05, | |
| "loss": 3.3566, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.16183706943685075, | |
| "grad_norm": 2.69533371925354, | |
| "learning_rate": 4.683779362403908e-05, | |
| "loss": 3.1604, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 0.161928193912885, | |
| "grad_norm": 1.983508825302124, | |
| "learning_rate": 4.683430872851934e-05, | |
| "loss": 2.9452, | |
| "step": 1777 | |
| }, | |
| { | |
| "epoch": 0.16201931838891925, | |
| "grad_norm": 2.2573885917663574, | |
| "learning_rate": 4.6830822043591994e-05, | |
| "loss": 3.1903, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.16211044286495352, | |
| "grad_norm": 1.7813071012496948, | |
| "learning_rate": 4.68273335695428e-05, | |
| "loss": 3.0245, | |
| "step": 1779 | |
| }, | |
| { | |
| "epoch": 0.16220156734098778, | |
| "grad_norm": 2.4912261962890625, | |
| "learning_rate": 4.682384330665765e-05, | |
| "loss": 3.1379, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.16229269181702205, | |
| "grad_norm": 2.328861713409424, | |
| "learning_rate": 4.682035125522258e-05, | |
| "loss": 3.1379, | |
| "step": 1781 | |
| }, | |
| { | |
| "epoch": 0.16238381629305632, | |
| "grad_norm": 1.43288254737854, | |
| "learning_rate": 4.681685741552379e-05, | |
| "loss": 2.959, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 0.16247494076909058, | |
| "grad_norm": 3.625856876373291, | |
| "learning_rate": 4.6813361787847585e-05, | |
| "loss": 3.088, | |
| "step": 1783 | |
| }, | |
| { | |
| "epoch": 0.16256606524512485, | |
| "grad_norm": 2.959545612335205, | |
| "learning_rate": 4.680986437248048e-05, | |
| "loss": 3.1953, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 0.1626571897211591, | |
| "grad_norm": 4.424033164978027, | |
| "learning_rate": 4.680636516970908e-05, | |
| "loss": 3.2453, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.16274831419719338, | |
| "grad_norm": 2.5210375785827637, | |
| "learning_rate": 4.680286417982017e-05, | |
| "loss": 3.127, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 0.16283943867322762, | |
| "grad_norm": 2.69915509223938, | |
| "learning_rate": 4.679936140310066e-05, | |
| "loss": 2.9651, | |
| "step": 1787 | |
| }, | |
| { | |
| "epoch": 0.16293056314926188, | |
| "grad_norm": 2.2764296531677246, | |
| "learning_rate": 4.679585683983763e-05, | |
| "loss": 2.4394, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 0.16302168762529615, | |
| "grad_norm": 2.4969351291656494, | |
| "learning_rate": 4.679235049031827e-05, | |
| "loss": 3.1952, | |
| "step": 1789 | |
| }, | |
| { | |
| "epoch": 0.1631128121013304, | |
| "grad_norm": 2.099569797515869, | |
| "learning_rate": 4.6788842354829965e-05, | |
| "loss": 2.9982, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.16320393657736468, | |
| "grad_norm": 2.138705015182495, | |
| "learning_rate": 4.67853324336602e-05, | |
| "loss": 2.8346, | |
| "step": 1791 | |
| }, | |
| { | |
| "epoch": 0.16329506105339894, | |
| "grad_norm": 3.4511311054229736, | |
| "learning_rate": 4.6781820727096634e-05, | |
| "loss": 2.9689, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.1633861855294332, | |
| "grad_norm": 1.415783166885376, | |
| "learning_rate": 4.677830723542708e-05, | |
| "loss": 2.9163, | |
| "step": 1793 | |
| }, | |
| { | |
| "epoch": 0.16347731000546747, | |
| "grad_norm": 1.5046038627624512, | |
| "learning_rate": 4.677479195893946e-05, | |
| "loss": 3.024, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 0.16356843448150174, | |
| "grad_norm": 1.4293829202651978, | |
| "learning_rate": 4.677127489792188e-05, | |
| "loss": 3.0218, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.163659558957536, | |
| "grad_norm": 2.071714401245117, | |
| "learning_rate": 4.676775605266256e-05, | |
| "loss": 3.092, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 0.16375068343357024, | |
| "grad_norm": 2.291851043701172, | |
| "learning_rate": 4.676423542344991e-05, | |
| "loss": 2.9331, | |
| "step": 1797 | |
| }, | |
| { | |
| "epoch": 0.1638418079096045, | |
| "grad_norm": 2.606294870376587, | |
| "learning_rate": 4.676071301057243e-05, | |
| "loss": 3.2264, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 0.16393293238563877, | |
| "grad_norm": 2.889845848083496, | |
| "learning_rate": 4.675718881431882e-05, | |
| "loss": 3.1705, | |
| "step": 1799 | |
| }, | |
| { | |
| "epoch": 0.16402405686167304, | |
| "grad_norm": 3.2519989013671875, | |
| "learning_rate": 4.675366283497788e-05, | |
| "loss": 4.5846, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.1641151813377073, | |
| "grad_norm": 2.266395092010498, | |
| "learning_rate": 4.67501350728386e-05, | |
| "loss": 3.4063, | |
| "step": 1801 | |
| }, | |
| { | |
| "epoch": 0.16420630581374157, | |
| "grad_norm": 2.0074515342712402, | |
| "learning_rate": 4.674660552819007e-05, | |
| "loss": 3.1425, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 0.16429743028977584, | |
| "grad_norm": 5.168177604675293, | |
| "learning_rate": 4.6743074201321577e-05, | |
| "loss": 3.0502, | |
| "step": 1803 | |
| }, | |
| { | |
| "epoch": 0.1643885547658101, | |
| "grad_norm": 1.329056978225708, | |
| "learning_rate": 4.673954109252251e-05, | |
| "loss": 2.9967, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 0.16447967924184437, | |
| "grad_norm": 3.3117220401763916, | |
| "learning_rate": 4.6736006202082414e-05, | |
| "loss": 3.1037, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.16457080371787863, | |
| "grad_norm": 2.464008331298828, | |
| "learning_rate": 4.6732469530291e-05, | |
| "loss": 2.9688, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 0.1646619281939129, | |
| "grad_norm": 2.8803305625915527, | |
| "learning_rate": 4.672893107743812e-05, | |
| "loss": 3.1368, | |
| "step": 1807 | |
| }, | |
| { | |
| "epoch": 0.16475305266994714, | |
| "grad_norm": 2.8774044513702393, | |
| "learning_rate": 4.672539084381375e-05, | |
| "loss": 3.0782, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 0.1648441771459814, | |
| "grad_norm": 2.2963814735412598, | |
| "learning_rate": 4.672184882970803e-05, | |
| "loss": 2.9884, | |
| "step": 1809 | |
| }, | |
| { | |
| "epoch": 0.16493530162201567, | |
| "grad_norm": 2.4225456714630127, | |
| "learning_rate": 4.671830503541124e-05, | |
| "loss": 3.3793, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.16502642609804993, | |
| "grad_norm": 1.4230417013168335, | |
| "learning_rate": 4.671475946121381e-05, | |
| "loss": 3.1628, | |
| "step": 1811 | |
| }, | |
| { | |
| "epoch": 0.1651175505740842, | |
| "grad_norm": 2.5980818271636963, | |
| "learning_rate": 4.671121210740631e-05, | |
| "loss": 3.2377, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 0.16520867505011846, | |
| "grad_norm": 1.8780187368392944, | |
| "learning_rate": 4.6707662974279464e-05, | |
| "loss": 2.9965, | |
| "step": 1813 | |
| }, | |
| { | |
| "epoch": 0.16529979952615273, | |
| "grad_norm": 1.3507145643234253, | |
| "learning_rate": 4.6704112062124146e-05, | |
| "loss": 2.8605, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 0.165390924002187, | |
| "grad_norm": 2.861724376678467, | |
| "learning_rate": 4.6700559371231345e-05, | |
| "loss": 2.9891, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.16548204847822126, | |
| "grad_norm": 1.8451899290084839, | |
| "learning_rate": 4.6697004901892244e-05, | |
| "loss": 3.0085, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 0.16557317295425553, | |
| "grad_norm": 2.6136531829833984, | |
| "learning_rate": 4.6693448654398126e-05, | |
| "loss": 3.164, | |
| "step": 1817 | |
| }, | |
| { | |
| "epoch": 0.16566429743028976, | |
| "grad_norm": 2.2334072589874268, | |
| "learning_rate": 4.668989062904045e-05, | |
| "loss": 3.0821, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 0.16575542190632403, | |
| "grad_norm": 2.377607583999634, | |
| "learning_rate": 4.66863308261108e-05, | |
| "loss": 2.9773, | |
| "step": 1819 | |
| }, | |
| { | |
| "epoch": 0.1658465463823583, | |
| "grad_norm": 2.5205888748168945, | |
| "learning_rate": 4.6682769245900924e-05, | |
| "loss": 3.0648, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.16593767085839256, | |
| "grad_norm": 1.433559536933899, | |
| "learning_rate": 4.667920588870271e-05, | |
| "loss": 2.9843, | |
| "step": 1821 | |
| }, | |
| { | |
| "epoch": 0.16602879533442683, | |
| "grad_norm": 2.934572458267212, | |
| "learning_rate": 4.667564075480818e-05, | |
| "loss": 2.8275, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 0.1661199198104611, | |
| "grad_norm": 4.768115997314453, | |
| "learning_rate": 4.6672073844509524e-05, | |
| "loss": 2.8542, | |
| "step": 1823 | |
| }, | |
| { | |
| "epoch": 0.16621104428649536, | |
| "grad_norm": 2.2473628520965576, | |
| "learning_rate": 4.666850515809905e-05, | |
| "loss": 2.9707, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 0.16630216876252962, | |
| "grad_norm": 2.0815329551696777, | |
| "learning_rate": 4.6664934695869226e-05, | |
| "loss": 3.1296, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.1663932932385639, | |
| "grad_norm": 2.7739152908325195, | |
| "learning_rate": 4.666136245811268e-05, | |
| "loss": 1.463, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 0.16648441771459815, | |
| "grad_norm": 1.61172616481781, | |
| "learning_rate": 4.6657788445122156e-05, | |
| "loss": 3.1061, | |
| "step": 1827 | |
| }, | |
| { | |
| "epoch": 0.1665755421906324, | |
| "grad_norm": 4.994141101837158, | |
| "learning_rate": 4.6654212657190574e-05, | |
| "loss": 3.2571, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 1.818001627922058, | |
| "learning_rate": 4.665063509461097e-05, | |
| "loss": 3.0249, | |
| "step": 1829 | |
| }, | |
| { | |
| "epoch": 0.16675779114270092, | |
| "grad_norm": 2.999178647994995, | |
| "learning_rate": 4.664705575767654e-05, | |
| "loss": 4.4251, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.1668489156187352, | |
| "grad_norm": 2.0437114238739014, | |
| "learning_rate": 4.6643474646680636e-05, | |
| "loss": 3.1958, | |
| "step": 1831 | |
| }, | |
| { | |
| "epoch": 0.16694004009476945, | |
| "grad_norm": 2.0116827487945557, | |
| "learning_rate": 4.663989176191673e-05, | |
| "loss": 2.7428, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 0.16703116457080372, | |
| "grad_norm": 1.6298675537109375, | |
| "learning_rate": 4.6636307103678464e-05, | |
| "loss": 2.9703, | |
| "step": 1833 | |
| }, | |
| { | |
| "epoch": 0.16712228904683799, | |
| "grad_norm": 2.848588466644287, | |
| "learning_rate": 4.663272067225961e-05, | |
| "loss": 3.0825, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 0.16721341352287225, | |
| "grad_norm": 1.2318345308303833, | |
| "learning_rate": 4.66291324679541e-05, | |
| "loss": 2.8645, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.16730453799890652, | |
| "grad_norm": 3.15191388130188, | |
| "learning_rate": 4.6625542491055985e-05, | |
| "loss": 3.2674, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 0.16739566247494078, | |
| "grad_norm": 1.4416121244430542, | |
| "learning_rate": 4.662195074185949e-05, | |
| "loss": 2.9721, | |
| "step": 1837 | |
| }, | |
| { | |
| "epoch": 0.16748678695097502, | |
| "grad_norm": 1.4033936262130737, | |
| "learning_rate": 4.661835722065896e-05, | |
| "loss": 2.8413, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 0.16757791142700929, | |
| "grad_norm": 3.393137216567993, | |
| "learning_rate": 4.661476192774892e-05, | |
| "loss": 3.255, | |
| "step": 1839 | |
| }, | |
| { | |
| "epoch": 0.16766903590304355, | |
| "grad_norm": 1.4797853231430054, | |
| "learning_rate": 4.6611164863424e-05, | |
| "loss": 2.9213, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.16776016037907782, | |
| "grad_norm": 1.99228835105896, | |
| "learning_rate": 4.660756602797899e-05, | |
| "loss": 3.0292, | |
| "step": 1841 | |
| }, | |
| { | |
| "epoch": 0.16785128485511208, | |
| "grad_norm": 3.2348062992095947, | |
| "learning_rate": 4.6603965421708845e-05, | |
| "loss": 3.0822, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 0.16794240933114635, | |
| "grad_norm": 1.782169222831726, | |
| "learning_rate": 4.660036304490864e-05, | |
| "loss": 2.9511, | |
| "step": 1843 | |
| }, | |
| { | |
| "epoch": 0.1680335338071806, | |
| "grad_norm": 4.648063659667969, | |
| "learning_rate": 4.6596758897873605e-05, | |
| "loss": 3.0001, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 0.16812465828321488, | |
| "grad_norm": 2.3051891326904297, | |
| "learning_rate": 4.659315298089912e-05, | |
| "loss": 2.6815, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.16821578275924914, | |
| "grad_norm": 3.1102521419525146, | |
| "learning_rate": 4.6589545294280694e-05, | |
| "loss": 3.291, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 0.1683069072352834, | |
| "grad_norm": 2.6495754718780518, | |
| "learning_rate": 4.6585935838313996e-05, | |
| "loss": 3.3111, | |
| "step": 1847 | |
| }, | |
| { | |
| "epoch": 0.16839803171131765, | |
| "grad_norm": 1.6148706674575806, | |
| "learning_rate": 4.658232461329484e-05, | |
| "loss": 3.0352, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.1684891561873519, | |
| "grad_norm": 2.024562120437622, | |
| "learning_rate": 4.657871161951917e-05, | |
| "loss": 3.2065, | |
| "step": 1849 | |
| }, | |
| { | |
| "epoch": 0.16858028066338618, | |
| "grad_norm": 3.0572400093078613, | |
| "learning_rate": 4.657509685728309e-05, | |
| "loss": 2.8895, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.16867140513942044, | |
| "grad_norm": 3.482790946960449, | |
| "learning_rate": 4.657148032688285e-05, | |
| "loss": 3.1904, | |
| "step": 1851 | |
| }, | |
| { | |
| "epoch": 0.1687625296154547, | |
| "grad_norm": 2.436032772064209, | |
| "learning_rate": 4.656786202861483e-05, | |
| "loss": 2.8036, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 0.16885365409148897, | |
| "grad_norm": 3.0358383655548096, | |
| "learning_rate": 4.6564241962775564e-05, | |
| "loss": 3.1313, | |
| "step": 1853 | |
| }, | |
| { | |
| "epoch": 0.16894477856752324, | |
| "grad_norm": 2.4795777797698975, | |
| "learning_rate": 4.656062012966173e-05, | |
| "loss": 3.2146, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 0.1690359030435575, | |
| "grad_norm": 1.5819923877716064, | |
| "learning_rate": 4.655699652957016e-05, | |
| "loss": 3.0148, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.16912702751959177, | |
| "grad_norm": 1.9436419010162354, | |
| "learning_rate": 4.655337116279782e-05, | |
| "loss": 2.9958, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.16921815199562604, | |
| "grad_norm": 2.404379367828369, | |
| "learning_rate": 4.6549744029641816e-05, | |
| "loss": 2.6195, | |
| "step": 1857 | |
| }, | |
| { | |
| "epoch": 0.16930927647166027, | |
| "grad_norm": 3.0540213584899902, | |
| "learning_rate": 4.6546115130399414e-05, | |
| "loss": 2.9237, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 0.16940040094769454, | |
| "grad_norm": 1.2016074657440186, | |
| "learning_rate": 4.6542484465368006e-05, | |
| "loss": 3.0172, | |
| "step": 1859 | |
| }, | |
| { | |
| "epoch": 0.1694915254237288, | |
| "grad_norm": 2.7861342430114746, | |
| "learning_rate": 4.653885203484515e-05, | |
| "loss": 4.522, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.16958264989976307, | |
| "grad_norm": 2.5136163234710693, | |
| "learning_rate": 4.6535217839128545e-05, | |
| "loss": 3.2934, | |
| "step": 1861 | |
| }, | |
| { | |
| "epoch": 0.16967377437579734, | |
| "grad_norm": 1.917813777923584, | |
| "learning_rate": 4.6531581878516005e-05, | |
| "loss": 2.982, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 0.1697648988518316, | |
| "grad_norm": 2.3996527194976807, | |
| "learning_rate": 4.652794415330552e-05, | |
| "loss": 3.2438, | |
| "step": 1863 | |
| }, | |
| { | |
| "epoch": 0.16985602332786587, | |
| "grad_norm": 2.1027286052703857, | |
| "learning_rate": 4.652430466379523e-05, | |
| "loss": 2.9588, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 0.16994714780390013, | |
| "grad_norm": 1.443307876586914, | |
| "learning_rate": 4.652066341028338e-05, | |
| "loss": 2.993, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.1700382722799344, | |
| "grad_norm": 2.603544235229492, | |
| "learning_rate": 4.6517020393068414e-05, | |
| "loss": 3.099, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 0.17012939675596866, | |
| "grad_norm": 2.022552728652954, | |
| "learning_rate": 4.651337561244887e-05, | |
| "loss": 3.4147, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 0.1702205212320029, | |
| "grad_norm": 3.2061586380004883, | |
| "learning_rate": 4.650972906872346e-05, | |
| "loss": 3.2699, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 0.17031164570803717, | |
| "grad_norm": 1.5013995170593262, | |
| "learning_rate": 4.650608076219103e-05, | |
| "loss": 2.9541, | |
| "step": 1869 | |
| }, | |
| { | |
| "epoch": 0.17040277018407143, | |
| "grad_norm": 1.4068304300308228, | |
| "learning_rate": 4.650243069315058e-05, | |
| "loss": 2.9621, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.1704938946601057, | |
| "grad_norm": 1.5417219400405884, | |
| "learning_rate": 4.649877886190124e-05, | |
| "loss": 2.9637, | |
| "step": 1871 | |
| }, | |
| { | |
| "epoch": 0.17058501913613996, | |
| "grad_norm": 1.9698114395141602, | |
| "learning_rate": 4.649512526874229e-05, | |
| "loss": 2.9846, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 0.17067614361217423, | |
| "grad_norm": 2.276402473449707, | |
| "learning_rate": 4.649146991397317e-05, | |
| "loss": 3.2274, | |
| "step": 1873 | |
| }, | |
| { | |
| "epoch": 0.1707672680882085, | |
| "grad_norm": 1.367923617362976, | |
| "learning_rate": 4.648781279789344e-05, | |
| "loss": 2.9497, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 0.17085839256424276, | |
| "grad_norm": 2.30353045463562, | |
| "learning_rate": 4.648415392080281e-05, | |
| "loss": 2.8962, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.17094951704027703, | |
| "grad_norm": 2.0635387897491455, | |
| "learning_rate": 4.6480493283001145e-05, | |
| "loss": 3.2399, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.1710406415163113, | |
| "grad_norm": 1.7822022438049316, | |
| "learning_rate": 4.6476830884788456e-05, | |
| "loss": 2.6609, | |
| "step": 1877 | |
| }, | |
| { | |
| "epoch": 0.17113176599234553, | |
| "grad_norm": 1.4074290990829468, | |
| "learning_rate": 4.647316672646488e-05, | |
| "loss": 3.0442, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 0.1712228904683798, | |
| "grad_norm": 1.7174943685531616, | |
| "learning_rate": 4.6469500808330724e-05, | |
| "loss": 3.3071, | |
| "step": 1879 | |
| }, | |
| { | |
| "epoch": 0.17131401494441406, | |
| "grad_norm": 2.6318490505218506, | |
| "learning_rate": 4.6465833130686405e-05, | |
| "loss": 3.231, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.17140513942044833, | |
| "grad_norm": 2.40649676322937, | |
| "learning_rate": 4.646216369383252e-05, | |
| "loss": 3.1019, | |
| "step": 1881 | |
| }, | |
| { | |
| "epoch": 0.1714962638964826, | |
| "grad_norm": 2.1710281372070312, | |
| "learning_rate": 4.645849249806977e-05, | |
| "loss": 2.9026, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 0.17158738837251686, | |
| "grad_norm": 3.329578161239624, | |
| "learning_rate": 4.645481954369906e-05, | |
| "loss": 4.2214, | |
| "step": 1883 | |
| }, | |
| { | |
| "epoch": 0.17167851284855112, | |
| "grad_norm": 2.663468360900879, | |
| "learning_rate": 4.6451144831021375e-05, | |
| "loss": 3.0867, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 0.1717696373245854, | |
| "grad_norm": 3.370527982711792, | |
| "learning_rate": 4.6447468360337876e-05, | |
| "loss": 3.0162, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.17186076180061965, | |
| "grad_norm": 1.3907276391983032, | |
| "learning_rate": 4.6443790131949874e-05, | |
| "loss": 2.9264, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 0.17195188627665392, | |
| "grad_norm": 3.615497350692749, | |
| "learning_rate": 4.644011014615881e-05, | |
| "loss": 3.0735, | |
| "step": 1887 | |
| }, | |
| { | |
| "epoch": 0.17204301075268819, | |
| "grad_norm": 1.8801993131637573, | |
| "learning_rate": 4.643642840326627e-05, | |
| "loss": 3.1026, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.17213413522872242, | |
| "grad_norm": 1.2994283437728882, | |
| "learning_rate": 4.6432744903573996e-05, | |
| "loss": 2.9899, | |
| "step": 1889 | |
| }, | |
| { | |
| "epoch": 0.1722252597047567, | |
| "grad_norm": 3.113603115081787, | |
| "learning_rate": 4.6429059647383867e-05, | |
| "loss": 2.4245, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.17231638418079095, | |
| "grad_norm": 1.4192283153533936, | |
| "learning_rate": 4.642537263499788e-05, | |
| "loss": 3.0991, | |
| "step": 1891 | |
| }, | |
| { | |
| "epoch": 0.17240750865682522, | |
| "grad_norm": 1.6561801433563232, | |
| "learning_rate": 4.642168386671823e-05, | |
| "loss": 3.0053, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 0.17249863313285949, | |
| "grad_norm": 2.518643617630005, | |
| "learning_rate": 4.64179933428472e-05, | |
| "loss": 3.0779, | |
| "step": 1893 | |
| }, | |
| { | |
| "epoch": 0.17258975760889375, | |
| "grad_norm": 1.9463084936141968, | |
| "learning_rate": 4.641430106368726e-05, | |
| "loss": 2.9763, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 0.17268088208492802, | |
| "grad_norm": 2.9101171493530273, | |
| "learning_rate": 4.641060702954101e-05, | |
| "loss": 3.1603, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.17277200656096228, | |
| "grad_norm": 2.2001545429229736, | |
| "learning_rate": 4.640691124071118e-05, | |
| "loss": 2.7663, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 0.17286313103699655, | |
| "grad_norm": 2.230363130569458, | |
| "learning_rate": 4.6403213697500656e-05, | |
| "loss": 2.4912, | |
| "step": 1897 | |
| }, | |
| { | |
| "epoch": 0.1729542555130308, | |
| "grad_norm": 2.127821445465088, | |
| "learning_rate": 4.639951440021247e-05, | |
| "loss": 3.0039, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 0.17304537998906505, | |
| "grad_norm": 2.014512538909912, | |
| "learning_rate": 4.639581334914979e-05, | |
| "loss": 3.266, | |
| "step": 1899 | |
| }, | |
| { | |
| "epoch": 0.17313650446509932, | |
| "grad_norm": 2.769667625427246, | |
| "learning_rate": 4.639211054461593e-05, | |
| "loss": 3.225, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.17322762894113358, | |
| "grad_norm": 1.3063126802444458, | |
| "learning_rate": 4.6388405986914365e-05, | |
| "loss": 2.9487, | |
| "step": 1901 | |
| }, | |
| { | |
| "epoch": 0.17331875341716785, | |
| "grad_norm": 2.091466188430786, | |
| "learning_rate": 4.6384699676348674e-05, | |
| "loss": 3.1492, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 0.1734098778932021, | |
| "grad_norm": 2.709743022918701, | |
| "learning_rate": 4.6380991613222625e-05, | |
| "loss": 3.2338, | |
| "step": 1903 | |
| }, | |
| { | |
| "epoch": 0.17350100236923638, | |
| "grad_norm": 2.078986406326294, | |
| "learning_rate": 4.637728179784009e-05, | |
| "loss": 3.4682, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 0.17359212684527064, | |
| "grad_norm": 2.516191005706787, | |
| "learning_rate": 4.637357023050512e-05, | |
| "loss": 3.0394, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.1736832513213049, | |
| "grad_norm": 3.148676872253418, | |
| "learning_rate": 4.636985691152188e-05, | |
| "loss": 3.1488, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 0.17377437579733918, | |
| "grad_norm": 3.0405311584472656, | |
| "learning_rate": 4.63661418411947e-05, | |
| "loss": 3.0929, | |
| "step": 1907 | |
| }, | |
| { | |
| "epoch": 0.17386550027337344, | |
| "grad_norm": 1.5074187517166138, | |
| "learning_rate": 4.6362425019828035e-05, | |
| "loss": 2.9689, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 0.17395662474940768, | |
| "grad_norm": 1.9838029146194458, | |
| "learning_rate": 4.635870644772651e-05, | |
| "loss": 2.8708, | |
| "step": 1909 | |
| }, | |
| { | |
| "epoch": 0.17404774922544194, | |
| "grad_norm": 1.4794012308120728, | |
| "learning_rate": 4.635498612519486e-05, | |
| "loss": 3.1818, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.1741388737014762, | |
| "grad_norm": 1.3626906871795654, | |
| "learning_rate": 4.6351264052537984e-05, | |
| "loss": 2.8804, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 0.17422999817751048, | |
| "grad_norm": 1.694874882698059, | |
| "learning_rate": 4.6347540230060924e-05, | |
| "loss": 3.1085, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 0.17432112265354474, | |
| "grad_norm": 3.162107467651367, | |
| "learning_rate": 4.634381465806886e-05, | |
| "loss": 3.2248, | |
| "step": 1913 | |
| }, | |
| { | |
| "epoch": 0.174412247129579, | |
| "grad_norm": 1.7732110023498535, | |
| "learning_rate": 4.6340087336867115e-05, | |
| "loss": 3.015, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 0.17450337160561327, | |
| "grad_norm": 2.3251988887786865, | |
| "learning_rate": 4.633635826676116e-05, | |
| "loss": 2.833, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.17459449608164754, | |
| "grad_norm": 2.435624599456787, | |
| "learning_rate": 4.633262744805661e-05, | |
| "loss": 3.0755, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 0.1746856205576818, | |
| "grad_norm": 3.7790379524230957, | |
| "learning_rate": 4.6328894881059216e-05, | |
| "loss": 3.1371, | |
| "step": 1917 | |
| }, | |
| { | |
| "epoch": 0.17477674503371607, | |
| "grad_norm": 5.323071002960205, | |
| "learning_rate": 4.6325160566074875e-05, | |
| "loss": 2.9746, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 0.1748678695097503, | |
| "grad_norm": 3.0934462547302246, | |
| "learning_rate": 4.632142450340964e-05, | |
| "loss": 3.2001, | |
| "step": 1919 | |
| }, | |
| { | |
| "epoch": 0.17495899398578457, | |
| "grad_norm": 1.9771567583084106, | |
| "learning_rate": 4.631768669336968e-05, | |
| "loss": 3.1897, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.17505011846181884, | |
| "grad_norm": 4.835346698760986, | |
| "learning_rate": 4.631394713626133e-05, | |
| "loss": 3.0367, | |
| "step": 1921 | |
| }, | |
| { | |
| "epoch": 0.1751412429378531, | |
| "grad_norm": 2.225339889526367, | |
| "learning_rate": 4.631020583239107e-05, | |
| "loss": 2.8157, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 0.17523236741388737, | |
| "grad_norm": 3.113792657852173, | |
| "learning_rate": 4.63064627820655e-05, | |
| "loss": 2.4321, | |
| "step": 1923 | |
| }, | |
| { | |
| "epoch": 0.17532349188992163, | |
| "grad_norm": 1.4210890531539917, | |
| "learning_rate": 4.630271798559138e-05, | |
| "loss": 3.0678, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 0.1754146163659559, | |
| "grad_norm": 1.9921435117721558, | |
| "learning_rate": 4.629897144327563e-05, | |
| "loss": 3.067, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.17550574084199017, | |
| "grad_norm": 2.643051862716675, | |
| "learning_rate": 4.6295223155425274e-05, | |
| "loss": 2.9527, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 0.17559686531802443, | |
| "grad_norm": 1.9968456029891968, | |
| "learning_rate": 4.6291473122347494e-05, | |
| "loss": 2.8901, | |
| "step": 1927 | |
| }, | |
| { | |
| "epoch": 0.1756879897940587, | |
| "grad_norm": 2.101381301879883, | |
| "learning_rate": 4.628772134434964e-05, | |
| "loss": 2.988, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 0.17577911427009293, | |
| "grad_norm": 3.2429404258728027, | |
| "learning_rate": 4.628396782173918e-05, | |
| "loss": 3.1101, | |
| "step": 1929 | |
| }, | |
| { | |
| "epoch": 0.1758702387461272, | |
| "grad_norm": 2.9418232440948486, | |
| "learning_rate": 4.6280212554823715e-05, | |
| "loss": 3.2891, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.17596136322216147, | |
| "grad_norm": 1.3050503730773926, | |
| "learning_rate": 4.6276455543911026e-05, | |
| "loss": 2.9632, | |
| "step": 1931 | |
| }, | |
| { | |
| "epoch": 0.17605248769819573, | |
| "grad_norm": 1.7188957929611206, | |
| "learning_rate": 4.627269678930899e-05, | |
| "loss": 3.0123, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 0.17614361217423, | |
| "grad_norm": 3.276196002960205, | |
| "learning_rate": 4.626893629132567e-05, | |
| "loss": 3.3355, | |
| "step": 1933 | |
| }, | |
| { | |
| "epoch": 0.17623473665026426, | |
| "grad_norm": 2.130753517150879, | |
| "learning_rate": 4.6265174050269245e-05, | |
| "loss": 3.1184, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 0.17632586112629853, | |
| "grad_norm": 3.7487149238586426, | |
| "learning_rate": 4.626141006644805e-05, | |
| "loss": 3.2589, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.1764169856023328, | |
| "grad_norm": 1.4219084978103638, | |
| "learning_rate": 4.625764434017056e-05, | |
| "loss": 2.9983, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 0.17650811007836706, | |
| "grad_norm": 2.6864495277404785, | |
| "learning_rate": 4.625387687174539e-05, | |
| "loss": 3.1346, | |
| "step": 1937 | |
| }, | |
| { | |
| "epoch": 0.17659923455440132, | |
| "grad_norm": 1.8427038192749023, | |
| "learning_rate": 4.62501076614813e-05, | |
| "loss": 2.7725, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 0.17669035903043556, | |
| "grad_norm": 3.0085763931274414, | |
| "learning_rate": 4.624633670968718e-05, | |
| "loss": 4.6207, | |
| "step": 1939 | |
| }, | |
| { | |
| "epoch": 0.17678148350646983, | |
| "grad_norm": 2.097071647644043, | |
| "learning_rate": 4.6242564016672094e-05, | |
| "loss": 3.2445, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.1768726079825041, | |
| "grad_norm": 2.8107852935791016, | |
| "learning_rate": 4.6238789582745215e-05, | |
| "loss": 3.3332, | |
| "step": 1941 | |
| }, | |
| { | |
| "epoch": 0.17696373245853836, | |
| "grad_norm": 1.6054763793945312, | |
| "learning_rate": 4.623501340821586e-05, | |
| "loss": 3.0692, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 0.17705485693457262, | |
| "grad_norm": 2.5180320739746094, | |
| "learning_rate": 4.6231235493393535e-05, | |
| "loss": 3.1702, | |
| "step": 1943 | |
| }, | |
| { | |
| "epoch": 0.1771459814106069, | |
| "grad_norm": 2.668308734893799, | |
| "learning_rate": 4.6227455838587827e-05, | |
| "loss": 4.3692, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 0.17723710588664116, | |
| "grad_norm": 3.261211633682251, | |
| "learning_rate": 4.6223674444108514e-05, | |
| "loss": 3.1087, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.17732823036267542, | |
| "grad_norm": 2.2094995975494385, | |
| "learning_rate": 4.621989131026548e-05, | |
| "loss": 3.2398, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 0.1774193548387097, | |
| "grad_norm": 2.806913375854492, | |
| "learning_rate": 4.621610643736878e-05, | |
| "loss": 3.2686, | |
| "step": 1947 | |
| }, | |
| { | |
| "epoch": 0.17751047931474395, | |
| "grad_norm": 3.777822494506836, | |
| "learning_rate": 4.621231982572858e-05, | |
| "loss": 2.7723, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 0.1776016037907782, | |
| "grad_norm": 2.3020384311676025, | |
| "learning_rate": 4.6208531475655236e-05, | |
| "loss": 2.8918, | |
| "step": 1949 | |
| }, | |
| { | |
| "epoch": 0.17769272826681246, | |
| "grad_norm": 3.0134787559509277, | |
| "learning_rate": 4.6204741387459196e-05, | |
| "loss": 3.2594, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.17778385274284672, | |
| "grad_norm": 3.5491254329681396, | |
| "learning_rate": 4.620094956145108e-05, | |
| "loss": 3.1717, | |
| "step": 1951 | |
| }, | |
| { | |
| "epoch": 0.177874977218881, | |
| "grad_norm": 1.3344693183898926, | |
| "learning_rate": 4.619715599794164e-05, | |
| "loss": 3.0054, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.17796610169491525, | |
| "grad_norm": 1.2647558450698853, | |
| "learning_rate": 4.619336069724178e-05, | |
| "loss": 2.9496, | |
| "step": 1953 | |
| }, | |
| { | |
| "epoch": 0.17805722617094952, | |
| "grad_norm": 1.373024821281433, | |
| "learning_rate": 4.6189563659662525e-05, | |
| "loss": 2.9396, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 0.17814835064698378, | |
| "grad_norm": 1.3420569896697998, | |
| "learning_rate": 4.618576488551508e-05, | |
| "loss": 2.9063, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.17823947512301805, | |
| "grad_norm": 1.575454592704773, | |
| "learning_rate": 4.618196437511075e-05, | |
| "loss": 3.0349, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 0.17833059959905231, | |
| "grad_norm": 2.5236902236938477, | |
| "learning_rate": 4.617816212876102e-05, | |
| "loss": 2.694, | |
| "step": 1957 | |
| }, | |
| { | |
| "epoch": 0.17842172407508658, | |
| "grad_norm": 2.2877037525177, | |
| "learning_rate": 4.617435814677748e-05, | |
| "loss": 3.2323, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 0.17851284855112082, | |
| "grad_norm": 2.5378546714782715, | |
| "learning_rate": 4.6170552429471905e-05, | |
| "loss": 2.8907, | |
| "step": 1959 | |
| }, | |
| { | |
| "epoch": 0.17860397302715508, | |
| "grad_norm": 2.347435712814331, | |
| "learning_rate": 4.6166744977156154e-05, | |
| "loss": 3.1412, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.17869509750318935, | |
| "grad_norm": 1.9487541913986206, | |
| "learning_rate": 4.616293579014229e-05, | |
| "loss": 2.858, | |
| "step": 1961 | |
| }, | |
| { | |
| "epoch": 0.17878622197922361, | |
| "grad_norm": 1.4565964937210083, | |
| "learning_rate": 4.6159124868742485e-05, | |
| "loss": 2.9924, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 0.17887734645525788, | |
| "grad_norm": 1.4799039363861084, | |
| "learning_rate": 4.6155312213269053e-05, | |
| "loss": 2.7628, | |
| "step": 1963 | |
| }, | |
| { | |
| "epoch": 0.17896847093129215, | |
| "grad_norm": 3.3805723190307617, | |
| "learning_rate": 4.615149782403446e-05, | |
| "loss": 3.2429, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 0.1790595954073264, | |
| "grad_norm": 2.6682169437408447, | |
| "learning_rate": 4.614768170135132e-05, | |
| "loss": 2.9839, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.17915071988336068, | |
| "grad_norm": 2.525508403778076, | |
| "learning_rate": 4.614386384553235e-05, | |
| "loss": 3.1916, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 0.17924184435939494, | |
| "grad_norm": 2.9044320583343506, | |
| "learning_rate": 4.614004425689048e-05, | |
| "loss": 3.3356, | |
| "step": 1967 | |
| }, | |
| { | |
| "epoch": 0.1793329688354292, | |
| "grad_norm": 3.02644419670105, | |
| "learning_rate": 4.6136222935738704e-05, | |
| "loss": 2.914, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 0.17942409331146347, | |
| "grad_norm": 2.337035655975342, | |
| "learning_rate": 4.6132399882390206e-05, | |
| "loss": 2.9974, | |
| "step": 1969 | |
| }, | |
| { | |
| "epoch": 0.1795152177874977, | |
| "grad_norm": 2.178936243057251, | |
| "learning_rate": 4.6128575097158314e-05, | |
| "loss": 3.1322, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.17960634226353198, | |
| "grad_norm": 1.6006604433059692, | |
| "learning_rate": 4.612474858035647e-05, | |
| "loss": 3.0153, | |
| "step": 1971 | |
| }, | |
| { | |
| "epoch": 0.17969746673956624, | |
| "grad_norm": 2.1648287773132324, | |
| "learning_rate": 4.612092033229828e-05, | |
| "loss": 3.2334, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 0.1797885912156005, | |
| "grad_norm": 2.6615753173828125, | |
| "learning_rate": 4.611709035329747e-05, | |
| "loss": 2.9051, | |
| "step": 1973 | |
| }, | |
| { | |
| "epoch": 0.17987971569163477, | |
| "grad_norm": 1.9354596138000488, | |
| "learning_rate": 4.6113258643667936e-05, | |
| "loss": 3.108, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 0.17997084016766904, | |
| "grad_norm": 1.7395933866500854, | |
| "learning_rate": 4.610942520372369e-05, | |
| "loss": 3.0382, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.1800619646437033, | |
| "grad_norm": 2.480259895324707, | |
| "learning_rate": 4.610559003377891e-05, | |
| "loss": 2.9684, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 0.18015308911973757, | |
| "grad_norm": 1.3295300006866455, | |
| "learning_rate": 4.61017531341479e-05, | |
| "loss": 3.0404, | |
| "step": 1977 | |
| }, | |
| { | |
| "epoch": 0.18024421359577183, | |
| "grad_norm": 1.599542498588562, | |
| "learning_rate": 4.60979145051451e-05, | |
| "loss": 2.9742, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 0.1803353380718061, | |
| "grad_norm": 1.8933284282684326, | |
| "learning_rate": 4.609407414708512e-05, | |
| "loss": 3.0518, | |
| "step": 1979 | |
| }, | |
| { | |
| "epoch": 0.18042646254784034, | |
| "grad_norm": 1.4877815246582031, | |
| "learning_rate": 4.6090232060282666e-05, | |
| "loss": 2.9679, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.1805175870238746, | |
| "grad_norm": 1.6608158349990845, | |
| "learning_rate": 4.6086388245052636e-05, | |
| "loss": 3.0059, | |
| "step": 1981 | |
| }, | |
| { | |
| "epoch": 0.18060871149990887, | |
| "grad_norm": 3.126404047012329, | |
| "learning_rate": 4.608254270171003e-05, | |
| "loss": 3.1677, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 0.18069983597594313, | |
| "grad_norm": 2.0683717727661133, | |
| "learning_rate": 4.6078695430570004e-05, | |
| "loss": 3.2245, | |
| "step": 1983 | |
| }, | |
| { | |
| "epoch": 0.1807909604519774, | |
| "grad_norm": 2.5405118465423584, | |
| "learning_rate": 4.607484643194788e-05, | |
| "loss": 2.286, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.18088208492801167, | |
| "grad_norm": 2.7362663745880127, | |
| "learning_rate": 4.6070995706159075e-05, | |
| "loss": 3.0712, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.18097320940404593, | |
| "grad_norm": 2.33858585357666, | |
| "learning_rate": 4.606714325351918e-05, | |
| "loss": 3.1092, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 0.1810643338800802, | |
| "grad_norm": 2.5455336570739746, | |
| "learning_rate": 4.606328907434392e-05, | |
| "loss": 3.2204, | |
| "step": 1987 | |
| }, | |
| { | |
| "epoch": 0.18115545835611446, | |
| "grad_norm": 2.442511558532715, | |
| "learning_rate": 4.605943316894915e-05, | |
| "loss": 3.2713, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 0.18124658283214873, | |
| "grad_norm": 1.7061405181884766, | |
| "learning_rate": 4.605557553765089e-05, | |
| "loss": 2.9598, | |
| "step": 1989 | |
| }, | |
| { | |
| "epoch": 0.18133770730818297, | |
| "grad_norm": 2.4000542163848877, | |
| "learning_rate": 4.605171618076528e-05, | |
| "loss": 3.0296, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.18142883178421723, | |
| "grad_norm": 2.3328189849853516, | |
| "learning_rate": 4.6047855098608615e-05, | |
| "loss": 2.9324, | |
| "step": 1991 | |
| }, | |
| { | |
| "epoch": 0.1815199562602515, | |
| "grad_norm": 2.2985846996307373, | |
| "learning_rate": 4.604399229149733e-05, | |
| "loss": 3.152, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 0.18161108073628576, | |
| "grad_norm": 1.8579432964324951, | |
| "learning_rate": 4.604012775974798e-05, | |
| "loss": 3.0462, | |
| "step": 1993 | |
| }, | |
| { | |
| "epoch": 0.18170220521232003, | |
| "grad_norm": 1.2653917074203491, | |
| "learning_rate": 4.6036261503677285e-05, | |
| "loss": 2.9474, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 0.1817933296883543, | |
| "grad_norm": 1.8129152059555054, | |
| "learning_rate": 4.6032393523602114e-05, | |
| "loss": 3.0247, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.18188445416438856, | |
| "grad_norm": 2.5666768550872803, | |
| "learning_rate": 4.602852381983945e-05, | |
| "loss": 3.043, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 0.18197557864042282, | |
| "grad_norm": 2.3878471851348877, | |
| "learning_rate": 4.602465239270643e-05, | |
| "loss": 3.5325, | |
| "step": 1997 | |
| }, | |
| { | |
| "epoch": 0.1820667031164571, | |
| "grad_norm": 2.6700685024261475, | |
| "learning_rate": 4.602077924252034e-05, | |
| "loss": 2.6536, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 0.18215782759249136, | |
| "grad_norm": 2.682384967803955, | |
| "learning_rate": 4.601690436959859e-05, | |
| "loss": 3.1602, | |
| "step": 1999 | |
| }, | |
| { | |
| "epoch": 0.1822489520685256, | |
| "grad_norm": 2.54840087890625, | |
| "learning_rate": 4.601302777425875e-05, | |
| "loss": 3.2196, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 10974, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.76919004465111e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |