| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.3520179920307038, | |
| "eval_steps": 500, | |
| "global_step": 900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00039113110225633753, | |
| "grad_norm": 73.33983612060547, | |
| "learning_rate": 0.0, | |
| "loss": 9022.7637, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0007822622045126751, | |
| "grad_norm": 341.7502746582031, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 13173.7656, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0011733933067690125, | |
| "grad_norm": 87.00965881347656, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 9796.1172, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0015645244090253501, | |
| "grad_norm": 17.399965286254883, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 11499.0176, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0019556555112816877, | |
| "grad_norm": 13.876103401184082, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 8862.1523, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.002346786613538025, | |
| "grad_norm": 79.78038787841797, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 9754.3584, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.002737917715794363, | |
| "grad_norm": 32.10177230834961, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 6731.0986, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0031290488180507003, | |
| "grad_norm": 35.9862174987793, | |
| "learning_rate": 3.88888888888889e-06, | |
| "loss": 11245.0312, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.003520179920307038, | |
| "grad_norm": 28.863813400268555, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 6245.1196, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.003911311022563375, | |
| "grad_norm": 42.70909881591797, | |
| "learning_rate": 5e-06, | |
| "loss": 6062.3369, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004302442124819713, | |
| "grad_norm": 8.477494239807129, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 9045.1113, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.00469357322707605, | |
| "grad_norm": 17.350603103637695, | |
| "learning_rate": 6.111111111111112e-06, | |
| "loss": 4973.7451, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.005084704329332388, | |
| "grad_norm": 10729.576171875, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 2556.2378, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.005475835431588726, | |
| "grad_norm": 12796.4111328125, | |
| "learning_rate": 7.222222222222223e-06, | |
| "loss": 2469.0479, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.005866966533845063, | |
| "grad_norm": 11690.9521484375, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 1900.1685, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0062580976361014005, | |
| "grad_norm": 10036.77734375, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 2600.8911, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.006649228738357738, | |
| "grad_norm": 10.607966423034668, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 871.7581, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.007040359840614076, | |
| "grad_norm": 19968.548828125, | |
| "learning_rate": 9.444444444444445e-06, | |
| "loss": 472.9429, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0074314909428704135, | |
| "grad_norm": 15.735692024230957, | |
| "learning_rate": 1e-05, | |
| "loss": 2482.8877, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.00782262204512675, | |
| "grad_norm": 6.307197570800781, | |
| "learning_rate": 9.999968282268043e-06, | |
| "loss": 538.6899, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008213753147383089, | |
| "grad_norm": 7844.2470703125, | |
| "learning_rate": 9.999873129474573e-06, | |
| "loss": 1317.9186, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.008604884249639426, | |
| "grad_norm": 1.590285062789917, | |
| "learning_rate": 9.999714542826806e-06, | |
| "loss": 641.1613, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.008996015351895764, | |
| "grad_norm": 7149.05322265625, | |
| "learning_rate": 9.999492524336743e-06, | |
| "loss": 821.1104, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0093871464541521, | |
| "grad_norm": 3.6406304836273193, | |
| "learning_rate": 9.999207076821155e-06, | |
| "loss": 1000.1948, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.009778277556408439, | |
| "grad_norm": 3.2047431468963623, | |
| "learning_rate": 9.99885820390154e-06, | |
| "loss": 418.2515, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.010169408658664777, | |
| "grad_norm": 2.1247236728668213, | |
| "learning_rate": 9.998445910004082e-06, | |
| "loss": 206.9073, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.010560539760921113, | |
| "grad_norm": 6176.64404296875, | |
| "learning_rate": 9.997970200359592e-06, | |
| "loss": 277.8287, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.010951670863177452, | |
| "grad_norm": 0.7870343327522278, | |
| "learning_rate": 9.99743108100344e-06, | |
| "loss": 28.0133, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.011342801965433788, | |
| "grad_norm": 2.914349317550659, | |
| "learning_rate": 9.996828558775486e-06, | |
| "loss": 528.6738, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.011733933067690126, | |
| "grad_norm": 5678.888671875, | |
| "learning_rate": 9.996162641319985e-06, | |
| "loss": 173.2077, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.012125064169946465, | |
| "grad_norm": 1.8666399717330933, | |
| "learning_rate": 9.995433337085492e-06, | |
| "loss": 227.239, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.012516195272202801, | |
| "grad_norm": 2.4892313480377197, | |
| "learning_rate": 9.994640655324758e-06, | |
| "loss": 32.1116, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.01290732637445914, | |
| "grad_norm": 1591.641357421875, | |
| "learning_rate": 9.993784606094612e-06, | |
| "loss": 211.6248, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.013298457476715476, | |
| "grad_norm": 10072.65234375, | |
| "learning_rate": 9.992865200255829e-06, | |
| "loss": 374.1296, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.013689588578971814, | |
| "grad_norm": 4825.43408203125, | |
| "learning_rate": 9.991882449472994e-06, | |
| "loss": 444.3439, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.014080719681228152, | |
| "grad_norm": 30619.453125, | |
| "learning_rate": 9.99083636621436e-06, | |
| "loss": 1014.899, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.014471850783484489, | |
| "grad_norm": 0.7788718342781067, | |
| "learning_rate": 9.989726963751683e-06, | |
| "loss": 637.2323, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.014862981885740827, | |
| "grad_norm": 0.37763819098472595, | |
| "learning_rate": 9.988554256160052e-06, | |
| "loss": 338.0694, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.015254112987997163, | |
| "grad_norm": 5639.20166015625, | |
| "learning_rate": 9.987318258317718e-06, | |
| "loss": 280.443, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0156452440902535, | |
| "grad_norm": 0.34363701939582825, | |
| "learning_rate": 9.986018985905901e-06, | |
| "loss": 127.4894, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01603637519250984, | |
| "grad_norm": 1.3454755544662476, | |
| "learning_rate": 9.984656455408591e-06, | |
| "loss": 227.2196, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.016427506294766178, | |
| "grad_norm": 1.1212941408157349, | |
| "learning_rate": 9.983230684112338e-06, | |
| "loss": 388.5173, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.016818637397022513, | |
| "grad_norm": 4135.07958984375, | |
| "learning_rate": 9.981741690106035e-06, | |
| "loss": 277.9473, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.01720976849927885, | |
| "grad_norm": 0.3415931761264801, | |
| "learning_rate": 9.980189492280688e-06, | |
| "loss": 141.4491, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.01760089960153519, | |
| "grad_norm": 0.3497358560562134, | |
| "learning_rate": 9.978574110329174e-06, | |
| "loss": 125.8424, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.017992030703791528, | |
| "grad_norm": 5.21222448348999, | |
| "learning_rate": 9.976895564745993e-06, | |
| "loss": 693.0908, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.018383161806047866, | |
| "grad_norm": 6.805148124694824, | |
| "learning_rate": 9.975153876827008e-06, | |
| "loss": 237.5693, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.0187742929083042, | |
| "grad_norm": 0.32790857553482056, | |
| "learning_rate": 9.973349068669178e-06, | |
| "loss": 231.3147, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.01916542401056054, | |
| "grad_norm": 1.088236927986145, | |
| "learning_rate": 9.97148116317027e-06, | |
| "loss": 59.4532, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.019556555112816877, | |
| "grad_norm": 3.897584915161133, | |
| "learning_rate": 9.969550184028572e-06, | |
| "loss": 181.5394, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.019947686215073215, | |
| "grad_norm": 0.36891791224479675, | |
| "learning_rate": 9.9675561557426e-06, | |
| "loss": 274.0787, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.020338817317329554, | |
| "grad_norm": 4463.36474609375, | |
| "learning_rate": 9.965499103610775e-06, | |
| "loss": 334.0236, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.02072994841958589, | |
| "grad_norm": 1.5177428722381592, | |
| "learning_rate": 9.963379053731104e-06, | |
| "loss": 116.3086, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.021121079521842227, | |
| "grad_norm": 0.948329746723175, | |
| "learning_rate": 9.961196033000862e-06, | |
| "loss": 167.3062, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.021512210624098565, | |
| "grad_norm": 6488.12744140625, | |
| "learning_rate": 9.95895006911623e-06, | |
| "loss": 280.1733, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.021903341726354903, | |
| "grad_norm": 1.1784950494766235, | |
| "learning_rate": 9.956641190571967e-06, | |
| "loss": 211.0869, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.02229447282861124, | |
| "grad_norm": 1.0954192876815796, | |
| "learning_rate": 9.954269426661023e-06, | |
| "loss": 108.0197, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.022685603930867576, | |
| "grad_norm": 2431.99755859375, | |
| "learning_rate": 9.951834807474191e-06, | |
| "loss": 182.8621, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.023076735033123914, | |
| "grad_norm": 0.5318559408187866, | |
| "learning_rate": 9.949337363899709e-06, | |
| "loss": 203.5584, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.023467866135380253, | |
| "grad_norm": 611.1079711914062, | |
| "learning_rate": 9.946777127622874e-06, | |
| "loss": 304.5495, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02385899723763659, | |
| "grad_norm": 1.1814780235290527, | |
| "learning_rate": 9.944154131125643e-06, | |
| "loss": 305.7924, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.02425012833989293, | |
| "grad_norm": 0.3324390947818756, | |
| "learning_rate": 9.941468407686216e-06, | |
| "loss": 133.2165, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.024641259442149264, | |
| "grad_norm": 0.3267706036567688, | |
| "learning_rate": 9.938719991378614e-06, | |
| "loss": 234.4715, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.025032390544405602, | |
| "grad_norm": 1.3336584568023682, | |
| "learning_rate": 9.935908917072253e-06, | |
| "loss": 44.1555, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.02542352164666194, | |
| "grad_norm": 0.33313995599746704, | |
| "learning_rate": 9.933035220431489e-06, | |
| "loss": 199.4248, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.02581465274891828, | |
| "grad_norm": 3202.915771484375, | |
| "learning_rate": 9.930098937915177e-06, | |
| "loss": 142.3462, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.026205783851174617, | |
| "grad_norm": 1.4841117858886719, | |
| "learning_rate": 9.927100106776213e-06, | |
| "loss": 334.0991, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.02659691495343095, | |
| "grad_norm": 3751.06689453125, | |
| "learning_rate": 9.924038765061042e-06, | |
| "loss": 254.8563, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.02698804605568729, | |
| "grad_norm": 0.25357913970947266, | |
| "learning_rate": 9.920914951609189e-06, | |
| "loss": 173.2204, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.027379177157943628, | |
| "grad_norm": 619.5060424804688, | |
| "learning_rate": 9.917728706052765e-06, | |
| "loss": 158.5274, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.027770308260199966, | |
| "grad_norm": 0.3924911320209503, | |
| "learning_rate": 9.914480068815964e-06, | |
| "loss": 101.5475, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.028161439362456304, | |
| "grad_norm": 477.5389404296875, | |
| "learning_rate": 9.91116908111455e-06, | |
| "loss": 150.1975, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.02855257046471264, | |
| "grad_norm": 0.28718748688697815, | |
| "learning_rate": 9.907795784955327e-06, | |
| "loss": 209.6969, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.028943701566968977, | |
| "grad_norm": 0.29111266136169434, | |
| "learning_rate": 9.90436022313562e-06, | |
| "loss": 153.0687, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.029334832669225316, | |
| "grad_norm": 323.9737243652344, | |
| "learning_rate": 9.900862439242719e-06, | |
| "loss": 74.2796, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.029725963771481654, | |
| "grad_norm": 2.6136021614074707, | |
| "learning_rate": 9.897302477653334e-06, | |
| "loss": 291.2503, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.030117094873737992, | |
| "grad_norm": 0.22756816446781158, | |
| "learning_rate": 9.893680383533027e-06, | |
| "loss": 96.8319, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.030508225975994327, | |
| "grad_norm": 0.9840016961097717, | |
| "learning_rate": 9.889996202835642e-06, | |
| "loss": 309.5101, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.030899357078250665, | |
| "grad_norm": 0.275417685508728, | |
| "learning_rate": 9.88624998230272e-06, | |
| "loss": 234.1608, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.031290488180507, | |
| "grad_norm": 0.7580969929695129, | |
| "learning_rate": 9.882441769462911e-06, | |
| "loss": 237.8195, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03168161928276334, | |
| "grad_norm": 0.76526939868927, | |
| "learning_rate": 9.878571612631364e-06, | |
| "loss": 84.615, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.03207275038501968, | |
| "grad_norm": 373.3411560058594, | |
| "learning_rate": 9.874639560909118e-06, | |
| "loss": 300.4078, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.03246388148727602, | |
| "grad_norm": 1.3133848905563354, | |
| "learning_rate": 9.870645664182478e-06, | |
| "loss": 131.5505, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.032855012589532356, | |
| "grad_norm": 311.3302917480469, | |
| "learning_rate": 9.86658997312238e-06, | |
| "loss": 196.0266, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.033246143691788695, | |
| "grad_norm": 0.6496802568435669, | |
| "learning_rate": 9.862472539183757e-06, | |
| "loss": 73.9141, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.033637274794045026, | |
| "grad_norm": 459.7312927246094, | |
| "learning_rate": 9.858293414604871e-06, | |
| "loss": 152.4336, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.034028405896301364, | |
| "grad_norm": 335.05426025390625, | |
| "learning_rate": 9.854052652406666e-06, | |
| "loss": 239.4406, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.0344195369985577, | |
| "grad_norm": 1.7906296253204346, | |
| "learning_rate": 9.849750306392085e-06, | |
| "loss": 129.0228, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.03481066810081404, | |
| "grad_norm": 0.2897918224334717, | |
| "learning_rate": 9.84538643114539e-06, | |
| "loss": 150.6289, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.03520179920307038, | |
| "grad_norm": 486.94952392578125, | |
| "learning_rate": 9.840961082031473e-06, | |
| "loss": 98.7655, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03559293030532672, | |
| "grad_norm": 1.4039978981018066, | |
| "learning_rate": 9.836474315195148e-06, | |
| "loss": 153.2002, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.035984061407583055, | |
| "grad_norm": 2591.096435546875, | |
| "learning_rate": 9.831926187560441e-06, | |
| "loss": 345.5845, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.036375192509839394, | |
| "grad_norm": 2290.58740234375, | |
| "learning_rate": 9.827316756829871e-06, | |
| "loss": 95.79, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.03676632361209573, | |
| "grad_norm": 2.362971067428589, | |
| "learning_rate": 9.822646081483713e-06, | |
| "loss": 177.6802, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.03715745471435207, | |
| "grad_norm": 0.3684510886669159, | |
| "learning_rate": 9.817914220779258e-06, | |
| "loss": 364.415, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.0375485858166084, | |
| "grad_norm": 3289.91162109375, | |
| "learning_rate": 9.81312123475006e-06, | |
| "loss": 353.144, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.03793971691886474, | |
| "grad_norm": 0.3244830369949341, | |
| "learning_rate": 9.808267184205182e-06, | |
| "loss": 189.2679, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.03833084802112108, | |
| "grad_norm": 1.0287890434265137, | |
| "learning_rate": 9.80335213072841e-06, | |
| "loss": 139.2987, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.038721979123377416, | |
| "grad_norm": 1508.3958740234375, | |
| "learning_rate": 9.798376136677486e-06, | |
| "loss": 187.994, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.039113110225633754, | |
| "grad_norm": 2.16009521484375, | |
| "learning_rate": 9.793339265183303e-06, | |
| "loss": 142.5293, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03950424132789009, | |
| "grad_norm": 0.4904083013534546, | |
| "learning_rate": 9.788241580149123e-06, | |
| "loss": 147.5625, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.03989537243014643, | |
| "grad_norm": 1.0540492534637451, | |
| "learning_rate": 9.783083146249749e-06, | |
| "loss": 195.4026, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.04028650353240277, | |
| "grad_norm": 602.712158203125, | |
| "learning_rate": 9.777864028930705e-06, | |
| "loss": 101.4185, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.04067763463465911, | |
| "grad_norm": 2.976310968399048, | |
| "learning_rate": 9.77258429440742e-06, | |
| "loss": 218.7537, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.041068765736915445, | |
| "grad_norm": 2.811608076095581, | |
| "learning_rate": 9.767244009664376e-06, | |
| "loss": 96.5371, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.04145989683917178, | |
| "grad_norm": 0.34032607078552246, | |
| "learning_rate": 9.761843242454261e-06, | |
| "loss": 191.7003, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.041851027941428115, | |
| "grad_norm": 5016.841796875, | |
| "learning_rate": 9.75638206129711e-06, | |
| "loss": 112.3467, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.04224215904368445, | |
| "grad_norm": 0.2334691882133484, | |
| "learning_rate": 9.750860535479434e-06, | |
| "loss": 153.987, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.04263329014594079, | |
| "grad_norm": 0.16604840755462646, | |
| "learning_rate": 9.745278735053345e-06, | |
| "loss": 164.6346, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.04302442124819713, | |
| "grad_norm": 2.011397361755371, | |
| "learning_rate": 9.73963673083566e-06, | |
| "loss": 96.3216, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04341555235045347, | |
| "grad_norm": 216.0008544921875, | |
| "learning_rate": 9.733934594407012e-06, | |
| "loss": 97.9669, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.043806683452709806, | |
| "grad_norm": 781.0093383789062, | |
| "learning_rate": 9.728172398110935e-06, | |
| "loss": 124.8754, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.044197814554966144, | |
| "grad_norm": 1206.0252685546875, | |
| "learning_rate": 9.722350215052946e-06, | |
| "loss": 126.4322, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.04458894565722248, | |
| "grad_norm": 1861.3472900390625, | |
| "learning_rate": 9.716468119099626e-06, | |
| "loss": 111.0165, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.04498007675947882, | |
| "grad_norm": 0.7697561979293823, | |
| "learning_rate": 9.710526184877667e-06, | |
| "loss": 1.3695, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.04537120786173515, | |
| "grad_norm": 0.5405284762382507, | |
| "learning_rate": 9.704524487772944e-06, | |
| "loss": 126.1109, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.04576233896399149, | |
| "grad_norm": 0.6306818723678589, | |
| "learning_rate": 9.698463103929542e-06, | |
| "loss": 21.7656, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.04615347006624783, | |
| "grad_norm": 3005.80712890625, | |
| "learning_rate": 9.692342110248802e-06, | |
| "loss": 127.39, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.04654460116850417, | |
| "grad_norm": 392.67950439453125, | |
| "learning_rate": 9.68616158438834e-06, | |
| "loss": 110.3292, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.046935732270760505, | |
| "grad_norm": 3.171126365661621, | |
| "learning_rate": 9.679921604761056e-06, | |
| "loss": 132.2241, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04732686337301684, | |
| "grad_norm": 0.4518907368183136, | |
| "learning_rate": 9.673622250534155e-06, | |
| "loss": 136.119, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.04771799447527318, | |
| "grad_norm": 2589.1015625, | |
| "learning_rate": 9.66726360162813e-06, | |
| "loss": 381.2232, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.04810912557752952, | |
| "grad_norm": 0.1722714751958847, | |
| "learning_rate": 9.660845738715743e-06, | |
| "loss": 97.7159, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.04850025667978586, | |
| "grad_norm": 0.5003955364227295, | |
| "learning_rate": 9.654368743221022e-06, | |
| "loss": 50.7664, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.048891387782042196, | |
| "grad_norm": 0.28121402859687805, | |
| "learning_rate": 9.647832697318207e-06, | |
| "loss": 93.6975, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.04928251888429853, | |
| "grad_norm": 2588.393310546875, | |
| "learning_rate": 9.641237683930722e-06, | |
| "loss": 54.1281, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.049673649986554866, | |
| "grad_norm": 522.7401123046875, | |
| "learning_rate": 9.63458378673011e-06, | |
| "loss": 21.376, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.050064781088811204, | |
| "grad_norm": 1337.2923583984375, | |
| "learning_rate": 9.627871090134984e-06, | |
| "loss": 97.5455, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.05045591219106754, | |
| "grad_norm": 0.17062804102897644, | |
| "learning_rate": 9.621099679309948e-06, | |
| "loss": 66.8805, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.05084704329332388, | |
| "grad_norm": 1.8228886127471924, | |
| "learning_rate": 9.61426964016452e-06, | |
| "loss": 90.8759, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05123817439558022, | |
| "grad_norm": 0.9226913452148438, | |
| "learning_rate": 9.60738105935204e-06, | |
| "loss": 57.6078, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.05162930549783656, | |
| "grad_norm": 0.695069432258606, | |
| "learning_rate": 9.60043402426857e-06, | |
| "loss": 125.0028, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.052020436600092895, | |
| "grad_norm": 1.5678439140319824, | |
| "learning_rate": 9.593428623051793e-06, | |
| "loss": 94.1563, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.052411567702349234, | |
| "grad_norm": 1.5755531787872314, | |
| "learning_rate": 9.58636494457988e-06, | |
| "loss": 83.3838, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.05280269880460557, | |
| "grad_norm": 0.2734808623790741, | |
| "learning_rate": 9.57924307847038e-06, | |
| "loss": 31.6734, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.0531938299068619, | |
| "grad_norm": 1.4930920600891113, | |
| "learning_rate": 9.572063115079063e-06, | |
| "loss": 126.7836, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.05358496100911824, | |
| "grad_norm": 0.25422215461730957, | |
| "learning_rate": 9.564825145498795e-06, | |
| "loss": 76.5774, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.05397609211137458, | |
| "grad_norm": 163.52911376953125, | |
| "learning_rate": 9.557529261558367e-06, | |
| "loss": 53.7238, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.05436722321363092, | |
| "grad_norm": 553.9047241210938, | |
| "learning_rate": 9.550175555821333e-06, | |
| "loss": 76.2747, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.054758354315887256, | |
| "grad_norm": 0.18982116878032684, | |
| "learning_rate": 9.542764121584845e-06, | |
| "loss": 289.7023, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.055149485418143594, | |
| "grad_norm": 1.4099926948547363, | |
| "learning_rate": 9.53529505287845e-06, | |
| "loss": 43.1031, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.05554061652039993, | |
| "grad_norm": 1.5430524349212646, | |
| "learning_rate": 9.527768444462922e-06, | |
| "loss": 4.821, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.05593174762265627, | |
| "grad_norm": 0.40151554346084595, | |
| "learning_rate": 9.520184391829037e-06, | |
| "loss": 67.8287, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.05632287872491261, | |
| "grad_norm": 0.22733083367347717, | |
| "learning_rate": 9.512542991196377e-06, | |
| "loss": 162.4188, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.05671400982716895, | |
| "grad_norm": 0.44649437069892883, | |
| "learning_rate": 9.504844339512096e-06, | |
| "loss": 80.723, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.05710514092942528, | |
| "grad_norm": 4683.439453125, | |
| "learning_rate": 9.497088534449707e-06, | |
| "loss": 362.2365, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.05749627203168162, | |
| "grad_norm": 1.031704306602478, | |
| "learning_rate": 9.489275674407826e-06, | |
| "loss": 164.8387, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.057887403133937955, | |
| "grad_norm": 0.1672774702310562, | |
| "learning_rate": 9.481405858508935e-06, | |
| "loss": 117.6841, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.05827853423619429, | |
| "grad_norm": 292.0991516113281, | |
| "learning_rate": 9.473479186598115e-06, | |
| "loss": 86.3267, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.05866966533845063, | |
| "grad_norm": 2.811321496963501, | |
| "learning_rate": 9.465495759241793e-06, | |
| "loss": 291.6378, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05906079644070697, | |
| "grad_norm": 3673.74951171875, | |
| "learning_rate": 9.457455677726447e-06, | |
| "loss": 118.6411, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.05945192754296331, | |
| "grad_norm": 3.3059866428375244, | |
| "learning_rate": 9.449359044057344e-06, | |
| "loss": 152.2512, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.059843058645219646, | |
| "grad_norm": 1.5615016222000122, | |
| "learning_rate": 9.441205960957221e-06, | |
| "loss": 75.4238, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.060234189747475984, | |
| "grad_norm": 129.04693603515625, | |
| "learning_rate": 9.432996531865001e-06, | |
| "loss": 98.264, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.06062532084973232, | |
| "grad_norm": 1.3238544464111328, | |
| "learning_rate": 9.424730860934474e-06, | |
| "loss": 138.5685, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.061016451951988654, | |
| "grad_norm": 0.6455661058425903, | |
| "learning_rate": 9.416409053032971e-06, | |
| "loss": 76.155, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.06140758305424499, | |
| "grad_norm": 3.3306515216827393, | |
| "learning_rate": 9.408031213740045e-06, | |
| "loss": 193.0163, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.06179871415650133, | |
| "grad_norm": 363.2554626464844, | |
| "learning_rate": 9.399597449346119e-06, | |
| "loss": 71.2268, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.06218984525875767, | |
| "grad_norm": 1775.3311767578125, | |
| "learning_rate": 9.391107866851143e-06, | |
| "loss": 102.3627, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.062580976361014, | |
| "grad_norm": 0.23126842081546783, | |
| "learning_rate": 9.382562573963238e-06, | |
| "loss": 33.1838, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06297210746327034, | |
| "grad_norm": 425.8428955078125, | |
| "learning_rate": 9.37396167909733e-06, | |
| "loss": 122.2196, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.06336323856552668, | |
| "grad_norm": 2.3558237552642822, | |
| "learning_rate": 9.365305291373769e-06, | |
| "loss": 91.5736, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.06375436966778301, | |
| "grad_norm": 0.19831174612045288, | |
| "learning_rate": 9.356593520616948e-06, | |
| "loss": 126.7139, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.06414550077003936, | |
| "grad_norm": 2583.596435546875, | |
| "learning_rate": 9.347826477353911e-06, | |
| "loss": 114.9668, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.06453663187229569, | |
| "grad_norm": 468.4013977050781, | |
| "learning_rate": 9.33900427281295e-06, | |
| "loss": 58.1353, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.06492776297455204, | |
| "grad_norm": 0.607861340045929, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 89.4309, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.06531889407680837, | |
| "grad_norm": 1.087489128112793, | |
| "learning_rate": 9.321194828308185e-06, | |
| "loss": 57.7468, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.06571002517906471, | |
| "grad_norm": 1.4299119710922241, | |
| "learning_rate": 9.312207814294454e-06, | |
| "loss": 131.9059, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.06610115628132104, | |
| "grad_norm": 0.32067033648490906, | |
| "learning_rate": 9.303166090900082e-06, | |
| "loss": 113.9032, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.06649228738357739, | |
| "grad_norm": 2900.03857421875, | |
| "learning_rate": 9.294069772838253e-06, | |
| "loss": 62.0634, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06688341848583372, | |
| "grad_norm": 2043.025634765625, | |
| "learning_rate": 9.284918975514798e-06, | |
| "loss": 128.1048, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.06727454958809005, | |
| "grad_norm": 763.0787963867188, | |
| "learning_rate": 9.275713815026732e-06, | |
| "loss": 100.8595, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.0676656806903464, | |
| "grad_norm": 316.2015380859375, | |
| "learning_rate": 9.266454408160779e-06, | |
| "loss": 83.9507, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.06805681179260273, | |
| "grad_norm": 1.8424168825149536, | |
| "learning_rate": 9.257140872391895e-06, | |
| "loss": 150.7857, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.06844794289485907, | |
| "grad_norm": 3.162083387374878, | |
| "learning_rate": 9.24777332588177e-06, | |
| "loss": 109.7346, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.0688390739971154, | |
| "grad_norm": 0.48428380489349365, | |
| "learning_rate": 9.238351887477338e-06, | |
| "loss": 198.0632, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.06923020509937175, | |
| "grad_norm": 283.169921875, | |
| "learning_rate": 9.22887667670926e-06, | |
| "loss": 133.4527, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.06962133620162808, | |
| "grad_norm": 0.24236121773719788, | |
| "learning_rate": 9.219347813790416e-06, | |
| "loss": 134.3827, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.07001246730388443, | |
| "grad_norm": 1.1005642414093018, | |
| "learning_rate": 9.209765419614375e-06, | |
| "loss": 95.9749, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.07040359840614076, | |
| "grad_norm": 0.2186761498451233, | |
| "learning_rate": 9.200129615753858e-06, | |
| "loss": 99.3441, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07079472950839709, | |
| "grad_norm": 1.1894441843032837, | |
| "learning_rate": 9.190440524459203e-06, | |
| "loss": 64.4476, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.07118586061065343, | |
| "grad_norm": 1906.7816162109375, | |
| "learning_rate": 9.180698268656814e-06, | |
| "loss": 76.6484, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.07157699171290977, | |
| "grad_norm": 1806.0740966796875, | |
| "learning_rate": 9.170902971947589e-06, | |
| "loss": 124.0878, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.07196812281516611, | |
| "grad_norm": 0.9422973394393921, | |
| "learning_rate": 9.16105475860537e-06, | |
| "loss": 61.4217, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.07235925391742244, | |
| "grad_norm": 0.22520415484905243, | |
| "learning_rate": 9.151153753575351e-06, | |
| "loss": 76.0462, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.07275038501967879, | |
| "grad_norm": 0.5669053792953491, | |
| "learning_rate": 9.141200082472503e-06, | |
| "loss": 66.0641, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.07314151612193512, | |
| "grad_norm": 4875.95751953125, | |
| "learning_rate": 9.131193871579975e-06, | |
| "loss": 363.5605, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.07353264722419146, | |
| "grad_norm": 1.5607740879058838, | |
| "learning_rate": 9.121135247847492e-06, | |
| "loss": 163.8841, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.0739237783264478, | |
| "grad_norm": 0.15883424878120422, | |
| "learning_rate": 9.111024338889748e-06, | |
| "loss": 99.735, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.07431490942870414, | |
| "grad_norm": 1.4388748407363892, | |
| "learning_rate": 9.10086127298478e-06, | |
| "loss": 37.5176, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.07470604053096047, | |
| "grad_norm": 5.2772040367126465, | |
| "learning_rate": 9.090646179072352e-06, | |
| "loss": 107.7335, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.0750971716332168, | |
| "grad_norm": 1.8319369554519653, | |
| "learning_rate": 9.080379186752304e-06, | |
| "loss": 71.8792, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.07548830273547315, | |
| "grad_norm": 0.7392552495002747, | |
| "learning_rate": 9.070060426282924e-06, | |
| "loss": 113.0879, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.07587943383772948, | |
| "grad_norm": 1.0116955041885376, | |
| "learning_rate": 9.059690028579285e-06, | |
| "loss": 37.4901, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.07627056493998582, | |
| "grad_norm": 0.36633720993995667, | |
| "learning_rate": 9.049268125211577e-06, | |
| "loss": 33.619, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.07666169604224216, | |
| "grad_norm": 0.25814440846443176, | |
| "learning_rate": 9.038794848403463e-06, | |
| "loss": 73.8944, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.0770528271444985, | |
| "grad_norm": 0.9375834465026855, | |
| "learning_rate": 9.028270331030373e-06, | |
| "loss": 130.3545, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.07744395824675483, | |
| "grad_norm": 0.18285518884658813, | |
| "learning_rate": 9.017694706617836e-06, | |
| "loss": 52.1208, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.07783508934901118, | |
| "grad_norm": 3008.265869140625, | |
| "learning_rate": 9.007068109339783e-06, | |
| "loss": 67.7978, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.07822622045126751, | |
| "grad_norm": 0.1591944545507431, | |
| "learning_rate": 8.996390674016839e-06, | |
| "loss": 56.9001, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07861735155352384, | |
| "grad_norm": 0.13576161861419678, | |
| "learning_rate": 8.985662536114614e-06, | |
| "loss": 136.3152, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.07900848265578019, | |
| "grad_norm": 411.30450439453125, | |
| "learning_rate": 8.97488383174199e-06, | |
| "loss": 125.2447, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.07939961375803652, | |
| "grad_norm": 0.5793523788452148, | |
| "learning_rate": 8.964054697649389e-06, | |
| "loss": 179.7917, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.07979074486029286, | |
| "grad_norm": 1.1756185293197632, | |
| "learning_rate": 8.953175271227042e-06, | |
| "loss": 208.6852, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.08018187596254919, | |
| "grad_norm": 0.4390352666378021, | |
| "learning_rate": 8.94224569050324e-06, | |
| "loss": 78.6966, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.08057300706480554, | |
| "grad_norm": 0.20057456195354462, | |
| "learning_rate": 8.931266094142588e-06, | |
| "loss": 79.7404, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.08096413816706187, | |
| "grad_norm": 2346.697509765625, | |
| "learning_rate": 8.920236621444243e-06, | |
| "loss": 162.8069, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.08135526926931821, | |
| "grad_norm": 0.19323071837425232, | |
| "learning_rate": 8.90915741234015e-06, | |
| "loss": 182.8168, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.08174640037157455, | |
| "grad_norm": 301.8100891113281, | |
| "learning_rate": 8.89802860739326e-06, | |
| "loss": 75.6532, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.08213753147383089, | |
| "grad_norm": 1862.041748046875, | |
| "learning_rate": 8.88685034779576e-06, | |
| "loss": 147.3607, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08252866257608722, | |
| "grad_norm": 0.15555168688297272, | |
| "learning_rate": 8.87562277536726e-06, | |
| "loss": 8.7277, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.08291979367834355, | |
| "grad_norm": 0.22444282472133636, | |
| "learning_rate": 8.864346032553016e-06, | |
| "loss": 168.385, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.0833109247805999, | |
| "grad_norm": 1.4300802946090698, | |
| "learning_rate": 8.853020262422111e-06, | |
| "loss": 170.4068, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.08370205588285623, | |
| "grad_norm": 0.5101105570793152, | |
| "learning_rate": 8.84164560866564e-06, | |
| "loss": 322.9848, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.08409318698511258, | |
| "grad_norm": 1.2417665719985962, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 265.6229, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.0844843180873689, | |
| "grad_norm": 0.5343866944313049, | |
| "learning_rate": 8.818750228139513e-06, | |
| "loss": 146.003, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.08487544918962525, | |
| "grad_norm": 0.30504119396209717, | |
| "learning_rate": 8.807229791845673e-06, | |
| "loss": 37.3566, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.08526658029188158, | |
| "grad_norm": 0.8993078470230103, | |
| "learning_rate": 8.795661052874217e-06, | |
| "loss": 83.1912, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.08565771139413793, | |
| "grad_norm": 520.2239379882812, | |
| "learning_rate": 8.78404415799881e-06, | |
| "loss": 6.4627, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.08604884249639426, | |
| "grad_norm": 1.5063972473144531, | |
| "learning_rate": 8.772379254604074e-06, | |
| "loss": 59.2478, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0864399735986506, | |
| "grad_norm": 0.9105270504951477, | |
| "learning_rate": 8.76066649068372e-06, | |
| "loss": 30.8765, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.08683110470090694, | |
| "grad_norm": 1.8939876556396484, | |
| "learning_rate": 8.748906014838672e-06, | |
| "loss": 147.7755, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.08722223580316327, | |
| "grad_norm": 0.8894091844558716, | |
| "learning_rate": 8.737097976275177e-06, | |
| "loss": 229.9513, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.08761336690541961, | |
| "grad_norm": 0.28113725781440735, | |
| "learning_rate": 8.725242524802919e-06, | |
| "loss": 185.9432, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.08800449800767594, | |
| "grad_norm": 2170.448974609375, | |
| "learning_rate": 8.713339810833105e-06, | |
| "loss": 86.6734, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.08839562910993229, | |
| "grad_norm": 0.9475433826446533, | |
| "learning_rate": 8.701389985376578e-06, | |
| "loss": 117.112, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.08878676021218862, | |
| "grad_norm": 0.6161375641822815, | |
| "learning_rate": 8.689393200041878e-06, | |
| "loss": 75.888, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.08917789131444497, | |
| "grad_norm": 1.6829807758331299, | |
| "learning_rate": 8.677349607033336e-06, | |
| "loss": 23.0701, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.0895690224167013, | |
| "grad_norm": 0.21339260041713715, | |
| "learning_rate": 8.665259359149132e-06, | |
| "loss": 28.2332, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.08996015351895764, | |
| "grad_norm": 2635.137451171875, | |
| "learning_rate": 8.653122609779365e-06, | |
| "loss": 124.7725, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.09035128462121397, | |
| "grad_norm": 0.34213894605636597, | |
| "learning_rate": 8.640939512904097e-06, | |
| "loss": 96.0887, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.0907424157234703, | |
| "grad_norm": 0.23164384067058563, | |
| "learning_rate": 8.62871022309141e-06, | |
| "loss": 69.6245, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.09113354682572665, | |
| "grad_norm": 1.0450925827026367, | |
| "learning_rate": 8.61643489549544e-06, | |
| "loss": 60.8622, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.09152467792798298, | |
| "grad_norm": 928.1207275390625, | |
| "learning_rate": 8.604113685854407e-06, | |
| "loss": 200.9607, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.09191580903023933, | |
| "grad_norm": 0.23424312472343445, | |
| "learning_rate": 8.591746750488639e-06, | |
| "loss": 37.2375, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.09230694013249566, | |
| "grad_norm": 0.17486761510372162, | |
| "learning_rate": 8.579334246298593e-06, | |
| "loss": 92.1142, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.092698071234752, | |
| "grad_norm": 0.2754494845867157, | |
| "learning_rate": 8.566876330762861e-06, | |
| "loss": 146.6022, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.09308920233700833, | |
| "grad_norm": 0.6534192562103271, | |
| "learning_rate": 8.554373161936176e-06, | |
| "loss": 152.2259, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.09348033343926468, | |
| "grad_norm": 0.19244331121444702, | |
| "learning_rate": 8.541824898447399e-06, | |
| "loss": 285.4724, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.09387146454152101, | |
| "grad_norm": 0.6852802038192749, | |
| "learning_rate": 8.529231699497512e-06, | |
| "loss": 170.299, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.09426259564377736, | |
| "grad_norm": 3.1934654712677, | |
| "learning_rate": 8.516593724857598e-06, | |
| "loss": 167.2633, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.09465372674603369, | |
| "grad_norm": 0.19524431228637695, | |
| "learning_rate": 8.503911134866819e-06, | |
| "loss": 103.9347, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.09504485784829002, | |
| "grad_norm": 316.60107421875, | |
| "learning_rate": 8.491184090430365e-06, | |
| "loss": 82.532, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.09543598895054636, | |
| "grad_norm": 1987.078125, | |
| "learning_rate": 8.478412753017433e-06, | |
| "loss": 205.8121, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.0958271200528027, | |
| "grad_norm": 4.607306957244873, | |
| "learning_rate": 8.465597284659163e-06, | |
| "loss": 28.8357, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.09621825115505904, | |
| "grad_norm": 317.93145751953125, | |
| "learning_rate": 8.452737847946597e-06, | |
| "loss": 96.3762, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.09660938225731537, | |
| "grad_norm": 1839.33740234375, | |
| "learning_rate": 8.439834606028594e-06, | |
| "loss": 81.6538, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.09700051335957172, | |
| "grad_norm": 1.588068962097168, | |
| "learning_rate": 8.426887722609787e-06, | |
| "loss": 86.3677, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.09739164446182805, | |
| "grad_norm": 0.27696385979652405, | |
| "learning_rate": 8.413897361948484e-06, | |
| "loss": 37.9441, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.09778277556408439, | |
| "grad_norm": 0.5097156167030334, | |
| "learning_rate": 8.400863688854598e-06, | |
| "loss": 46.0221, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09817390666634072, | |
| "grad_norm": 0.16560612618923187, | |
| "learning_rate": 8.387786868687549e-06, | |
| "loss": 63.0729, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.09856503776859706, | |
| "grad_norm": 0.4086504280567169, | |
| "learning_rate": 8.374667067354164e-06, | |
| "loss": 123.6349, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.0989561688708534, | |
| "grad_norm": 2121.2421875, | |
| "learning_rate": 8.361504451306585e-06, | |
| "loss": 79.6353, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.09934729997310973, | |
| "grad_norm": 0.5328729152679443, | |
| "learning_rate": 8.34829918754014e-06, | |
| "loss": 57.2165, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.09973843107536608, | |
| "grad_norm": 0.5634379386901855, | |
| "learning_rate": 8.335051443591236e-06, | |
| "loss": 80.4085, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.10012956217762241, | |
| "grad_norm": 1125.6824951171875, | |
| "learning_rate": 8.321761387535231e-06, | |
| "loss": 95.3526, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.10052069327987875, | |
| "grad_norm": 3538.38525390625, | |
| "learning_rate": 8.308429187984298e-06, | |
| "loss": 142.8311, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.10091182438213508, | |
| "grad_norm": 0.32026898860931396, | |
| "learning_rate": 8.295055014085289e-06, | |
| "loss": 33.7843, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.10130295548439143, | |
| "grad_norm": 3.7499899864196777, | |
| "learning_rate": 8.281639035517591e-06, | |
| "loss": 64.4205, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.10169408658664776, | |
| "grad_norm": 0.6029073596000671, | |
| "learning_rate": 8.268181422490969e-06, | |
| "loss": 91.6323, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.1020852176889041, | |
| "grad_norm": 481.76361083984375, | |
| "learning_rate": 8.254682345743406e-06, | |
| "loss": 92.7615, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.10247634879116044, | |
| "grad_norm": 0.34256768226623535, | |
| "learning_rate": 8.241141976538944e-06, | |
| "loss": 104.3669, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.10286747989341677, | |
| "grad_norm": 1.014054775238037, | |
| "learning_rate": 8.227560486665498e-06, | |
| "loss": 30.8866, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.10325861099567311, | |
| "grad_norm": 0.3670375943183899, | |
| "learning_rate": 8.213938048432697e-06, | |
| "loss": 89.6379, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.10364974209792945, | |
| "grad_norm": 0.2565741539001465, | |
| "learning_rate": 8.200274834669675e-06, | |
| "loss": 81.031, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.10404087320018579, | |
| "grad_norm": 0.15868939459323883, | |
| "learning_rate": 8.186571018722894e-06, | |
| "loss": 184.6763, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.10443200430244212, | |
| "grad_norm": 1.7641242742538452, | |
| "learning_rate": 8.172826774453937e-06, | |
| "loss": 156.6617, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.10482313540469847, | |
| "grad_norm": 2.6388673782348633, | |
| "learning_rate": 8.159042276237308e-06, | |
| "loss": 36.3769, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.1052142665069548, | |
| "grad_norm": 0.3692081868648529, | |
| "learning_rate": 8.145217698958213e-06, | |
| "loss": 94.0488, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.10560539760921114, | |
| "grad_norm": 1.1230888366699219, | |
| "learning_rate": 8.131353218010347e-06, | |
| "loss": 182.0155, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.10599652871146747, | |
| "grad_norm": 2172.541259765625, | |
| "learning_rate": 8.117449009293668e-06, | |
| "loss": 144.515, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.1063876598137238, | |
| "grad_norm": 1854.9971923828125, | |
| "learning_rate": 8.10350524921216e-06, | |
| "loss": 135.7373, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.10677879091598015, | |
| "grad_norm": 0.37005868554115295, | |
| "learning_rate": 8.089522114671603e-06, | |
| "loss": 71.444, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.10716992201823648, | |
| "grad_norm": 1958.9207763671875, | |
| "learning_rate": 8.075499783077321e-06, | |
| "loss": 129.372, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.10756105312049283, | |
| "grad_norm": 0.5390088558197021, | |
| "learning_rate": 8.061438432331935e-06, | |
| "loss": 205.9629, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.10795218422274916, | |
| "grad_norm": 1.4290964603424072, | |
| "learning_rate": 8.047338240833108e-06, | |
| "loss": 107.3386, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.1083433153250055, | |
| "grad_norm": 0.30491700768470764, | |
| "learning_rate": 8.033199387471278e-06, | |
| "loss": 126.0688, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.10873444642726184, | |
| "grad_norm": 0.6881037950515747, | |
| "learning_rate": 8.019022051627387e-06, | |
| "loss": 137.38, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.10912557752951818, | |
| "grad_norm": 0.5975064039230347, | |
| "learning_rate": 8.004806413170613e-06, | |
| "loss": 49.5408, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.10951670863177451, | |
| "grad_norm": 2138.67041015625, | |
| "learning_rate": 7.99055265245608e-06, | |
| "loss": 87.7601, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.10990783973403086, | |
| "grad_norm": 0.19253171980381012, | |
| "learning_rate": 7.976260950322572e-06, | |
| "loss": 76.5956, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.11029897083628719, | |
| "grad_norm": 1.348046898841858, | |
| "learning_rate": 7.96193148809024e-06, | |
| "loss": 47.8194, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.11069010193854352, | |
| "grad_norm": 0.4207615554332733, | |
| "learning_rate": 7.9475644475583e-06, | |
| "loss": 59.7233, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.11108123304079986, | |
| "grad_norm": 1892.4029541015625, | |
| "learning_rate": 7.933160011002729e-06, | |
| "loss": 147.8456, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.1114723641430562, | |
| "grad_norm": 1.315190076828003, | |
| "learning_rate": 7.918718361173951e-06, | |
| "loss": 93.2988, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.11186349524531254, | |
| "grad_norm": 1.3171827793121338, | |
| "learning_rate": 7.904239681294515e-06, | |
| "loss": 44.8592, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.11225462634756887, | |
| "grad_norm": 314.198486328125, | |
| "learning_rate": 7.889724155056776e-06, | |
| "loss": 87.7935, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.11264575744982522, | |
| "grad_norm": 0.28883472084999084, | |
| "learning_rate": 7.875171966620567e-06, | |
| "loss": 101.7658, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.11303688855208155, | |
| "grad_norm": 0.49919599294662476, | |
| "learning_rate": 7.860583300610849e-06, | |
| "loss": 132.276, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.1134280196543379, | |
| "grad_norm": 0.644845187664032, | |
| "learning_rate": 7.84595834211538e-06, | |
| "loss": 88.4937, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.11381915075659423, | |
| "grad_norm": 0.21764078736305237, | |
| "learning_rate": 7.83129727668237e-06, | |
| "loss": 106.7949, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.11421028185885056, | |
| "grad_norm": 0.6382555961608887, | |
| "learning_rate": 7.81660029031811e-06, | |
| "loss": 86.3284, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.1146014129611069, | |
| "grad_norm": 3383.526123046875, | |
| "learning_rate": 7.801867569484635e-06, | |
| "loss": 163.3528, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.11499254406336323, | |
| "grad_norm": 1.0071310997009277, | |
| "learning_rate": 7.78709930109734e-06, | |
| "loss": 55.8198, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.11538367516561958, | |
| "grad_norm": 472.02276611328125, | |
| "learning_rate": 7.772295672522615e-06, | |
| "loss": 68.5582, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.11577480626787591, | |
| "grad_norm": 3507.451904296875, | |
| "learning_rate": 7.75745687157547e-06, | |
| "loss": 200.0802, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.11616593737013226, | |
| "grad_norm": 2918.291748046875, | |
| "learning_rate": 7.742583086517151e-06, | |
| "loss": 235.3087, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.11655706847238859, | |
| "grad_norm": 264.2690124511719, | |
| "learning_rate": 7.727674506052744e-06, | |
| "loss": 46.5808, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.11694819957464493, | |
| "grad_norm": 0.6071652173995972, | |
| "learning_rate": 7.712731319328798e-06, | |
| "loss": 66.332, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.11733933067690126, | |
| "grad_norm": 415.38214111328125, | |
| "learning_rate": 7.697753715930906e-06, | |
| "loss": 55.6848, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11773046177915761, | |
| "grad_norm": 0.16611334681510925, | |
| "learning_rate": 7.682741885881314e-06, | |
| "loss": 176.503, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.11812159288141394, | |
| "grad_norm": 0.9599153995513916, | |
| "learning_rate": 7.667696019636504e-06, | |
| "loss": 12.792, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.11851272398367027, | |
| "grad_norm": 1961.807373046875, | |
| "learning_rate": 7.652616308084774e-06, | |
| "loss": 144.7594, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.11890385508592662, | |
| "grad_norm": 1706.922119140625, | |
| "learning_rate": 7.637502942543825e-06, | |
| "loss": 77.7838, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.11929498618818295, | |
| "grad_norm": 3011.58349609375, | |
| "learning_rate": 7.622356114758328e-06, | |
| "loss": 66.3472, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.11968611729043929, | |
| "grad_norm": 0.18727894127368927, | |
| "learning_rate": 7.607176016897491e-06, | |
| "loss": 20.0101, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.12007724839269562, | |
| "grad_norm": 747.8145141601562, | |
| "learning_rate": 7.591962841552627e-06, | |
| "loss": 124.6628, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.12046837949495197, | |
| "grad_norm": 2174.573486328125, | |
| "learning_rate": 7.576716781734699e-06, | |
| "loss": 122.6966, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.1208595105972083, | |
| "grad_norm": 1657.6822509765625, | |
| "learning_rate": 7.561438030871886e-06, | |
| "loss": 90.9553, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.12125064169946465, | |
| "grad_norm": 409.1562194824219, | |
| "learning_rate": 7.546126782807117e-06, | |
| "loss": 39.3561, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.12164177280172098, | |
| "grad_norm": 0.1211930438876152, | |
| "learning_rate": 7.530783231795615e-06, | |
| "loss": 1.299, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.12203290390397731, | |
| "grad_norm": 2307.365234375, | |
| "learning_rate": 7.515407572502438e-06, | |
| "loss": 200.4622, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.12242403500623365, | |
| "grad_norm": 0.3849581778049469, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 126.054, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.12281516610848998, | |
| "grad_norm": 2264.787109375, | |
| "learning_rate": 7.484560709765605e-06, | |
| "loss": 172.344, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.12320629721074633, | |
| "grad_norm": 1.2760062217712402, | |
| "learning_rate": 7.469089897678958e-06, | |
| "loss": 107.9826, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.12359742831300266, | |
| "grad_norm": 347.2667236328125, | |
| "learning_rate": 7.453587760019691e-06, | |
| "loss": 123.0772, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.123988559415259, | |
| "grad_norm": 0.7116885781288147, | |
| "learning_rate": 7.438054493464859e-06, | |
| "loss": 53.4364, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.12437969051751534, | |
| "grad_norm": 5167.86669921875, | |
| "learning_rate": 7.422490295086457e-06, | |
| "loss": 314.3017, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.12477082161977168, | |
| "grad_norm": 405.8331604003906, | |
| "learning_rate": 7.406895362348916e-06, | |
| "loss": 157.4734, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.125161952722028, | |
| "grad_norm": 289.1830749511719, | |
| "learning_rate": 7.391269893106592e-06, | |
| "loss": 86.5292, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.12555308382428434, | |
| "grad_norm": 1.3253761529922485, | |
| "learning_rate": 7.375614085601265e-06, | |
| "loss": 62.8788, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.12594421492654068, | |
| "grad_norm": 3807.062744140625, | |
| "learning_rate": 7.359928138459615e-06, | |
| "loss": 193.2301, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.12633534602879704, | |
| "grad_norm": 0.38906916975975037, | |
| "learning_rate": 7.344212250690712e-06, | |
| "loss": 87.5832, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.12672647713105337, | |
| "grad_norm": 1681.7589111328125, | |
| "learning_rate": 7.328466621683481e-06, | |
| "loss": 167.7496, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.1271176082333097, | |
| "grad_norm": 0.22865992784500122, | |
| "learning_rate": 7.312691451204178e-06, | |
| "loss": 42.7028, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.12750873933556603, | |
| "grad_norm": 0.14315825700759888, | |
| "learning_rate": 7.296886939393852e-06, | |
| "loss": 41.7926, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.1278998704378224, | |
| "grad_norm": 0.7125481963157654, | |
| "learning_rate": 7.281053286765816e-06, | |
| "loss": 150.8858, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.12829100154007872, | |
| "grad_norm": 1.3114720582962036, | |
| "learning_rate": 7.265190694203086e-06, | |
| "loss": 200.4679, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.12868213264233505, | |
| "grad_norm": 308.97198486328125, | |
| "learning_rate": 7.249299362955846e-06, | |
| "loss": 56.5048, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.12907326374459138, | |
| "grad_norm": 337.1923522949219, | |
| "learning_rate": 7.233379494638891e-06, | |
| "loss": 43.4137, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.12946439484684774, | |
| "grad_norm": 0.3174319565296173, | |
| "learning_rate": 7.217431291229068e-06, | |
| "loss": 57.2986, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.12985552594910407, | |
| "grad_norm": 4877.64306640625, | |
| "learning_rate": 7.201454955062712e-06, | |
| "loss": 295.9178, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.1302466570513604, | |
| "grad_norm": 2004.761474609375, | |
| "learning_rate": 7.185450688833083e-06, | |
| "loss": 175.5556, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.13063778815361674, | |
| "grad_norm": 0.5494270920753479, | |
| "learning_rate": 7.169418695587791e-06, | |
| "loss": 95.1294, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.13102891925587307, | |
| "grad_norm": 0.4806520938873291, | |
| "learning_rate": 7.153359178726222e-06, | |
| "loss": 40.1013, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.13142005035812943, | |
| "grad_norm": 0.6679964065551758, | |
| "learning_rate": 7.137272341996958e-06, | |
| "loss": 73.998, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.13181118146038576, | |
| "grad_norm": 0.3463609516620636, | |
| "learning_rate": 7.121158389495187e-06, | |
| "loss": 55.503, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.1322023125626421, | |
| "grad_norm": 0.4972734749317169, | |
| "learning_rate": 7.10501752566012e-06, | |
| "loss": 93.7803, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.13259344366489842, | |
| "grad_norm": 0.16640667617321014, | |
| "learning_rate": 7.088849955272396e-06, | |
| "loss": 118.0719, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.13298457476715478, | |
| "grad_norm": 0.20604762434959412, | |
| "learning_rate": 7.072655883451478e-06, | |
| "loss": 135.9177, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1333757058694111, | |
| "grad_norm": 0.49932876229286194, | |
| "learning_rate": 7.056435515653059e-06, | |
| "loss": 161.2835, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.13376683697166744, | |
| "grad_norm": 0.6121784448623657, | |
| "learning_rate": 7.040189057666449e-06, | |
| "loss": 12.1418, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.13415796807392377, | |
| "grad_norm": 1.3118720054626465, | |
| "learning_rate": 7.023916715611969e-06, | |
| "loss": 122.6702, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.1345490991761801, | |
| "grad_norm": 0.12975674867630005, | |
| "learning_rate": 7.007618695938334e-06, | |
| "loss": 165.243, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.13494023027843646, | |
| "grad_norm": 1.6560322046279907, | |
| "learning_rate": 6.991295205420028e-06, | |
| "loss": 38.0507, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.1353313613806928, | |
| "grad_norm": 0.20794105529785156, | |
| "learning_rate": 6.974946451154694e-06, | |
| "loss": 22.9494, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.13572249248294913, | |
| "grad_norm": 0.2563984990119934, | |
| "learning_rate": 6.9585726405604915e-06, | |
| "loss": 109.0126, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.13611362358520546, | |
| "grad_norm": 2.3350882530212402, | |
| "learning_rate": 6.942173981373474e-06, | |
| "loss": 25.5488, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.13650475468746182, | |
| "grad_norm": 0.6753470301628113, | |
| "learning_rate": 6.925750681644954e-06, | |
| "loss": 81.2919, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.13689588578971815, | |
| "grad_norm": 1.4040716886520386, | |
| "learning_rate": 6.90930294973886e-06, | |
| "loss": 140.9022, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.13728701689197448, | |
| "grad_norm": 0.4277321696281433, | |
| "learning_rate": 6.892830994329089e-06, | |
| "loss": 86.2413, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.1376781479942308, | |
| "grad_norm": 1191.952880859375, | |
| "learning_rate": 6.876335024396872e-06, | |
| "loss": 152.5764, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.13806927909648714, | |
| "grad_norm": 0.21168895065784454, | |
| "learning_rate": 6.859815249228106e-06, | |
| "loss": 32.4788, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.1384604101987435, | |
| "grad_norm": 352.44512939453125, | |
| "learning_rate": 6.8432718784107145e-06, | |
| "loss": 42.3575, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.13885154130099983, | |
| "grad_norm": 0.7770540118217468, | |
| "learning_rate": 6.8267051218319766e-06, | |
| "loss": 177.1307, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.13924267240325616, | |
| "grad_norm": 1410.75146484375, | |
| "learning_rate": 6.81011518967587e-06, | |
| "loss": 55.0934, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.1396338035055125, | |
| "grad_norm": 0.3775579631328583, | |
| "learning_rate": 6.793502292420402e-06, | |
| "loss": 34.0766, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.14002493460776885, | |
| "grad_norm": 3233.5927734375, | |
| "learning_rate": 6.7768666408349445e-06, | |
| "loss": 149.3327, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.14041606571002518, | |
| "grad_norm": 1.1655175685882568, | |
| "learning_rate": 6.760208445977551e-06, | |
| "loss": 78.598, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.14080719681228152, | |
| "grad_norm": 3.140843629837036, | |
| "learning_rate": 6.743527919192285e-06, | |
| "loss": 51.5391, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.14119832791453785, | |
| "grad_norm": 0.5811576843261719, | |
| "learning_rate": 6.726825272106539e-06, | |
| "loss": 56.4923, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.14158945901679418, | |
| "grad_norm": 0.12908467650413513, | |
| "learning_rate": 6.710100716628345e-06, | |
| "loss": 57.4381, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.14198059011905054, | |
| "grad_norm": 573.8088989257812, | |
| "learning_rate": 6.693354464943689e-06, | |
| "loss": 98.4893, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.14237172122130687, | |
| "grad_norm": 117.73070526123047, | |
| "learning_rate": 6.676586729513823e-06, | |
| "loss": 96.0484, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.1427628523235632, | |
| "grad_norm": 1419.90625, | |
| "learning_rate": 6.659797723072558e-06, | |
| "loss": 95.183, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.14315398342581953, | |
| "grad_norm": 542.6209106445312, | |
| "learning_rate": 6.642987658623581e-06, | |
| "loss": 65.6222, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.1435451145280759, | |
| "grad_norm": 1.1538621187210083, | |
| "learning_rate": 6.626156749437736e-06, | |
| "loss": 120.1217, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.14393624563033222, | |
| "grad_norm": 398.84796142578125, | |
| "learning_rate": 6.609305209050332e-06, | |
| "loss": 74.3819, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.14432737673258855, | |
| "grad_norm": 268.26251220703125, | |
| "learning_rate": 6.592433251258423e-06, | |
| "loss": 76.2617, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.14471850783484488, | |
| "grad_norm": 321.3377990722656, | |
| "learning_rate": 6.575541090118105e-06, | |
| "loss": 40.9459, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.14510963893710124, | |
| "grad_norm": 0.22645658254623413, | |
| "learning_rate": 6.558628939941792e-06, | |
| "loss": 40.5776, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.14550077003935757, | |
| "grad_norm": 0.5002371072769165, | |
| "learning_rate": 6.541697015295503e-06, | |
| "loss": 75.5995, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.1458919011416139, | |
| "grad_norm": 0.16194933652877808, | |
| "learning_rate": 6.524745530996137e-06, | |
| "loss": 84.202, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.14628303224387024, | |
| "grad_norm": 0.37668415904045105, | |
| "learning_rate": 6.507774702108748e-06, | |
| "loss": 83.8723, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.14667416334612657, | |
| "grad_norm": 288.6870422363281, | |
| "learning_rate": 6.490784743943819e-06, | |
| "loss": 128.4052, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.14706529444838293, | |
| "grad_norm": 0.13677971065044403, | |
| "learning_rate": 6.473775872054522e-06, | |
| "loss": 57.1975, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.14745642555063926, | |
| "grad_norm": 0.3166632652282715, | |
| "learning_rate": 6.456748302233995e-06, | |
| "loss": 48.1106, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.1478475566528956, | |
| "grad_norm": 1.3575718402862549, | |
| "learning_rate": 6.439702250512596e-06, | |
| "loss": 129.3716, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.14823868775515192, | |
| "grad_norm": 0.21969862282276154, | |
| "learning_rate": 6.4226379331551625e-06, | |
| "loss": 113.5939, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.14862981885740828, | |
| "grad_norm": 0.13174912333488464, | |
| "learning_rate": 6.405555566658276e-06, | |
| "loss": 32.1963, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.1490209499596646, | |
| "grad_norm": 0.863881528377533, | |
| "learning_rate": 6.388455367747503e-06, | |
| "loss": 29.8159, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.14941208106192094, | |
| "grad_norm": 0.6778990030288696, | |
| "learning_rate": 6.3713375533746525e-06, | |
| "loss": 128.0998, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.14980321216417727, | |
| "grad_norm": 1.4469739198684692, | |
| "learning_rate": 6.354202340715027e-06, | |
| "loss": 52.4234, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.1501943432664336, | |
| "grad_norm": 0.18584023416042328, | |
| "learning_rate": 6.337049947164656e-06, | |
| "loss": 70.7733, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.15058547436868996, | |
| "grad_norm": 2002.4371337890625, | |
| "learning_rate": 6.319880590337549e-06, | |
| "loss": 84.5797, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.1509766054709463, | |
| "grad_norm": 0.19816723465919495, | |
| "learning_rate": 6.302694488062931e-06, | |
| "loss": 76.0305, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.15136773657320263, | |
| "grad_norm": 0.5410847067832947, | |
| "learning_rate": 6.2854918583824745e-06, | |
| "loss": 74.2372, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.15175886767545896, | |
| "grad_norm": 1.376298427581787, | |
| "learning_rate": 6.268272919547537e-06, | |
| "loss": 24.7493, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.15214999877771532, | |
| "grad_norm": 142.1651611328125, | |
| "learning_rate": 6.251037890016396e-06, | |
| "loss": 34.4196, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.15254112987997165, | |
| "grad_norm": 2054.256103515625, | |
| "learning_rate": 6.233786988451468e-06, | |
| "loss": 86.9907, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.15293226098222798, | |
| "grad_norm": 3574.6181640625, | |
| "learning_rate": 6.216520433716544e-06, | |
| "loss": 162.4229, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.1533233920844843, | |
| "grad_norm": 1714.3194580078125, | |
| "learning_rate": 6.199238444874005e-06, | |
| "loss": 124.9201, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.15371452318674064, | |
| "grad_norm": 1639.366455078125, | |
| "learning_rate": 6.181941241182044e-06, | |
| "loss": 76.7506, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.154105654288997, | |
| "grad_norm": 1059.76123046875, | |
| "learning_rate": 6.164629042091894e-06, | |
| "loss": 34.25, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.15449678539125333, | |
| "grad_norm": 1.3340238332748413, | |
| "learning_rate": 6.1473020672450275e-06, | |
| "loss": 85.7469, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.15488791649350966, | |
| "grad_norm": 1639.532958984375, | |
| "learning_rate": 6.1299605364703826e-06, | |
| "loss": 93.6497, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.155279047595766, | |
| "grad_norm": 0.20548588037490845, | |
| "learning_rate": 6.112604669781572e-06, | |
| "loss": 83.9102, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.15567017869802235, | |
| "grad_norm": 1.0026606321334839, | |
| "learning_rate": 6.095234687374085e-06, | |
| "loss": 74.3395, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.15606130980027869, | |
| "grad_norm": 453.576171875, | |
| "learning_rate": 6.0778508096224985e-06, | |
| "loss": 57.6422, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.15645244090253502, | |
| "grad_norm": 0.14913234114646912, | |
| "learning_rate": 6.060453257077686e-06, | |
| "loss": 26.2558, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15684357200479135, | |
| "grad_norm": 0.1819409877061844, | |
| "learning_rate": 6.043042250464005e-06, | |
| "loss": 57.1572, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.15723470310704768, | |
| "grad_norm": 0.3527912497520447, | |
| "learning_rate": 6.025618010676516e-06, | |
| "loss": 78.8062, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.15762583420930404, | |
| "grad_norm": 1.7945302724838257, | |
| "learning_rate": 6.008180758778167e-06, | |
| "loss": 34.1183, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.15801696531156037, | |
| "grad_norm": 0.3905615508556366, | |
| "learning_rate": 5.990730715996989e-06, | |
| "loss": 38.1853, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.1584080964138167, | |
| "grad_norm": 1662.2044677734375, | |
| "learning_rate": 5.973268103723293e-06, | |
| "loss": 90.833, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.15879922751607303, | |
| "grad_norm": 894.9415283203125, | |
| "learning_rate": 5.955793143506863e-06, | |
| "loss": 54.7208, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.1591903586183294, | |
| "grad_norm": 0.2516638934612274, | |
| "learning_rate": 5.938306057054139e-06, | |
| "loss": 49.8002, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.15958148972058572, | |
| "grad_norm": 0.16948434710502625, | |
| "learning_rate": 5.920807066225409e-06, | |
| "loss": 119.8379, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.15997262082284205, | |
| "grad_norm": 0.480121374130249, | |
| "learning_rate": 5.903296393031996e-06, | |
| "loss": 57.558, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.16036375192509839, | |
| "grad_norm": 467.2088623046875, | |
| "learning_rate": 5.885774259633432e-06, | |
| "loss": 108.7793, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.16075488302735474, | |
| "grad_norm": 0.16975107789039612, | |
| "learning_rate": 5.8682408883346535e-06, | |
| "loss": 23.7654, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.16114601412961108, | |
| "grad_norm": 1.1074670553207397, | |
| "learning_rate": 5.850696501583164e-06, | |
| "loss": 35.2543, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.1615371452318674, | |
| "grad_norm": 2842.92529296875, | |
| "learning_rate": 5.8331413219662295e-06, | |
| "loss": 78.9185, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.16192827633412374, | |
| "grad_norm": 0.22508656978607178, | |
| "learning_rate": 5.815575572208042e-06, | |
| "loss": 25.2656, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.16231940743638007, | |
| "grad_norm": 0.14979542791843414, | |
| "learning_rate": 5.797999475166897e-06, | |
| "loss": 43.0969, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.16271053853863643, | |
| "grad_norm": 1.3583056926727295, | |
| "learning_rate": 5.78041325383237e-06, | |
| "loss": 111.1471, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.16310166964089276, | |
| "grad_norm": 0.3384321630001068, | |
| "learning_rate": 5.762817131322482e-06, | |
| "loss": 168.3297, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.1634928007431491, | |
| "grad_norm": 0.4203161895275116, | |
| "learning_rate": 5.745211330880872e-06, | |
| "loss": 227.7544, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.16388393184540542, | |
| "grad_norm": 0.2434893250465393, | |
| "learning_rate": 5.7275960758739655e-06, | |
| "loss": 188.506, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.16427506294766178, | |
| "grad_norm": 374.4583740234375, | |
| "learning_rate": 5.709971589788136e-06, | |
| "loss": 103.9844, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1646661940499181, | |
| "grad_norm": 0.3567257523536682, | |
| "learning_rate": 5.69233809622687e-06, | |
| "loss": 45.0813, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.16505732515217444, | |
| "grad_norm": 0.3658342659473419, | |
| "learning_rate": 5.674695818907943e-06, | |
| "loss": 79.1331, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.16544845625443078, | |
| "grad_norm": 1.0847684144973755, | |
| "learning_rate": 5.65704498166056e-06, | |
| "loss": 89.7686, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.1658395873566871, | |
| "grad_norm": 1.6579034328460693, | |
| "learning_rate": 5.6393858084225305e-06, | |
| "loss": 36.3624, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.16623071845894347, | |
| "grad_norm": 136.94493103027344, | |
| "learning_rate": 5.621718523237427e-06, | |
| "loss": 86.6906, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.1666218495611998, | |
| "grad_norm": 1437.6072998046875, | |
| "learning_rate": 5.604043350251733e-06, | |
| "loss": 47.5283, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.16701298066345613, | |
| "grad_norm": 1.1629807949066162, | |
| "learning_rate": 5.586360513712011e-06, | |
| "loss": 69.8016, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.16740411176571246, | |
| "grad_norm": 0.08627960830926895, | |
| "learning_rate": 5.568670237962045e-06, | |
| "loss": 41.2346, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.16779524286796882, | |
| "grad_norm": 307.7284851074219, | |
| "learning_rate": 5.550972747440007e-06, | |
| "loss": 64.6287, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.16818637397022515, | |
| "grad_norm": 275.1657409667969, | |
| "learning_rate": 5.533268266675601e-06, | |
| "loss": 37.8621, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.16857750507248148, | |
| "grad_norm": 321.581298828125, | |
| "learning_rate": 5.515557020287219e-06, | |
| "loss": 21.3013, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.1689686361747378, | |
| "grad_norm": 107.34349822998047, | |
| "learning_rate": 5.497839232979084e-06, | |
| "loss": 22.1243, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.16935976727699414, | |
| "grad_norm": 1374.56396484375, | |
| "learning_rate": 5.480115129538409e-06, | |
| "loss": 36.583, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.1697508983792505, | |
| "grad_norm": 0.42376813292503357, | |
| "learning_rate": 5.4623849348325396e-06, | |
| "loss": 44.8171, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.17014202948150683, | |
| "grad_norm": 1.2415663003921509, | |
| "learning_rate": 5.444648873806101e-06, | |
| "loss": 61.8365, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.17053316058376317, | |
| "grad_norm": 0.8514006733894348, | |
| "learning_rate": 5.426907171478143e-06, | |
| "loss": 46.8816, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.1709242916860195, | |
| "grad_norm": 0.2142760455608368, | |
| "learning_rate": 5.409160052939292e-06, | |
| "loss": 35.3242, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.17131542278827586, | |
| "grad_norm": 0.15164269506931305, | |
| "learning_rate": 5.391407743348884e-06, | |
| "loss": 11.6772, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.1717065538905322, | |
| "grad_norm": 1661.5631103515625, | |
| "learning_rate": 5.373650467932122e-06, | |
| "loss": 53.8261, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.17209768499278852, | |
| "grad_norm": 1743.482421875, | |
| "learning_rate": 5.355888451977204e-06, | |
| "loss": 35.92, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.17248881609504485, | |
| "grad_norm": 0.20529299974441528, | |
| "learning_rate": 5.3381219208324755e-06, | |
| "loss": 33.3962, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.1728799471973012, | |
| "grad_norm": 2.2511260509490967, | |
| "learning_rate": 5.320351099903565e-06, | |
| "loss": 55.9249, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.17327107829955754, | |
| "grad_norm": 0.11457404494285583, | |
| "learning_rate": 5.302576214650527e-06, | |
| "loss": 73.4981, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.17366220940181387, | |
| "grad_norm": 138.77809143066406, | |
| "learning_rate": 5.284797490584979e-06, | |
| "loss": 41.5857, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.1740533405040702, | |
| "grad_norm": 259.2716064453125, | |
| "learning_rate": 5.267015153267246e-06, | |
| "loss": 63.9765, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.17444447160632653, | |
| "grad_norm": 1.0777804851531982, | |
| "learning_rate": 5.249229428303486e-06, | |
| "loss": 11.9817, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.1748356027085829, | |
| "grad_norm": 2536.879150390625, | |
| "learning_rate": 5.231440541342846e-06, | |
| "loss": 57.9686, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.17522673381083922, | |
| "grad_norm": 0.2679949700832367, | |
| "learning_rate": 5.213648718074584e-06, | |
| "loss": 31.2022, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.17561786491309556, | |
| "grad_norm": 0.14789415895938873, | |
| "learning_rate": 5.1958541842252145e-06, | |
| "loss": 46.2886, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.1760089960153519, | |
| "grad_norm": 3.2639567852020264, | |
| "learning_rate": 5.178057165555636e-06, | |
| "loss": 84.1918, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.17640012711760825, | |
| "grad_norm": 372.91387939453125, | |
| "learning_rate": 5.160257887858278e-06, | |
| "loss": 20.9396, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.17679125821986458, | |
| "grad_norm": 0.30689093470573425, | |
| "learning_rate": 5.142456576954225e-06, | |
| "loss": 15.7738, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.1771823893221209, | |
| "grad_norm": 247.07366943359375, | |
| "learning_rate": 5.1246534586903655e-06, | |
| "loss": 19.9637, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.17757352042437724, | |
| "grad_norm": 0.7007215619087219, | |
| "learning_rate": 5.106848758936508e-06, | |
| "loss": 33.0597, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.17796465152663357, | |
| "grad_norm": 290.64202880859375, | |
| "learning_rate": 5.089042703582533e-06, | |
| "loss": 65.3849, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.17835578262888993, | |
| "grad_norm": 0.4069232940673828, | |
| "learning_rate": 5.071235518535516e-06, | |
| "loss": 25.4287, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.17874691373114626, | |
| "grad_norm": 0.6326934099197388, | |
| "learning_rate": 5.053427429716867e-06, | |
| "loss": 43.9984, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.1791380448334026, | |
| "grad_norm": 0.269971638917923, | |
| "learning_rate": 5.0356186630594585e-06, | |
| "loss": 46.612, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.17952917593565892, | |
| "grad_norm": 0.21015766263008118, | |
| "learning_rate": 5.017809444504768e-06, | |
| "loss": 48.1048, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.17992030703791528, | |
| "grad_norm": 357.07867431640625, | |
| "learning_rate": 5e-06, | |
| "loss": 25.4451, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.18031143814017161, | |
| "grad_norm": 476.7673645019531, | |
| "learning_rate": 4.982190555495236e-06, | |
| "loss": 35.3956, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.18070256924242795, | |
| "grad_norm": 0.19846689701080322, | |
| "learning_rate": 4.964381336940542e-06, | |
| "loss": 76.3696, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.18109370034468428, | |
| "grad_norm": 0.3459748923778534, | |
| "learning_rate": 4.946572570283135e-06, | |
| "loss": 28.662, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.1814848314469406, | |
| "grad_norm": 2.347224712371826, | |
| "learning_rate": 4.928764481464485e-06, | |
| "loss": 43.5744, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.18187596254919697, | |
| "grad_norm": 1.0240310430526733, | |
| "learning_rate": 4.910957296417467e-06, | |
| "loss": 38.6815, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.1822670936514533, | |
| "grad_norm": 2363.06884765625, | |
| "learning_rate": 4.893151241063493e-06, | |
| "loss": 97.8371, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.18265822475370963, | |
| "grad_norm": 0.7521228790283203, | |
| "learning_rate": 4.875346541309637e-06, | |
| "loss": 87.3877, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.18304935585596596, | |
| "grad_norm": 186.02557373046875, | |
| "learning_rate": 4.857543423045775e-06, | |
| "loss": 61.2668, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.18344048695822232, | |
| "grad_norm": 0.3884137272834778, | |
| "learning_rate": 4.839742112141725e-06, | |
| "loss": 57.2652, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.18383161806047865, | |
| "grad_norm": 1904.343994140625, | |
| "learning_rate": 4.821942834444367e-06, | |
| "loss": 44.9092, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.18422274916273498, | |
| "grad_norm": 0.933085024356842, | |
| "learning_rate": 4.804145815774787e-06, | |
| "loss": 45.1724, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.18461388026499131, | |
| "grad_norm": 0.17478938400745392, | |
| "learning_rate": 4.786351281925417e-06, | |
| "loss": 63.2364, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.18500501136724765, | |
| "grad_norm": 0.3920159637928009, | |
| "learning_rate": 4.768559458657156e-06, | |
| "loss": 9.0603, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.185396142469504, | |
| "grad_norm": 0.20027987658977509, | |
| "learning_rate": 4.750770571696514e-06, | |
| "loss": 67.3941, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.18578727357176034, | |
| "grad_norm": 4896.18603515625, | |
| "learning_rate": 4.732984846732755e-06, | |
| "loss": 110.2459, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.18617840467401667, | |
| "grad_norm": 0.6982368230819702, | |
| "learning_rate": 4.7152025094150214e-06, | |
| "loss": 60.1346, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.186569535776273, | |
| "grad_norm": 0.21346724033355713, | |
| "learning_rate": 4.697423785349475e-06, | |
| "loss": 86.276, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.18696066687852936, | |
| "grad_norm": 0.5096214413642883, | |
| "learning_rate": 4.679648900096436e-06, | |
| "loss": 47.5857, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.1873517979807857, | |
| "grad_norm": 0.5563175678253174, | |
| "learning_rate": 4.661878079167527e-06, | |
| "loss": 49.3372, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.18774292908304202, | |
| "grad_norm": 1.7563621997833252, | |
| "learning_rate": 4.644111548022798e-06, | |
| "loss": 40.9886, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.18813406018529835, | |
| "grad_norm": 112.32270050048828, | |
| "learning_rate": 4.626349532067879e-06, | |
| "loss": 47.8111, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.1885251912875547, | |
| "grad_norm": 1.5125802755355835, | |
| "learning_rate": 4.608592256651117e-06, | |
| "loss": 32.0425, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.18891632238981104, | |
| "grad_norm": 0.3377935588359833, | |
| "learning_rate": 4.5908399470607106e-06, | |
| "loss": 41.984, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.18930745349206737, | |
| "grad_norm": 262.73553466796875, | |
| "learning_rate": 4.573092828521857e-06, | |
| "loss": 15.6826, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.1896985845943237, | |
| "grad_norm": 1222.323974609375, | |
| "learning_rate": 4.555351126193901e-06, | |
| "loss": 82.0725, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.19008971569658004, | |
| "grad_norm": 0.4867708384990692, | |
| "learning_rate": 4.537615065167461e-06, | |
| "loss": 68.1378, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.1904808467988364, | |
| "grad_norm": 0.24868200719356537, | |
| "learning_rate": 4.5198848704615915e-06, | |
| "loss": 34.2792, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.19087197790109273, | |
| "grad_norm": 1565.045654296875, | |
| "learning_rate": 4.502160767020918e-06, | |
| "loss": 61.3013, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.19126310900334906, | |
| "grad_norm": 0.12846969068050385, | |
| "learning_rate": 4.484442979712783e-06, | |
| "loss": 101.128, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.1916542401056054, | |
| "grad_norm": 0.2493712157011032, | |
| "learning_rate": 4.466731733324399e-06, | |
| "loss": 16.2675, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.19204537120786175, | |
| "grad_norm": 0.8139014840126038, | |
| "learning_rate": 4.449027252559994e-06, | |
| "loss": 56.7364, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.19243650231011808, | |
| "grad_norm": 0.44511955976486206, | |
| "learning_rate": 4.431329762037958e-06, | |
| "loss": 22.7369, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.1928276334123744, | |
| "grad_norm": 334.3827819824219, | |
| "learning_rate": 4.413639486287992e-06, | |
| "loss": 8.3813, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.19321876451463074, | |
| "grad_norm": 1712.8160400390625, | |
| "learning_rate": 4.395956649748269e-06, | |
| "loss": 57.2132, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.19360989561688707, | |
| "grad_norm": 1017.40771484375, | |
| "learning_rate": 4.3782814767625755e-06, | |
| "loss": 39.137, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.19400102671914343, | |
| "grad_norm": 2.061763286590576, | |
| "learning_rate": 4.3606141915774695e-06, | |
| "loss": 29.4071, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.19439215782139976, | |
| "grad_norm": 0.351924329996109, | |
| "learning_rate": 4.342955018339442e-06, | |
| "loss": 71.3075, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.1947832889236561, | |
| "grad_norm": 0.5345131754875183, | |
| "learning_rate": 4.3253041810920595e-06, | |
| "loss": 53.7062, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.19517442002591243, | |
| "grad_norm": 0.4997957944869995, | |
| "learning_rate": 4.307661903773129e-06, | |
| "loss": 52.0548, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.19556555112816879, | |
| "grad_norm": 0.646457850933075, | |
| "learning_rate": 4.290028410211866e-06, | |
| "loss": 92.2717, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19595668223042512, | |
| "grad_norm": 0.3356407880783081, | |
| "learning_rate": 4.272403924126035e-06, | |
| "loss": 89.8288, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.19634781333268145, | |
| "grad_norm": 1.2155108451843262, | |
| "learning_rate": 4.254788669119127e-06, | |
| "loss": 42.697, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.19673894443493778, | |
| "grad_norm": 0.8059905171394348, | |
| "learning_rate": 4.237182868677519e-06, | |
| "loss": 45.4195, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.1971300755371941, | |
| "grad_norm": 0.19161798059940338, | |
| "learning_rate": 4.219586746167632e-06, | |
| "loss": 5.6987, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.19752120663945047, | |
| "grad_norm": 1.4473623037338257, | |
| "learning_rate": 4.2020005248331056e-06, | |
| "loss": 83.2729, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.1979123377417068, | |
| "grad_norm": 3267.076904296875, | |
| "learning_rate": 4.18442442779196e-06, | |
| "loss": 64.1532, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.19830346884396313, | |
| "grad_norm": 0.3686378002166748, | |
| "learning_rate": 4.166858678033771e-06, | |
| "loss": 5.8373, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.19869459994621946, | |
| "grad_norm": 0.7481865882873535, | |
| "learning_rate": 4.149303498416838e-06, | |
| "loss": 30.1228, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.19908573104847582, | |
| "grad_norm": 1.0720148086547852, | |
| "learning_rate": 4.131759111665349e-06, | |
| "loss": 16.1707, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.19947686215073215, | |
| "grad_norm": 253.39622497558594, | |
| "learning_rate": 4.114225740366569e-06, | |
| "loss": 11.6924, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.19986799325298849, | |
| "grad_norm": 1.3426392078399658, | |
| "learning_rate": 4.096703606968007e-06, | |
| "loss": 18.6361, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.20025912435524482, | |
| "grad_norm": 242.68768310546875, | |
| "learning_rate": 4.079192933774592e-06, | |
| "loss": 60.1703, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.20065025545750115, | |
| "grad_norm": 0.12895415723323822, | |
| "learning_rate": 4.061693942945863e-06, | |
| "loss": 39.873, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.2010413865597575, | |
| "grad_norm": 446.8625793457031, | |
| "learning_rate": 4.04420685649314e-06, | |
| "loss": 61.0251, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.20143251766201384, | |
| "grad_norm": 0.19806069135665894, | |
| "learning_rate": 4.026731896276708e-06, | |
| "loss": 45.4137, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.20182364876427017, | |
| "grad_norm": 0.357597678899765, | |
| "learning_rate": 4.009269284003014e-06, | |
| "loss": 110.9987, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.2022147798665265, | |
| "grad_norm": 0.15286563336849213, | |
| "learning_rate": 3.991819241221836e-06, | |
| "loss": 52.1221, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.20260591096878286, | |
| "grad_norm": 0.1778210997581482, | |
| "learning_rate": 3.974381989323484e-06, | |
| "loss": 57.3214, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.2029970420710392, | |
| "grad_norm": 1.1525286436080933, | |
| "learning_rate": 3.956957749535997e-06, | |
| "loss": 26.8482, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.20338817317329552, | |
| "grad_norm": 1.2070550918579102, | |
| "learning_rate": 3.939546742922318e-06, | |
| "loss": 68.4404, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.20377930427555185, | |
| "grad_norm": 1.1822954416275024, | |
| "learning_rate": 3.9221491903775014e-06, | |
| "loss": 20.8915, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.2041704353778082, | |
| "grad_norm": 0.27339041233062744, | |
| "learning_rate": 3.904765312625916e-06, | |
| "loss": 38.3124, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.20456156648006454, | |
| "grad_norm": 251.54371643066406, | |
| "learning_rate": 3.887395330218429e-06, | |
| "loss": 75.3845, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.20495269758232088, | |
| "grad_norm": 0.2657800316810608, | |
| "learning_rate": 3.8700394635296166e-06, | |
| "loss": 50.7794, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.2053438286845772, | |
| "grad_norm": 0.9252436757087708, | |
| "learning_rate": 3.852697932754974e-06, | |
| "loss": 80.2724, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.20573495978683354, | |
| "grad_norm": 0.18160250782966614, | |
| "learning_rate": 3.835370957908108e-06, | |
| "loss": 18.6159, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.2061260908890899, | |
| "grad_norm": 1.437393069267273, | |
| "learning_rate": 3.818058758817956e-06, | |
| "loss": 62.4316, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.20651722199134623, | |
| "grad_norm": 2809.828369140625, | |
| "learning_rate": 3.800761555125997e-06, | |
| "loss": 25.8665, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.20690835309360256, | |
| "grad_norm": 952.0728149414062, | |
| "learning_rate": 3.783479566283457e-06, | |
| "loss": 22.2021, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.2072994841958589, | |
| "grad_norm": 2.9290764331817627, | |
| "learning_rate": 3.7662130115485317e-06, | |
| "loss": 62.186, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.20769061529811525, | |
| "grad_norm": 0.4081960618495941, | |
| "learning_rate": 3.748962109983605e-06, | |
| "loss": 36.3092, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.20808174640037158, | |
| "grad_norm": 0.33659136295318604, | |
| "learning_rate": 3.731727080452464e-06, | |
| "loss": 19.2433, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.2084728775026279, | |
| "grad_norm": 1739.4482421875, | |
| "learning_rate": 3.714508141617527e-06, | |
| "loss": 76.853, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.20886400860488424, | |
| "grad_norm": 3326.531494140625, | |
| "learning_rate": 3.69730551193707e-06, | |
| "loss": 99.9301, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.20925513970714057, | |
| "grad_norm": 220.89852905273438, | |
| "learning_rate": 3.6801194096624515e-06, | |
| "loss": 41.4909, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.20964627080939693, | |
| "grad_norm": 106.39971160888672, | |
| "learning_rate": 3.6629500528353464e-06, | |
| "loss": 28.8744, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.21003740191165327, | |
| "grad_norm": 1.0691938400268555, | |
| "learning_rate": 3.6457976592849753e-06, | |
| "loss": 20.5078, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.2104285330139096, | |
| "grad_norm": 0.6214406490325928, | |
| "learning_rate": 3.6286624466253496e-06, | |
| "loss": 45.2672, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.21081966411616593, | |
| "grad_norm": 1.1435602903366089, | |
| "learning_rate": 3.6115446322525007e-06, | |
| "loss": 62.7016, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.2112107952184223, | |
| "grad_norm": 0.2811889052391052, | |
| "learning_rate": 3.594444433341725e-06, | |
| "loss": 73.1093, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.21160192632067862, | |
| "grad_norm": 1629.179443359375, | |
| "learning_rate": 3.5773620668448384e-06, | |
| "loss": 70.3456, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.21199305742293495, | |
| "grad_norm": 0.6402444839477539, | |
| "learning_rate": 3.560297749487407e-06, | |
| "loss": 100.5996, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.21238418852519128, | |
| "grad_norm": 0.20082825422286987, | |
| "learning_rate": 3.543251697766006e-06, | |
| "loss": 45.9456, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.2127753196274476, | |
| "grad_norm": 0.2259850949048996, | |
| "learning_rate": 3.526224127945479e-06, | |
| "loss": 81.2139, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.21316645072970397, | |
| "grad_norm": 0.5259697437286377, | |
| "learning_rate": 3.5092152560561833e-06, | |
| "loss": 29.0951, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.2135575818319603, | |
| "grad_norm": 0.2201029360294342, | |
| "learning_rate": 3.4922252978912523e-06, | |
| "loss": 39.3512, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.21394871293421663, | |
| "grad_norm": 0.6921319365501404, | |
| "learning_rate": 3.475254469003865e-06, | |
| "loss": 74.4222, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.21433984403647297, | |
| "grad_norm": 0.3210639953613281, | |
| "learning_rate": 3.4583029847044996e-06, | |
| "loss": 78.7619, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.21473097513872932, | |
| "grad_norm": 0.49791640043258667, | |
| "learning_rate": 3.4413710600582096e-06, | |
| "loss": 41.5078, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.21512210624098566, | |
| "grad_norm": 1.5678467750549316, | |
| "learning_rate": 3.424458909881897e-06, | |
| "loss": 25.0795, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.215513237343242, | |
| "grad_norm": 0.4146921634674072, | |
| "learning_rate": 3.4075667487415785e-06, | |
| "loss": 53.2567, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.21590436844549832, | |
| "grad_norm": 0.3883844017982483, | |
| "learning_rate": 3.3906947909496696e-06, | |
| "loss": 40.2185, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.21629549954775465, | |
| "grad_norm": 1.1996833086013794, | |
| "learning_rate": 3.3738432505622653e-06, | |
| "loss": 43.9982, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.216686630650011, | |
| "grad_norm": 0.203975647687912, | |
| "learning_rate": 3.357012341376421e-06, | |
| "loss": 29.5247, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.21707776175226734, | |
| "grad_norm": 0.3972192108631134, | |
| "learning_rate": 3.3402022769274422e-06, | |
| "loss": 12.6732, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.21746889285452367, | |
| "grad_norm": 0.3920489251613617, | |
| "learning_rate": 3.3234132704861786e-06, | |
| "loss": 10.7088, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.21786002395678, | |
| "grad_norm": 1.8879612684249878, | |
| "learning_rate": 3.306645535056312e-06, | |
| "loss": 78.3652, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.21825115505903636, | |
| "grad_norm": 1339.1312255859375, | |
| "learning_rate": 3.289899283371657e-06, | |
| "loss": 115.3514, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.2186422861612927, | |
| "grad_norm": 0.34053364396095276, | |
| "learning_rate": 3.273174727893463e-06, | |
| "loss": 30.0332, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.21903341726354902, | |
| "grad_norm": 1.0308630466461182, | |
| "learning_rate": 3.2564720808077167e-06, | |
| "loss": 45.353, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.21942454836580536, | |
| "grad_norm": 0.11198209971189499, | |
| "learning_rate": 3.2397915540224493e-06, | |
| "loss": 21.2855, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.21981567946806171, | |
| "grad_norm": 0.1384800672531128, | |
| "learning_rate": 3.2231333591650567e-06, | |
| "loss": 1.2242, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.22020681057031805, | |
| "grad_norm": 0.2989153563976288, | |
| "learning_rate": 3.2064977075795988e-06, | |
| "loss": 25.7958, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.22059794167257438, | |
| "grad_norm": 1617.5919189453125, | |
| "learning_rate": 3.189884810324133e-06, | |
| "loss": 41.6185, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.2209890727748307, | |
| "grad_norm": 0.4813622236251831, | |
| "learning_rate": 3.173294878168025e-06, | |
| "loss": 54.3557, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.22138020387708704, | |
| "grad_norm": 207.3912811279297, | |
| "learning_rate": 3.1567281215892868e-06, | |
| "loss": 23.3589, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.2217713349793434, | |
| "grad_norm": 0.6962729096412659, | |
| "learning_rate": 3.140184750771895e-06, | |
| "loss": 53.3044, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.22216246608159973, | |
| "grad_norm": 0.3524315059185028, | |
| "learning_rate": 3.12366497560313e-06, | |
| "loss": 44.0667, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.22255359718385606, | |
| "grad_norm": 0.2957899868488312, | |
| "learning_rate": 3.1071690056709125e-06, | |
| "loss": 61.0361, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.2229447282861124, | |
| "grad_norm": 0.13717715442180634, | |
| "learning_rate": 3.090697050261143e-06, | |
| "loss": 48.0218, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.22333585938836875, | |
| "grad_norm": 1.4488298892974854, | |
| "learning_rate": 3.074249318355046e-06, | |
| "loss": 42.8739, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.22372699049062508, | |
| "grad_norm": 0.19119593501091003, | |
| "learning_rate": 3.057826018626527e-06, | |
| "loss": 131.6781, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.22411812159288141, | |
| "grad_norm": 0.4780034124851227, | |
| "learning_rate": 3.0414273594395106e-06, | |
| "loss": 68.4404, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.22450925269513775, | |
| "grad_norm": 0.25417017936706543, | |
| "learning_rate": 3.0250535488453077e-06, | |
| "loss": 78.3688, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.22490038379739408, | |
| "grad_norm": 0.2417258769273758, | |
| "learning_rate": 3.008704794579973e-06, | |
| "loss": 116.528, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.22529151489965044, | |
| "grad_norm": 0.19122564792633057, | |
| "learning_rate": 2.9923813040616685e-06, | |
| "loss": 29.6101, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.22568264600190677, | |
| "grad_norm": 0.13005802035331726, | |
| "learning_rate": 2.976083284388031e-06, | |
| "loss": 35.3624, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.2260737771041631, | |
| "grad_norm": 381.5791320800781, | |
| "learning_rate": 2.959810942333552e-06, | |
| "loss": 90.369, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.22646490820641943, | |
| "grad_norm": 0.19559843838214874, | |
| "learning_rate": 2.9435644843469434e-06, | |
| "loss": 51.9091, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.2268560393086758, | |
| "grad_norm": 0.3959593176841736, | |
| "learning_rate": 2.9273441165485227e-06, | |
| "loss": 38.9128, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.22724717041093212, | |
| "grad_norm": 0.18184183537960052, | |
| "learning_rate": 2.9111500447276053e-06, | |
| "loss": 51.5855, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.22763830151318845, | |
| "grad_norm": 0.6553506851196289, | |
| "learning_rate": 2.8949824743398804e-06, | |
| "loss": 30.2534, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.22802943261544478, | |
| "grad_norm": 0.18349502980709076, | |
| "learning_rate": 2.8788416105048124e-06, | |
| "loss": 15.0662, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.22842056371770111, | |
| "grad_norm": 0.57335364818573, | |
| "learning_rate": 2.862727658003042e-06, | |
| "loss": 48.4215, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.22881169481995747, | |
| "grad_norm": 0.36635109782218933, | |
| "learning_rate": 2.8466408212737777e-06, | |
| "loss": 4.5718, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.2292028259222138, | |
| "grad_norm": 0.2556889057159424, | |
| "learning_rate": 2.83058130441221e-06, | |
| "loss": 43.9021, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.22959395702447014, | |
| "grad_norm": 1374.17138671875, | |
| "learning_rate": 2.8145493111669186e-06, | |
| "loss": 61.402, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.22998508812672647, | |
| "grad_norm": 0.1971081793308258, | |
| "learning_rate": 2.79854504493729e-06, | |
| "loss": 49.1368, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.23037621922898283, | |
| "grad_norm": 1701.60693359375, | |
| "learning_rate": 2.782568708770933e-06, | |
| "loss": 53.8848, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.23076735033123916, | |
| "grad_norm": 789.6815185546875, | |
| "learning_rate": 2.7666205053611097e-06, | |
| "loss": 17.3218, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2311584814334955, | |
| "grad_norm": 0.25564226508140564, | |
| "learning_rate": 2.7507006370441557e-06, | |
| "loss": 8.3916, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.23154961253575182, | |
| "grad_norm": 1354.3128662109375, | |
| "learning_rate": 2.734809305796915e-06, | |
| "loss": 48.9245, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.23194074363800815, | |
| "grad_norm": 161.26748657226562, | |
| "learning_rate": 2.718946713234185e-06, | |
| "loss": 56.0846, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.2323318747402645, | |
| "grad_norm": 0.32130080461502075, | |
| "learning_rate": 2.7031130606061486e-06, | |
| "loss": 38.3063, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.23272300584252084, | |
| "grad_norm": 1.8923166990280151, | |
| "learning_rate": 2.687308548795825e-06, | |
| "loss": 47.5562, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.23311413694477717, | |
| "grad_norm": 0.13152983784675598, | |
| "learning_rate": 2.67153337831652e-06, | |
| "loss": 26.3863, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.2335052680470335, | |
| "grad_norm": 0.1482762098312378, | |
| "learning_rate": 2.6557877493092885e-06, | |
| "loss": 79.9195, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.23389639914928986, | |
| "grad_norm": 0.4109286069869995, | |
| "learning_rate": 2.6400718615403852e-06, | |
| "loss": 50.5231, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.2342875302515462, | |
| "grad_norm": 0.8140275478363037, | |
| "learning_rate": 2.624385914398737e-06, | |
| "loss": 55.21, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.23467866135380253, | |
| "grad_norm": 1361.185302734375, | |
| "learning_rate": 2.608730106893411e-06, | |
| "loss": 51.1663, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.23506979245605886, | |
| "grad_norm": 0.4616069793701172, | |
| "learning_rate": 2.5931046376510875e-06, | |
| "loss": 14.65, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.23546092355831522, | |
| "grad_norm": 0.43858256936073303, | |
| "learning_rate": 2.5775097049135445e-06, | |
| "loss": 21.6647, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.23585205466057155, | |
| "grad_norm": 0.2774844765663147, | |
| "learning_rate": 2.561945506535144e-06, | |
| "loss": 37.901, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.23624318576282788, | |
| "grad_norm": 1338.126220703125, | |
| "learning_rate": 2.5464122399803126e-06, | |
| "loss": 66.4596, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.2366343168650842, | |
| "grad_norm": 0.4828522801399231, | |
| "learning_rate": 2.5309101023210426e-06, | |
| "loss": 11.7746, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.23702544796734054, | |
| "grad_norm": 1.3893892765045166, | |
| "learning_rate": 2.5154392902343966e-06, | |
| "loss": 38.6192, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.2374165790695969, | |
| "grad_norm": 2.981203556060791, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 15.9302, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.23780771017185323, | |
| "grad_norm": 597.11279296875, | |
| "learning_rate": 2.4845924274975625e-06, | |
| "loss": 45.9918, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.23819884127410956, | |
| "grad_norm": 1.3516067266464233, | |
| "learning_rate": 2.4692167682043855e-06, | |
| "loss": 46.3228, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.2385899723763659, | |
| "grad_norm": 0.1671498566865921, | |
| "learning_rate": 2.4538732171928847e-06, | |
| "loss": 43.2643, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.23898110347862225, | |
| "grad_norm": 0.3257511854171753, | |
| "learning_rate": 2.4385619691281144e-06, | |
| "loss": 44.6005, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.23937223458087858, | |
| "grad_norm": 0.4877566993236542, | |
| "learning_rate": 2.4232832182653014e-06, | |
| "loss": 72.7035, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.23976336568313492, | |
| "grad_norm": 1470.4984130859375, | |
| "learning_rate": 2.408037158447375e-06, | |
| "loss": 50.2886, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.24015449678539125, | |
| "grad_norm": 0.78384929895401, | |
| "learning_rate": 2.39282398310251e-06, | |
| "loss": 19.3894, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.24054562788764758, | |
| "grad_norm": 0.2225428968667984, | |
| "learning_rate": 2.3776438852416743e-06, | |
| "loss": 53.8646, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.24093675898990394, | |
| "grad_norm": 0.996082603931427, | |
| "learning_rate": 2.3624970574561773e-06, | |
| "loss": 2.6708, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.24132789009216027, | |
| "grad_norm": 0.38895338773727417, | |
| "learning_rate": 2.3473836919152267e-06, | |
| "loss": 50.9521, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.2417190211944166, | |
| "grad_norm": 1.0213991403579712, | |
| "learning_rate": 2.332303980363497e-06, | |
| "loss": 44.1723, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.24211015229667293, | |
| "grad_norm": 0.5459339022636414, | |
| "learning_rate": 2.317258114118686e-06, | |
| "loss": 1.9167, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.2425012833989293, | |
| "grad_norm": 428.5580139160156, | |
| "learning_rate": 2.3022462840690933e-06, | |
| "loss": 60.3034, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.24289241450118562, | |
| "grad_norm": 1972.2655029296875, | |
| "learning_rate": 2.2872686806712037e-06, | |
| "loss": 53.7348, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.24328354560344195, | |
| "grad_norm": 0.9528247714042664, | |
| "learning_rate": 2.272325493947257e-06, | |
| "loss": 34.783, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.24367467670569828, | |
| "grad_norm": 0.456136554479599, | |
| "learning_rate": 2.257416913482853e-06, | |
| "loss": 32.2854, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.24406580780795462, | |
| "grad_norm": 0.22959932684898376, | |
| "learning_rate": 2.2425431284245302e-06, | |
| "loss": 27.3752, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.24445693891021097, | |
| "grad_norm": 0.48872700333595276, | |
| "learning_rate": 2.2277043274773856e-06, | |
| "loss": 22.8773, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.2448480700124673, | |
| "grad_norm": 1.4068603515625, | |
| "learning_rate": 2.2129006989026612e-06, | |
| "loss": 26.3108, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.24523920111472364, | |
| "grad_norm": 2.354417085647583, | |
| "learning_rate": 2.1981324305153644e-06, | |
| "loss": 60.9464, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.24563033221697997, | |
| "grad_norm": 1.7362618446350098, | |
| "learning_rate": 2.1833997096818897e-06, | |
| "loss": 18.6702, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.24602146331923633, | |
| "grad_norm": 471.421875, | |
| "learning_rate": 2.168702723317632e-06, | |
| "loss": 13.0568, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.24641259442149266, | |
| "grad_norm": 0.3530268669128418, | |
| "learning_rate": 2.1540416578846207e-06, | |
| "loss": 14.6524, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.246803725523749, | |
| "grad_norm": 0.7190971970558167, | |
| "learning_rate": 2.139416699389153e-06, | |
| "loss": 12.7703, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.24719485662600532, | |
| "grad_norm": 2144.53125, | |
| "learning_rate": 2.1248280333794347e-06, | |
| "loss": 24.8344, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.24758598772826168, | |
| "grad_norm": 1532.8892822265625, | |
| "learning_rate": 2.1102758449432233e-06, | |
| "loss": 48.1845, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.247977118830518, | |
| "grad_norm": 0.18892136216163635, | |
| "learning_rate": 2.095760318705487e-06, | |
| "loss": 41.4411, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.24836824993277434, | |
| "grad_norm": 0.43188950419425964, | |
| "learning_rate": 2.081281638826052e-06, | |
| "loss": 15.6649, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.24875938103503067, | |
| "grad_norm": 0.247590571641922, | |
| "learning_rate": 2.0668399889972717e-06, | |
| "loss": 52.5673, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.249150512137287, | |
| "grad_norm": 0.7205409407615662, | |
| "learning_rate": 2.0524355524417017e-06, | |
| "loss": 16.6089, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.24954164323954336, | |
| "grad_norm": 0.30670174956321716, | |
| "learning_rate": 2.038068511909762e-06, | |
| "loss": 25.2432, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.2499327743417997, | |
| "grad_norm": 1323.96875, | |
| "learning_rate": 2.0237390496774284e-06, | |
| "loss": 47.6592, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.250323905444056, | |
| "grad_norm": 0.4727286696434021, | |
| "learning_rate": 2.00944734754392e-06, | |
| "loss": 26.6763, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2507150365463124, | |
| "grad_norm": 0.2739315927028656, | |
| "learning_rate": 1.995193586829387e-06, | |
| "loss": 60.4231, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.2511061676485687, | |
| "grad_norm": 0.31088632345199585, | |
| "learning_rate": 1.980977948372612e-06, | |
| "loss": 12.4899, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.25149729875082505, | |
| "grad_norm": 0.2787396013736725, | |
| "learning_rate": 1.966800612528723e-06, | |
| "loss": 52.5619, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.25188842985308135, | |
| "grad_norm": 0.15130284428596497, | |
| "learning_rate": 1.952661759166893e-06, | |
| "loss": 27.9581, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.2522795609553377, | |
| "grad_norm": 1.2856502532958984, | |
| "learning_rate": 1.9385615676680663e-06, | |
| "loss": 19.7204, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.25267069205759407, | |
| "grad_norm": 772.9840087890625, | |
| "learning_rate": 1.9245002169226814e-06, | |
| "loss": 47.7722, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.2530618231598504, | |
| "grad_norm": 342.9947509765625, | |
| "learning_rate": 1.910477885328399e-06, | |
| "loss": 20.2267, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.25345295426210673, | |
| "grad_norm": 0.65385502576828, | |
| "learning_rate": 1.8964947507878401e-06, | |
| "loss": 24.3055, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.2538440853643631, | |
| "grad_norm": 2241.752197265625, | |
| "learning_rate": 1.8825509907063328e-06, | |
| "loss": 61.2555, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.2542352164666194, | |
| "grad_norm": 1303.792724609375, | |
| "learning_rate": 1.8686467819896542e-06, | |
| "loss": 28.9474, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.25462634756887575, | |
| "grad_norm": 1.3625749349594116, | |
| "learning_rate": 1.8547823010417876e-06, | |
| "loss": 39.3853, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.25501747867113206, | |
| "grad_norm": 0.34665849804878235, | |
| "learning_rate": 1.8409577237626935e-06, | |
| "loss": 35.6182, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.2554086097733884, | |
| "grad_norm": 4.089036464691162, | |
| "learning_rate": 1.8271732255460644e-06, | |
| "loss": 4.3633, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.2557997408756448, | |
| "grad_norm": 1152.780517578125, | |
| "learning_rate": 1.8134289812771077e-06, | |
| "loss": 49.1108, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.2561908719779011, | |
| "grad_norm": 0.6453447341918945, | |
| "learning_rate": 1.7997251653303249e-06, | |
| "loss": 40.7015, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.25658200308015744, | |
| "grad_norm": 93.7674789428711, | |
| "learning_rate": 1.7860619515673034e-06, | |
| "loss": 30.0199, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.25697313418241374, | |
| "grad_norm": 0.4583728313446045, | |
| "learning_rate": 1.7724395133345025e-06, | |
| "loss": 67.6765, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.2573642652846701, | |
| "grad_norm": 0.5361828207969666, | |
| "learning_rate": 1.7588580234610592e-06, | |
| "loss": 17.3098, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.25775539638692646, | |
| "grad_norm": 0.22744417190551758, | |
| "learning_rate": 1.7453176542565958e-06, | |
| "loss": 38.1001, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.25814652748918276, | |
| "grad_norm": 0.23170587420463562, | |
| "learning_rate": 1.7318185775090336e-06, | |
| "loss": 10.4951, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2585376585914391, | |
| "grad_norm": 153.93882751464844, | |
| "learning_rate": 1.7183609644824096e-06, | |
| "loss": 52.739, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.2589287896936955, | |
| "grad_norm": 0.1890573650598526, | |
| "learning_rate": 1.7049449859147121e-06, | |
| "loss": 29.1403, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.2593199207959518, | |
| "grad_norm": 1.4989005327224731, | |
| "learning_rate": 1.6915708120157042e-06, | |
| "loss": 44.5519, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.25971105189820815, | |
| "grad_norm": 1.5247461795806885, | |
| "learning_rate": 1.67823861246477e-06, | |
| "loss": 40.8943, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.26010218300046445, | |
| "grad_norm": 1.0584818124771118, | |
| "learning_rate": 1.6649485564087646e-06, | |
| "loss": 29.1124, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.2604933141027208, | |
| "grad_norm": 1.388893485069275, | |
| "learning_rate": 1.6517008124598622e-06, | |
| "loss": 20.1575, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.26088444520497717, | |
| "grad_norm": 0.27823394536972046, | |
| "learning_rate": 1.6384955486934157e-06, | |
| "loss": 19.5398, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.26127557630723347, | |
| "grad_norm": 0.1498628854751587, | |
| "learning_rate": 1.6253329326458367e-06, | |
| "loss": 43.7116, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.26166670740948983, | |
| "grad_norm": 5067.41357421875, | |
| "learning_rate": 1.612213131312454e-06, | |
| "loss": 62.5826, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.26205783851174613, | |
| "grad_norm": 0.3824155926704407, | |
| "learning_rate": 1.5991363111454023e-06, | |
| "loss": 43.7866, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2624489696140025, | |
| "grad_norm": 0.30052921175956726, | |
| "learning_rate": 1.5861026380515165e-06, | |
| "loss": 80.1734, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.26284010071625885, | |
| "grad_norm": 0.14637216925621033, | |
| "learning_rate": 1.5731122773902147e-06, | |
| "loss": 17.5782, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.26323123181851515, | |
| "grad_norm": 0.14098778367042542, | |
| "learning_rate": 1.5601653939714073e-06, | |
| "loss": 52.0247, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.2636223629207715, | |
| "grad_norm": 331.9844055175781, | |
| "learning_rate": 1.547262152053406e-06, | |
| "loss": 32.7034, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.2640134940230278, | |
| "grad_norm": 0.5405360460281372, | |
| "learning_rate": 1.5344027153408375e-06, | |
| "loss": 49.8961, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.2644046251252842, | |
| "grad_norm": 0.21132820844650269, | |
| "learning_rate": 1.5215872469825682e-06, | |
| "loss": 4.232, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.26479575622754054, | |
| "grad_norm": 0.930949330329895, | |
| "learning_rate": 1.5088159095696365e-06, | |
| "loss": 43.6584, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.26518688732979684, | |
| "grad_norm": 0.6677309274673462, | |
| "learning_rate": 1.4960888651331833e-06, | |
| "loss": 40.9175, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.2655780184320532, | |
| "grad_norm": 0.35741525888442993, | |
| "learning_rate": 1.4834062751424018e-06, | |
| "loss": 22.68, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.26596914953430956, | |
| "grad_norm": 1111.9866943359375, | |
| "learning_rate": 1.4707683005024898e-06, | |
| "loss": 41.6105, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.26636028063656586, | |
| "grad_norm": 0.6054497957229614, | |
| "learning_rate": 1.4581751015526035e-06, | |
| "loss": 27.9255, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.2667514117388222, | |
| "grad_norm": 0.14572077989578247, | |
| "learning_rate": 1.4456268380638262e-06, | |
| "loss": 4.4698, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.2671425428410785, | |
| "grad_norm": 0.28749310970306396, | |
| "learning_rate": 1.4331236692371386e-06, | |
| "loss": 24.8405, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.2675336739433349, | |
| "grad_norm": 0.3515958786010742, | |
| "learning_rate": 1.4206657537014078e-06, | |
| "loss": 14.6068, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.26792480504559124, | |
| "grad_norm": 554.953857421875, | |
| "learning_rate": 1.4082532495113627e-06, | |
| "loss": 43.7761, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.26831593614784754, | |
| "grad_norm": 0.34047558903694153, | |
| "learning_rate": 1.3958863141455937e-06, | |
| "loss": 69.1389, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.2687070672501039, | |
| "grad_norm": 0.31358927488327026, | |
| "learning_rate": 1.38356510450456e-06, | |
| "loss": 48.2586, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.2690981983523602, | |
| "grad_norm": 0.18278853595256805, | |
| "learning_rate": 1.3712897769085903e-06, | |
| "loss": 72.0719, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.26948932945461657, | |
| "grad_norm": 1582.910888671875, | |
| "learning_rate": 1.3590604870959046e-06, | |
| "loss": 32.1289, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.2698804605568729, | |
| "grad_norm": 0.1720167100429535, | |
| "learning_rate": 1.3468773902206378e-06, | |
| "loss": 51.6635, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.27027159165912923, | |
| "grad_norm": 0.14479337632656097, | |
| "learning_rate": 1.3347406408508695e-06, | |
| "loss": 34.0947, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.2706627227613856, | |
| "grad_norm": 0.16732390224933624, | |
| "learning_rate": 1.322650392966665e-06, | |
| "loss": 15.8947, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.27105385386364195, | |
| "grad_norm": 0.9922708868980408, | |
| "learning_rate": 1.3106067999581224e-06, | |
| "loss": 23.1571, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.27144498496589825, | |
| "grad_norm": 971.8731689453125, | |
| "learning_rate": 1.298610014623423e-06, | |
| "loss": 52.2057, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.2718361160681546, | |
| "grad_norm": 824.1338500976562, | |
| "learning_rate": 1.2866601891668945e-06, | |
| "loss": 24.8422, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.2722272471704109, | |
| "grad_norm": 0.136683851480484, | |
| "learning_rate": 1.2747574751970826e-06, | |
| "loss": 11.3418, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.27261837827266727, | |
| "grad_norm": 0.8831171989440918, | |
| "learning_rate": 1.2629020237248241e-06, | |
| "loss": 27.774, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.27300950937492363, | |
| "grad_norm": 0.16165444254875183, | |
| "learning_rate": 1.2510939851613285e-06, | |
| "loss": 43.9631, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.27340064047717993, | |
| "grad_norm": 1.4575397968292236, | |
| "learning_rate": 1.239333509316281e-06, | |
| "loss": 60.9957, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.2737917715794363, | |
| "grad_norm": 0.35259395837783813, | |
| "learning_rate": 1.2276207453959283e-06, | |
| "loss": 50.4399, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2741829026816926, | |
| "grad_norm": 0.22403627634048462, | |
| "learning_rate": 1.2159558420011907e-06, | |
| "loss": 19.9137, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.27457403378394896, | |
| "grad_norm": 1029.162353515625, | |
| "learning_rate": 1.2043389471257833e-06, | |
| "loss": 31.7524, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.2749651648862053, | |
| "grad_norm": 900.6036987304688, | |
| "learning_rate": 1.1927702081543279e-06, | |
| "loss": 11.9903, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.2753562959884616, | |
| "grad_norm": 0.16628578305244446, | |
| "learning_rate": 1.1812497718604887e-06, | |
| "loss": 11.8731, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.275747427090718, | |
| "grad_norm": 0.20883895456790924, | |
| "learning_rate": 1.1697777844051105e-06, | |
| "loss": 2.1834, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.2761385581929743, | |
| "grad_norm": 1.7050410509109497, | |
| "learning_rate": 1.158354391334362e-06, | |
| "loss": 18.1528, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.27652968929523064, | |
| "grad_norm": 1408.4571533203125, | |
| "learning_rate": 1.1469797375778902e-06, | |
| "loss": 37.1046, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.276920820397487, | |
| "grad_norm": 0.5376846790313721, | |
| "learning_rate": 1.1356539674469852e-06, | |
| "loss": 32.4544, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.2773119514997433, | |
| "grad_norm": 0.30538272857666016, | |
| "learning_rate": 1.1243772246327416e-06, | |
| "loss": 26.5578, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.27770308260199966, | |
| "grad_norm": 0.1639033704996109, | |
| "learning_rate": 1.1131496522042424e-06, | |
| "loss": 24.755, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.278094213704256, | |
| "grad_norm": 296.2695007324219, | |
| "learning_rate": 1.1019713926067394e-06, | |
| "loss": 37.8495, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.2784853448065123, | |
| "grad_norm": 0.9918115735054016, | |
| "learning_rate": 1.0908425876598512e-06, | |
| "loss": 32.8862, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.2788764759087687, | |
| "grad_norm": 803.0256958007812, | |
| "learning_rate": 1.0797633785557582e-06, | |
| "loss": 17.9585, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.279267607011025, | |
| "grad_norm": 0.4314124584197998, | |
| "learning_rate": 1.068733905857413e-06, | |
| "loss": 11.3658, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.27965873811328135, | |
| "grad_norm": 1402.580078125, | |
| "learning_rate": 1.0577543094967613e-06, | |
| "loss": 62.5657, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.2800498692155377, | |
| "grad_norm": 1.8656405210494995, | |
| "learning_rate": 1.0468247287729593e-06, | |
| "loss": 33.3876, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.280441000317794, | |
| "grad_norm": 0.4445607662200928, | |
| "learning_rate": 1.0359453023506123e-06, | |
| "loss": 9.0196, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.28083213142005037, | |
| "grad_norm": 0.5976535081863403, | |
| "learning_rate": 1.0251161682580125e-06, | |
| "loss": 21.402, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.28122326252230667, | |
| "grad_norm": 0.54698646068573, | |
| "learning_rate": 1.0143374638853892e-06, | |
| "loss": 26.4295, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.28161439362456303, | |
| "grad_norm": 0.8513966798782349, | |
| "learning_rate": 1.0036093259831624e-06, | |
| "loss": 32.0522, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2820055247268194, | |
| "grad_norm": 0.45537883043289185, | |
| "learning_rate": 9.929318906602176e-07, | |
| "loss": 39.6447, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.2823966558290757, | |
| "grad_norm": 1060.1263427734375, | |
| "learning_rate": 9.823052933821643e-07, | |
| "loss": 37.912, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.28278778693133205, | |
| "grad_norm": 0.5889149308204651, | |
| "learning_rate": 9.717296689696283e-07, | |
| "loss": 33.4835, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.28317891803358836, | |
| "grad_norm": 1.4640785455703735, | |
| "learning_rate": 9.612051515965388e-07, | |
| "loss": 24.0596, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.2835700491358447, | |
| "grad_norm": 0.16718535125255585, | |
| "learning_rate": 9.507318747884243e-07, | |
| "loss": 30.542, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.2839611802381011, | |
| "grad_norm": 0.5365858674049377, | |
| "learning_rate": 9.403099714207175e-07, | |
| "loss": 43.143, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.2843523113403574, | |
| "grad_norm": 1128.4521484375, | |
| "learning_rate": 9.299395737170758e-07, | |
| "loss": 41.8304, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.28474344244261374, | |
| "grad_norm": 917.1222534179688, | |
| "learning_rate": 9.196208132476963e-07, | |
| "loss": 27.3092, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.2851345735448701, | |
| "grad_norm": 0.3612130582332611, | |
| "learning_rate": 9.093538209276487e-07, | |
| "loss": 22.8086, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.2855257046471264, | |
| "grad_norm": 0.26056137681007385, | |
| "learning_rate": 8.991387270152202e-07, | |
| "loss": 22.6093, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.28591683574938276, | |
| "grad_norm": 0.23313690721988678, | |
| "learning_rate": 8.88975661110254e-07, | |
| "loss": 20.9189, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.28630796685163906, | |
| "grad_norm": 0.1749650090932846, | |
| "learning_rate": 8.78864752152509e-07, | |
| "loss": 17.4111, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.2866990979538954, | |
| "grad_norm": 1060.3902587890625, | |
| "learning_rate": 8.688061284200266e-07, | |
| "loss": 78.8863, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.2870902290561518, | |
| "grad_norm": 0.22554989159107208, | |
| "learning_rate": 8.587999175274986e-07, | |
| "loss": 44.0863, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.2874813601584081, | |
| "grad_norm": 1059.94580078125, | |
| "learning_rate": 8.488462464246495e-07, | |
| "loss": 29.8665, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.28787249126066444, | |
| "grad_norm": 949.2442626953125, | |
| "learning_rate": 8.389452413946314e-07, | |
| "loss": 26.6799, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.28826362236292075, | |
| "grad_norm": 789.4210205078125, | |
| "learning_rate": 8.290970280524124e-07, | |
| "loss": 25.6707, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.2886547534651771, | |
| "grad_norm": 2.4044387340545654, | |
| "learning_rate": 8.193017313431872e-07, | |
| "loss": 10.1112, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.28904588456743346, | |
| "grad_norm": 324.33489990234375, | |
| "learning_rate": 8.095594755407971e-07, | |
| "loss": 11.7497, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.28943701566968977, | |
| "grad_norm": 1.1166508197784424, | |
| "learning_rate": 7.99870384246143e-07, | |
| "loss": 34.0988, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2898281467719461, | |
| "grad_norm": 0.18654634058475494, | |
| "learning_rate": 7.902345803856265e-07, | |
| "loss": 45.7333, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.2902192778742025, | |
| "grad_norm": 0.3777371346950531, | |
| "learning_rate": 7.806521862095834e-07, | |
| "loss": 35.6395, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.2906104089764588, | |
| "grad_norm": 0.24813219904899597, | |
| "learning_rate": 7.711233232907401e-07, | |
| "loss": 29.0974, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.29100154007871515, | |
| "grad_norm": 0.2076377123594284, | |
| "learning_rate": 7.616481125226632e-07, | |
| "loss": 46.9587, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.29139267118097145, | |
| "grad_norm": 0.7038260102272034, | |
| "learning_rate": 7.522266741182305e-07, | |
| "loss": 21.8313, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.2917838022832278, | |
| "grad_norm": 217.7123565673828, | |
| "learning_rate": 7.42859127608106e-07, | |
| "loss": 6.3769, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.29217493338548417, | |
| "grad_norm": 0.12917295098304749, | |
| "learning_rate": 7.33545591839222e-07, | |
| "loss": 47.6111, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.2925660644877405, | |
| "grad_norm": 0.20550723373889923, | |
| "learning_rate": 7.242861849732696e-07, | |
| "loss": 64.3405, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.29295719558999683, | |
| "grad_norm": 1334.2445068359375, | |
| "learning_rate": 7.150810244852036e-07, | |
| "loss": 26.1011, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.29334832669225314, | |
| "grad_norm": 0.3111410439014435, | |
| "learning_rate": 7.059302271617485e-07, | |
| "loss": 35.9932, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2937394577945095, | |
| "grad_norm": 942.8177490234375, | |
| "learning_rate": 6.968339090999188e-07, | |
| "loss": 31.1454, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.29413058889676585, | |
| "grad_norm": 810.7886962890625, | |
| "learning_rate": 6.877921857055476e-07, | |
| "loss": 37.078, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.29452171999902216, | |
| "grad_norm": 306.9100036621094, | |
| "learning_rate": 6.78805171691817e-07, | |
| "loss": 34.2818, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.2949128511012785, | |
| "grad_norm": 0.9455443620681763, | |
| "learning_rate": 6.698729810778065e-07, | |
| "loss": 13.8228, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.2953039822035348, | |
| "grad_norm": 0.8926202654838562, | |
| "learning_rate": 6.609957271870505e-07, | |
| "loss": 48.3806, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.2956951133057912, | |
| "grad_norm": 217.76356506347656, | |
| "learning_rate": 6.521735226460901e-07, | |
| "loss": 61.1602, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.29608624440804754, | |
| "grad_norm": 0.5974970459938049, | |
| "learning_rate": 6.43406479383053e-07, | |
| "loss": 14.1005, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.29647737551030384, | |
| "grad_norm": 1.2981594800949097, | |
| "learning_rate": 6.346947086262323e-07, | |
| "loss": 24.2989, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.2968685066125602, | |
| "grad_norm": 0.1865173578262329, | |
| "learning_rate": 6.260383209026704e-07, | |
| "loss": 44.4224, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.29725963771481656, | |
| "grad_norm": 0.39853161573410034, | |
| "learning_rate": 6.174374260367611e-07, | |
| "loss": 37.0987, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.29765076881707286, | |
| "grad_norm": 0.5360068678855896, | |
| "learning_rate": 6.088921331488568e-07, | |
| "loss": 19.2739, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.2980418999193292, | |
| "grad_norm": 840.9296875, | |
| "learning_rate": 6.004025506538813e-07, | |
| "loss": 24.1344, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.2984330310215855, | |
| "grad_norm": 0.38888782262802124, | |
| "learning_rate": 5.919687862599549e-07, | |
| "loss": 37.5647, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.2988241621238419, | |
| "grad_norm": 1.1136953830718994, | |
| "learning_rate": 5.835909469670292e-07, | |
| "loss": 39.4938, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.29921529322609824, | |
| "grad_norm": 0.21624325215816498, | |
| "learning_rate": 5.752691390655279e-07, | |
| "loss": 58.6849, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.29960642432835455, | |
| "grad_norm": 0.725331723690033, | |
| "learning_rate": 5.670034681349995e-07, | |
| "loss": 15.0299, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.2999975554306109, | |
| "grad_norm": 0.4943119287490845, | |
| "learning_rate": 5.587940390427804e-07, | |
| "loss": 22.3662, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.3003886865328672, | |
| "grad_norm": 0.3493794798851013, | |
| "learning_rate": 5.506409559426573e-07, | |
| "loss": 5.3336, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.30077981763512357, | |
| "grad_norm": 0.16925600171089172, | |
| "learning_rate": 5.425443222735527e-07, | |
| "loss": 24.5783, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.30117094873737993, | |
| "grad_norm": 0.42793917655944824, | |
| "learning_rate": 5.345042407582079e-07, | |
| "loss": 22.6456, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.30156207983963623, | |
| "grad_norm": 1158.593994140625, | |
| "learning_rate": 5.265208134018851e-07, | |
| "loss": 28.3467, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.3019532109418926, | |
| "grad_norm": 1398.79248046875, | |
| "learning_rate": 5.185941414910673e-07, | |
| "loss": 13.612, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.30234434204414895, | |
| "grad_norm": 0.3084481954574585, | |
| "learning_rate": 5.107243255921746e-07, | |
| "loss": 40.542, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.30273547314640525, | |
| "grad_norm": 0.6440997123718262, | |
| "learning_rate": 5.029114655502937e-07, | |
| "loss": 23.8275, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.3031266042486616, | |
| "grad_norm": 582.021484375, | |
| "learning_rate": 4.951556604879049e-07, | |
| "loss": 52.9339, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.3035177353509179, | |
| "grad_norm": 297.0395202636719, | |
| "learning_rate": 4.874570088036252e-07, | |
| "loss": 14.6142, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.3039088664531743, | |
| "grad_norm": 0.25378304719924927, | |
| "learning_rate": 4.798156081709638e-07, | |
| "loss": 4.0975, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.30429999755543063, | |
| "grad_norm": 0.7152448892593384, | |
| "learning_rate": 4.722315555370793e-07, | |
| "loss": 14.747, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.30469112865768694, | |
| "grad_norm": 750.841064453125, | |
| "learning_rate": 4.647049471215498e-07, | |
| "loss": 10.1092, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.3050822597599433, | |
| "grad_norm": 0.16996781527996063, | |
| "learning_rate": 4.5723587841515707e-07, | |
| "loss": 8.5861, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3054733908621996, | |
| "grad_norm": 0.2573756277561188, | |
| "learning_rate": 4.4982444417866753e-07, | |
| "loss": 28.2554, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.30586452196445596, | |
| "grad_norm": 0.8471802473068237, | |
| "learning_rate": 4.4247073844163434e-07, | |
| "loss": 23.4402, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.3062556530667123, | |
| "grad_norm": 711.4560546875, | |
| "learning_rate": 4.351748545012058e-07, | |
| "loss": 35.54, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.3066467841689686, | |
| "grad_norm": 660.20458984375, | |
| "learning_rate": 4.279368849209381e-07, | |
| "loss": 32.8363, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.307037915271225, | |
| "grad_norm": 60.09021759033203, | |
| "learning_rate": 4.2075692152962145e-07, | |
| "loss": 25.1056, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.3074290463734813, | |
| "grad_norm": 0.995664119720459, | |
| "learning_rate": 4.136350554201196e-07, | |
| "loss": 18.2107, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.30782017747573764, | |
| "grad_norm": 1.327789545059204, | |
| "learning_rate": 4.0657137694820826e-07, | |
| "loss": 31.5689, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.308211308577994, | |
| "grad_norm": 0.5742402076721191, | |
| "learning_rate": 3.9956597573142966e-07, | |
| "loss": 9.6179, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.3086024396802503, | |
| "grad_norm": 0.27793997526168823, | |
| "learning_rate": 3.9261894064796136e-07, | |
| "loss": 32.6346, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.30899357078250667, | |
| "grad_norm": 1185.6458740234375, | |
| "learning_rate": 3.8573035983548167e-07, | |
| "loss": 38.3132, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.309384701884763, | |
| "grad_norm": 1311.600830078125, | |
| "learning_rate": 3.789003206900538e-07, | |
| "loss": 44.8882, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.30977583298701933, | |
| "grad_norm": 0.7463202476501465, | |
| "learning_rate": 3.7212890986501773e-07, | |
| "loss": 33.5164, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.3101669640892757, | |
| "grad_norm": 0.1619306206703186, | |
| "learning_rate": 3.6541621326989183e-07, | |
| "loss": 8.8827, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.310558095191532, | |
| "grad_norm": 0.290446937084198, | |
| "learning_rate": 3.5876231606927936e-07, | |
| "loss": 22.0263, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.31094922629378835, | |
| "grad_norm": 0.5374624729156494, | |
| "learning_rate": 3.5216730268179346e-07, | |
| "loss": 54.9825, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.3113403573960447, | |
| "grad_norm": 0.45844340324401855, | |
| "learning_rate": 3.4563125677897936e-07, | |
| "loss": 34.1724, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.311731488498301, | |
| "grad_norm": 0.21980485320091248, | |
| "learning_rate": 3.3915426128425744e-07, | |
| "loss": 23.5609, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.31212261960055737, | |
| "grad_norm": 0.1387357860803604, | |
| "learning_rate": 3.327363983718723e-07, | |
| "loss": 3.3226, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.3125137507028137, | |
| "grad_norm": 703.876708984375, | |
| "learning_rate": 3.263777494658449e-07, | |
| "loss": 14.627, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.31290488180507003, | |
| "grad_norm": 0.3398139178752899, | |
| "learning_rate": 3.200783952389447e-07, | |
| "loss": 23.2432, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3132960129073264, | |
| "grad_norm": 0.2651851177215576, | |
| "learning_rate": 3.138384156116614e-07, | |
| "loss": 22.8204, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.3136871440095827, | |
| "grad_norm": 0.7338706254959106, | |
| "learning_rate": 3.076578897511978e-07, | |
| "loss": 8.9847, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.31407827511183906, | |
| "grad_norm": 674.1719360351562, | |
| "learning_rate": 3.015368960704584e-07, | |
| "loss": 42.1909, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.31446940621409536, | |
| "grad_norm": 0.5109131932258606, | |
| "learning_rate": 2.954755122270564e-07, | |
| "loss": 16.502, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.3148605373163517, | |
| "grad_norm": 38.51890563964844, | |
| "learning_rate": 2.894738151223331e-07, | |
| "loss": 22.2199, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.3152516684186081, | |
| "grad_norm": 236.9777069091797, | |
| "learning_rate": 2.835318809003751e-07, | |
| "loss": 61.7173, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.3156427995208644, | |
| "grad_norm": 0.13343866169452667, | |
| "learning_rate": 2.776497849470544e-07, | |
| "loss": 14.8311, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.31603393062312074, | |
| "grad_norm": 0.2250102311372757, | |
| "learning_rate": 2.71827601889067e-07, | |
| "loss": 28.0406, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.3164250617253771, | |
| "grad_norm": 0.3997911810874939, | |
| "learning_rate": 2.6606540559298956e-07, | |
| "loss": 32.8142, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.3168161928276334, | |
| "grad_norm": 947.511474609375, | |
| "learning_rate": 2.6036326916434153e-07, | |
| "loss": 32.0284, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.31720732392988976, | |
| "grad_norm": 0.18809598684310913, | |
| "learning_rate": 2.547212649466568e-07, | |
| "loss": 46.4845, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.31759845503214607, | |
| "grad_norm": 105.57866668701172, | |
| "learning_rate": 2.491394645205669e-07, | |
| "loss": 36.195, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.3179895861344024, | |
| "grad_norm": 700.2158203125, | |
| "learning_rate": 2.436179387028903e-07, | |
| "loss": 38.9557, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.3183807172366588, | |
| "grad_norm": 778.9248657226562, | |
| "learning_rate": 2.3815675754573885e-07, | |
| "loss": 22.1128, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.3187718483389151, | |
| "grad_norm": 0.24114812910556793, | |
| "learning_rate": 2.3275599033562414e-07, | |
| "loss": 40.5305, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.31916297944117145, | |
| "grad_norm": 539.972900390625, | |
| "learning_rate": 2.274157055925802e-07, | |
| "loss": 38.5275, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.31955411054342775, | |
| "grad_norm": 0.5195923447608948, | |
| "learning_rate": 2.2213597106929608e-07, | |
| "loss": 31.244, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.3199452416456841, | |
| "grad_norm": 242.19908142089844, | |
| "learning_rate": 2.1691685375025362e-07, | |
| "loss": 39.8784, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.32033637274794047, | |
| "grad_norm": 0.1984694004058838, | |
| "learning_rate": 2.117584198508771e-07, | |
| "loss": 30.9005, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.32072750385019677, | |
| "grad_norm": 987.247802734375, | |
| "learning_rate": 2.0666073481669714e-07, | |
| "loss": 32.3119, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.32111863495245313, | |
| "grad_norm": 1.042695164680481, | |
| "learning_rate": 2.016238633225165e-07, | |
| "loss": 41.9533, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.3215097660547095, | |
| "grad_norm": 0.18694262206554413, | |
| "learning_rate": 1.9664786927159064e-07, | |
| "loss": 55.5923, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.3219008971569658, | |
| "grad_norm": 0.35913291573524475, | |
| "learning_rate": 1.9173281579481896e-07, | |
| "loss": 17.7606, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.32229202825922215, | |
| "grad_norm": 0.1270519196987152, | |
| "learning_rate": 1.8687876524993987e-07, | |
| "loss": 6.9452, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.32268315936147846, | |
| "grad_norm": 0.25847911834716797, | |
| "learning_rate": 1.820857792207431e-07, | |
| "loss": 11.1015, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.3230742904637348, | |
| "grad_norm": 507.70074462890625, | |
| "learning_rate": 1.7735391851628814e-07, | |
| "loss": 32.347, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.3234654215659912, | |
| "grad_norm": 90.63712310791016, | |
| "learning_rate": 1.7268324317012974e-07, | |
| "loss": 16.0917, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.3238565526682475, | |
| "grad_norm": 0.3610641658306122, | |
| "learning_rate": 1.680738124395598e-07, | |
| "loss": 10.4453, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.32424768377050384, | |
| "grad_norm": 1.101544976234436, | |
| "learning_rate": 1.6352568480485277e-07, | |
| "loss": 3.3177, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.32463881487276014, | |
| "grad_norm": 1.0258104801177979, | |
| "learning_rate": 1.5903891796852756e-07, | |
| "loss": 30.9836, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.3250299459750165, | |
| "grad_norm": 0.22194243967533112, | |
| "learning_rate": 1.5461356885461077e-07, | |
| "loss": 11.0591, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.32542107707727286, | |
| "grad_norm": 527.8275146484375, | |
| "learning_rate": 1.5024969360791564e-07, | |
| "loss": 48.5131, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.32581220817952916, | |
| "grad_norm": 0.20000404119491577, | |
| "learning_rate": 1.4594734759333484e-07, | |
| "loss": 33.9883, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.3262033392817855, | |
| "grad_norm": 0.5714410543441772, | |
| "learning_rate": 1.4170658539512993e-07, | |
| "loss": 19.7609, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.3265944703840418, | |
| "grad_norm": 1.373691439628601, | |
| "learning_rate": 1.375274608162447e-07, | |
| "loss": 33.721, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.3269856014862982, | |
| "grad_norm": 0.5083162188529968, | |
| "learning_rate": 1.3341002687762062e-07, | |
| "loss": 21.9135, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.32737673258855454, | |
| "grad_norm": 1.2598671913146973, | |
| "learning_rate": 1.2935433581752365e-07, | |
| "loss": 24.6308, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.32776786369081085, | |
| "grad_norm": 615.4888305664062, | |
| "learning_rate": 1.253604390908819e-07, | |
| "loss": 33.5934, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.3281589947930672, | |
| "grad_norm": 0.8651353716850281, | |
| "learning_rate": 1.2142838736863562e-07, | |
| "loss": 79.5808, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.32855012589532356, | |
| "grad_norm": 0.21397706866264343, | |
| "learning_rate": 1.175582305370887e-07, | |
| "loss": 30.6943, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.32894125699757987, | |
| "grad_norm": 748.9775390625, | |
| "learning_rate": 1.1375001769728e-07, | |
| "loss": 30.7896, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.3293323880998362, | |
| "grad_norm": 0.20578625798225403, | |
| "learning_rate": 1.1000379716435916e-07, | |
| "loss": 28.9501, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.32972351920209253, | |
| "grad_norm": 0.34230169653892517, | |
| "learning_rate": 1.0631961646697387e-07, | |
| "loss": 6.3882, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.3301146503043489, | |
| "grad_norm": 136.2682342529297, | |
| "learning_rate": 1.0269752234666642e-07, | |
| "loss": 9.9568, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.33050578140660525, | |
| "grad_norm": 536.7844848632812, | |
| "learning_rate": 9.913756075728088e-08, | |
| "loss": 42.5157, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.33089691250886155, | |
| "grad_norm": 0.6589470505714417, | |
| "learning_rate": 9.563977686438019e-08, | |
| "loss": 15.0413, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.3312880436111179, | |
| "grad_norm": 0.44913989305496216, | |
| "learning_rate": 9.22042150446728e-08, | |
| "loss": 22.3091, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.3316791747133742, | |
| "grad_norm": 0.11731698364019394, | |
| "learning_rate": 8.883091888545136e-08, | |
| "loss": 26.5784, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.3320703058156306, | |
| "grad_norm": 0.27501413226127625, | |
| "learning_rate": 8.551993118403656e-08, | |
| "loss": 18.7753, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.33246143691788693, | |
| "grad_norm": 325.61627197265625, | |
| "learning_rate": 8.227129394723643e-08, | |
| "loss": 22.7822, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.33285256802014324, | |
| "grad_norm": 0.2187097817659378, | |
| "learning_rate": 7.908504839081343e-08, | |
| "loss": 21.9867, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.3332436991223996, | |
| "grad_norm": 476.84649658203125, | |
| "learning_rate": 7.59612349389599e-08, | |
| "loss": 34.4312, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.33363483022465595, | |
| "grad_norm": 0.8119810223579407, | |
| "learning_rate": 7.289989322378732e-08, | |
| "loss": 9.815, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.33402596132691226, | |
| "grad_norm": 0.5497637987136841, | |
| "learning_rate": 6.990106208482227e-08, | |
| "loss": 25.6327, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.3344170924291686, | |
| "grad_norm": 0.14091427624225616, | |
| "learning_rate": 6.696477956851356e-08, | |
| "loss": 17.0462, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.3348082235314249, | |
| "grad_norm": 0.36118146777153015, | |
| "learning_rate": 6.409108292774912e-08, | |
| "loss": 29.5031, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.3351993546336813, | |
| "grad_norm": 0.4133693277835846, | |
| "learning_rate": 6.12800086213866e-08, | |
| "loss": 23.7123, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.33559048573593764, | |
| "grad_norm": 1.1946247816085815, | |
| "learning_rate": 5.853159231378469e-08, | |
| "loss": 46.5555, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.33598161683819394, | |
| "grad_norm": 0.21554416418075562, | |
| "learning_rate": 5.584586887435739e-08, | |
| "loss": 17.6013, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.3363727479404503, | |
| "grad_norm": 0.16092292964458466, | |
| "learning_rate": 5.322287237712664e-08, | |
| "loss": 10.5376, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3367638790427066, | |
| "grad_norm": 464.0669860839844, | |
| "learning_rate": 5.0662636100292094e-08, | |
| "loss": 36.5366, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.33715501014496296, | |
| "grad_norm": 0.7112561464309692, | |
| "learning_rate": 4.8165192525809754e-08, | |
| "loss": 32.8205, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.3375461412472193, | |
| "grad_norm": 0.18869797885417938, | |
| "learning_rate": 4.573057333897679e-08, | |
| "loss": 11.1364, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.3379372723494756, | |
| "grad_norm": 0.11357381194829941, | |
| "learning_rate": 4.335880942803405e-08, | |
| "loss": 15.2307, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.338328403451732, | |
| "grad_norm": 0.2633415162563324, | |
| "learning_rate": 4.104993088376974e-08, | |
| "loss": 11.5664, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.3387195345539883, | |
| "grad_norm": 1.018863320350647, | |
| "learning_rate": 3.8803966999139686e-08, | |
| "loss": 8.2019, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.33911066565624465, | |
| "grad_norm": 1.0014698505401611, | |
| "learning_rate": 3.662094626889656e-08, | |
| "loss": 14.888, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.339501796758501, | |
| "grad_norm": 0.7487742900848389, | |
| "learning_rate": 3.450089638922738e-08, | |
| "loss": 51.2638, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.3398929278607573, | |
| "grad_norm": 532.7721557617188, | |
| "learning_rate": 3.2443844257400434e-08, | |
| "loss": 23.042, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.34028405896301367, | |
| "grad_norm": 0.3149930536746979, | |
| "learning_rate": 3.044981597142837e-08, | |
| "loss": 4.3578, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.34067519006527003, | |
| "grad_norm": 1.0493534803390503, | |
| "learning_rate": 2.8518836829732332e-08, | |
| "loss": 25.666, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.34106632116752633, | |
| "grad_norm": 582.3631591796875, | |
| "learning_rate": 2.6650931330823305e-08, | |
| "loss": 31.6531, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.3414574522697827, | |
| "grad_norm": 680.9072265625, | |
| "learning_rate": 2.4846123172992953e-08, | |
| "loss": 22.6484, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.341848583372039, | |
| "grad_norm": 0.22280123829841614, | |
| "learning_rate": 2.3104435254008852e-08, | |
| "loss": 14.0636, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.34223971447429535, | |
| "grad_norm": 0.26472118496894836, | |
| "learning_rate": 2.1425889670827483e-08, | |
| "loss": 19.032, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.3426308455765517, | |
| "grad_norm": 238.65916442871094, | |
| "learning_rate": 1.981050771931281e-08, | |
| "loss": 10.7904, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.343021976678808, | |
| "grad_norm": 0.1643354743719101, | |
| "learning_rate": 1.8258309893965375e-08, | |
| "loss": 14.425, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.3434131077810644, | |
| "grad_norm": 0.3684662878513336, | |
| "learning_rate": 1.6769315887662508e-08, | |
| "loss": 33.7941, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.3438042388833207, | |
| "grad_norm": 0.7127506136894226, | |
| "learning_rate": 1.5343544591409632e-08, | |
| "loss": 21.4788, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.34419536998557704, | |
| "grad_norm": 0.591823160648346, | |
| "learning_rate": 1.3981014094099354e-08, | |
| "loss": 1.5929, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3445865010878334, | |
| "grad_norm": 0.8882282972335815, | |
| "learning_rate": 1.2681741682282755e-08, | |
| "loss": 16.2899, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.3449776321900897, | |
| "grad_norm": 0.4288715720176697, | |
| "learning_rate": 1.1445743839949008e-08, | |
| "loss": 11.78, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.34536876329234606, | |
| "grad_norm": 479.3231506347656, | |
| "learning_rate": 1.0273036248318325e-08, | |
| "loss": 27.5783, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.3457598943946024, | |
| "grad_norm": 0.7523378133773804, | |
| "learning_rate": 9.163633785639892e-09, | |
| "loss": 31.4348, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.3461510254968587, | |
| "grad_norm": 0.4846894145011902, | |
| "learning_rate": 8.117550527005913e-09, | |
| "loss": 2.143, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.3465421565991151, | |
| "grad_norm": 229.96755981445312, | |
| "learning_rate": 7.13479974417175e-09, | |
| "loss": 30.5446, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.3469332877013714, | |
| "grad_norm": 0.1374731957912445, | |
| "learning_rate": 6.215393905388278e-09, | |
| "loss": 22.3179, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.34732441880362774, | |
| "grad_norm": 0.76992267370224, | |
| "learning_rate": 5.359344675242018e-09, | |
| "loss": 14.0893, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.3477155499058841, | |
| "grad_norm": 476.39459228515625, | |
| "learning_rate": 4.56666291450858e-09, | |
| "loss": 52.4586, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.3481066810081404, | |
| "grad_norm": 71.57829284667969, | |
| "learning_rate": 3.837358680016112e-09, | |
| "loss": 19.4455, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.34849781211039677, | |
| "grad_norm": 0.3923113942146301, | |
| "learning_rate": 3.1714412245148486e-09, | |
| "loss": 43.8042, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.34888894321265307, | |
| "grad_norm": 0.11082535982131958, | |
| "learning_rate": 2.568918996560532e-09, | |
| "loss": 21.0263, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.34928007431490943, | |
| "grad_norm": 0.5106098651885986, | |
| "learning_rate": 2.029799640409502e-09, | |
| "loss": 21.4105, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.3496712054171658, | |
| "grad_norm": 0.2212674468755722, | |
| "learning_rate": 1.5540899959187727e-09, | |
| "loss": 29.8404, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.3500623365194221, | |
| "grad_norm": 0.4330773949623108, | |
| "learning_rate": 1.1417960984605459e-09, | |
| "loss": 28.9654, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.35045346762167845, | |
| "grad_norm": 0.1814635843038559, | |
| "learning_rate": 7.92923178845606e-10, | |
| "loss": 25.6656, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.35084459872393475, | |
| "grad_norm": 538.5513916015625, | |
| "learning_rate": 5.07475663257262e-10, | |
| "loss": 32.209, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.3512357298261911, | |
| "grad_norm": 536.6629028320312, | |
| "learning_rate": 2.854571731947253e-10, | |
| "loss": 13.3711, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.35162686092844747, | |
| "grad_norm": 598.1375732421875, | |
| "learning_rate": 1.2687052542759148e-10, | |
| "loss": 27.3026, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.3520179920307038, | |
| "grad_norm": 0.18695542216300964, | |
| "learning_rate": 3.171773195809191e-11, | |
| "loss": 31.2123, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3520179920307038, | |
| "step": 900, | |
| "total_flos": 9.027346976391299e+18, | |
| "train_loss": 213.8077490248945, | |
| "train_runtime": 76099.1347, | |
| "train_samples_per_second": 3.028, | |
| "train_steps_per_second": 0.012 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 900, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.027346976391299e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |