| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.994606256742179, | |
| "eval_steps": 500, | |
| "global_step": 1389, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002157497303128371, | |
| "grad_norm": 57.13587782353209, | |
| "learning_rate": 0.0, | |
| "loss": 11.1563, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004314994606256742, | |
| "grad_norm": 57.57684447116968, | |
| "learning_rate": 3.5971223021582736e-07, | |
| "loss": 11.0961, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.006472491909385114, | |
| "grad_norm": 54.76562948065167, | |
| "learning_rate": 7.194244604316547e-07, | |
| "loss": 11.2027, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.008629989212513484, | |
| "grad_norm": 56.76231147192983, | |
| "learning_rate": 1.0791366906474822e-06, | |
| "loss": 11.1248, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.010787486515641856, | |
| "grad_norm": 59.88129471289495, | |
| "learning_rate": 1.4388489208633094e-06, | |
| "loss": 11.0418, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.012944983818770227, | |
| "grad_norm": 59.30119373658902, | |
| "learning_rate": 1.7985611510791366e-06, | |
| "loss": 11.0469, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.015102481121898598, | |
| "grad_norm": 63.23987032545693, | |
| "learning_rate": 2.1582733812949645e-06, | |
| "loss": 10.7637, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.017259978425026967, | |
| "grad_norm": 67.8264131296607, | |
| "learning_rate": 2.5179856115107916e-06, | |
| "loss": 10.5729, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.019417475728155338, | |
| "grad_norm": 96.87216003729526, | |
| "learning_rate": 2.877697841726619e-06, | |
| "loss": 9.33, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.021574973031283712, | |
| "grad_norm": 110.36643138785996, | |
| "learning_rate": 3.237410071942446e-06, | |
| "loss": 8.7194, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.023732470334412083, | |
| "grad_norm": 62.62903504880271, | |
| "learning_rate": 3.5971223021582732e-06, | |
| "loss": 3.5156, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.025889967637540454, | |
| "grad_norm": 55.87526116545333, | |
| "learning_rate": 3.956834532374101e-06, | |
| "loss": 3.2214, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.028047464940668825, | |
| "grad_norm": 38.69327465608191, | |
| "learning_rate": 4.316546762589929e-06, | |
| "loss": 2.5429, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.030204962243797196, | |
| "grad_norm": 29.949119954850115, | |
| "learning_rate": 4.676258992805756e-06, | |
| "loss": 2.2303, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.032362459546925564, | |
| "grad_norm": 6.23171849490708, | |
| "learning_rate": 5.035971223021583e-06, | |
| "loss": 1.3887, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.034519956850053934, | |
| "grad_norm": 4.727559417918715, | |
| "learning_rate": 5.3956834532374105e-06, | |
| "loss": 1.2918, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.036677454153182305, | |
| "grad_norm": 3.940521844725638, | |
| "learning_rate": 5.755395683453238e-06, | |
| "loss": 1.2401, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.038834951456310676, | |
| "grad_norm": 2.6425075308520816, | |
| "learning_rate": 6.115107913669065e-06, | |
| "loss": 1.1628, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.040992448759439054, | |
| "grad_norm": 2.1413189742525223, | |
| "learning_rate": 6.474820143884892e-06, | |
| "loss": 1.106, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.043149946062567425, | |
| "grad_norm": 1.4939789670719916, | |
| "learning_rate": 6.83453237410072e-06, | |
| "loss": 0.9834, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.045307443365695796, | |
| "grad_norm": 119.59331302017698, | |
| "learning_rate": 7.1942446043165465e-06, | |
| "loss": 1.0391, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.04746494066882417, | |
| "grad_norm": 74.35045753368888, | |
| "learning_rate": 7.5539568345323745e-06, | |
| "loss": 0.984, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04962243797195254, | |
| "grad_norm": 2.1166562532348547, | |
| "learning_rate": 7.913669064748202e-06, | |
| "loss": 0.9028, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.05177993527508091, | |
| "grad_norm": 1.259997812455964, | |
| "learning_rate": 8.273381294964029e-06, | |
| "loss": 0.9009, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05393743257820928, | |
| "grad_norm": 1.0042382956541405, | |
| "learning_rate": 8.633093525179858e-06, | |
| "loss": 0.8458, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05609492988133765, | |
| "grad_norm": 0.8711320683532612, | |
| "learning_rate": 8.992805755395683e-06, | |
| "loss": 0.8222, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.05825242718446602, | |
| "grad_norm": 0.8070054635681398, | |
| "learning_rate": 9.352517985611512e-06, | |
| "loss": 0.7821, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.06040992448759439, | |
| "grad_norm": 0.907177791133324, | |
| "learning_rate": 9.71223021582734e-06, | |
| "loss": 0.7905, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06256742179072276, | |
| "grad_norm": 0.8987984527990586, | |
| "learning_rate": 1.0071942446043167e-05, | |
| "loss": 0.7743, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.06472491909385113, | |
| "grad_norm": 0.766780715327364, | |
| "learning_rate": 1.0431654676258994e-05, | |
| "loss": 0.7393, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0668824163969795, | |
| "grad_norm": 0.6221327677977063, | |
| "learning_rate": 1.0791366906474821e-05, | |
| "loss": 0.7169, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.06903991370010787, | |
| "grad_norm": 0.724452806351174, | |
| "learning_rate": 1.1151079136690648e-05, | |
| "loss": 0.6844, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07119741100323625, | |
| "grad_norm": 0.726983862192927, | |
| "learning_rate": 1.1510791366906475e-05, | |
| "loss": 0.6782, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.07335490830636461, | |
| "grad_norm": 0.5784038572104493, | |
| "learning_rate": 1.1870503597122303e-05, | |
| "loss": 0.6674, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07551240560949299, | |
| "grad_norm": 0.5549566422059363, | |
| "learning_rate": 1.223021582733813e-05, | |
| "loss": 0.6972, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07766990291262135, | |
| "grad_norm": 0.6399396561875369, | |
| "learning_rate": 1.2589928057553957e-05, | |
| "loss": 0.6792, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.07982740021574973, | |
| "grad_norm": 0.5849368618649039, | |
| "learning_rate": 1.2949640287769784e-05, | |
| "loss": 0.6695, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.08198489751887811, | |
| "grad_norm": 0.5900434638838187, | |
| "learning_rate": 1.3309352517985613e-05, | |
| "loss": 0.6773, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08414239482200647, | |
| "grad_norm": 0.4914556169563964, | |
| "learning_rate": 1.366906474820144e-05, | |
| "loss": 0.6521, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.08629989212513485, | |
| "grad_norm": 0.3975245564581552, | |
| "learning_rate": 1.4028776978417266e-05, | |
| "loss": 0.6457, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08845738942826321, | |
| "grad_norm": 0.5020290377407021, | |
| "learning_rate": 1.4388489208633093e-05, | |
| "loss": 0.6393, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.09061488673139159, | |
| "grad_norm": 0.46341748213501166, | |
| "learning_rate": 1.4748201438848922e-05, | |
| "loss": 0.5959, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09277238403451996, | |
| "grad_norm": 0.4138969749517581, | |
| "learning_rate": 1.5107913669064749e-05, | |
| "loss": 0.623, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.09492988133764833, | |
| "grad_norm": 0.4470998608888789, | |
| "learning_rate": 1.5467625899280578e-05, | |
| "loss": 0.6173, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0970873786407767, | |
| "grad_norm": 0.4268271776986609, | |
| "learning_rate": 1.5827338129496403e-05, | |
| "loss": 0.6252, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09924487594390508, | |
| "grad_norm": 0.3992607277762862, | |
| "learning_rate": 1.618705035971223e-05, | |
| "loss": 0.6104, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10140237324703344, | |
| "grad_norm": 0.46730384996685315, | |
| "learning_rate": 1.6546762589928058e-05, | |
| "loss": 0.6067, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.10355987055016182, | |
| "grad_norm": 0.340962945547672, | |
| "learning_rate": 1.6906474820143887e-05, | |
| "loss": 0.6184, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10571736785329018, | |
| "grad_norm": 0.3169350607679008, | |
| "learning_rate": 1.7266187050359716e-05, | |
| "loss": 0.5889, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.10787486515641856, | |
| "grad_norm": 0.42059973714399224, | |
| "learning_rate": 1.7625899280575538e-05, | |
| "loss": 0.6113, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11003236245954692, | |
| "grad_norm": 0.33716661312326635, | |
| "learning_rate": 1.7985611510791367e-05, | |
| "loss": 0.5815, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.1121898597626753, | |
| "grad_norm": 0.2944086703558636, | |
| "learning_rate": 1.8345323741007196e-05, | |
| "loss": 0.6006, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11434735706580366, | |
| "grad_norm": 0.349551156334338, | |
| "learning_rate": 1.8705035971223024e-05, | |
| "loss": 0.573, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.11650485436893204, | |
| "grad_norm": 0.3128341661906979, | |
| "learning_rate": 1.906474820143885e-05, | |
| "loss": 0.5897, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1186623516720604, | |
| "grad_norm": 0.30337839597627236, | |
| "learning_rate": 1.942446043165468e-05, | |
| "loss": 0.585, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.12081984897518878, | |
| "grad_norm": 0.2977094639323748, | |
| "learning_rate": 1.9784172661870504e-05, | |
| "loss": 0.5783, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12297734627831715, | |
| "grad_norm": 0.3163276610774343, | |
| "learning_rate": 2.0143884892086333e-05, | |
| "loss": 0.5844, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.12513484358144553, | |
| "grad_norm": 0.2909825753497844, | |
| "learning_rate": 2.050359712230216e-05, | |
| "loss": 0.5773, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.1272923408845739, | |
| "grad_norm": 0.24835717615595493, | |
| "learning_rate": 2.0863309352517988e-05, | |
| "loss": 0.5666, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.12944983818770225, | |
| "grad_norm": 0.30633255958737043, | |
| "learning_rate": 2.1223021582733816e-05, | |
| "loss": 0.5567, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13160733549083065, | |
| "grad_norm": 0.3123982037388874, | |
| "learning_rate": 2.1582733812949642e-05, | |
| "loss": 0.5845, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.133764832793959, | |
| "grad_norm": 0.3547886968310134, | |
| "learning_rate": 2.1942446043165467e-05, | |
| "loss": 0.5574, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.13592233009708737, | |
| "grad_norm": 0.3014066909176335, | |
| "learning_rate": 2.2302158273381296e-05, | |
| "loss": 0.5621, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.13807982740021574, | |
| "grad_norm": 0.24373585748942053, | |
| "learning_rate": 2.2661870503597125e-05, | |
| "loss": 0.5787, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.14023732470334413, | |
| "grad_norm": 0.2552538074958043, | |
| "learning_rate": 2.302158273381295e-05, | |
| "loss": 0.5376, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.1423948220064725, | |
| "grad_norm": 0.2602279217663108, | |
| "learning_rate": 2.3381294964028776e-05, | |
| "loss": 0.565, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.14455231930960086, | |
| "grad_norm": 0.2540749697215477, | |
| "learning_rate": 2.3741007194244605e-05, | |
| "loss": 0.5384, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.14670981661272922, | |
| "grad_norm": 0.265028926794241, | |
| "learning_rate": 2.4100719424460434e-05, | |
| "loss": 0.5404, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1488673139158576, | |
| "grad_norm": 0.24987952808261527, | |
| "learning_rate": 2.446043165467626e-05, | |
| "loss": 0.555, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.15102481121898598, | |
| "grad_norm": 0.24626279967807332, | |
| "learning_rate": 2.482014388489209e-05, | |
| "loss": 0.5465, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15318230852211434, | |
| "grad_norm": 0.24540860578710721, | |
| "learning_rate": 2.5179856115107914e-05, | |
| "loss": 0.5389, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.1553398058252427, | |
| "grad_norm": 0.2819028166039196, | |
| "learning_rate": 2.5539568345323743e-05, | |
| "loss": 0.5341, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1574973031283711, | |
| "grad_norm": 0.2725924730541978, | |
| "learning_rate": 2.589928057553957e-05, | |
| "loss": 0.5593, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.15965480043149946, | |
| "grad_norm": 0.41273921517798356, | |
| "learning_rate": 2.6258992805755394e-05, | |
| "loss": 0.5627, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16181229773462782, | |
| "grad_norm": 0.2582400805700941, | |
| "learning_rate": 2.6618705035971226e-05, | |
| "loss": 0.5306, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.16396979503775622, | |
| "grad_norm": 0.26006309950535167, | |
| "learning_rate": 2.697841726618705e-05, | |
| "loss": 0.5215, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16612729234088458, | |
| "grad_norm": 0.24344187994219604, | |
| "learning_rate": 2.733812949640288e-05, | |
| "loss": 0.5243, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.16828478964401294, | |
| "grad_norm": 0.2520498275788824, | |
| "learning_rate": 2.7697841726618706e-05, | |
| "loss": 0.5263, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1704422869471413, | |
| "grad_norm": 0.25245524340715986, | |
| "learning_rate": 2.805755395683453e-05, | |
| "loss": 0.5287, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.1725997842502697, | |
| "grad_norm": 0.2605418146662812, | |
| "learning_rate": 2.841726618705036e-05, | |
| "loss": 0.5301, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.17475728155339806, | |
| "grad_norm": 0.24212992631081523, | |
| "learning_rate": 2.8776978417266186e-05, | |
| "loss": 0.5359, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.17691477885652643, | |
| "grad_norm": 0.24539478768207, | |
| "learning_rate": 2.9136690647482018e-05, | |
| "loss": 0.5208, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.1790722761596548, | |
| "grad_norm": 0.24763507587346034, | |
| "learning_rate": 2.9496402877697844e-05, | |
| "loss": 0.5203, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.18122977346278318, | |
| "grad_norm": 0.27551677902256216, | |
| "learning_rate": 2.985611510791367e-05, | |
| "loss": 0.5228, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.18338727076591155, | |
| "grad_norm": 0.24453606868458008, | |
| "learning_rate": 3.0215827338129498e-05, | |
| "loss": 0.5235, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1855447680690399, | |
| "grad_norm": 0.26688122063771413, | |
| "learning_rate": 3.0575539568345324e-05, | |
| "loss": 0.5343, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.18770226537216828, | |
| "grad_norm": 0.29529860209112546, | |
| "learning_rate": 3.0935251798561156e-05, | |
| "loss": 0.5367, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.18985976267529667, | |
| "grad_norm": 0.24266710111447526, | |
| "learning_rate": 3.129496402877698e-05, | |
| "loss": 0.5253, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19201725997842503, | |
| "grad_norm": 0.23807746989279177, | |
| "learning_rate": 3.165467625899281e-05, | |
| "loss": 0.5185, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.1941747572815534, | |
| "grad_norm": 0.2626350221760916, | |
| "learning_rate": 3.201438848920863e-05, | |
| "loss": 0.5395, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.19633225458468176, | |
| "grad_norm": 0.2414024909993752, | |
| "learning_rate": 3.237410071942446e-05, | |
| "loss": 0.532, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.19848975188781015, | |
| "grad_norm": 0.25017076106275854, | |
| "learning_rate": 3.273381294964029e-05, | |
| "loss": 0.5276, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.20064724919093851, | |
| "grad_norm": 0.2918654609740857, | |
| "learning_rate": 3.3093525179856116e-05, | |
| "loss": 0.5204, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.20280474649406688, | |
| "grad_norm": 0.2624246785726397, | |
| "learning_rate": 3.345323741007194e-05, | |
| "loss": 0.5336, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.20496224379719524, | |
| "grad_norm": 0.2452405947687516, | |
| "learning_rate": 3.3812949640287773e-05, | |
| "loss": 0.5105, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.20711974110032363, | |
| "grad_norm": 0.27455141570832603, | |
| "learning_rate": 3.41726618705036e-05, | |
| "loss": 0.517, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.209277238403452, | |
| "grad_norm": 0.234849775079589, | |
| "learning_rate": 3.453237410071943e-05, | |
| "loss": 0.5155, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.21143473570658036, | |
| "grad_norm": 0.25742508943184506, | |
| "learning_rate": 3.489208633093525e-05, | |
| "loss": 0.5348, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.21359223300970873, | |
| "grad_norm": 0.23602027129846576, | |
| "learning_rate": 3.5251798561151075e-05, | |
| "loss": 0.5136, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.21574973031283712, | |
| "grad_norm": 0.2830916969833504, | |
| "learning_rate": 3.561151079136691e-05, | |
| "loss": 0.5141, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21790722761596548, | |
| "grad_norm": 0.2661717768903485, | |
| "learning_rate": 3.597122302158273e-05, | |
| "loss": 0.514, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.22006472491909385, | |
| "grad_norm": 0.2486575067928459, | |
| "learning_rate": 3.6330935251798566e-05, | |
| "loss": 0.5146, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 0.2953832672456234, | |
| "learning_rate": 3.669064748201439e-05, | |
| "loss": 0.5302, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.2243797195253506, | |
| "grad_norm": 0.28906909952910065, | |
| "learning_rate": 3.7050359712230217e-05, | |
| "loss": 0.4751, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.22653721682847897, | |
| "grad_norm": 0.2686201225334034, | |
| "learning_rate": 3.741007194244605e-05, | |
| "loss": 0.511, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.22869471413160733, | |
| "grad_norm": 0.2741508909189952, | |
| "learning_rate": 3.776978417266187e-05, | |
| "loss": 0.5184, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2308522114347357, | |
| "grad_norm": 0.2802116060264731, | |
| "learning_rate": 3.81294964028777e-05, | |
| "loss": 0.4975, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.23300970873786409, | |
| "grad_norm": 0.2667693505204224, | |
| "learning_rate": 3.8489208633093525e-05, | |
| "loss": 0.4981, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.23516720604099245, | |
| "grad_norm": 0.2597113095109555, | |
| "learning_rate": 3.884892086330936e-05, | |
| "loss": 0.5118, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.2373247033441208, | |
| "grad_norm": 0.2697903200086048, | |
| "learning_rate": 3.920863309352518e-05, | |
| "loss": 0.5051, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23948220064724918, | |
| "grad_norm": 0.2713556557014583, | |
| "learning_rate": 3.956834532374101e-05, | |
| "loss": 0.5212, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.24163969795037757, | |
| "grad_norm": 0.2728968900059994, | |
| "learning_rate": 3.992805755395684e-05, | |
| "loss": 0.5134, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.24379719525350593, | |
| "grad_norm": 0.3057783884890205, | |
| "learning_rate": 4.0287769784172666e-05, | |
| "loss": 0.5174, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.2459546925566343, | |
| "grad_norm": 0.2526607253149815, | |
| "learning_rate": 4.064748201438849e-05, | |
| "loss": 0.4979, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2481121898597627, | |
| "grad_norm": 0.29935142309285345, | |
| "learning_rate": 4.100719424460432e-05, | |
| "loss": 0.4791, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.25026968716289105, | |
| "grad_norm": 0.274036916116646, | |
| "learning_rate": 4.136690647482014e-05, | |
| "loss": 0.503, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2524271844660194, | |
| "grad_norm": 0.29373833817106976, | |
| "learning_rate": 4.1726618705035975e-05, | |
| "loss": 0.4979, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.2545846817691478, | |
| "grad_norm": 0.30379306843345605, | |
| "learning_rate": 4.20863309352518e-05, | |
| "loss": 0.5143, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.25674217907227614, | |
| "grad_norm": 0.3597081745599116, | |
| "learning_rate": 4.244604316546763e-05, | |
| "loss": 0.4963, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.2588996763754045, | |
| "grad_norm": 0.27784099569060233, | |
| "learning_rate": 4.280575539568346e-05, | |
| "loss": 0.4964, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.26105717367853293, | |
| "grad_norm": 0.2895994279044635, | |
| "learning_rate": 4.3165467625899284e-05, | |
| "loss": 0.4953, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.2632146709816613, | |
| "grad_norm": 0.34053962799665355, | |
| "learning_rate": 4.352517985611511e-05, | |
| "loss": 0.5335, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.26537216828478966, | |
| "grad_norm": 0.3141740622845576, | |
| "learning_rate": 4.3884892086330935e-05, | |
| "loss": 0.5071, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.267529665587918, | |
| "grad_norm": 0.3046488749690831, | |
| "learning_rate": 4.424460431654677e-05, | |
| "loss": 0.4859, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2696871628910464, | |
| "grad_norm": 0.30610931123203444, | |
| "learning_rate": 4.460431654676259e-05, | |
| "loss": 0.4708, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.27184466019417475, | |
| "grad_norm": 0.3193175150854491, | |
| "learning_rate": 4.496402877697842e-05, | |
| "loss": 0.4848, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2740021574973031, | |
| "grad_norm": 0.3255701503756811, | |
| "learning_rate": 4.532374100719425e-05, | |
| "loss": 0.4766, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.2761596548004315, | |
| "grad_norm": 0.3599922285030049, | |
| "learning_rate": 4.5683453237410076e-05, | |
| "loss": 0.5077, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2783171521035599, | |
| "grad_norm": 0.3449429958647818, | |
| "learning_rate": 4.60431654676259e-05, | |
| "loss": 0.4859, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.28047464940668826, | |
| "grad_norm": 0.30781106612363096, | |
| "learning_rate": 4.640287769784173e-05, | |
| "loss": 0.5086, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2826321467098166, | |
| "grad_norm": 0.31034736893309267, | |
| "learning_rate": 4.676258992805755e-05, | |
| "loss": 0.4706, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.284789644012945, | |
| "grad_norm": 0.3297773636655693, | |
| "learning_rate": 4.7122302158273385e-05, | |
| "loss": 0.4901, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.28694714131607335, | |
| "grad_norm": 0.31788365317052747, | |
| "learning_rate": 4.748201438848921e-05, | |
| "loss": 0.5098, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.2891046386192017, | |
| "grad_norm": 0.38398998891452074, | |
| "learning_rate": 4.784172661870504e-05, | |
| "loss": 0.493, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2912621359223301, | |
| "grad_norm": 0.3273138176909412, | |
| "learning_rate": 4.820143884892087e-05, | |
| "loss": 0.4769, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.29341963322545844, | |
| "grad_norm": 0.33365763486992506, | |
| "learning_rate": 4.8561151079136694e-05, | |
| "loss": 0.49, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.29557713052858686, | |
| "grad_norm": 0.301373109121661, | |
| "learning_rate": 4.892086330935252e-05, | |
| "loss": 0.4869, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.2977346278317152, | |
| "grad_norm": 0.36827893859352856, | |
| "learning_rate": 4.9280575539568345e-05, | |
| "loss": 0.494, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.2998921251348436, | |
| "grad_norm": 0.4286223278223316, | |
| "learning_rate": 4.964028776978418e-05, | |
| "loss": 0.508, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.30204962243797195, | |
| "grad_norm": 0.3583525731124658, | |
| "learning_rate": 5e-05, | |
| "loss": 0.495, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3042071197411003, | |
| "grad_norm": 0.3221902127168568, | |
| "learning_rate": 4.996e-05, | |
| "loss": 0.5045, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3063646170442287, | |
| "grad_norm": 0.337708249785391, | |
| "learning_rate": 4.992e-05, | |
| "loss": 0.4929, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.30852211434735705, | |
| "grad_norm": 0.3394347724632982, | |
| "learning_rate": 4.9880000000000004e-05, | |
| "loss": 0.5047, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.3106796116504854, | |
| "grad_norm": 0.2813961175434312, | |
| "learning_rate": 4.9840000000000004e-05, | |
| "loss": 0.4949, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.31283710895361383, | |
| "grad_norm": 0.30421333717748744, | |
| "learning_rate": 4.9800000000000004e-05, | |
| "loss": 0.486, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3149946062567422, | |
| "grad_norm": 0.36470188308478374, | |
| "learning_rate": 4.976e-05, | |
| "loss": 0.4983, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.31715210355987056, | |
| "grad_norm": 0.27856852801649823, | |
| "learning_rate": 4.972e-05, | |
| "loss": 0.4883, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3193096008629989, | |
| "grad_norm": 0.3044782327916934, | |
| "learning_rate": 4.9680000000000005e-05, | |
| "loss": 0.4922, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3214670981661273, | |
| "grad_norm": 0.2804188353274235, | |
| "learning_rate": 4.9640000000000006e-05, | |
| "loss": 0.4866, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.32362459546925565, | |
| "grad_norm": 0.30594014263779995, | |
| "learning_rate": 4.96e-05, | |
| "loss": 0.4745, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.325782092772384, | |
| "grad_norm": 0.3175912214255066, | |
| "learning_rate": 4.956e-05, | |
| "loss": 0.4986, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.32793959007551243, | |
| "grad_norm": 0.31983013814077355, | |
| "learning_rate": 4.952e-05, | |
| "loss": 0.48, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3300970873786408, | |
| "grad_norm": 0.3230897992697352, | |
| "learning_rate": 4.948000000000001e-05, | |
| "loss": 0.4825, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.33225458468176916, | |
| "grad_norm": 0.26971956302103783, | |
| "learning_rate": 4.944e-05, | |
| "loss": 0.4806, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3344120819848975, | |
| "grad_norm": 0.2878515152482518, | |
| "learning_rate": 4.94e-05, | |
| "loss": 0.4868, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3365695792880259, | |
| "grad_norm": 0.29671867961170256, | |
| "learning_rate": 4.936e-05, | |
| "loss": 0.4967, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.33872707659115425, | |
| "grad_norm": 0.28930159604607764, | |
| "learning_rate": 4.932e-05, | |
| "loss": 0.4877, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.3408845738942826, | |
| "grad_norm": 0.3015050117518478, | |
| "learning_rate": 4.928e-05, | |
| "loss": 0.4856, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.343042071197411, | |
| "grad_norm": 0.3286860170088506, | |
| "learning_rate": 4.924e-05, | |
| "loss": 0.4798, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3451995685005394, | |
| "grad_norm": 0.30390765140489734, | |
| "learning_rate": 4.92e-05, | |
| "loss": 0.4841, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.34735706580366776, | |
| "grad_norm": 0.3599533471110528, | |
| "learning_rate": 4.9160000000000004e-05, | |
| "loss": 0.4798, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.34951456310679613, | |
| "grad_norm": 0.27961730157871734, | |
| "learning_rate": 4.9120000000000004e-05, | |
| "loss": 0.4622, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3516720604099245, | |
| "grad_norm": 0.36326297857194084, | |
| "learning_rate": 4.9080000000000004e-05, | |
| "loss": 0.4941, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.35382955771305286, | |
| "grad_norm": 0.25877643675232387, | |
| "learning_rate": 4.9040000000000005e-05, | |
| "loss": 0.4916, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.3559870550161812, | |
| "grad_norm": 0.3556594334331794, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.4642, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3581445523193096, | |
| "grad_norm": 0.32515033894088813, | |
| "learning_rate": 4.896e-05, | |
| "loss": 0.4864, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.36030204962243795, | |
| "grad_norm": 0.3576526726949483, | |
| "learning_rate": 4.8920000000000006e-05, | |
| "loss": 0.4737, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.36245954692556637, | |
| "grad_norm": 0.29454313757538503, | |
| "learning_rate": 4.8880000000000006e-05, | |
| "loss": 0.4579, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.36461704422869473, | |
| "grad_norm": 0.35162010767939217, | |
| "learning_rate": 4.884e-05, | |
| "loss": 0.4985, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.3667745415318231, | |
| "grad_norm": 0.28512732525892354, | |
| "learning_rate": 4.88e-05, | |
| "loss": 0.4902, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.36893203883495146, | |
| "grad_norm": 0.29759943236707304, | |
| "learning_rate": 4.876e-05, | |
| "loss": 0.4972, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.3710895361380798, | |
| "grad_norm": 0.3287877116113184, | |
| "learning_rate": 4.872000000000001e-05, | |
| "loss": 0.4908, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3732470334412082, | |
| "grad_norm": 0.3133996309144812, | |
| "learning_rate": 4.868e-05, | |
| "loss": 0.4937, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.37540453074433655, | |
| "grad_norm": 0.3064667877265841, | |
| "learning_rate": 4.864e-05, | |
| "loss": 0.4864, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3775620280474649, | |
| "grad_norm": 0.33485639441738874, | |
| "learning_rate": 4.86e-05, | |
| "loss": 0.4775, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.37971952535059333, | |
| "grad_norm": 0.305407925816577, | |
| "learning_rate": 4.856e-05, | |
| "loss": 0.4764, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3818770226537217, | |
| "grad_norm": 0.3657289190953192, | |
| "learning_rate": 4.852e-05, | |
| "loss": 0.4658, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.38403451995685006, | |
| "grad_norm": 0.32585818474400624, | |
| "learning_rate": 4.8480000000000003e-05, | |
| "loss": 0.4759, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3861920172599784, | |
| "grad_norm": 0.37557882227532163, | |
| "learning_rate": 4.8440000000000004e-05, | |
| "loss": 0.4747, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.3883495145631068, | |
| "grad_norm": 0.320749191525247, | |
| "learning_rate": 4.8400000000000004e-05, | |
| "loss": 0.4821, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.39050701186623515, | |
| "grad_norm": 0.31740159569446824, | |
| "learning_rate": 4.836e-05, | |
| "loss": 0.4819, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.3926645091693635, | |
| "grad_norm": 0.34719515229206804, | |
| "learning_rate": 4.8320000000000005e-05, | |
| "loss": 0.4809, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.3948220064724919, | |
| "grad_norm": 0.29600056574488137, | |
| "learning_rate": 4.8280000000000005e-05, | |
| "loss": 0.473, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.3969795037756203, | |
| "grad_norm": 0.31700707281900425, | |
| "learning_rate": 4.824e-05, | |
| "loss": 0.4764, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.39913700107874867, | |
| "grad_norm": 0.2843323546229233, | |
| "learning_rate": 4.82e-05, | |
| "loss": 0.4706, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.40129449838187703, | |
| "grad_norm": 0.33405105406323765, | |
| "learning_rate": 4.816e-05, | |
| "loss": 0.4659, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.4034519956850054, | |
| "grad_norm": 0.3450445779367956, | |
| "learning_rate": 4.812000000000001e-05, | |
| "loss": 0.4848, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.40560949298813376, | |
| "grad_norm": 0.29904111020419, | |
| "learning_rate": 4.808e-05, | |
| "loss": 0.4692, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4077669902912621, | |
| "grad_norm": 0.30156350801095055, | |
| "learning_rate": 4.804e-05, | |
| "loss": 0.4928, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4099244875943905, | |
| "grad_norm": 0.29440492223089526, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.4714, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4120819848975189, | |
| "grad_norm": 0.2941734869319751, | |
| "learning_rate": 4.796e-05, | |
| "loss": 0.4865, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.41423948220064727, | |
| "grad_norm": 0.2831205819699076, | |
| "learning_rate": 4.792e-05, | |
| "loss": 0.4923, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.41639697950377563, | |
| "grad_norm": 0.30976889318863166, | |
| "learning_rate": 4.788e-05, | |
| "loss": 0.4752, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.418554476806904, | |
| "grad_norm": 0.2765632011545738, | |
| "learning_rate": 4.784e-05, | |
| "loss": 0.4501, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.42071197411003236, | |
| "grad_norm": 0.2955577997787949, | |
| "learning_rate": 4.78e-05, | |
| "loss": 0.4641, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4228694714131607, | |
| "grad_norm": 0.35418034783340757, | |
| "learning_rate": 4.7760000000000004e-05, | |
| "loss": 0.4748, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4250269687162891, | |
| "grad_norm": 0.290860398121857, | |
| "learning_rate": 4.7720000000000004e-05, | |
| "loss": 0.4666, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.42718446601941745, | |
| "grad_norm": 0.3202541928666888, | |
| "learning_rate": 4.7680000000000004e-05, | |
| "loss": 0.4812, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.42934196332254587, | |
| "grad_norm": 0.291794165366607, | |
| "learning_rate": 4.7640000000000005e-05, | |
| "loss": 0.4688, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.43149946062567424, | |
| "grad_norm": 0.32523032478470537, | |
| "learning_rate": 4.76e-05, | |
| "loss": 0.4604, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4336569579288026, | |
| "grad_norm": 0.2661859076083175, | |
| "learning_rate": 4.7560000000000005e-05, | |
| "loss": 0.4693, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.43581445523193096, | |
| "grad_norm": 0.3403835298187506, | |
| "learning_rate": 4.7520000000000006e-05, | |
| "loss": 0.4651, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.43797195253505933, | |
| "grad_norm": 0.28598439991323976, | |
| "learning_rate": 4.748e-05, | |
| "loss": 0.4907, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.4401294498381877, | |
| "grad_norm": 0.383195732436593, | |
| "learning_rate": 4.744e-05, | |
| "loss": 0.4983, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.44228694714131606, | |
| "grad_norm": 0.27458653574722164, | |
| "learning_rate": 4.74e-05, | |
| "loss": 0.4698, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 0.4194293652615916, | |
| "learning_rate": 4.736000000000001e-05, | |
| "loss": 0.4787, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.44660194174757284, | |
| "grad_norm": 0.29331891339359434, | |
| "learning_rate": 4.732e-05, | |
| "loss": 0.4743, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.4487594390507012, | |
| "grad_norm": 0.34631805619114103, | |
| "learning_rate": 4.728e-05, | |
| "loss": 0.4723, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.45091693635382957, | |
| "grad_norm": 0.2773817416509101, | |
| "learning_rate": 4.724e-05, | |
| "loss": 0.4568, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.45307443365695793, | |
| "grad_norm": 0.31793720505095835, | |
| "learning_rate": 4.72e-05, | |
| "loss": 0.4563, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4552319309600863, | |
| "grad_norm": 0.3360952018884764, | |
| "learning_rate": 4.716e-05, | |
| "loss": 0.4758, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.45738942826321466, | |
| "grad_norm": 0.3238545895389713, | |
| "learning_rate": 4.712e-05, | |
| "loss": 0.4802, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.459546925566343, | |
| "grad_norm": 0.3299565529698189, | |
| "learning_rate": 4.708e-05, | |
| "loss": 0.4713, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.4617044228694714, | |
| "grad_norm": 0.3014892248399335, | |
| "learning_rate": 4.7040000000000004e-05, | |
| "loss": 0.4689, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4638619201725998, | |
| "grad_norm": 0.36865008887233414, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.4845, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.46601941747572817, | |
| "grad_norm": 0.3010039204398615, | |
| "learning_rate": 4.6960000000000004e-05, | |
| "loss": 0.4717, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.46817691477885653, | |
| "grad_norm": 0.32021272907517795, | |
| "learning_rate": 4.6920000000000005e-05, | |
| "loss": 0.4635, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.4703344120819849, | |
| "grad_norm": 0.27683773975403986, | |
| "learning_rate": 4.688e-05, | |
| "loss": 0.4629, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.47249190938511326, | |
| "grad_norm": 0.2939694818374937, | |
| "learning_rate": 4.684e-05, | |
| "loss": 0.4616, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.4746494066882416, | |
| "grad_norm": 0.3146725242194594, | |
| "learning_rate": 4.6800000000000006e-05, | |
| "loss": 0.4722, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.47680690399137, | |
| "grad_norm": 0.2764797117217664, | |
| "learning_rate": 4.6760000000000006e-05, | |
| "loss": 0.4673, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.47896440129449835, | |
| "grad_norm": 0.3043641853589388, | |
| "learning_rate": 4.672e-05, | |
| "loss": 0.4478, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.4811218985976268, | |
| "grad_norm": 0.28775200628381, | |
| "learning_rate": 4.668e-05, | |
| "loss": 0.4662, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.48327939590075514, | |
| "grad_norm": 0.27023475049837015, | |
| "learning_rate": 4.664e-05, | |
| "loss": 0.4661, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4854368932038835, | |
| "grad_norm": 0.31669406491236995, | |
| "learning_rate": 4.660000000000001e-05, | |
| "loss": 0.4458, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.48759439050701187, | |
| "grad_norm": 0.2837164913050278, | |
| "learning_rate": 4.656e-05, | |
| "loss": 0.4684, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.48975188781014023, | |
| "grad_norm": 0.29868610479918123, | |
| "learning_rate": 4.652e-05, | |
| "loss": 0.4506, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.4919093851132686, | |
| "grad_norm": 0.2949287420608189, | |
| "learning_rate": 4.648e-05, | |
| "loss": 0.4632, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.49406688241639696, | |
| "grad_norm": 0.3119200088855303, | |
| "learning_rate": 4.644e-05, | |
| "loss": 0.4551, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.4962243797195254, | |
| "grad_norm": 0.290598992319338, | |
| "learning_rate": 4.64e-05, | |
| "loss": 0.458, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.49838187702265374, | |
| "grad_norm": 0.34483322575005054, | |
| "learning_rate": 4.636e-05, | |
| "loss": 0.4637, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5005393743257821, | |
| "grad_norm": 0.27712821917993735, | |
| "learning_rate": 4.6320000000000004e-05, | |
| "loss": 0.4778, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5026968716289104, | |
| "grad_norm": 0.28690313202566, | |
| "learning_rate": 4.6280000000000004e-05, | |
| "loss": 0.4718, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5048543689320388, | |
| "grad_norm": 0.29881025273811784, | |
| "learning_rate": 4.624e-05, | |
| "loss": 0.4548, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5070118662351673, | |
| "grad_norm": 0.3009843183384041, | |
| "learning_rate": 4.6200000000000005e-05, | |
| "loss": 0.4734, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5091693635382956, | |
| "grad_norm": 0.3216848912197773, | |
| "learning_rate": 4.6160000000000005e-05, | |
| "loss": 0.453, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.511326860841424, | |
| "grad_norm": 0.2622840122570054, | |
| "learning_rate": 4.612e-05, | |
| "loss": 0.4534, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5134843581445523, | |
| "grad_norm": 0.31268030831842797, | |
| "learning_rate": 4.608e-05, | |
| "loss": 0.4541, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5156418554476807, | |
| "grad_norm": 0.2655099701679723, | |
| "learning_rate": 4.604e-05, | |
| "loss": 0.4509, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.517799352750809, | |
| "grad_norm": 0.27681618093808624, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.4765, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5199568500539374, | |
| "grad_norm": 0.31086122245620157, | |
| "learning_rate": 4.596e-05, | |
| "loss": 0.4542, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.5221143473570659, | |
| "grad_norm": 0.26187143929707984, | |
| "learning_rate": 4.592e-05, | |
| "loss": 0.4548, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5242718446601942, | |
| "grad_norm": 0.30612908409709966, | |
| "learning_rate": 4.588e-05, | |
| "loss": 0.4644, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.5264293419633226, | |
| "grad_norm": 0.28353354277654075, | |
| "learning_rate": 4.584e-05, | |
| "loss": 0.4777, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5285868392664509, | |
| "grad_norm": 0.2707543786610627, | |
| "learning_rate": 4.58e-05, | |
| "loss": 0.4605, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5307443365695793, | |
| "grad_norm": 0.25826035874506936, | |
| "learning_rate": 4.576e-05, | |
| "loss": 0.4733, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5329018338727076, | |
| "grad_norm": 0.26731071019406205, | |
| "learning_rate": 4.572e-05, | |
| "loss": 0.4562, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.535059331175836, | |
| "grad_norm": 0.23862376017091427, | |
| "learning_rate": 4.568e-05, | |
| "loss": 0.4481, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5372168284789643, | |
| "grad_norm": 0.3832009951376676, | |
| "learning_rate": 4.564e-05, | |
| "loss": 0.4628, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.5393743257820928, | |
| "grad_norm": 0.29500842123610954, | |
| "learning_rate": 4.5600000000000004e-05, | |
| "loss": 0.459, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5415318230852212, | |
| "grad_norm": 0.2965682041233286, | |
| "learning_rate": 4.5560000000000004e-05, | |
| "loss": 0.4621, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.5436893203883495, | |
| "grad_norm": 0.28726731902710617, | |
| "learning_rate": 4.5520000000000005e-05, | |
| "loss": 0.463, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5458468176914779, | |
| "grad_norm": 0.2690311478101724, | |
| "learning_rate": 4.548e-05, | |
| "loss": 0.4707, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.5480043149946062, | |
| "grad_norm": 0.30474236349865746, | |
| "learning_rate": 4.5440000000000005e-05, | |
| "loss": 0.4691, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5501618122977346, | |
| "grad_norm": 0.24352372447443482, | |
| "learning_rate": 4.5400000000000006e-05, | |
| "loss": 0.4413, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.552319309600863, | |
| "grad_norm": 0.3042686272085747, | |
| "learning_rate": 4.536e-05, | |
| "loss": 0.4796, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5544768069039914, | |
| "grad_norm": 0.24174468708449245, | |
| "learning_rate": 4.532e-05, | |
| "loss": 0.4436, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5566343042071198, | |
| "grad_norm": 0.26229288392443767, | |
| "learning_rate": 4.528e-05, | |
| "loss": 0.4474, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5587918015102481, | |
| "grad_norm": 0.3390509699538324, | |
| "learning_rate": 4.524000000000001e-05, | |
| "loss": 0.4584, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.5609492988133765, | |
| "grad_norm": 0.2525311110616745, | |
| "learning_rate": 4.52e-05, | |
| "loss": 0.4412, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5631067961165048, | |
| "grad_norm": 0.3434472457371927, | |
| "learning_rate": 4.516e-05, | |
| "loss": 0.4698, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.5652642934196332, | |
| "grad_norm": 0.25391082344689797, | |
| "learning_rate": 4.512e-05, | |
| "loss": 0.4759, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5674217907227616, | |
| "grad_norm": 0.34249204432261243, | |
| "learning_rate": 4.508e-05, | |
| "loss": 0.4552, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.56957928802589, | |
| "grad_norm": 0.2472862544884362, | |
| "learning_rate": 4.504e-05, | |
| "loss": 0.4688, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5717367853290184, | |
| "grad_norm": 0.27264729971113316, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.4488, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5738942826321467, | |
| "grad_norm": 0.2977417387577173, | |
| "learning_rate": 4.496e-05, | |
| "loss": 0.4752, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5760517799352751, | |
| "grad_norm": 0.2763251041657974, | |
| "learning_rate": 4.4920000000000004e-05, | |
| "loss": 0.4633, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5782092772384034, | |
| "grad_norm": 0.29864566803635684, | |
| "learning_rate": 4.488e-05, | |
| "loss": 0.4877, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5803667745415318, | |
| "grad_norm": 0.2817997182658353, | |
| "learning_rate": 4.4840000000000004e-05, | |
| "loss": 0.4599, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.5825242718446602, | |
| "grad_norm": 0.2691208231765093, | |
| "learning_rate": 4.4800000000000005e-05, | |
| "loss": 0.4446, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5846817691477886, | |
| "grad_norm": 0.26100659163381174, | |
| "learning_rate": 4.4760000000000005e-05, | |
| "loss": 0.4702, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.5868392664509169, | |
| "grad_norm": 0.30494432016510414, | |
| "learning_rate": 4.472e-05, | |
| "loss": 0.4434, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5889967637540453, | |
| "grad_norm": 0.3217381889319848, | |
| "learning_rate": 4.468e-05, | |
| "loss": 0.4522, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.5911542610571737, | |
| "grad_norm": 0.2747807777821844, | |
| "learning_rate": 4.4640000000000006e-05, | |
| "loss": 0.4642, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.593311758360302, | |
| "grad_norm": 0.2771512136815011, | |
| "learning_rate": 4.46e-05, | |
| "loss": 0.458, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5954692556634305, | |
| "grad_norm": 0.30667352637556383, | |
| "learning_rate": 4.456e-05, | |
| "loss": 0.4653, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5976267529665588, | |
| "grad_norm": 0.27054508489934065, | |
| "learning_rate": 4.452e-05, | |
| "loss": 0.4457, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.5997842502696872, | |
| "grad_norm": 0.32885060108368724, | |
| "learning_rate": 4.448e-05, | |
| "loss": 0.4479, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6019417475728155, | |
| "grad_norm": 0.25861633576048176, | |
| "learning_rate": 4.444e-05, | |
| "loss": 0.443, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.6040992448759439, | |
| "grad_norm": 0.3264488574979686, | |
| "learning_rate": 4.44e-05, | |
| "loss": 0.4546, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6062567421790723, | |
| "grad_norm": 0.2661396893245308, | |
| "learning_rate": 4.436e-05, | |
| "loss": 0.4483, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.6084142394822006, | |
| "grad_norm": 0.2666725233168853, | |
| "learning_rate": 4.432e-05, | |
| "loss": 0.4455, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6105717367853291, | |
| "grad_norm": 0.3213878317441801, | |
| "learning_rate": 4.428e-05, | |
| "loss": 0.4561, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.6127292340884574, | |
| "grad_norm": 0.27514627181060536, | |
| "learning_rate": 4.424e-05, | |
| "loss": 0.4533, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6148867313915858, | |
| "grad_norm": 0.28529847199820635, | |
| "learning_rate": 4.4200000000000004e-05, | |
| "loss": 0.4583, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6170442286947141, | |
| "grad_norm": 0.3084015687846991, | |
| "learning_rate": 4.4160000000000004e-05, | |
| "loss": 0.4499, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6192017259978425, | |
| "grad_norm": 0.2722920383928748, | |
| "learning_rate": 4.412e-05, | |
| "loss": 0.4699, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.6213592233009708, | |
| "grad_norm": 0.2790301022639745, | |
| "learning_rate": 4.4080000000000005e-05, | |
| "loss": 0.4571, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6235167206040992, | |
| "grad_norm": 0.28022715939780435, | |
| "learning_rate": 4.4040000000000005e-05, | |
| "loss": 0.4486, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.6256742179072277, | |
| "grad_norm": 0.25791286221623794, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.4383, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.627831715210356, | |
| "grad_norm": 0.30038570760100414, | |
| "learning_rate": 4.396e-05, | |
| "loss": 0.4619, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.6299892125134844, | |
| "grad_norm": 0.29876170957810305, | |
| "learning_rate": 4.392e-05, | |
| "loss": 0.4707, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6321467098166127, | |
| "grad_norm": 0.2932038829420852, | |
| "learning_rate": 4.388000000000001e-05, | |
| "loss": 0.4557, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.6343042071197411, | |
| "grad_norm": 0.2716815847789961, | |
| "learning_rate": 4.384e-05, | |
| "loss": 0.4361, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6364617044228694, | |
| "grad_norm": 0.3099793080621506, | |
| "learning_rate": 4.38e-05, | |
| "loss": 0.4577, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.6386192017259978, | |
| "grad_norm": 0.2938986793353107, | |
| "learning_rate": 4.376e-05, | |
| "loss": 0.4476, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6407766990291263, | |
| "grad_norm": 0.29035829007503894, | |
| "learning_rate": 4.372e-05, | |
| "loss": 0.4423, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.6429341963322546, | |
| "grad_norm": 0.2694452227101497, | |
| "learning_rate": 4.368e-05, | |
| "loss": 0.4545, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.645091693635383, | |
| "grad_norm": 0.28851646597054104, | |
| "learning_rate": 4.364e-05, | |
| "loss": 0.4431, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.6472491909385113, | |
| "grad_norm": 0.2593151761184194, | |
| "learning_rate": 4.36e-05, | |
| "loss": 0.455, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6494066882416397, | |
| "grad_norm": 0.29309975049936027, | |
| "learning_rate": 4.356e-05, | |
| "loss": 0.4481, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.651564185544768, | |
| "grad_norm": 0.25398889340605957, | |
| "learning_rate": 4.352e-05, | |
| "loss": 0.4476, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6537216828478964, | |
| "grad_norm": 0.30940788747918896, | |
| "learning_rate": 4.3480000000000004e-05, | |
| "loss": 0.45, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.6558791801510249, | |
| "grad_norm": 0.2820801140095731, | |
| "learning_rate": 4.3440000000000004e-05, | |
| "loss": 0.4544, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6580366774541532, | |
| "grad_norm": 0.3152437853102846, | |
| "learning_rate": 4.3400000000000005e-05, | |
| "loss": 0.4492, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6601941747572816, | |
| "grad_norm": 0.2749853135008326, | |
| "learning_rate": 4.336e-05, | |
| "loss": 0.4412, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6623516720604099, | |
| "grad_norm": 0.3079252422579841, | |
| "learning_rate": 4.332e-05, | |
| "loss": 0.4369, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6645091693635383, | |
| "grad_norm": 0.2946391275689202, | |
| "learning_rate": 4.3280000000000006e-05, | |
| "loss": 0.4573, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.2668274385038352, | |
| "learning_rate": 4.324e-05, | |
| "loss": 0.4359, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.668824163969795, | |
| "grad_norm": 0.28838034268652896, | |
| "learning_rate": 4.32e-05, | |
| "loss": 0.4508, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6709816612729234, | |
| "grad_norm": 0.2834904014905993, | |
| "learning_rate": 4.316e-05, | |
| "loss": 0.4522, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.6731391585760518, | |
| "grad_norm": 0.27040013725015194, | |
| "learning_rate": 4.312000000000001e-05, | |
| "loss": 0.4456, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6752966558791802, | |
| "grad_norm": 0.31722009198474627, | |
| "learning_rate": 4.308e-05, | |
| "loss": 0.4659, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.6774541531823085, | |
| "grad_norm": 0.27088552438961755, | |
| "learning_rate": 4.304e-05, | |
| "loss": 0.431, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6796116504854369, | |
| "grad_norm": 0.26632771069672984, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.4501, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6817691477885652, | |
| "grad_norm": 0.2972755674501818, | |
| "learning_rate": 4.296e-05, | |
| "loss": 0.4663, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6839266450916937, | |
| "grad_norm": 0.25853931040596184, | |
| "learning_rate": 4.292e-05, | |
| "loss": 0.4572, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.686084142394822, | |
| "grad_norm": 0.2714876932330396, | |
| "learning_rate": 4.288e-05, | |
| "loss": 0.4622, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6882416396979504, | |
| "grad_norm": 0.2642767531993865, | |
| "learning_rate": 4.284e-05, | |
| "loss": 0.4714, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.6903991370010788, | |
| "grad_norm": 0.25954856405102256, | |
| "learning_rate": 4.2800000000000004e-05, | |
| "loss": 0.4568, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6925566343042071, | |
| "grad_norm": 0.3044540062878058, | |
| "learning_rate": 4.276e-05, | |
| "loss": 0.4415, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.6947141316073355, | |
| "grad_norm": 0.23066914239975445, | |
| "learning_rate": 4.2720000000000004e-05, | |
| "loss": 0.4638, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6968716289104638, | |
| "grad_norm": 0.35554840668777676, | |
| "learning_rate": 4.2680000000000005e-05, | |
| "loss": 0.4573, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.6990291262135923, | |
| "grad_norm": 0.2913915204408648, | |
| "learning_rate": 4.2640000000000005e-05, | |
| "loss": 0.4473, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7011866235167206, | |
| "grad_norm": 0.2684942718217648, | |
| "learning_rate": 4.26e-05, | |
| "loss": 0.4339, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.703344120819849, | |
| "grad_norm": 0.3216708484980791, | |
| "learning_rate": 4.256e-05, | |
| "loss": 0.4468, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7055016181229773, | |
| "grad_norm": 0.2756335067641377, | |
| "learning_rate": 4.2520000000000006e-05, | |
| "loss": 0.4558, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.7076591154261057, | |
| "grad_norm": 0.2764643077888995, | |
| "learning_rate": 4.248e-05, | |
| "loss": 0.4346, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7098166127292341, | |
| "grad_norm": 0.32603235256791835, | |
| "learning_rate": 4.244e-05, | |
| "loss": 0.4559, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.7119741100323624, | |
| "grad_norm": 0.29424638904551764, | |
| "learning_rate": 4.24e-05, | |
| "loss": 0.4462, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7141316073354909, | |
| "grad_norm": 0.3001554414730066, | |
| "learning_rate": 4.236e-05, | |
| "loss": 0.4468, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.7162891046386192, | |
| "grad_norm": 0.3129011037209938, | |
| "learning_rate": 4.232e-05, | |
| "loss": 0.4108, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7184466019417476, | |
| "grad_norm": 0.28762494926699644, | |
| "learning_rate": 4.228e-05, | |
| "loss": 0.4594, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.7206040992448759, | |
| "grad_norm": 0.3287086931234326, | |
| "learning_rate": 4.224e-05, | |
| "loss": 0.4546, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7227615965480043, | |
| "grad_norm": 0.2440171509135856, | |
| "learning_rate": 4.22e-05, | |
| "loss": 0.4395, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7249190938511327, | |
| "grad_norm": 0.41734955962307646, | |
| "learning_rate": 4.2159999999999996e-05, | |
| "loss": 0.4562, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.727076591154261, | |
| "grad_norm": 0.23647128778868323, | |
| "learning_rate": 4.212e-05, | |
| "loss": 0.448, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.7292340884573895, | |
| "grad_norm": 0.3478141358669819, | |
| "learning_rate": 4.2080000000000004e-05, | |
| "loss": 0.4259, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7313915857605178, | |
| "grad_norm": 0.22537690567688576, | |
| "learning_rate": 4.2040000000000004e-05, | |
| "loss": 0.4254, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.7335490830636462, | |
| "grad_norm": 0.32770466753425076, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.4493, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7357065803667745, | |
| "grad_norm": 0.2348262809416127, | |
| "learning_rate": 4.196e-05, | |
| "loss": 0.4379, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.7378640776699029, | |
| "grad_norm": 0.2784106337499888, | |
| "learning_rate": 4.1920000000000005e-05, | |
| "loss": 0.4512, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7400215749730313, | |
| "grad_norm": 0.24611169037260724, | |
| "learning_rate": 4.1880000000000006e-05, | |
| "loss": 0.4372, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.7421790722761596, | |
| "grad_norm": 0.28368805770265704, | |
| "learning_rate": 4.184e-05, | |
| "loss": 0.4641, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7443365695792881, | |
| "grad_norm": 0.2538772376229055, | |
| "learning_rate": 4.18e-05, | |
| "loss": 0.446, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7464940668824164, | |
| "grad_norm": 0.25580425464723444, | |
| "learning_rate": 4.176000000000001e-05, | |
| "loss": 0.4417, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7486515641855448, | |
| "grad_norm": 0.27469783764584166, | |
| "learning_rate": 4.172e-05, | |
| "loss": 0.4611, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.7508090614886731, | |
| "grad_norm": 0.29130083464472767, | |
| "learning_rate": 4.168e-05, | |
| "loss": 0.444, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7529665587918015, | |
| "grad_norm": 0.2651244371602352, | |
| "learning_rate": 4.164e-05, | |
| "loss": 0.4441, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.7551240560949298, | |
| "grad_norm": 0.26234665200685936, | |
| "learning_rate": 4.16e-05, | |
| "loss": 0.4461, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7572815533980582, | |
| "grad_norm": 0.24125048647992992, | |
| "learning_rate": 4.156e-05, | |
| "loss": 0.4352, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.7594390507011867, | |
| "grad_norm": 0.2508832475792393, | |
| "learning_rate": 4.152e-05, | |
| "loss": 0.4463, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.761596548004315, | |
| "grad_norm": 0.26155428589274304, | |
| "learning_rate": 4.148e-05, | |
| "loss": 0.4369, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.7637540453074434, | |
| "grad_norm": 0.24472099152743595, | |
| "learning_rate": 4.144e-05, | |
| "loss": 0.4345, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7659115426105717, | |
| "grad_norm": 0.24715070483697896, | |
| "learning_rate": 4.14e-05, | |
| "loss": 0.4505, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7680690399137001, | |
| "grad_norm": 0.24298690156207697, | |
| "learning_rate": 4.1360000000000004e-05, | |
| "loss": 0.4422, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7702265372168284, | |
| "grad_norm": 0.2349365195764117, | |
| "learning_rate": 4.1320000000000004e-05, | |
| "loss": 0.4526, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.7723840345199569, | |
| "grad_norm": 0.2331945559857268, | |
| "learning_rate": 4.1280000000000005e-05, | |
| "loss": 0.4383, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7745415318230853, | |
| "grad_norm": 0.2595543035077313, | |
| "learning_rate": 4.124e-05, | |
| "loss": 0.4308, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.7766990291262136, | |
| "grad_norm": 0.21807377534792635, | |
| "learning_rate": 4.12e-05, | |
| "loss": 0.4349, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.778856526429342, | |
| "grad_norm": 0.28950608296716684, | |
| "learning_rate": 4.1160000000000006e-05, | |
| "loss": 0.4558, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.7810140237324703, | |
| "grad_norm": 0.2611339581514693, | |
| "learning_rate": 4.1120000000000006e-05, | |
| "loss": 0.4599, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7831715210355987, | |
| "grad_norm": 0.25575793173451006, | |
| "learning_rate": 4.108e-05, | |
| "loss": 0.4406, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.785329018338727, | |
| "grad_norm": 0.2450742657345304, | |
| "learning_rate": 4.104e-05, | |
| "loss": 0.4453, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7874865156418555, | |
| "grad_norm": 0.28382377756829125, | |
| "learning_rate": 4.1e-05, | |
| "loss": 0.465, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7896440129449838, | |
| "grad_norm": 0.2392683438388647, | |
| "learning_rate": 4.096e-05, | |
| "loss": 0.4416, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7918015102481122, | |
| "grad_norm": 0.2539921883515288, | |
| "learning_rate": 4.092e-05, | |
| "loss": 0.4497, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.7939590075512406, | |
| "grad_norm": 0.27579412883884935, | |
| "learning_rate": 4.088e-05, | |
| "loss": 0.4464, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7961165048543689, | |
| "grad_norm": 0.28727621982712537, | |
| "learning_rate": 4.084e-05, | |
| "loss": 0.4411, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.7982740021574973, | |
| "grad_norm": 0.2519428850122542, | |
| "learning_rate": 4.08e-05, | |
| "loss": 0.4479, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8004314994606256, | |
| "grad_norm": 0.2674974847910012, | |
| "learning_rate": 4.076e-05, | |
| "loss": 0.44, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.8025889967637541, | |
| "grad_norm": 0.25530557484483807, | |
| "learning_rate": 4.072e-05, | |
| "loss": 0.4394, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8047464940668824, | |
| "grad_norm": 0.26435664799974373, | |
| "learning_rate": 4.0680000000000004e-05, | |
| "loss": 0.4574, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.8069039913700108, | |
| "grad_norm": 0.2530022819289299, | |
| "learning_rate": 4.064e-05, | |
| "loss": 0.4454, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8090614886731392, | |
| "grad_norm": 0.24054892754428694, | |
| "learning_rate": 4.0600000000000004e-05, | |
| "loss": 0.4398, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.8112189859762675, | |
| "grad_norm": 0.2848289155610854, | |
| "learning_rate": 4.0560000000000005e-05, | |
| "loss": 0.4398, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8133764832793959, | |
| "grad_norm": 0.2575053811702797, | |
| "learning_rate": 4.0520000000000005e-05, | |
| "loss": 0.4578, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.8155339805825242, | |
| "grad_norm": 0.27019672115837984, | |
| "learning_rate": 4.048e-05, | |
| "loss": 0.4323, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8176914778856527, | |
| "grad_norm": 0.2398139963913645, | |
| "learning_rate": 4.044e-05, | |
| "loss": 0.4448, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.819848975188781, | |
| "grad_norm": 0.2529963842405476, | |
| "learning_rate": 4.0400000000000006e-05, | |
| "loss": 0.4296, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8220064724919094, | |
| "grad_norm": 0.2511947818922217, | |
| "learning_rate": 4.0360000000000007e-05, | |
| "loss": 0.4396, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.8241639697950378, | |
| "grad_norm": 0.2584407655583235, | |
| "learning_rate": 4.032e-05, | |
| "loss": 0.4406, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8263214670981661, | |
| "grad_norm": 0.2456123715798027, | |
| "learning_rate": 4.028e-05, | |
| "loss": 0.4677, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.8284789644012945, | |
| "grad_norm": 0.2701085902021937, | |
| "learning_rate": 4.024e-05, | |
| "loss": 0.466, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8306364617044228, | |
| "grad_norm": 0.23978562401955322, | |
| "learning_rate": 4.02e-05, | |
| "loss": 0.4386, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.8327939590075513, | |
| "grad_norm": 0.2645169304521651, | |
| "learning_rate": 4.016e-05, | |
| "loss": 0.4502, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8349514563106796, | |
| "grad_norm": 0.27897793156631207, | |
| "learning_rate": 4.012e-05, | |
| "loss": 0.4684, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.837108953613808, | |
| "grad_norm": 0.2478443685184336, | |
| "learning_rate": 4.008e-05, | |
| "loss": 0.4376, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8392664509169363, | |
| "grad_norm": 0.256589453456646, | |
| "learning_rate": 4.004e-05, | |
| "loss": 0.431, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.8414239482200647, | |
| "grad_norm": 0.28233657875363566, | |
| "learning_rate": 4e-05, | |
| "loss": 0.4315, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8435814455231931, | |
| "grad_norm": 0.27009324457097755, | |
| "learning_rate": 3.9960000000000004e-05, | |
| "loss": 0.4481, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.8457389428263214, | |
| "grad_norm": 0.33499329840024655, | |
| "learning_rate": 3.9920000000000004e-05, | |
| "loss": 0.4471, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8478964401294499, | |
| "grad_norm": 0.2841086526535015, | |
| "learning_rate": 3.988e-05, | |
| "loss": 0.4293, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.8500539374325782, | |
| "grad_norm": 0.2716488191030425, | |
| "learning_rate": 3.984e-05, | |
| "loss": 0.4397, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8522114347357066, | |
| "grad_norm": 0.26620067701224137, | |
| "learning_rate": 3.9800000000000005e-05, | |
| "loss": 0.4461, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8543689320388349, | |
| "grad_norm": 0.2576197026648712, | |
| "learning_rate": 3.9760000000000006e-05, | |
| "loss": 0.4298, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8565264293419633, | |
| "grad_norm": 0.2532441081675246, | |
| "learning_rate": 3.972e-05, | |
| "loss": 0.4383, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.8586839266450917, | |
| "grad_norm": 0.2679762403275455, | |
| "learning_rate": 3.968e-05, | |
| "loss": 0.4395, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.86084142394822, | |
| "grad_norm": 0.24275874935058916, | |
| "learning_rate": 3.964e-05, | |
| "loss": 0.4357, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.8629989212513485, | |
| "grad_norm": 0.24961166385653455, | |
| "learning_rate": 3.960000000000001e-05, | |
| "loss": 0.4383, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8651564185544768, | |
| "grad_norm": 0.24374670649085867, | |
| "learning_rate": 3.956e-05, | |
| "loss": 0.4287, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.8673139158576052, | |
| "grad_norm": 0.23871690671281046, | |
| "learning_rate": 3.952e-05, | |
| "loss": 0.425, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8694714131607335, | |
| "grad_norm": 0.3032682890616598, | |
| "learning_rate": 3.948e-05, | |
| "loss": 0.4537, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.8716289104638619, | |
| "grad_norm": 0.25117617642424706, | |
| "learning_rate": 3.944e-05, | |
| "loss": 0.4282, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8737864077669902, | |
| "grad_norm": 0.22541166475315713, | |
| "learning_rate": 3.94e-05, | |
| "loss": 0.436, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8759439050701187, | |
| "grad_norm": 0.2716408337060906, | |
| "learning_rate": 3.936e-05, | |
| "loss": 0.4295, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8781014023732471, | |
| "grad_norm": 0.2556248222954383, | |
| "learning_rate": 3.932e-05, | |
| "loss": 0.4549, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.8802588996763754, | |
| "grad_norm": 0.23721706187791133, | |
| "learning_rate": 3.9280000000000003e-05, | |
| "loss": 0.4436, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8824163969795038, | |
| "grad_norm": 0.24025254833177956, | |
| "learning_rate": 3.9240000000000004e-05, | |
| "loss": 0.4313, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.8845738942826321, | |
| "grad_norm": 0.2614266619623848, | |
| "learning_rate": 3.9200000000000004e-05, | |
| "loss": 0.4453, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8867313915857605, | |
| "grad_norm": 0.23773461747873242, | |
| "learning_rate": 3.9160000000000005e-05, | |
| "loss": 0.4422, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 0.2555605177311676, | |
| "learning_rate": 3.912e-05, | |
| "loss": 0.4532, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8910463861920173, | |
| "grad_norm": 0.21705114177765664, | |
| "learning_rate": 3.908e-05, | |
| "loss": 0.4325, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.8932038834951457, | |
| "grad_norm": 0.23143716203371478, | |
| "learning_rate": 3.9040000000000006e-05, | |
| "loss": 0.419, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.895361380798274, | |
| "grad_norm": 0.5742209849902357, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 0.4305, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.8975188781014024, | |
| "grad_norm": 0.2551103117883286, | |
| "learning_rate": 3.896e-05, | |
| "loss": 0.4189, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8996763754045307, | |
| "grad_norm": 0.22402399804210352, | |
| "learning_rate": 3.892e-05, | |
| "loss": 0.428, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.9018338727076591, | |
| "grad_norm": 0.24116776940016732, | |
| "learning_rate": 3.888e-05, | |
| "loss": 0.4367, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9039913700107874, | |
| "grad_norm": 0.22403592441484066, | |
| "learning_rate": 3.884e-05, | |
| "loss": 0.4485, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.9061488673139159, | |
| "grad_norm": 0.22482911614936518, | |
| "learning_rate": 3.88e-05, | |
| "loss": 0.457, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9083063646170443, | |
| "grad_norm": 0.23857996790359456, | |
| "learning_rate": 3.876e-05, | |
| "loss": 0.4319, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.9104638619201726, | |
| "grad_norm": 0.22710273590374788, | |
| "learning_rate": 3.872e-05, | |
| "loss": 0.4375, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.912621359223301, | |
| "grad_norm": 0.2259633074307126, | |
| "learning_rate": 3.868e-05, | |
| "loss": 0.4248, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.9147788565264293, | |
| "grad_norm": 0.22640228114318403, | |
| "learning_rate": 3.864e-05, | |
| "loss": 0.4161, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9169363538295577, | |
| "grad_norm": 0.2350442466813375, | |
| "learning_rate": 3.86e-05, | |
| "loss": 0.4435, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.919093851132686, | |
| "grad_norm": 0.44365613370637497, | |
| "learning_rate": 3.8560000000000004e-05, | |
| "loss": 0.4393, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9212513484358145, | |
| "grad_norm": 0.2512897972825721, | |
| "learning_rate": 3.8520000000000004e-05, | |
| "loss": 0.4409, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.9234088457389428, | |
| "grad_norm": 0.24464436693649008, | |
| "learning_rate": 3.848e-05, | |
| "loss": 0.4324, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9255663430420712, | |
| "grad_norm": 0.2672384976835338, | |
| "learning_rate": 3.8440000000000005e-05, | |
| "loss": 0.4216, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.9277238403451996, | |
| "grad_norm": 0.2871267008652053, | |
| "learning_rate": 3.8400000000000005e-05, | |
| "loss": 0.4522, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9298813376483279, | |
| "grad_norm": 0.24811968751313535, | |
| "learning_rate": 3.836e-05, | |
| "loss": 0.4339, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.9320388349514563, | |
| "grad_norm": 0.27159198887797176, | |
| "learning_rate": 3.832e-05, | |
| "loss": 0.4315, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9341963322545846, | |
| "grad_norm": 0.26279585526150134, | |
| "learning_rate": 3.828e-05, | |
| "loss": 0.4335, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.9363538295577131, | |
| "grad_norm": 0.2463043675583301, | |
| "learning_rate": 3.8240000000000007e-05, | |
| "loss": 0.4426, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9385113268608414, | |
| "grad_norm": 0.22582242130582073, | |
| "learning_rate": 3.82e-05, | |
| "loss": 0.4327, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9406688241639698, | |
| "grad_norm": 0.22544591401205022, | |
| "learning_rate": 3.816e-05, | |
| "loss": 0.416, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9428263214670982, | |
| "grad_norm": 0.25570195904870374, | |
| "learning_rate": 3.812e-05, | |
| "loss": 0.4413, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.9449838187702265, | |
| "grad_norm": 0.2165456325752943, | |
| "learning_rate": 3.808e-05, | |
| "loss": 0.42, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9471413160733549, | |
| "grad_norm": 0.2660006977071334, | |
| "learning_rate": 3.804e-05, | |
| "loss": 0.4386, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.9492988133764833, | |
| "grad_norm": 0.27016187227336946, | |
| "learning_rate": 3.8e-05, | |
| "loss": 0.4497, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9514563106796117, | |
| "grad_norm": 0.26490253003888736, | |
| "learning_rate": 3.796e-05, | |
| "loss": 0.4455, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.95361380798274, | |
| "grad_norm": 0.24784119691094716, | |
| "learning_rate": 3.792e-05, | |
| "loss": 0.4312, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9557713052858684, | |
| "grad_norm": 0.24577321550551764, | |
| "learning_rate": 3.788e-05, | |
| "loss": 0.4331, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.9579288025889967, | |
| "grad_norm": 0.23254334294044085, | |
| "learning_rate": 3.7840000000000004e-05, | |
| "loss": 0.4446, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9600862998921251, | |
| "grad_norm": 0.24298642123366798, | |
| "learning_rate": 3.7800000000000004e-05, | |
| "loss": 0.4349, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.9622437971952535, | |
| "grad_norm": 0.2267162236488151, | |
| "learning_rate": 3.776e-05, | |
| "loss": 0.4299, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9644012944983819, | |
| "grad_norm": 0.2550173441070278, | |
| "learning_rate": 3.772e-05, | |
| "loss": 0.4457, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.9665587918015103, | |
| "grad_norm": 0.23923284262163064, | |
| "learning_rate": 3.7680000000000005e-05, | |
| "loss": 0.4228, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9687162891046386, | |
| "grad_norm": 0.26139756709830914, | |
| "learning_rate": 3.7640000000000006e-05, | |
| "loss": 0.4264, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.970873786407767, | |
| "grad_norm": 0.2504569204883789, | |
| "learning_rate": 3.76e-05, | |
| "loss": 0.436, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9730312837108953, | |
| "grad_norm": 0.2638553178925634, | |
| "learning_rate": 3.756e-05, | |
| "loss": 0.4471, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.9751887810140237, | |
| "grad_norm": 0.2627847800007779, | |
| "learning_rate": 3.752e-05, | |
| "loss": 0.4362, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9773462783171522, | |
| "grad_norm": 0.21271418115533056, | |
| "learning_rate": 3.748000000000001e-05, | |
| "loss": 0.4312, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.9795037756202805, | |
| "grad_norm": 0.282247303317703, | |
| "learning_rate": 3.744e-05, | |
| "loss": 0.4266, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9816612729234089, | |
| "grad_norm": 0.22711280249989385, | |
| "learning_rate": 3.74e-05, | |
| "loss": 0.4443, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.9838187702265372, | |
| "grad_norm": 0.2543979274114635, | |
| "learning_rate": 3.736e-05, | |
| "loss": 0.4311, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9859762675296656, | |
| "grad_norm": 0.2463425451861333, | |
| "learning_rate": 3.732e-05, | |
| "loss": 0.4315, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.9881337648327939, | |
| "grad_norm": 0.2633378786543858, | |
| "learning_rate": 3.728e-05, | |
| "loss": 0.4361, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.9902912621359223, | |
| "grad_norm": 0.26123423746373853, | |
| "learning_rate": 3.724e-05, | |
| "loss": 0.446, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.9924487594390508, | |
| "grad_norm": 0.2886530412464882, | |
| "learning_rate": 3.72e-05, | |
| "loss": 0.4471, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9946062567421791, | |
| "grad_norm": 0.2403470838331088, | |
| "learning_rate": 3.716e-05, | |
| "loss": 0.4348, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.9967637540453075, | |
| "grad_norm": 0.3003434463680213, | |
| "learning_rate": 3.712e-05, | |
| "loss": 0.4434, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9989212513484358, | |
| "grad_norm": 0.23330280858351574, | |
| "learning_rate": 3.7080000000000004e-05, | |
| "loss": 0.4225, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.23330280858351574, | |
| "learning_rate": 3.7040000000000005e-05, | |
| "loss": 0.4811, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.0021574973031284, | |
| "grad_norm": 0.4639893862561724, | |
| "learning_rate": 3.7e-05, | |
| "loss": 0.3817, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.0043149946062568, | |
| "grad_norm": 0.3321444998235641, | |
| "learning_rate": 3.696e-05, | |
| "loss": 0.3759, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.006472491909385, | |
| "grad_norm": 0.3177620819368746, | |
| "learning_rate": 3.692e-05, | |
| "loss": 0.3568, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.0086299892125135, | |
| "grad_norm": 0.3175305154761647, | |
| "learning_rate": 3.6880000000000006e-05, | |
| "loss": 0.3716, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.0107874865156419, | |
| "grad_norm": 0.3451229640661856, | |
| "learning_rate": 3.684e-05, | |
| "loss": 0.3509, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.0129449838187703, | |
| "grad_norm": 0.28804744838713436, | |
| "learning_rate": 3.68e-05, | |
| "loss": 0.3687, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0151024811218985, | |
| "grad_norm": 0.33595860575734326, | |
| "learning_rate": 3.676e-05, | |
| "loss": 0.3779, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.017259978425027, | |
| "grad_norm": 0.30104692221444024, | |
| "learning_rate": 3.672000000000001e-05, | |
| "loss": 0.3691, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0194174757281553, | |
| "grad_norm": 0.35106754973912657, | |
| "learning_rate": 3.668e-05, | |
| "loss": 0.3654, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.0215749730312838, | |
| "grad_norm": 0.2533257256241151, | |
| "learning_rate": 3.664e-05, | |
| "loss": 0.3805, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0237324703344122, | |
| "grad_norm": 0.3054072286372611, | |
| "learning_rate": 3.66e-05, | |
| "loss": 0.359, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.0258899676375404, | |
| "grad_norm": 0.2504478156879782, | |
| "learning_rate": 3.656e-05, | |
| "loss": 0.347, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0280474649406688, | |
| "grad_norm": 0.2712883776564294, | |
| "learning_rate": 3.652e-05, | |
| "loss": 0.3754, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.0302049622437972, | |
| "grad_norm": 0.2782380806953332, | |
| "learning_rate": 3.648e-05, | |
| "loss": 0.3515, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0323624595469256, | |
| "grad_norm": 0.26159335832572284, | |
| "learning_rate": 3.6440000000000003e-05, | |
| "loss": 0.3485, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.0345199568500538, | |
| "grad_norm": 0.2864328676648181, | |
| "learning_rate": 3.6400000000000004e-05, | |
| "loss": 0.3631, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0366774541531822, | |
| "grad_norm": 0.25027991189741466, | |
| "learning_rate": 3.636e-05, | |
| "loss": 0.3816, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.0388349514563107, | |
| "grad_norm": 0.2788892924014196, | |
| "learning_rate": 3.6320000000000005e-05, | |
| "loss": 0.3567, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.040992448759439, | |
| "grad_norm": 0.2659535510350543, | |
| "learning_rate": 3.6280000000000005e-05, | |
| "loss": 0.3713, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.0431499460625675, | |
| "grad_norm": 0.23515542903905737, | |
| "learning_rate": 3.624e-05, | |
| "loss": 0.3535, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0453074433656957, | |
| "grad_norm": 0.279193625770593, | |
| "learning_rate": 3.62e-05, | |
| "loss": 0.3747, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.0474649406688241, | |
| "grad_norm": 0.22521695771346117, | |
| "learning_rate": 3.616e-05, | |
| "loss": 0.369, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.0496224379719525, | |
| "grad_norm": 0.2535094313179469, | |
| "learning_rate": 3.6120000000000007e-05, | |
| "loss": 0.3537, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.051779935275081, | |
| "grad_norm": 0.27003969431487984, | |
| "learning_rate": 3.608e-05, | |
| "loss": 0.3827, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0539374325782094, | |
| "grad_norm": 0.2514392546102155, | |
| "learning_rate": 3.604e-05, | |
| "loss": 0.3744, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.0560949298813376, | |
| "grad_norm": 0.2371342216949108, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.3703, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.058252427184466, | |
| "grad_norm": 0.2416511923662919, | |
| "learning_rate": 3.596e-05, | |
| "loss": 0.3711, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.0604099244875944, | |
| "grad_norm": 0.22901813700686482, | |
| "learning_rate": 3.592e-05, | |
| "loss": 0.3434, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0625674217907228, | |
| "grad_norm": 0.2525253353540884, | |
| "learning_rate": 3.588e-05, | |
| "loss": 0.363, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.064724919093851, | |
| "grad_norm": 0.23895832490100163, | |
| "learning_rate": 3.584e-05, | |
| "loss": 0.3493, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.0668824163969794, | |
| "grad_norm": 0.23238858140574412, | |
| "learning_rate": 3.58e-05, | |
| "loss": 0.3713, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.0690399137001079, | |
| "grad_norm": 0.2683342199484544, | |
| "learning_rate": 3.5759999999999996e-05, | |
| "loss": 0.3819, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0711974110032363, | |
| "grad_norm": 0.24170220244530477, | |
| "learning_rate": 3.5720000000000004e-05, | |
| "loss": 0.361, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.0733549083063647, | |
| "grad_norm": 0.22845523982957577, | |
| "learning_rate": 3.5680000000000004e-05, | |
| "loss": 0.363, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.075512405609493, | |
| "grad_norm": 0.25277862463525125, | |
| "learning_rate": 3.5640000000000004e-05, | |
| "loss": 0.355, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.0776699029126213, | |
| "grad_norm": 0.24700129560716771, | |
| "learning_rate": 3.56e-05, | |
| "loss": 0.3706, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0798274002157497, | |
| "grad_norm": 0.22065070960656424, | |
| "learning_rate": 3.5560000000000005e-05, | |
| "loss": 0.3531, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.0819848975188782, | |
| "grad_norm": 0.2561454034108686, | |
| "learning_rate": 3.5520000000000006e-05, | |
| "loss": 0.3833, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.0841423948220066, | |
| "grad_norm": 0.22433131754774605, | |
| "learning_rate": 3.548e-05, | |
| "loss": 0.3691, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.0862998921251348, | |
| "grad_norm": 0.2522033412086021, | |
| "learning_rate": 3.544e-05, | |
| "loss": 0.3567, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.0884573894282632, | |
| "grad_norm": 0.21905495575288447, | |
| "learning_rate": 3.54e-05, | |
| "loss": 0.3848, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.0906148867313916, | |
| "grad_norm": 0.22081247386141037, | |
| "learning_rate": 3.536000000000001e-05, | |
| "loss": 0.3662, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.09277238403452, | |
| "grad_norm": 0.2713654247657179, | |
| "learning_rate": 3.532e-05, | |
| "loss": 0.3699, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.0949298813376482, | |
| "grad_norm": 0.22498743492459014, | |
| "learning_rate": 3.528e-05, | |
| "loss": 0.383, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.0970873786407767, | |
| "grad_norm": 0.2514196262074284, | |
| "learning_rate": 3.524e-05, | |
| "loss": 0.3499, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.099244875943905, | |
| "grad_norm": 0.23462639319558332, | |
| "learning_rate": 3.52e-05, | |
| "loss": 0.3607, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.1014023732470335, | |
| "grad_norm": 0.23003249047263888, | |
| "learning_rate": 3.516e-05, | |
| "loss": 0.3738, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.103559870550162, | |
| "grad_norm": 0.2188060350115714, | |
| "learning_rate": 3.512e-05, | |
| "loss": 0.3515, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.1057173678532901, | |
| "grad_norm": 0.22124097377197594, | |
| "learning_rate": 3.508e-05, | |
| "loss": 0.3686, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.1078748651564185, | |
| "grad_norm": 0.21022961752556638, | |
| "learning_rate": 3.504e-05, | |
| "loss": 0.3784, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.110032362459547, | |
| "grad_norm": 0.2210491921381139, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.3542, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.1121898597626754, | |
| "grad_norm": 0.23756038385005723, | |
| "learning_rate": 3.4960000000000004e-05, | |
| "loss": 0.3733, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1143473570658036, | |
| "grad_norm": 0.23088533641732836, | |
| "learning_rate": 3.4920000000000004e-05, | |
| "loss": 0.3577, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.116504854368932, | |
| "grad_norm": 0.23851750461757235, | |
| "learning_rate": 3.4880000000000005e-05, | |
| "loss": 0.3626, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1186623516720604, | |
| "grad_norm": 0.25889253618573643, | |
| "learning_rate": 3.484e-05, | |
| "loss": 0.3657, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.1208198489751888, | |
| "grad_norm": 0.2318799442388334, | |
| "learning_rate": 3.48e-05, | |
| "loss": 0.3513, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1229773462783172, | |
| "grad_norm": 0.2256074426059202, | |
| "learning_rate": 3.4760000000000006e-05, | |
| "loss": 0.3574, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.1251348435814454, | |
| "grad_norm": 0.22447577356156587, | |
| "learning_rate": 3.472e-05, | |
| "loss": 0.3619, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1272923408845739, | |
| "grad_norm": 0.23975845975915916, | |
| "learning_rate": 3.468e-05, | |
| "loss": 0.3598, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.1294498381877023, | |
| "grad_norm": 0.19493992211953257, | |
| "learning_rate": 3.464e-05, | |
| "loss": 0.3648, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1316073354908307, | |
| "grad_norm": 0.2870170542570617, | |
| "learning_rate": 3.46e-05, | |
| "loss": 0.3695, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.133764832793959, | |
| "grad_norm": 0.20812408093979423, | |
| "learning_rate": 3.456e-05, | |
| "loss": 0.3537, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.1359223300970873, | |
| "grad_norm": 0.279716636463304, | |
| "learning_rate": 3.452e-05, | |
| "loss": 0.3609, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.1380798274002157, | |
| "grad_norm": 0.2223889872165593, | |
| "learning_rate": 3.448e-05, | |
| "loss": 0.3663, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1402373247033442, | |
| "grad_norm": 0.2433632262289244, | |
| "learning_rate": 3.444e-05, | |
| "loss": 0.3714, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.1423948220064726, | |
| "grad_norm": 0.24277614478782433, | |
| "learning_rate": 3.4399999999999996e-05, | |
| "loss": 0.3869, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1445523193096008, | |
| "grad_norm": 0.2586540206286391, | |
| "learning_rate": 3.436e-05, | |
| "loss": 0.3579, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.1467098166127292, | |
| "grad_norm": 0.23870929551871684, | |
| "learning_rate": 3.4320000000000003e-05, | |
| "loss": 0.3672, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1488673139158576, | |
| "grad_norm": 0.23766584571259206, | |
| "learning_rate": 3.4280000000000004e-05, | |
| "loss": 0.3671, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.151024811218986, | |
| "grad_norm": 0.24568271253401686, | |
| "learning_rate": 3.424e-05, | |
| "loss": 0.3596, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.1531823085221142, | |
| "grad_norm": 0.2635556243671408, | |
| "learning_rate": 3.4200000000000005e-05, | |
| "loss": 0.3834, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.1553398058252426, | |
| "grad_norm": 0.21921288320598883, | |
| "learning_rate": 3.4160000000000005e-05, | |
| "loss": 0.3767, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.157497303128371, | |
| "grad_norm": 0.2761715682909131, | |
| "learning_rate": 3.412e-05, | |
| "loss": 0.3931, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.1596548004314995, | |
| "grad_norm": 0.24124279593723555, | |
| "learning_rate": 3.408e-05, | |
| "loss": 0.3616, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.161812297734628, | |
| "grad_norm": 0.24495716516269506, | |
| "learning_rate": 3.404e-05, | |
| "loss": 0.3551, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.1639697950377563, | |
| "grad_norm": 0.26303797180952715, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 0.3808, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1661272923408845, | |
| "grad_norm": 0.2320923059681381, | |
| "learning_rate": 3.396e-05, | |
| "loss": 0.3489, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.168284789644013, | |
| "grad_norm": 0.22912881919910247, | |
| "learning_rate": 3.392e-05, | |
| "loss": 0.3782, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.1704422869471414, | |
| "grad_norm": 0.25937778405892337, | |
| "learning_rate": 3.388e-05, | |
| "loss": 0.3657, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.1725997842502698, | |
| "grad_norm": 0.2480554773755587, | |
| "learning_rate": 3.384e-05, | |
| "loss": 0.3547, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.174757281553398, | |
| "grad_norm": 0.23033283728714243, | |
| "learning_rate": 3.38e-05, | |
| "loss": 0.3514, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.1769147788565264, | |
| "grad_norm": 0.23578708348817395, | |
| "learning_rate": 3.376e-05, | |
| "loss": 0.3449, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.1790722761596548, | |
| "grad_norm": 0.28370201661918365, | |
| "learning_rate": 3.372e-05, | |
| "loss": 0.3938, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.1812297734627832, | |
| "grad_norm": 0.24579139057291602, | |
| "learning_rate": 3.368e-05, | |
| "loss": 0.3648, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.1833872707659117, | |
| "grad_norm": 0.2390462478430337, | |
| "learning_rate": 3.3639999999999996e-05, | |
| "loss": 0.3478, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.1855447680690399, | |
| "grad_norm": 0.26131993520239843, | |
| "learning_rate": 3.3600000000000004e-05, | |
| "loss": 0.3653, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.1877022653721683, | |
| "grad_norm": 0.2602420831758332, | |
| "learning_rate": 3.3560000000000004e-05, | |
| "loss": 0.3522, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.1898597626752967, | |
| "grad_norm": 0.2694184023955293, | |
| "learning_rate": 3.3520000000000004e-05, | |
| "loss": 0.3651, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.1920172599784251, | |
| "grad_norm": 0.24345754285858087, | |
| "learning_rate": 3.348e-05, | |
| "loss": 0.3712, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.1941747572815533, | |
| "grad_norm": 0.23563745353239082, | |
| "learning_rate": 3.344e-05, | |
| "loss": 0.371, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.1963322545846817, | |
| "grad_norm": 0.23701901133791073, | |
| "learning_rate": 3.3400000000000005e-05, | |
| "loss": 0.3643, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.1984897518878102, | |
| "grad_norm": 0.23925942262157326, | |
| "learning_rate": 3.336e-05, | |
| "loss": 0.3544, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.2006472491909386, | |
| "grad_norm": 0.2338316781910534, | |
| "learning_rate": 3.332e-05, | |
| "loss": 0.3562, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.202804746494067, | |
| "grad_norm": 0.2541377842977513, | |
| "learning_rate": 3.328e-05, | |
| "loss": 0.3658, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.2049622437971952, | |
| "grad_norm": 0.257947454208033, | |
| "learning_rate": 3.324e-05, | |
| "loss": 0.3862, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.2071197411003236, | |
| "grad_norm": 0.5529685709493989, | |
| "learning_rate": 3.32e-05, | |
| "loss": 0.3733, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.209277238403452, | |
| "grad_norm": 0.25807202026476894, | |
| "learning_rate": 3.316e-05, | |
| "loss": 0.3476, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.2114347357065804, | |
| "grad_norm": 0.21766878594901307, | |
| "learning_rate": 3.312e-05, | |
| "loss": 0.3404, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2135922330097086, | |
| "grad_norm": 0.2136045851257333, | |
| "learning_rate": 3.308e-05, | |
| "loss": 0.3628, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.215749730312837, | |
| "grad_norm": 0.23905539491616293, | |
| "learning_rate": 3.304e-05, | |
| "loss": 0.3767, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2179072276159655, | |
| "grad_norm": 0.2442819986159734, | |
| "learning_rate": 3.3e-05, | |
| "loss": 0.3673, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.220064724919094, | |
| "grad_norm": 0.26256977239020945, | |
| "learning_rate": 3.296e-05, | |
| "loss": 0.3693, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.2222222222222223, | |
| "grad_norm": 0.21905194245883036, | |
| "learning_rate": 3.292e-05, | |
| "loss": 0.3512, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.2243797195253505, | |
| "grad_norm": 0.24907388947059758, | |
| "learning_rate": 3.288e-05, | |
| "loss": 0.3589, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.226537216828479, | |
| "grad_norm": 0.2621089604884317, | |
| "learning_rate": 3.2840000000000004e-05, | |
| "loss": 0.3614, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.2286947141316074, | |
| "grad_norm": 0.21791101743115224, | |
| "learning_rate": 3.2800000000000004e-05, | |
| "loss": 0.378, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2308522114347358, | |
| "grad_norm": 0.2681150827216919, | |
| "learning_rate": 3.2760000000000005e-05, | |
| "loss": 0.356, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.233009708737864, | |
| "grad_norm": 0.22042202632353994, | |
| "learning_rate": 3.272e-05, | |
| "loss": 0.3619, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2351672060409924, | |
| "grad_norm": 0.22940316773804112, | |
| "learning_rate": 3.268e-05, | |
| "loss": 0.376, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.2373247033441208, | |
| "grad_norm": 0.2300745808427193, | |
| "learning_rate": 3.2640000000000006e-05, | |
| "loss": 0.352, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2394822006472492, | |
| "grad_norm": 0.22246397846867583, | |
| "learning_rate": 3.26e-05, | |
| "loss": 0.3674, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.2416396979503777, | |
| "grad_norm": 0.2535242296437029, | |
| "learning_rate": 3.256e-05, | |
| "loss": 0.3676, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2437971952535058, | |
| "grad_norm": 0.22889371782833975, | |
| "learning_rate": 3.252e-05, | |
| "loss": 0.3485, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.2459546925566343, | |
| "grad_norm": 0.2071146637004398, | |
| "learning_rate": 3.248e-05, | |
| "loss": 0.3771, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2481121898597627, | |
| "grad_norm": 0.26197336938818744, | |
| "learning_rate": 3.244e-05, | |
| "loss": 0.3675, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.250269687162891, | |
| "grad_norm": 0.2241786710298959, | |
| "learning_rate": 3.24e-05, | |
| "loss": 0.3528, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2524271844660193, | |
| "grad_norm": 0.23312122700817697, | |
| "learning_rate": 3.236e-05, | |
| "loss": 0.3544, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.2545846817691477, | |
| "grad_norm": 0.2182270396469802, | |
| "learning_rate": 3.232e-05, | |
| "loss": 0.3568, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2567421790722761, | |
| "grad_norm": 0.26366238123425695, | |
| "learning_rate": 3.2279999999999996e-05, | |
| "loss": 0.3686, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.2588996763754046, | |
| "grad_norm": 0.23349496548207888, | |
| "learning_rate": 3.224e-05, | |
| "loss": 0.3702, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.261057173678533, | |
| "grad_norm": 0.2280868391944456, | |
| "learning_rate": 3.2200000000000003e-05, | |
| "loss": 0.3744, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.2632146709816614, | |
| "grad_norm": 0.2532316820814779, | |
| "learning_rate": 3.2160000000000004e-05, | |
| "loss": 0.3551, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.2653721682847896, | |
| "grad_norm": 0.21080695604733463, | |
| "learning_rate": 3.212e-05, | |
| "loss": 0.3584, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.267529665587918, | |
| "grad_norm": 0.2176852597553473, | |
| "learning_rate": 3.208e-05, | |
| "loss": 0.3664, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.2696871628910464, | |
| "grad_norm": 0.2350058727944437, | |
| "learning_rate": 3.2040000000000005e-05, | |
| "loss": 0.3572, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.2718446601941746, | |
| "grad_norm": 0.2142412894660819, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.3622, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.274002157497303, | |
| "grad_norm": 0.2988582984016833, | |
| "learning_rate": 3.196e-05, | |
| "loss": 0.3641, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.2761596548004315, | |
| "grad_norm": 0.21391803913879617, | |
| "learning_rate": 3.192e-05, | |
| "loss": 0.3594, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.27831715210356, | |
| "grad_norm": 0.26842444375759716, | |
| "learning_rate": 3.188e-05, | |
| "loss": 0.3652, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.2804746494066883, | |
| "grad_norm": 0.24949721130385824, | |
| "learning_rate": 3.184e-05, | |
| "loss": 0.3734, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.2826321467098167, | |
| "grad_norm": 0.23021856207798744, | |
| "learning_rate": 3.18e-05, | |
| "loss": 0.3569, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.284789644012945, | |
| "grad_norm": 0.256911436435004, | |
| "learning_rate": 3.176e-05, | |
| "loss": 0.3658, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.2869471413160734, | |
| "grad_norm": 0.2442690046250457, | |
| "learning_rate": 3.172e-05, | |
| "loss": 0.3733, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.2891046386192018, | |
| "grad_norm": 0.2339516527126993, | |
| "learning_rate": 3.168e-05, | |
| "loss": 0.3594, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.29126213592233, | |
| "grad_norm": 0.22497425413784988, | |
| "learning_rate": 3.164e-05, | |
| "loss": 0.3569, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.2934196332254584, | |
| "grad_norm": 0.20579662620446118, | |
| "learning_rate": 3.16e-05, | |
| "loss": 0.3683, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2955771305285868, | |
| "grad_norm": 0.24608918583535308, | |
| "learning_rate": 3.156e-05, | |
| "loss": 0.3733, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.2977346278317152, | |
| "grad_norm": 0.2281105722902805, | |
| "learning_rate": 3.1519999999999996e-05, | |
| "loss": 0.3694, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.2998921251348436, | |
| "grad_norm": 0.2040966143041461, | |
| "learning_rate": 3.1480000000000004e-05, | |
| "loss": 0.3636, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.302049622437972, | |
| "grad_norm": 0.21460759759197517, | |
| "learning_rate": 3.1440000000000004e-05, | |
| "loss": 0.3477, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.3042071197411003, | |
| "grad_norm": 0.221481405151603, | |
| "learning_rate": 3.1400000000000004e-05, | |
| "loss": 0.3699, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.3063646170442287, | |
| "grad_norm": 0.2501579732769482, | |
| "learning_rate": 3.136e-05, | |
| "loss": 0.4039, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.308522114347357, | |
| "grad_norm": 0.2077259213631551, | |
| "learning_rate": 3.132e-05, | |
| "loss": 0.358, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.3106796116504853, | |
| "grad_norm": 0.22593570412239217, | |
| "learning_rate": 3.1280000000000005e-05, | |
| "loss": 0.3546, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.3128371089536137, | |
| "grad_norm": 0.23482506691659347, | |
| "learning_rate": 3.1240000000000006e-05, | |
| "loss": 0.3932, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.3149946062567421, | |
| "grad_norm": 0.20192962328812056, | |
| "learning_rate": 3.12e-05, | |
| "loss": 0.359, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3171521035598706, | |
| "grad_norm": 0.2309883568911319, | |
| "learning_rate": 3.116e-05, | |
| "loss": 0.3661, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.319309600862999, | |
| "grad_norm": 0.2258622386132955, | |
| "learning_rate": 3.112e-05, | |
| "loss": 0.3553, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3214670981661274, | |
| "grad_norm": 0.1955163803876729, | |
| "learning_rate": 3.108e-05, | |
| "loss": 0.3502, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.3236245954692556, | |
| "grad_norm": 0.23352846688566287, | |
| "learning_rate": 3.104e-05, | |
| "loss": 0.3621, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.325782092772384, | |
| "grad_norm": 0.22786136344214358, | |
| "learning_rate": 3.1e-05, | |
| "loss": 0.3654, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.3279395900755124, | |
| "grad_norm": 0.23706574178741774, | |
| "learning_rate": 3.096e-05, | |
| "loss": 0.3708, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3300970873786409, | |
| "grad_norm": 0.21678537837279216, | |
| "learning_rate": 3.092e-05, | |
| "loss": 0.3601, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.332254584681769, | |
| "grad_norm": 0.22426690469613242, | |
| "learning_rate": 3.088e-05, | |
| "loss": 0.3607, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3344120819848975, | |
| "grad_norm": 0.22447348229662797, | |
| "learning_rate": 3.084e-05, | |
| "loss": 0.3677, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.3365695792880259, | |
| "grad_norm": 0.2414443482792685, | |
| "learning_rate": 3.08e-05, | |
| "loss": 0.3516, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3387270765911543, | |
| "grad_norm": 0.2209324250516759, | |
| "learning_rate": 3.076e-05, | |
| "loss": 0.3465, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.3408845738942827, | |
| "grad_norm": 0.22931182094163088, | |
| "learning_rate": 3.072e-05, | |
| "loss": 0.3645, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.343042071197411, | |
| "grad_norm": 0.22884876391903108, | |
| "learning_rate": 3.0680000000000004e-05, | |
| "loss": 0.3732, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.3451995685005393, | |
| "grad_norm": 0.23344780330484063, | |
| "learning_rate": 3.0640000000000005e-05, | |
| "loss": 0.3603, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3473570658036678, | |
| "grad_norm": 0.2286620307384633, | |
| "learning_rate": 3.06e-05, | |
| "loss": 0.3694, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.3495145631067962, | |
| "grad_norm": 0.2161365308453795, | |
| "learning_rate": 3.056e-05, | |
| "loss": 0.3654, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3516720604099244, | |
| "grad_norm": 0.23046040136711576, | |
| "learning_rate": 3.0520000000000006e-05, | |
| "loss": 0.3592, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.3538295577130528, | |
| "grad_norm": 0.20568766413435982, | |
| "learning_rate": 3.0480000000000003e-05, | |
| "loss": 0.3878, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3559870550161812, | |
| "grad_norm": 0.23495098967359113, | |
| "learning_rate": 3.0440000000000003e-05, | |
| "loss": 0.3619, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.3581445523193096, | |
| "grad_norm": 0.19926196422663414, | |
| "learning_rate": 3.04e-05, | |
| "loss": 0.3639, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.360302049622438, | |
| "grad_norm": 0.253526676183994, | |
| "learning_rate": 3.036e-05, | |
| "loss": 0.3649, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.3624595469255665, | |
| "grad_norm": 0.2247831223309294, | |
| "learning_rate": 3.0320000000000004e-05, | |
| "loss": 0.3836, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.3646170442286947, | |
| "grad_norm": 0.23980813430498824, | |
| "learning_rate": 3.028e-05, | |
| "loss": 0.3735, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.366774541531823, | |
| "grad_norm": 0.24839219342419572, | |
| "learning_rate": 3.0240000000000002e-05, | |
| "loss": 0.3555, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.3689320388349515, | |
| "grad_norm": 0.23203667590220284, | |
| "learning_rate": 3.02e-05, | |
| "loss": 0.3433, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.3710895361380797, | |
| "grad_norm": 0.25304868631907496, | |
| "learning_rate": 3.016e-05, | |
| "loss": 0.3635, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.3732470334412081, | |
| "grad_norm": 0.23269259251392885, | |
| "learning_rate": 3.0120000000000003e-05, | |
| "loss": 0.3961, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.3754045307443366, | |
| "grad_norm": 0.21293976180361354, | |
| "learning_rate": 3.0080000000000003e-05, | |
| "loss": 0.349, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.377562028047465, | |
| "grad_norm": 0.2848360522308427, | |
| "learning_rate": 3.004e-05, | |
| "loss": 0.3634, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.3797195253505934, | |
| "grad_norm": 0.23140180630070306, | |
| "learning_rate": 3e-05, | |
| "loss": 0.38, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3818770226537218, | |
| "grad_norm": 0.2745994307355107, | |
| "learning_rate": 2.9959999999999998e-05, | |
| "loss": 0.3753, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.38403451995685, | |
| "grad_norm": 0.26144668714615754, | |
| "learning_rate": 2.9920000000000005e-05, | |
| "loss": 0.3663, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.3861920172599784, | |
| "grad_norm": 0.2421993564006052, | |
| "learning_rate": 2.9880000000000002e-05, | |
| "loss": 0.3568, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.3883495145631068, | |
| "grad_norm": 0.2370121873272197, | |
| "learning_rate": 2.9840000000000002e-05, | |
| "loss": 0.3596, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.390507011866235, | |
| "grad_norm": 0.24085023222334487, | |
| "learning_rate": 2.98e-05, | |
| "loss": 0.3762, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.3926645091693635, | |
| "grad_norm": 0.234755628545118, | |
| "learning_rate": 2.976e-05, | |
| "loss": 0.3672, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.3948220064724919, | |
| "grad_norm": 0.24182630900679758, | |
| "learning_rate": 2.9720000000000003e-05, | |
| "loss": 0.3687, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.3969795037756203, | |
| "grad_norm": 0.23986126807518696, | |
| "learning_rate": 2.9680000000000004e-05, | |
| "loss": 0.3591, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.3991370010787487, | |
| "grad_norm": 0.24262318684533668, | |
| "learning_rate": 2.964e-05, | |
| "loss": 0.3532, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.4012944983818771, | |
| "grad_norm": 0.2403573614795157, | |
| "learning_rate": 2.96e-05, | |
| "loss": 0.3695, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.4034519956850053, | |
| "grad_norm": 0.2231968626230008, | |
| "learning_rate": 2.9559999999999998e-05, | |
| "loss": 0.3687, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.4056094929881338, | |
| "grad_norm": 0.24521191692071806, | |
| "learning_rate": 2.9520000000000002e-05, | |
| "loss": 0.3616, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4077669902912622, | |
| "grad_norm": 0.24730212938499072, | |
| "learning_rate": 2.9480000000000002e-05, | |
| "loss": 0.3465, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.4099244875943904, | |
| "grad_norm": 0.2334236281522416, | |
| "learning_rate": 2.944e-05, | |
| "loss": 0.3685, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.4120819848975188, | |
| "grad_norm": 0.23180879257563838, | |
| "learning_rate": 2.94e-05, | |
| "loss": 0.3516, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.4142394822006472, | |
| "grad_norm": 0.22429371025157882, | |
| "learning_rate": 2.9360000000000003e-05, | |
| "loss": 0.3539, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.4163969795037756, | |
| "grad_norm": 0.23286822854457767, | |
| "learning_rate": 2.9320000000000004e-05, | |
| "loss": 0.3673, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.418554476806904, | |
| "grad_norm": 0.21972421961678684, | |
| "learning_rate": 2.928e-05, | |
| "loss": 0.3564, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4207119741100325, | |
| "grad_norm": 0.2317444957314592, | |
| "learning_rate": 2.924e-05, | |
| "loss": 0.3647, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.4228694714131607, | |
| "grad_norm": 0.7300728474445504, | |
| "learning_rate": 2.9199999999999998e-05, | |
| "loss": 0.3521, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.425026968716289, | |
| "grad_norm": 0.22003003920883527, | |
| "learning_rate": 2.9160000000000005e-05, | |
| "loss": 0.3661, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.4271844660194175, | |
| "grad_norm": 0.2771166892544825, | |
| "learning_rate": 2.9120000000000002e-05, | |
| "loss": 0.3763, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.429341963322546, | |
| "grad_norm": 0.21961469846761253, | |
| "learning_rate": 2.9080000000000003e-05, | |
| "loss": 0.3637, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.4314994606256741, | |
| "grad_norm": 0.23524236389027967, | |
| "learning_rate": 2.904e-05, | |
| "loss": 0.382, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.4336569579288025, | |
| "grad_norm": 0.2527543079175038, | |
| "learning_rate": 2.9e-05, | |
| "loss": 0.3658, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.435814455231931, | |
| "grad_norm": 0.2580197589459597, | |
| "learning_rate": 2.8960000000000004e-05, | |
| "loss": 0.3618, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.4379719525350594, | |
| "grad_norm": 0.2556251991492771, | |
| "learning_rate": 2.8920000000000004e-05, | |
| "loss": 0.3742, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.4401294498381878, | |
| "grad_norm": 0.25698446665498725, | |
| "learning_rate": 2.888e-05, | |
| "loss": 0.3631, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.442286947141316, | |
| "grad_norm": 0.21986799830862488, | |
| "learning_rate": 2.8840000000000002e-05, | |
| "loss": 0.3697, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.4444444444444444, | |
| "grad_norm": 0.21959952885732004, | |
| "learning_rate": 2.88e-05, | |
| "loss": 0.3754, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4466019417475728, | |
| "grad_norm": 0.24918494834725308, | |
| "learning_rate": 2.8760000000000002e-05, | |
| "loss": 0.3777, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.4487594390507013, | |
| "grad_norm": 0.2260301950438927, | |
| "learning_rate": 2.8720000000000003e-05, | |
| "loss": 0.3519, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4509169363538295, | |
| "grad_norm": 0.2283050010652631, | |
| "learning_rate": 2.868e-05, | |
| "loss": 0.377, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.4530744336569579, | |
| "grad_norm": 0.22476927273463568, | |
| "learning_rate": 2.864e-05, | |
| "loss": 0.3476, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4552319309600863, | |
| "grad_norm": 0.20324342452103078, | |
| "learning_rate": 2.86e-05, | |
| "loss": 0.3475, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.4573894282632147, | |
| "grad_norm": 0.22629462680575138, | |
| "learning_rate": 2.8560000000000004e-05, | |
| "loss": 0.3569, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.4595469255663431, | |
| "grad_norm": 0.19880472901319113, | |
| "learning_rate": 2.852e-05, | |
| "loss": 0.3564, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.4617044228694713, | |
| "grad_norm": 0.2143320586140639, | |
| "learning_rate": 2.8480000000000002e-05, | |
| "loss": 0.3655, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.4638619201725998, | |
| "grad_norm": 0.2362702869766217, | |
| "learning_rate": 2.844e-05, | |
| "loss": 0.3574, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.4660194174757282, | |
| "grad_norm": 0.22656216374024327, | |
| "learning_rate": 2.84e-05, | |
| "loss": 0.3391, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4681769147788566, | |
| "grad_norm": 0.21809318515170673, | |
| "learning_rate": 2.8360000000000003e-05, | |
| "loss": 0.3639, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.4703344120819848, | |
| "grad_norm": 0.23993152342071317, | |
| "learning_rate": 2.8320000000000003e-05, | |
| "loss": 0.3492, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.4724919093851132, | |
| "grad_norm": 0.23685847874927074, | |
| "learning_rate": 2.828e-05, | |
| "loss": 0.3724, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.4746494066882416, | |
| "grad_norm": 0.22891344167344313, | |
| "learning_rate": 2.824e-05, | |
| "loss": 0.3542, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.47680690399137, | |
| "grad_norm": 0.23521390801990014, | |
| "learning_rate": 2.8199999999999998e-05, | |
| "loss": 0.3609, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.4789644012944985, | |
| "grad_norm": 0.24288583571997324, | |
| "learning_rate": 2.816e-05, | |
| "loss": 0.3661, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.4811218985976269, | |
| "grad_norm": 0.2740848150612509, | |
| "learning_rate": 2.8120000000000002e-05, | |
| "loss": 0.3598, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.483279395900755, | |
| "grad_norm": 0.23571675791182756, | |
| "learning_rate": 2.8080000000000002e-05, | |
| "loss": 0.3654, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.4854368932038835, | |
| "grad_norm": 0.20129412441664046, | |
| "learning_rate": 2.804e-05, | |
| "loss": 0.3536, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.487594390507012, | |
| "grad_norm": 0.23166748621018965, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 0.367, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.4897518878101401, | |
| "grad_norm": 0.20871358422460617, | |
| "learning_rate": 2.7960000000000003e-05, | |
| "loss": 0.3518, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.4919093851132685, | |
| "grad_norm": 0.20339117924240924, | |
| "learning_rate": 2.792e-05, | |
| "loss": 0.3555, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.494066882416397, | |
| "grad_norm": 0.21272736583634902, | |
| "learning_rate": 2.788e-05, | |
| "loss": 0.3712, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.4962243797195254, | |
| "grad_norm": 0.23915492805442082, | |
| "learning_rate": 2.7839999999999998e-05, | |
| "loss": 0.3755, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.4983818770226538, | |
| "grad_norm": 0.20821942101166777, | |
| "learning_rate": 2.7800000000000005e-05, | |
| "loss": 0.3695, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.5005393743257822, | |
| "grad_norm": 0.2232367633694246, | |
| "learning_rate": 2.7760000000000002e-05, | |
| "loss": 0.3688, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.5026968716289104, | |
| "grad_norm": 0.24499976864308812, | |
| "learning_rate": 2.7720000000000002e-05, | |
| "loss": 0.3543, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.5048543689320388, | |
| "grad_norm": 0.22940998225556647, | |
| "learning_rate": 2.768e-05, | |
| "loss": 0.3696, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.5070118662351673, | |
| "grad_norm": 0.2267128233697368, | |
| "learning_rate": 2.764e-05, | |
| "loss": 0.3579, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.5091693635382954, | |
| "grad_norm": 0.2307569329238415, | |
| "learning_rate": 2.7600000000000003e-05, | |
| "loss": 0.3834, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5113268608414239, | |
| "grad_norm": 0.25112226025346424, | |
| "learning_rate": 2.7560000000000004e-05, | |
| "loss": 0.3679, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.5134843581445523, | |
| "grad_norm": 0.25419142213454193, | |
| "learning_rate": 2.752e-05, | |
| "loss": 0.3542, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.5156418554476807, | |
| "grad_norm": 0.22613974746673926, | |
| "learning_rate": 2.748e-05, | |
| "loss": 0.3832, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.5177993527508091, | |
| "grad_norm": 0.22613424161758108, | |
| "learning_rate": 2.7439999999999998e-05, | |
| "loss": 0.3655, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.5199568500539375, | |
| "grad_norm": 0.23893379478624607, | |
| "learning_rate": 2.7400000000000002e-05, | |
| "loss": 0.3519, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.522114347357066, | |
| "grad_norm": 0.2775995269354047, | |
| "learning_rate": 2.7360000000000002e-05, | |
| "loss": 0.3894, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.5242718446601942, | |
| "grad_norm": 0.23904561581196412, | |
| "learning_rate": 2.7320000000000003e-05, | |
| "loss": 0.3655, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.5264293419633226, | |
| "grad_norm": 0.23756759487868181, | |
| "learning_rate": 2.728e-05, | |
| "loss": 0.3519, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.5285868392664508, | |
| "grad_norm": 0.20601009974320345, | |
| "learning_rate": 2.724e-05, | |
| "loss": 0.3558, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.5307443365695792, | |
| "grad_norm": 0.2179130898471259, | |
| "learning_rate": 2.7200000000000004e-05, | |
| "loss": 0.3788, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5329018338727076, | |
| "grad_norm": 0.25261468141264204, | |
| "learning_rate": 2.716e-05, | |
| "loss": 0.369, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.535059331175836, | |
| "grad_norm": 0.20717129569980194, | |
| "learning_rate": 2.712e-05, | |
| "loss": 0.3579, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5372168284789645, | |
| "grad_norm": 0.21298019748089506, | |
| "learning_rate": 2.7079999999999998e-05, | |
| "loss": 0.3586, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.5393743257820929, | |
| "grad_norm": 0.25207101142696386, | |
| "learning_rate": 2.704e-05, | |
| "loss": 0.3538, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.5415318230852213, | |
| "grad_norm": 0.20068229452734665, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 0.3573, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.5436893203883495, | |
| "grad_norm": 0.22821380199261126, | |
| "learning_rate": 2.6960000000000003e-05, | |
| "loss": 0.3634, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.545846817691478, | |
| "grad_norm": 0.21345032453764343, | |
| "learning_rate": 2.692e-05, | |
| "loss": 0.364, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.5480043149946061, | |
| "grad_norm": 0.20494875666911705, | |
| "learning_rate": 2.688e-05, | |
| "loss": 0.3697, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.5501618122977345, | |
| "grad_norm": 0.2154910856310443, | |
| "learning_rate": 2.6840000000000004e-05, | |
| "loss": 0.3574, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.552319309600863, | |
| "grad_norm": 0.20193810414215643, | |
| "learning_rate": 2.6800000000000004e-05, | |
| "loss": 0.3531, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5544768069039914, | |
| "grad_norm": 0.2312442332707919, | |
| "learning_rate": 2.676e-05, | |
| "loss": 0.3687, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.5566343042071198, | |
| "grad_norm": 0.23089370538006676, | |
| "learning_rate": 2.672e-05, | |
| "loss": 0.3713, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5587918015102482, | |
| "grad_norm": 0.206819035037506, | |
| "learning_rate": 2.668e-05, | |
| "loss": 0.3636, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.5609492988133766, | |
| "grad_norm": 0.24663426452102197, | |
| "learning_rate": 2.6640000000000002e-05, | |
| "loss": 0.3757, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.5631067961165048, | |
| "grad_norm": 0.24688884560578506, | |
| "learning_rate": 2.6600000000000003e-05, | |
| "loss": 0.3747, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.5652642934196332, | |
| "grad_norm": 0.2188001289885843, | |
| "learning_rate": 2.6560000000000003e-05, | |
| "loss": 0.3702, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.5674217907227614, | |
| "grad_norm": 0.24246127603696868, | |
| "learning_rate": 2.652e-05, | |
| "loss": 0.3841, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.5695792880258899, | |
| "grad_norm": 0.2203909069469141, | |
| "learning_rate": 2.648e-05, | |
| "loss": 0.3546, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.5717367853290183, | |
| "grad_norm": 0.21585230186912005, | |
| "learning_rate": 2.6440000000000004e-05, | |
| "loss": 0.3477, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.5738942826321467, | |
| "grad_norm": 0.22218825993341798, | |
| "learning_rate": 2.64e-05, | |
| "loss": 0.3632, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5760517799352751, | |
| "grad_norm": 0.24371912936443485, | |
| "learning_rate": 2.6360000000000002e-05, | |
| "loss": 0.38, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.5782092772384035, | |
| "grad_norm": 0.2227192482351511, | |
| "learning_rate": 2.632e-05, | |
| "loss": 0.3703, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.580366774541532, | |
| "grad_norm": 0.21456759816988139, | |
| "learning_rate": 2.628e-05, | |
| "loss": 0.3374, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.5825242718446602, | |
| "grad_norm": 0.23342928415132325, | |
| "learning_rate": 2.6240000000000003e-05, | |
| "loss": 0.3563, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.5846817691477886, | |
| "grad_norm": 0.23418445062714002, | |
| "learning_rate": 2.6200000000000003e-05, | |
| "loss": 0.3748, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.5868392664509168, | |
| "grad_norm": 0.2336619338904283, | |
| "learning_rate": 2.616e-05, | |
| "loss": 0.3495, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.5889967637540452, | |
| "grad_norm": 0.24381867912545538, | |
| "learning_rate": 2.612e-05, | |
| "loss": 0.3611, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.5911542610571736, | |
| "grad_norm": 0.3090239665378897, | |
| "learning_rate": 2.6079999999999998e-05, | |
| "loss": 0.3717, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.593311758360302, | |
| "grad_norm": 0.2646555898668475, | |
| "learning_rate": 2.6040000000000005e-05, | |
| "loss": 0.3589, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.5954692556634305, | |
| "grad_norm": 0.2296470939633854, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 0.3642, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.5976267529665589, | |
| "grad_norm": 0.2419732243746287, | |
| "learning_rate": 2.5960000000000002e-05, | |
| "loss": 0.3917, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.5997842502696873, | |
| "grad_norm": 0.2322415069928087, | |
| "learning_rate": 2.592e-05, | |
| "loss": 0.3584, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.6019417475728155, | |
| "grad_norm": 0.24405015607804642, | |
| "learning_rate": 2.588e-05, | |
| "loss": 0.3727, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.604099244875944, | |
| "grad_norm": 0.22590347323709656, | |
| "learning_rate": 2.5840000000000003e-05, | |
| "loss": 0.356, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.6062567421790723, | |
| "grad_norm": 0.24229824285904206, | |
| "learning_rate": 2.58e-05, | |
| "loss": 0.3768, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.6084142394822005, | |
| "grad_norm": 0.22910048467078928, | |
| "learning_rate": 2.576e-05, | |
| "loss": 0.3757, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.610571736785329, | |
| "grad_norm": 0.22282531058063634, | |
| "learning_rate": 2.572e-05, | |
| "loss": 0.3584, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.6127292340884574, | |
| "grad_norm": 0.23674151401492222, | |
| "learning_rate": 2.5679999999999998e-05, | |
| "loss": 0.3551, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.6148867313915858, | |
| "grad_norm": 0.23673152399904154, | |
| "learning_rate": 2.5640000000000002e-05, | |
| "loss": 0.3868, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.6170442286947142, | |
| "grad_norm": 0.23017444547110794, | |
| "learning_rate": 2.5600000000000002e-05, | |
| "loss": 0.3694, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.6192017259978426, | |
| "grad_norm": 0.2464613175786792, | |
| "learning_rate": 2.556e-05, | |
| "loss": 0.362, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.6213592233009708, | |
| "grad_norm": 0.22557125082732474, | |
| "learning_rate": 2.552e-05, | |
| "loss": 0.3789, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.6235167206040992, | |
| "grad_norm": 0.23750344656685524, | |
| "learning_rate": 2.5480000000000003e-05, | |
| "loss": 0.373, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.6256742179072277, | |
| "grad_norm": 0.22516492800062368, | |
| "learning_rate": 2.5440000000000004e-05, | |
| "loss": 0.3499, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.6278317152103559, | |
| "grad_norm": 0.22543762231722306, | |
| "learning_rate": 2.54e-05, | |
| "loss": 0.3691, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.6299892125134843, | |
| "grad_norm": 0.21901698270170006, | |
| "learning_rate": 2.536e-05, | |
| "loss": 0.3541, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.6321467098166127, | |
| "grad_norm": 0.20145275001402113, | |
| "learning_rate": 2.5319999999999998e-05, | |
| "loss": 0.3582, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.6343042071197411, | |
| "grad_norm": 0.2375280443344114, | |
| "learning_rate": 2.5280000000000005e-05, | |
| "loss": 0.3659, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6364617044228695, | |
| "grad_norm": 0.20472034873672557, | |
| "learning_rate": 2.5240000000000002e-05, | |
| "loss": 0.3571, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.638619201725998, | |
| "grad_norm": 0.2264353999031357, | |
| "learning_rate": 2.5200000000000003e-05, | |
| "loss": 0.3582, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6407766990291264, | |
| "grad_norm": 0.23609128223309025, | |
| "learning_rate": 2.516e-05, | |
| "loss": 0.3463, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.6429341963322546, | |
| "grad_norm": 0.20195916155379995, | |
| "learning_rate": 2.512e-05, | |
| "loss": 0.3736, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.645091693635383, | |
| "grad_norm": 0.22128929062222866, | |
| "learning_rate": 2.5080000000000004e-05, | |
| "loss": 0.3838, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.6472491909385112, | |
| "grad_norm": 0.22940217162415266, | |
| "learning_rate": 2.504e-05, | |
| "loss": 0.3434, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6494066882416396, | |
| "grad_norm": 0.20765834519446938, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.36, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.651564185544768, | |
| "grad_norm": 0.22652122912706935, | |
| "learning_rate": 2.496e-05, | |
| "loss": 0.3537, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.6537216828478964, | |
| "grad_norm": 0.209486467241231, | |
| "learning_rate": 2.4920000000000002e-05, | |
| "loss": 0.38, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.6558791801510249, | |
| "grad_norm": 0.21329391063224873, | |
| "learning_rate": 2.488e-05, | |
| "loss": 0.3471, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6580366774541533, | |
| "grad_norm": 0.204874907654904, | |
| "learning_rate": 2.4840000000000003e-05, | |
| "loss": 0.3874, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.6601941747572817, | |
| "grad_norm": 0.21401626438471516, | |
| "learning_rate": 2.48e-05, | |
| "loss": 0.3747, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.66235167206041, | |
| "grad_norm": 0.2259053066704498, | |
| "learning_rate": 2.476e-05, | |
| "loss": 0.3595, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.6645091693635383, | |
| "grad_norm": 0.2096015711688781, | |
| "learning_rate": 2.472e-05, | |
| "loss": 0.3635, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.19181190032847367, | |
| "learning_rate": 2.468e-05, | |
| "loss": 0.3624, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.668824163969795, | |
| "grad_norm": 0.2244869637071479, | |
| "learning_rate": 2.464e-05, | |
| "loss": 0.3595, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.6709816612729234, | |
| "grad_norm": 0.222169368065215, | |
| "learning_rate": 2.46e-05, | |
| "loss": 0.3729, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.6731391585760518, | |
| "grad_norm": 0.20169251773681174, | |
| "learning_rate": 2.4560000000000002e-05, | |
| "loss": 0.3651, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.6752966558791802, | |
| "grad_norm": 0.25372193384583863, | |
| "learning_rate": 2.4520000000000002e-05, | |
| "loss": 0.3454, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.6774541531823086, | |
| "grad_norm": 0.2197114197850221, | |
| "learning_rate": 2.448e-05, | |
| "loss": 0.3537, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.679611650485437, | |
| "grad_norm": 0.3314925595487828, | |
| "learning_rate": 2.4440000000000003e-05, | |
| "loss": 0.3621, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.6817691477885652, | |
| "grad_norm": 0.20939091826537268, | |
| "learning_rate": 2.44e-05, | |
| "loss": 0.3502, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.6839266450916937, | |
| "grad_norm": 0.21279544211352294, | |
| "learning_rate": 2.4360000000000004e-05, | |
| "loss": 0.35, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.6860841423948218, | |
| "grad_norm": 0.24701488636019142, | |
| "learning_rate": 2.432e-05, | |
| "loss": 0.3594, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.6882416396979503, | |
| "grad_norm": 0.20477887450410062, | |
| "learning_rate": 2.428e-05, | |
| "loss": 0.3479, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.6903991370010787, | |
| "grad_norm": 0.23481629228705686, | |
| "learning_rate": 2.4240000000000002e-05, | |
| "loss": 0.3456, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.692556634304207, | |
| "grad_norm": 0.192287940231301, | |
| "learning_rate": 2.4200000000000002e-05, | |
| "loss": 0.3565, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.6947141316073355, | |
| "grad_norm": 0.2185166952318352, | |
| "learning_rate": 2.4160000000000002e-05, | |
| "loss": 0.3512, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.696871628910464, | |
| "grad_norm": 0.2096624106569505, | |
| "learning_rate": 2.412e-05, | |
| "loss": 0.3783, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.6990291262135924, | |
| "grad_norm": 0.21166468282471723, | |
| "learning_rate": 2.408e-05, | |
| "loss": 0.3748, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.7011866235167206, | |
| "grad_norm": 0.19873344543315882, | |
| "learning_rate": 2.404e-05, | |
| "loss": 0.3571, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.703344120819849, | |
| "grad_norm": 0.22110925981619345, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.3629, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.7055016181229772, | |
| "grad_norm": 0.2021215637434402, | |
| "learning_rate": 2.396e-05, | |
| "loss": 0.3716, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.7076591154261056, | |
| "grad_norm": 0.2111765564318759, | |
| "learning_rate": 2.392e-05, | |
| "loss": 0.3588, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.709816612729234, | |
| "grad_norm": 0.2223622417383566, | |
| "learning_rate": 2.3880000000000002e-05, | |
| "loss": 0.3536, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.7119741100323624, | |
| "grad_norm": 0.22406668807582533, | |
| "learning_rate": 2.3840000000000002e-05, | |
| "loss": 0.3596, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.7141316073354909, | |
| "grad_norm": 0.2047427267119515, | |
| "learning_rate": 2.38e-05, | |
| "loss": 0.362, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.7162891046386193, | |
| "grad_norm": 0.2487591943558242, | |
| "learning_rate": 2.3760000000000003e-05, | |
| "loss": 0.3929, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.7184466019417477, | |
| "grad_norm": 0.23436633449529568, | |
| "learning_rate": 2.372e-05, | |
| "loss": 0.3683, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.720604099244876, | |
| "grad_norm": 0.20926682274018532, | |
| "learning_rate": 2.3680000000000004e-05, | |
| "loss": 0.3553, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.7227615965480043, | |
| "grad_norm": 0.2084020707494825, | |
| "learning_rate": 2.364e-05, | |
| "loss": 0.3673, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.7249190938511327, | |
| "grad_norm": 0.21090087654277367, | |
| "learning_rate": 2.36e-05, | |
| "loss": 0.3491, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.727076591154261, | |
| "grad_norm": 0.21079538661713704, | |
| "learning_rate": 2.356e-05, | |
| "loss": 0.3617, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.7292340884573894, | |
| "grad_norm": 0.19785208126669376, | |
| "learning_rate": 2.3520000000000002e-05, | |
| "loss": 0.3734, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.7313915857605178, | |
| "grad_norm": 0.20051304335215855, | |
| "learning_rate": 2.3480000000000002e-05, | |
| "loss": 0.3561, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.7335490830636462, | |
| "grad_norm": 0.20991073713252967, | |
| "learning_rate": 2.344e-05, | |
| "loss": 0.3456, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.7357065803667746, | |
| "grad_norm": 0.19847852943956665, | |
| "learning_rate": 2.3400000000000003e-05, | |
| "loss": 0.3636, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.737864077669903, | |
| "grad_norm": 0.19228778135303196, | |
| "learning_rate": 2.336e-05, | |
| "loss": 0.3546, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7400215749730314, | |
| "grad_norm": 0.19869096373294648, | |
| "learning_rate": 2.332e-05, | |
| "loss": 0.3535, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.7421790722761596, | |
| "grad_norm": 0.19598486859011863, | |
| "learning_rate": 2.328e-05, | |
| "loss": 0.3672, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.744336569579288, | |
| "grad_norm": 0.20021180617747214, | |
| "learning_rate": 2.324e-05, | |
| "loss": 0.3629, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.7464940668824163, | |
| "grad_norm": 0.19601550993847638, | |
| "learning_rate": 2.32e-05, | |
| "loss": 0.3661, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7486515641855447, | |
| "grad_norm": 0.19629741169441461, | |
| "learning_rate": 2.3160000000000002e-05, | |
| "loss": 0.3568, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.750809061488673, | |
| "grad_norm": 0.2071662842083144, | |
| "learning_rate": 2.312e-05, | |
| "loss": 0.3813, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.7529665587918015, | |
| "grad_norm": 0.1966149798408275, | |
| "learning_rate": 2.3080000000000003e-05, | |
| "loss": 0.362, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.75512405609493, | |
| "grad_norm": 0.2279788745870227, | |
| "learning_rate": 2.304e-05, | |
| "loss": 0.364, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.7572815533980584, | |
| "grad_norm": 0.23247306079514818, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": 0.3641, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.7594390507011868, | |
| "grad_norm": 0.19906860551229927, | |
| "learning_rate": 2.296e-05, | |
| "loss": 0.3428, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.761596548004315, | |
| "grad_norm": 0.2355677766543707, | |
| "learning_rate": 2.292e-05, | |
| "loss": 0.352, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.7637540453074434, | |
| "grad_norm": 0.2654888809339144, | |
| "learning_rate": 2.288e-05, | |
| "loss": 0.3766, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.7659115426105716, | |
| "grad_norm": 0.27302693742007983, | |
| "learning_rate": 2.284e-05, | |
| "loss": 0.3591, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.7680690399137, | |
| "grad_norm": 0.2179231966531101, | |
| "learning_rate": 2.2800000000000002e-05, | |
| "loss": 0.3538, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.7702265372168284, | |
| "grad_norm": 0.22506397382520602, | |
| "learning_rate": 2.2760000000000002e-05, | |
| "loss": 0.3564, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.7723840345199569, | |
| "grad_norm": 0.25408157657001734, | |
| "learning_rate": 2.2720000000000003e-05, | |
| "loss": 0.3515, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.7745415318230853, | |
| "grad_norm": 0.21062218688731194, | |
| "learning_rate": 2.268e-05, | |
| "loss": 0.3651, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.7766990291262137, | |
| "grad_norm": 0.22725053193586056, | |
| "learning_rate": 2.264e-05, | |
| "loss": 0.3508, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.778856526429342, | |
| "grad_norm": 0.24341685689233888, | |
| "learning_rate": 2.26e-05, | |
| "loss": 0.3911, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.7810140237324703, | |
| "grad_norm": 0.28389285536430425, | |
| "learning_rate": 2.256e-05, | |
| "loss": 0.3767, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.7831715210355987, | |
| "grad_norm": 0.23070034042434137, | |
| "learning_rate": 2.252e-05, | |
| "loss": 0.3494, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.785329018338727, | |
| "grad_norm": 0.2706944129377681, | |
| "learning_rate": 2.248e-05, | |
| "loss": 0.354, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.7874865156418553, | |
| "grad_norm": 0.2610014419098948, | |
| "learning_rate": 2.244e-05, | |
| "loss": 0.3468, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.7896440129449838, | |
| "grad_norm": 0.2514731314471683, | |
| "learning_rate": 2.2400000000000002e-05, | |
| "loss": 0.3629, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7918015102481122, | |
| "grad_norm": 0.2633751369955124, | |
| "learning_rate": 2.236e-05, | |
| "loss": 0.3753, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.7939590075512406, | |
| "grad_norm": 0.25424651459631, | |
| "learning_rate": 2.2320000000000003e-05, | |
| "loss": 0.3416, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.796116504854369, | |
| "grad_norm": 0.2250335462372855, | |
| "learning_rate": 2.228e-05, | |
| "loss": 0.3553, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.7982740021574974, | |
| "grad_norm": 0.2589022819689555, | |
| "learning_rate": 2.224e-05, | |
| "loss": 0.3748, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.8004314994606256, | |
| "grad_norm": 0.23838039943520098, | |
| "learning_rate": 2.22e-05, | |
| "loss": 0.3714, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.802588996763754, | |
| "grad_norm": 0.21958281718490288, | |
| "learning_rate": 2.216e-05, | |
| "loss": 0.3602, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.8047464940668823, | |
| "grad_norm": 0.22055042786226484, | |
| "learning_rate": 2.212e-05, | |
| "loss": 0.3542, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.8069039913700107, | |
| "grad_norm": 0.2178260520716557, | |
| "learning_rate": 2.2080000000000002e-05, | |
| "loss": 0.3643, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.809061488673139, | |
| "grad_norm": 0.22651969517407028, | |
| "learning_rate": 2.2040000000000002e-05, | |
| "loss": 0.367, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.8112189859762675, | |
| "grad_norm": 0.23148159521722647, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 0.3609, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.813376483279396, | |
| "grad_norm": 0.23027659960031496, | |
| "learning_rate": 2.196e-05, | |
| "loss": 0.3844, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.8155339805825244, | |
| "grad_norm": 0.21521666010080176, | |
| "learning_rate": 2.192e-05, | |
| "loss": 0.363, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.8176914778856528, | |
| "grad_norm": 0.23580779756655815, | |
| "learning_rate": 2.188e-05, | |
| "loss": 0.3719, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.819848975188781, | |
| "grad_norm": 0.24788937903653036, | |
| "learning_rate": 2.184e-05, | |
| "loss": 0.3586, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8220064724919094, | |
| "grad_norm": 0.22247443155209357, | |
| "learning_rate": 2.18e-05, | |
| "loss": 0.3581, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.8241639697950378, | |
| "grad_norm": 0.23770870455599036, | |
| "learning_rate": 2.176e-05, | |
| "loss": 0.3614, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.826321467098166, | |
| "grad_norm": 0.22632423401344884, | |
| "learning_rate": 2.1720000000000002e-05, | |
| "loss": 0.3624, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.8284789644012944, | |
| "grad_norm": 0.2524920921986345, | |
| "learning_rate": 2.168e-05, | |
| "loss": 0.3675, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8306364617044228, | |
| "grad_norm": 0.3215840056574261, | |
| "learning_rate": 2.1640000000000003e-05, | |
| "loss": 0.3538, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.8327939590075513, | |
| "grad_norm": 0.21486175758377893, | |
| "learning_rate": 2.16e-05, | |
| "loss": 0.3632, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.8349514563106797, | |
| "grad_norm": 0.2330894514205108, | |
| "learning_rate": 2.1560000000000004e-05, | |
| "loss": 0.341, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.837108953613808, | |
| "grad_norm": 0.24218781444766682, | |
| "learning_rate": 2.152e-05, | |
| "loss": 0.3677, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.8392664509169363, | |
| "grad_norm": 0.21963366595590614, | |
| "learning_rate": 2.148e-05, | |
| "loss": 0.3606, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.8414239482200647, | |
| "grad_norm": 0.21006868250745636, | |
| "learning_rate": 2.144e-05, | |
| "loss": 0.3489, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8435814455231931, | |
| "grad_norm": 0.23174473879298813, | |
| "learning_rate": 2.1400000000000002e-05, | |
| "loss": 0.3587, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.8457389428263213, | |
| "grad_norm": 0.2321069704400467, | |
| "learning_rate": 2.1360000000000002e-05, | |
| "loss": 0.3486, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.8478964401294498, | |
| "grad_norm": 0.20025509216353718, | |
| "learning_rate": 2.1320000000000003e-05, | |
| "loss": 0.3436, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.8500539374325782, | |
| "grad_norm": 0.22250953140085006, | |
| "learning_rate": 2.128e-05, | |
| "loss": 0.3603, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.8522114347357066, | |
| "grad_norm": 0.2173021358838602, | |
| "learning_rate": 2.124e-05, | |
| "loss": 0.3467, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.854368932038835, | |
| "grad_norm": 0.1930203129733309, | |
| "learning_rate": 2.12e-05, | |
| "loss": 0.3615, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8565264293419634, | |
| "grad_norm": 0.19862427723542597, | |
| "learning_rate": 2.116e-05, | |
| "loss": 0.3579, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.8586839266450919, | |
| "grad_norm": 0.2096060299104676, | |
| "learning_rate": 2.112e-05, | |
| "loss": 0.3517, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.86084142394822, | |
| "grad_norm": 0.25983349292984365, | |
| "learning_rate": 2.1079999999999998e-05, | |
| "loss": 0.3469, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.8629989212513485, | |
| "grad_norm": 0.19568780893613127, | |
| "learning_rate": 2.1040000000000002e-05, | |
| "loss": 0.3586, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.8651564185544767, | |
| "grad_norm": 0.20440754302808392, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.3454, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.867313915857605, | |
| "grad_norm": 0.2617960410505945, | |
| "learning_rate": 2.0960000000000003e-05, | |
| "loss": 0.3722, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.8694714131607335, | |
| "grad_norm": 0.21484645618030715, | |
| "learning_rate": 2.092e-05, | |
| "loss": 0.363, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.871628910463862, | |
| "grad_norm": 0.2089417062699798, | |
| "learning_rate": 2.0880000000000003e-05, | |
| "loss": 0.3575, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.8737864077669903, | |
| "grad_norm": 0.21870690017344033, | |
| "learning_rate": 2.084e-05, | |
| "loss": 0.3441, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.8759439050701188, | |
| "grad_norm": 0.2015916258019001, | |
| "learning_rate": 2.08e-05, | |
| "loss": 0.3649, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.8781014023732472, | |
| "grad_norm": 0.20445394487249433, | |
| "learning_rate": 2.076e-05, | |
| "loss": 0.3633, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.8802588996763754, | |
| "grad_norm": 0.21933907619881754, | |
| "learning_rate": 2.072e-05, | |
| "loss": 0.3565, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.8824163969795038, | |
| "grad_norm": 0.22898545335887432, | |
| "learning_rate": 2.0680000000000002e-05, | |
| "loss": 0.3489, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.884573894282632, | |
| "grad_norm": 0.19190043665285303, | |
| "learning_rate": 2.0640000000000002e-05, | |
| "loss": 0.3811, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.8867313915857604, | |
| "grad_norm": 0.22405276330574816, | |
| "learning_rate": 2.06e-05, | |
| "loss": 0.3514, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.8888888888888888, | |
| "grad_norm": 0.20664352560729662, | |
| "learning_rate": 2.0560000000000003e-05, | |
| "loss": 0.3564, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.8910463861920173, | |
| "grad_norm": 0.19849201500058444, | |
| "learning_rate": 2.052e-05, | |
| "loss": 0.3791, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.8932038834951457, | |
| "grad_norm": 0.21514720161140888, | |
| "learning_rate": 2.048e-05, | |
| "loss": 0.343, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.895361380798274, | |
| "grad_norm": 0.19591290106236087, | |
| "learning_rate": 2.044e-05, | |
| "loss": 0.3617, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.8975188781014025, | |
| "grad_norm": 0.20056100252045797, | |
| "learning_rate": 2.04e-05, | |
| "loss": 0.3692, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8996763754045307, | |
| "grad_norm": 0.6263284386911345, | |
| "learning_rate": 2.036e-05, | |
| "loss": 0.3819, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.9018338727076591, | |
| "grad_norm": 0.23922694733352964, | |
| "learning_rate": 2.032e-05, | |
| "loss": 0.3643, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.9039913700107873, | |
| "grad_norm": 0.19781471120361346, | |
| "learning_rate": 2.0280000000000002e-05, | |
| "loss": 0.3611, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.9061488673139158, | |
| "grad_norm": 0.21165098306207256, | |
| "learning_rate": 2.024e-05, | |
| "loss": 0.3529, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.9083063646170442, | |
| "grad_norm": 0.21721904598543817, | |
| "learning_rate": 2.0200000000000003e-05, | |
| "loss": 0.3492, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.9104638619201726, | |
| "grad_norm": 0.21422381596626322, | |
| "learning_rate": 2.016e-05, | |
| "loss": 0.3602, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.912621359223301, | |
| "grad_norm": 0.20452655904024958, | |
| "learning_rate": 2.012e-05, | |
| "loss": 0.3564, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.9147788565264294, | |
| "grad_norm": 0.21580439136445786, | |
| "learning_rate": 2.008e-05, | |
| "loss": 0.3711, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.9169363538295578, | |
| "grad_norm": 0.19423056851782128, | |
| "learning_rate": 2.004e-05, | |
| "loss": 0.3701, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.919093851132686, | |
| "grad_norm": 0.20048302260105516, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3672, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.9212513484358145, | |
| "grad_norm": 0.2117075068441905, | |
| "learning_rate": 1.9960000000000002e-05, | |
| "loss": 0.3584, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.9234088457389427, | |
| "grad_norm": 0.20275015688207176, | |
| "learning_rate": 1.992e-05, | |
| "loss": 0.3722, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.925566343042071, | |
| "grad_norm": 0.18554538922099112, | |
| "learning_rate": 1.9880000000000003e-05, | |
| "loss": 0.3688, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.9277238403451995, | |
| "grad_norm": 0.2264891336459159, | |
| "learning_rate": 1.984e-05, | |
| "loss": 0.3516, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.929881337648328, | |
| "grad_norm": 0.20419289874494262, | |
| "learning_rate": 1.9800000000000004e-05, | |
| "loss": 0.368, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.9320388349514563, | |
| "grad_norm": 0.2253379202949314, | |
| "learning_rate": 1.976e-05, | |
| "loss": 0.3684, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.9341963322545848, | |
| "grad_norm": 0.21080590277290626, | |
| "learning_rate": 1.972e-05, | |
| "loss": 0.3559, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.9363538295577132, | |
| "grad_norm": 0.18855517288473167, | |
| "learning_rate": 1.968e-05, | |
| "loss": 0.3683, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.9385113268608414, | |
| "grad_norm": 0.22336015878998725, | |
| "learning_rate": 1.9640000000000002e-05, | |
| "loss": 0.3501, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.9406688241639698, | |
| "grad_norm": 0.20681307053495754, | |
| "learning_rate": 1.9600000000000002e-05, | |
| "loss": 0.3339, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.9428263214670982, | |
| "grad_norm": 0.21956872185132326, | |
| "learning_rate": 1.956e-05, | |
| "loss": 0.3643, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.9449838187702264, | |
| "grad_norm": 0.22995896123381177, | |
| "learning_rate": 1.9520000000000003e-05, | |
| "loss": 0.3539, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.9471413160733548, | |
| "grad_norm": 0.24289096552867204, | |
| "learning_rate": 1.948e-05, | |
| "loss": 0.3529, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.9492988133764833, | |
| "grad_norm": 0.18977445221480282, | |
| "learning_rate": 1.944e-05, | |
| "loss": 0.3761, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.9514563106796117, | |
| "grad_norm": 0.19537181851448127, | |
| "learning_rate": 1.94e-05, | |
| "loss": 0.3776, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.95361380798274, | |
| "grad_norm": 0.21168864149685598, | |
| "learning_rate": 1.936e-05, | |
| "loss": 0.3564, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.9557713052858685, | |
| "grad_norm": 0.21082909120102009, | |
| "learning_rate": 1.932e-05, | |
| "loss": 0.3677, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.9579288025889967, | |
| "grad_norm": 0.2092667221652132, | |
| "learning_rate": 1.9280000000000002e-05, | |
| "loss": 0.3666, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.9600862998921251, | |
| "grad_norm": 0.28440317520339464, | |
| "learning_rate": 1.924e-05, | |
| "loss": 0.3616, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.9622437971952535, | |
| "grad_norm": 0.21372393833816974, | |
| "learning_rate": 1.9200000000000003e-05, | |
| "loss": 0.3724, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.9644012944983817, | |
| "grad_norm": 0.21177604197905148, | |
| "learning_rate": 1.916e-05, | |
| "loss": 0.3547, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.9665587918015102, | |
| "grad_norm": 0.22087482780256842, | |
| "learning_rate": 1.9120000000000003e-05, | |
| "loss": 0.3552, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.9687162891046386, | |
| "grad_norm": 0.2293776507660548, | |
| "learning_rate": 1.908e-05, | |
| "loss": 0.3539, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.970873786407767, | |
| "grad_norm": 0.20763279035660182, | |
| "learning_rate": 1.904e-05, | |
| "loss": 0.3609, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.9730312837108954, | |
| "grad_norm": 0.2130132604223223, | |
| "learning_rate": 1.9e-05, | |
| "loss": 0.387, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.9751887810140238, | |
| "grad_norm": 0.22693796782628164, | |
| "learning_rate": 1.896e-05, | |
| "loss": 0.3632, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.9773462783171523, | |
| "grad_norm": 0.2075508453226733, | |
| "learning_rate": 1.8920000000000002e-05, | |
| "loss": 0.3504, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.9795037756202805, | |
| "grad_norm": 0.21670939076491838, | |
| "learning_rate": 1.888e-05, | |
| "loss": 0.3573, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.9816612729234089, | |
| "grad_norm": 0.20202624393739851, | |
| "learning_rate": 1.8840000000000003e-05, | |
| "loss": 0.3549, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.983818770226537, | |
| "grad_norm": 0.20167979698326996, | |
| "learning_rate": 1.88e-05, | |
| "loss": 0.3672, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.9859762675296655, | |
| "grad_norm": 0.1948573998079774, | |
| "learning_rate": 1.876e-05, | |
| "loss": 0.355, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.988133764832794, | |
| "grad_norm": 0.22586589053280912, | |
| "learning_rate": 1.872e-05, | |
| "loss": 0.3892, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.9902912621359223, | |
| "grad_norm": 0.21193504103673555, | |
| "learning_rate": 1.868e-05, | |
| "loss": 0.3573, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.9924487594390508, | |
| "grad_norm": 0.19869474028310133, | |
| "learning_rate": 1.864e-05, | |
| "loss": 0.3524, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.9946062567421792, | |
| "grad_norm": 0.21694327316611497, | |
| "learning_rate": 1.86e-05, | |
| "loss": 0.3623, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.9967637540453076, | |
| "grad_norm": 0.19165371716250784, | |
| "learning_rate": 1.856e-05, | |
| "loss": 0.3674, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.9989212513484358, | |
| "grad_norm": 0.25911714915290096, | |
| "learning_rate": 1.8520000000000002e-05, | |
| "loss": 0.3711, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.3166913102001167, | |
| "learning_rate": 1.848e-05, | |
| "loss": 0.3554, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.0021574973031284, | |
| "grad_norm": 0.2952601206226685, | |
| "learning_rate": 1.8440000000000003e-05, | |
| "loss": 0.2966, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.004314994606257, | |
| "grad_norm": 0.2737583904942107, | |
| "learning_rate": 1.84e-05, | |
| "loss": 0.2773, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.0064724919093853, | |
| "grad_norm": 0.2883228038451365, | |
| "learning_rate": 1.8360000000000004e-05, | |
| "loss": 0.2972, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.0086299892125137, | |
| "grad_norm": 0.3045158995400534, | |
| "learning_rate": 1.832e-05, | |
| "loss": 0.2912, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.0107874865156417, | |
| "grad_norm": 0.21425516278784199, | |
| "learning_rate": 1.828e-05, | |
| "loss": 0.2797, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.01294498381877, | |
| "grad_norm": 0.26815968591614264, | |
| "learning_rate": 1.824e-05, | |
| "loss": 0.2899, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.0151024811218985, | |
| "grad_norm": 0.2525848880526318, | |
| "learning_rate": 1.8200000000000002e-05, | |
| "loss": 0.2754, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.017259978425027, | |
| "grad_norm": 0.22940718108680191, | |
| "learning_rate": 1.8160000000000002e-05, | |
| "loss": 0.2739, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.0194174757281553, | |
| "grad_norm": 0.2930727493312503, | |
| "learning_rate": 1.812e-05, | |
| "loss": 0.2928, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.0215749730312838, | |
| "grad_norm": 0.2730010795221549, | |
| "learning_rate": 1.808e-05, | |
| "loss": 0.2799, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.023732470334412, | |
| "grad_norm": 0.21843464575067598, | |
| "learning_rate": 1.804e-05, | |
| "loss": 0.2759, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.0258899676375406, | |
| "grad_norm": 0.2287920671205098, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.2686, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.028047464940669, | |
| "grad_norm": 0.24632463641906233, | |
| "learning_rate": 1.796e-05, | |
| "loss": 0.2901, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.030204962243797, | |
| "grad_norm": 0.21554911391559797, | |
| "learning_rate": 1.792e-05, | |
| "loss": 0.2646, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.0323624595469254, | |
| "grad_norm": 0.23974674214671038, | |
| "learning_rate": 1.7879999999999998e-05, | |
| "loss": 0.2865, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.034519956850054, | |
| "grad_norm": 0.23389305666437907, | |
| "learning_rate": 1.7840000000000002e-05, | |
| "loss": 0.2659, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.0366774541531822, | |
| "grad_norm": 0.23454932706996612, | |
| "learning_rate": 1.78e-05, | |
| "loss": 0.2889, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.0388349514563107, | |
| "grad_norm": 0.2110811652752538, | |
| "learning_rate": 1.7760000000000003e-05, | |
| "loss": 0.2715, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.040992448759439, | |
| "grad_norm": 0.2086924969772193, | |
| "learning_rate": 1.772e-05, | |
| "loss": 0.2827, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.0431499460625675, | |
| "grad_norm": 0.2178166620120031, | |
| "learning_rate": 1.7680000000000004e-05, | |
| "loss": 0.2823, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.045307443365696, | |
| "grad_norm": 0.21332967807788294, | |
| "learning_rate": 1.764e-05, | |
| "loss": 0.2814, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.0474649406688243, | |
| "grad_norm": 0.21111966487487638, | |
| "learning_rate": 1.76e-05, | |
| "loss": 0.278, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.0496224379719523, | |
| "grad_norm": 0.20634749644599865, | |
| "learning_rate": 1.756e-05, | |
| "loss": 0.2862, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.0517799352750807, | |
| "grad_norm": 0.20256588641450451, | |
| "learning_rate": 1.752e-05, | |
| "loss": 0.2754, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.053937432578209, | |
| "grad_norm": 0.21546847922922335, | |
| "learning_rate": 1.7480000000000002e-05, | |
| "loss": 0.2859, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.0560949298813376, | |
| "grad_norm": 0.2012891240538272, | |
| "learning_rate": 1.7440000000000002e-05, | |
| "loss": 0.2667, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.058252427184466, | |
| "grad_norm": 0.21828886379868126, | |
| "learning_rate": 1.74e-05, | |
| "loss": 0.2768, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.0604099244875944, | |
| "grad_norm": 0.23319314864900412, | |
| "learning_rate": 1.736e-05, | |
| "loss": 0.2712, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.062567421790723, | |
| "grad_norm": 0.21570652212000496, | |
| "learning_rate": 1.732e-05, | |
| "loss": 0.2704, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.0647249190938513, | |
| "grad_norm": 0.2050136511501782, | |
| "learning_rate": 1.728e-05, | |
| "loss": 0.2768, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.0668824163969797, | |
| "grad_norm": 0.19908285569355766, | |
| "learning_rate": 1.724e-05, | |
| "loss": 0.2699, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.0690399137001076, | |
| "grad_norm": 0.22341990371652531, | |
| "learning_rate": 1.7199999999999998e-05, | |
| "loss": 0.2802, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.071197411003236, | |
| "grad_norm": 0.21245912373702183, | |
| "learning_rate": 1.7160000000000002e-05, | |
| "loss": 0.2732, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.0733549083063645, | |
| "grad_norm": 0.2315866528262126, | |
| "learning_rate": 1.712e-05, | |
| "loss": 0.2666, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.075512405609493, | |
| "grad_norm": 0.21250700843322592, | |
| "learning_rate": 1.7080000000000002e-05, | |
| "loss": 0.287, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.0776699029126213, | |
| "grad_norm": 0.20591733874775808, | |
| "learning_rate": 1.704e-05, | |
| "loss": 0.2805, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.0798274002157497, | |
| "grad_norm": 0.20270061570174747, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 0.2724, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.081984897518878, | |
| "grad_norm": 0.22011780420584517, | |
| "learning_rate": 1.696e-05, | |
| "loss": 0.27, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.0841423948220066, | |
| "grad_norm": 0.2086945807906332, | |
| "learning_rate": 1.692e-05, | |
| "loss": 0.2652, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.086299892125135, | |
| "grad_norm": 0.2017480638957166, | |
| "learning_rate": 1.688e-05, | |
| "loss": 0.2807, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.0884573894282634, | |
| "grad_norm": 0.23221771597231639, | |
| "learning_rate": 1.684e-05, | |
| "loss": 0.2908, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.0906148867313914, | |
| "grad_norm": 0.19787472936299969, | |
| "learning_rate": 1.6800000000000002e-05, | |
| "loss": 0.2602, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.09277238403452, | |
| "grad_norm": 0.1911068238953241, | |
| "learning_rate": 1.6760000000000002e-05, | |
| "loss": 0.262, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.0949298813376482, | |
| "grad_norm": 0.21404892409121634, | |
| "learning_rate": 1.672e-05, | |
| "loss": 0.2883, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.0970873786407767, | |
| "grad_norm": 0.19651822000095875, | |
| "learning_rate": 1.668e-05, | |
| "loss": 0.2787, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.099244875943905, | |
| "grad_norm": 0.19629483862396524, | |
| "learning_rate": 1.664e-05, | |
| "loss": 0.2682, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.1014023732470335, | |
| "grad_norm": 0.195771507449239, | |
| "learning_rate": 1.66e-05, | |
| "loss": 0.2595, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.103559870550162, | |
| "grad_norm": 0.2012720434683137, | |
| "learning_rate": 1.656e-05, | |
| "loss": 0.2924, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.1057173678532903, | |
| "grad_norm": 0.20022994629900823, | |
| "learning_rate": 1.652e-05, | |
| "loss": 0.2623, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.1078748651564188, | |
| "grad_norm": 0.1988415836362952, | |
| "learning_rate": 1.648e-05, | |
| "loss": 0.2665, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.1100323624595467, | |
| "grad_norm": 0.20067992175054306, | |
| "learning_rate": 1.644e-05, | |
| "loss": 0.2708, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.112189859762675, | |
| "grad_norm": 0.1965836467645065, | |
| "learning_rate": 1.6400000000000002e-05, | |
| "loss": 0.2781, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.1143473570658036, | |
| "grad_norm": 0.2106067939768535, | |
| "learning_rate": 1.636e-05, | |
| "loss": 0.2928, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.116504854368932, | |
| "grad_norm": 0.1912874552435929, | |
| "learning_rate": 1.6320000000000003e-05, | |
| "loss": 0.2653, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.1186623516720604, | |
| "grad_norm": 0.21480216660403667, | |
| "learning_rate": 1.628e-05, | |
| "loss": 0.2795, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.120819848975189, | |
| "grad_norm": 0.19894898831802282, | |
| "learning_rate": 1.624e-05, | |
| "loss": 0.2723, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.1229773462783172, | |
| "grad_norm": 0.18868913838930684, | |
| "learning_rate": 1.62e-05, | |
| "loss": 0.2701, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.1251348435814457, | |
| "grad_norm": 0.22014773725924427, | |
| "learning_rate": 1.616e-05, | |
| "loss": 0.2956, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.127292340884574, | |
| "grad_norm": 0.2161914198087177, | |
| "learning_rate": 1.612e-05, | |
| "loss": 0.2694, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.129449838187702, | |
| "grad_norm": 0.18693419466428068, | |
| "learning_rate": 1.6080000000000002e-05, | |
| "loss": 0.2722, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.1316073354908305, | |
| "grad_norm": 0.19845446294819882, | |
| "learning_rate": 1.604e-05, | |
| "loss": 0.2699, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.133764832793959, | |
| "grad_norm": 0.2243502306601146, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.2898, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.1359223300970873, | |
| "grad_norm": 0.189061438536264, | |
| "learning_rate": 1.596e-05, | |
| "loss": 0.2613, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.1380798274002157, | |
| "grad_norm": 0.196308108347633, | |
| "learning_rate": 1.592e-05, | |
| "loss": 0.2802, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.140237324703344, | |
| "grad_norm": 0.20505568263166546, | |
| "learning_rate": 1.588e-05, | |
| "loss": 0.2754, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.1423948220064726, | |
| "grad_norm": 0.19371375505033042, | |
| "learning_rate": 1.584e-05, | |
| "loss": 0.2797, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.144552319309601, | |
| "grad_norm": 0.2038159697548706, | |
| "learning_rate": 1.58e-05, | |
| "loss": 0.2748, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.1467098166127294, | |
| "grad_norm": 0.20104064716873768, | |
| "learning_rate": 1.5759999999999998e-05, | |
| "loss": 0.2517, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.148867313915858, | |
| "grad_norm": 0.1993147823205055, | |
| "learning_rate": 1.5720000000000002e-05, | |
| "loss": 0.2806, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.151024811218986, | |
| "grad_norm": 0.23201462938785924, | |
| "learning_rate": 1.568e-05, | |
| "loss": 0.2843, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.1531823085221142, | |
| "grad_norm": 0.194482970909287, | |
| "learning_rate": 1.5640000000000003e-05, | |
| "loss": 0.2704, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.1553398058252426, | |
| "grad_norm": 0.19741489729000775, | |
| "learning_rate": 1.56e-05, | |
| "loss": 0.2868, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.157497303128371, | |
| "grad_norm": 0.19338218576211613, | |
| "learning_rate": 1.556e-05, | |
| "loss": 0.2807, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.1596548004314995, | |
| "grad_norm": 0.18815117551278576, | |
| "learning_rate": 1.552e-05, | |
| "loss": 0.2865, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.161812297734628, | |
| "grad_norm": 0.19320001320317376, | |
| "learning_rate": 1.548e-05, | |
| "loss": 0.2893, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.1639697950377563, | |
| "grad_norm": 0.19912625340767592, | |
| "learning_rate": 1.544e-05, | |
| "loss": 0.2861, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.1661272923408847, | |
| "grad_norm": 0.20055637118837155, | |
| "learning_rate": 1.54e-05, | |
| "loss": 0.2832, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.168284789644013, | |
| "grad_norm": 0.18881522044079282, | |
| "learning_rate": 1.536e-05, | |
| "loss": 0.2735, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.170442286947141, | |
| "grad_norm": 0.19424371584292754, | |
| "learning_rate": 1.5320000000000002e-05, | |
| "loss": 0.2741, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.1725997842502696, | |
| "grad_norm": 0.19749981503516376, | |
| "learning_rate": 1.528e-05, | |
| "loss": 0.2814, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.174757281553398, | |
| "grad_norm": 0.21172397165526702, | |
| "learning_rate": 1.5240000000000001e-05, | |
| "loss": 0.2821, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.1769147788565264, | |
| "grad_norm": 0.2008055699713837, | |
| "learning_rate": 1.52e-05, | |
| "loss": 0.2772, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.179072276159655, | |
| "grad_norm": 0.18785347695619867, | |
| "learning_rate": 1.5160000000000002e-05, | |
| "loss": 0.2766, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.1812297734627832, | |
| "grad_norm": 0.1963392396504042, | |
| "learning_rate": 1.5120000000000001e-05, | |
| "loss": 0.2724, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.1833872707659117, | |
| "grad_norm": 0.20123664109137754, | |
| "learning_rate": 1.508e-05, | |
| "loss": 0.2756, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.18554476806904, | |
| "grad_norm": 0.20444367434824906, | |
| "learning_rate": 1.5040000000000002e-05, | |
| "loss": 0.2924, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.1877022653721685, | |
| "grad_norm": 0.1853221427084223, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.2647, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.1898597626752965, | |
| "grad_norm": 0.19532713814867492, | |
| "learning_rate": 1.4960000000000002e-05, | |
| "loss": 0.2885, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.192017259978425, | |
| "grad_norm": 0.194567249405803, | |
| "learning_rate": 1.4920000000000001e-05, | |
| "loss": 0.2751, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.1941747572815533, | |
| "grad_norm": 0.19156850642372739, | |
| "learning_rate": 1.488e-05, | |
| "loss": 0.2902, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.1963322545846817, | |
| "grad_norm": 0.19068854832443224, | |
| "learning_rate": 1.4840000000000002e-05, | |
| "loss": 0.2785, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.19848975188781, | |
| "grad_norm": 0.1907888424930383, | |
| "learning_rate": 1.48e-05, | |
| "loss": 0.2833, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.2006472491909386, | |
| "grad_norm": 0.2022398514747435, | |
| "learning_rate": 1.4760000000000001e-05, | |
| "loss": 0.2812, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.202804746494067, | |
| "grad_norm": 0.20650152647961673, | |
| "learning_rate": 1.472e-05, | |
| "loss": 0.2808, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.2049622437971954, | |
| "grad_norm": 0.19783560549543305, | |
| "learning_rate": 1.4680000000000002e-05, | |
| "loss": 0.2835, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.207119741100324, | |
| "grad_norm": 0.19791637326958472, | |
| "learning_rate": 1.464e-05, | |
| "loss": 0.2686, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.209277238403452, | |
| "grad_norm": 0.19785476348240078, | |
| "learning_rate": 1.4599999999999999e-05, | |
| "loss": 0.2763, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.2114347357065802, | |
| "grad_norm": 0.2200012984407047, | |
| "learning_rate": 1.4560000000000001e-05, | |
| "loss": 0.2788, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.2135922330097086, | |
| "grad_norm": 0.23124340363606444, | |
| "learning_rate": 1.452e-05, | |
| "loss": 0.278, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 2.215749730312837, | |
| "grad_norm": 0.19294677227886092, | |
| "learning_rate": 1.4480000000000002e-05, | |
| "loss": 0.2662, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.2179072276159655, | |
| "grad_norm": 0.21467839292037788, | |
| "learning_rate": 1.444e-05, | |
| "loss": 0.293, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 2.220064724919094, | |
| "grad_norm": 0.20226064234278612, | |
| "learning_rate": 1.44e-05, | |
| "loss": 0.2808, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 0.19651529866983977, | |
| "learning_rate": 1.4360000000000001e-05, | |
| "loss": 0.2776, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 2.2243797195253507, | |
| "grad_norm": 0.19847228391981836, | |
| "learning_rate": 1.432e-05, | |
| "loss": 0.286, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.226537216828479, | |
| "grad_norm": 0.21369656352490973, | |
| "learning_rate": 1.4280000000000002e-05, | |
| "loss": 0.2782, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 2.228694714131607, | |
| "grad_norm": 0.1919725559755518, | |
| "learning_rate": 1.4240000000000001e-05, | |
| "loss": 0.2775, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.2308522114347356, | |
| "grad_norm": 0.19131850346902102, | |
| "learning_rate": 1.42e-05, | |
| "loss": 0.2657, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.233009708737864, | |
| "grad_norm": 0.19442842880711014, | |
| "learning_rate": 1.4160000000000002e-05, | |
| "loss": 0.2724, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.2351672060409924, | |
| "grad_norm": 0.1981714390416587, | |
| "learning_rate": 1.412e-05, | |
| "loss": 0.285, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 2.237324703344121, | |
| "grad_norm": 0.20521747405247104, | |
| "learning_rate": 1.408e-05, | |
| "loss": 0.266, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.2394822006472492, | |
| "grad_norm": 0.20356927339863315, | |
| "learning_rate": 1.4040000000000001e-05, | |
| "loss": 0.287, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 2.2416396979503777, | |
| "grad_norm": 0.19892125410866876, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 0.2835, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.243797195253506, | |
| "grad_norm": 0.19475194941115584, | |
| "learning_rate": 1.396e-05, | |
| "loss": 0.2804, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 2.2459546925566345, | |
| "grad_norm": 0.19481027849756127, | |
| "learning_rate": 1.3919999999999999e-05, | |
| "loss": 0.2867, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.2481121898597625, | |
| "grad_norm": 0.20053383340444886, | |
| "learning_rate": 1.3880000000000001e-05, | |
| "loss": 0.2796, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 2.250269687162891, | |
| "grad_norm": 0.18996447992057114, | |
| "learning_rate": 1.384e-05, | |
| "loss": 0.2694, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.2524271844660193, | |
| "grad_norm": 0.19590330294229358, | |
| "learning_rate": 1.3800000000000002e-05, | |
| "loss": 0.2792, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.2545846817691477, | |
| "grad_norm": 0.20196142682316945, | |
| "learning_rate": 1.376e-05, | |
| "loss": 0.2781, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.256742179072276, | |
| "grad_norm": 0.18937385936130618, | |
| "learning_rate": 1.3719999999999999e-05, | |
| "loss": 0.2765, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 2.2588996763754046, | |
| "grad_norm": 0.20180588042942285, | |
| "learning_rate": 1.3680000000000001e-05, | |
| "loss": 0.2884, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.261057173678533, | |
| "grad_norm": 0.19623045696350683, | |
| "learning_rate": 1.364e-05, | |
| "loss": 0.2901, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 2.2632146709816614, | |
| "grad_norm": 0.20787520367981113, | |
| "learning_rate": 1.3600000000000002e-05, | |
| "loss": 0.2855, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.26537216828479, | |
| "grad_norm": 0.1962467077184201, | |
| "learning_rate": 1.356e-05, | |
| "loss": 0.279, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 2.267529665587918, | |
| "grad_norm": 0.1995464508539786, | |
| "learning_rate": 1.352e-05, | |
| "loss": 0.2805, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.269687162891046, | |
| "grad_norm": 0.19046158544146177, | |
| "learning_rate": 1.3480000000000001e-05, | |
| "loss": 0.2684, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 2.2718446601941746, | |
| "grad_norm": 0.19663063830525268, | |
| "learning_rate": 1.344e-05, | |
| "loss": 0.2815, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.274002157497303, | |
| "grad_norm": 0.1999712561033732, | |
| "learning_rate": 1.3400000000000002e-05, | |
| "loss": 0.2879, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.2761596548004315, | |
| "grad_norm": 0.19284565246163382, | |
| "learning_rate": 1.336e-05, | |
| "loss": 0.2835, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.27831715210356, | |
| "grad_norm": 0.1939460481831784, | |
| "learning_rate": 1.3320000000000001e-05, | |
| "loss": 0.2852, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 2.2804746494066883, | |
| "grad_norm": 0.17751216415258458, | |
| "learning_rate": 1.3280000000000002e-05, | |
| "loss": 0.2553, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.2826321467098167, | |
| "grad_norm": 0.19432324788325034, | |
| "learning_rate": 1.324e-05, | |
| "loss": 0.283, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 2.284789644012945, | |
| "grad_norm": 0.18964666635161925, | |
| "learning_rate": 1.32e-05, | |
| "loss": 0.2659, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.286947141316073, | |
| "grad_norm": 0.19072254885569248, | |
| "learning_rate": 1.316e-05, | |
| "loss": 0.283, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 2.2891046386192015, | |
| "grad_norm": 0.19930445238105082, | |
| "learning_rate": 1.3120000000000001e-05, | |
| "loss": 0.2783, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.29126213592233, | |
| "grad_norm": 0.19310586069082192, | |
| "learning_rate": 1.308e-05, | |
| "loss": 0.2633, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 2.2934196332254584, | |
| "grad_norm": 0.19118685628714194, | |
| "learning_rate": 1.3039999999999999e-05, | |
| "loss": 0.2836, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.295577130528587, | |
| "grad_norm": 0.19463719617986558, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 0.2817, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.2977346278317152, | |
| "grad_norm": 0.19904862455983952, | |
| "learning_rate": 1.296e-05, | |
| "loss": 0.269, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.2998921251348436, | |
| "grad_norm": 0.18016591793334202, | |
| "learning_rate": 1.2920000000000002e-05, | |
| "loss": 0.2573, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 2.302049622437972, | |
| "grad_norm": 0.1870821714575586, | |
| "learning_rate": 1.288e-05, | |
| "loss": 0.2704, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.3042071197411005, | |
| "grad_norm": 0.1958000410540272, | |
| "learning_rate": 1.2839999999999999e-05, | |
| "loss": 0.2804, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 2.3063646170442285, | |
| "grad_norm": 0.19053367756271009, | |
| "learning_rate": 1.2800000000000001e-05, | |
| "loss": 0.2793, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.308522114347357, | |
| "grad_norm": 0.21616331965307153, | |
| "learning_rate": 1.276e-05, | |
| "loss": 0.2711, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 2.3106796116504853, | |
| "grad_norm": 0.1889243925400608, | |
| "learning_rate": 1.2720000000000002e-05, | |
| "loss": 0.2861, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.3128371089536137, | |
| "grad_norm": 0.18365302232747097, | |
| "learning_rate": 1.268e-05, | |
| "loss": 0.2785, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 2.314994606256742, | |
| "grad_norm": 0.21361395439929864, | |
| "learning_rate": 1.2640000000000003e-05, | |
| "loss": 0.273, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.3171521035598706, | |
| "grad_norm": 0.2076208646434828, | |
| "learning_rate": 1.2600000000000001e-05, | |
| "loss": 0.2933, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.319309600862999, | |
| "grad_norm": 0.2139822291459261, | |
| "learning_rate": 1.256e-05, | |
| "loss": 0.2681, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.3214670981661274, | |
| "grad_norm": 0.20701974653718327, | |
| "learning_rate": 1.252e-05, | |
| "loss": 0.2756, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 2.323624595469256, | |
| "grad_norm": 0.2145284687747517, | |
| "learning_rate": 1.248e-05, | |
| "loss": 0.2747, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.325782092772384, | |
| "grad_norm": 0.21677836163848008, | |
| "learning_rate": 1.244e-05, | |
| "loss": 0.2867, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 2.3279395900755127, | |
| "grad_norm": 0.1949952709667068, | |
| "learning_rate": 1.24e-05, | |
| "loss": 0.2738, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.3300970873786406, | |
| "grad_norm": 0.20454609129744541, | |
| "learning_rate": 1.236e-05, | |
| "loss": 0.2778, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 2.332254584681769, | |
| "grad_norm": 0.20493348135141942, | |
| "learning_rate": 1.232e-05, | |
| "loss": 0.2702, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.3344120819848975, | |
| "grad_norm": 0.18716804503177586, | |
| "learning_rate": 1.2280000000000001e-05, | |
| "loss": 0.2632, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 2.336569579288026, | |
| "grad_norm": 0.19834904622075808, | |
| "learning_rate": 1.224e-05, | |
| "loss": 0.2707, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.3387270765911543, | |
| "grad_norm": 0.21807191652834473, | |
| "learning_rate": 1.22e-05, | |
| "loss": 0.2938, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.3408845738942827, | |
| "grad_norm": 0.21076158567002148, | |
| "learning_rate": 1.216e-05, | |
| "loss": 0.2832, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.343042071197411, | |
| "grad_norm": 0.2145838491982327, | |
| "learning_rate": 1.2120000000000001e-05, | |
| "loss": 0.2927, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 2.3451995685005396, | |
| "grad_norm": 0.19885442417194185, | |
| "learning_rate": 1.2080000000000001e-05, | |
| "loss": 0.2671, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.347357065803668, | |
| "grad_norm": 0.19696825750002114, | |
| "learning_rate": 1.204e-05, | |
| "loss": 0.2737, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 2.349514563106796, | |
| "grad_norm": 0.18667737946503235, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.28, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.3516720604099244, | |
| "grad_norm": 0.1908481063008943, | |
| "learning_rate": 1.196e-05, | |
| "loss": 0.2737, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 2.353829557713053, | |
| "grad_norm": 0.19318351140026296, | |
| "learning_rate": 1.1920000000000001e-05, | |
| "loss": 0.2757, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.355987055016181, | |
| "grad_norm": 0.1961228433938055, | |
| "learning_rate": 1.1880000000000001e-05, | |
| "loss": 0.2862, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 2.3581445523193096, | |
| "grad_norm": 0.18057536406104824, | |
| "learning_rate": 1.1840000000000002e-05, | |
| "loss": 0.2596, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.360302049622438, | |
| "grad_norm": 0.20362429416941197, | |
| "learning_rate": 1.18e-05, | |
| "loss": 0.2949, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.3624595469255665, | |
| "grad_norm": 0.1991741123926773, | |
| "learning_rate": 1.1760000000000001e-05, | |
| "loss": 0.2759, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.364617044228695, | |
| "grad_norm": 0.19011374313750604, | |
| "learning_rate": 1.172e-05, | |
| "loss": 0.2803, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 2.3667745415318233, | |
| "grad_norm": 0.18495183732229703, | |
| "learning_rate": 1.168e-05, | |
| "loss": 0.275, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.3689320388349513, | |
| "grad_norm": 0.183331748886235, | |
| "learning_rate": 1.164e-05, | |
| "loss": 0.2722, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 2.3710895361380797, | |
| "grad_norm": 0.19464092804866787, | |
| "learning_rate": 1.16e-05, | |
| "loss": 0.2694, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.373247033441208, | |
| "grad_norm": 0.20595615106535306, | |
| "learning_rate": 1.156e-05, | |
| "loss": 0.2765, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 2.3754045307443366, | |
| "grad_norm": 0.20027835316245082, | |
| "learning_rate": 1.152e-05, | |
| "loss": 0.2916, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.377562028047465, | |
| "grad_norm": 0.18806800378915345, | |
| "learning_rate": 1.148e-05, | |
| "loss": 0.2691, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 2.3797195253505934, | |
| "grad_norm": 0.44925086549855475, | |
| "learning_rate": 1.144e-05, | |
| "loss": 0.2671, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.381877022653722, | |
| "grad_norm": 0.1956910478238287, | |
| "learning_rate": 1.1400000000000001e-05, | |
| "loss": 0.2828, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 2.3840345199568502, | |
| "grad_norm": 0.19656830448675866, | |
| "learning_rate": 1.1360000000000001e-05, | |
| "loss": 0.2762, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.3861920172599786, | |
| "grad_norm": 0.2012449425045171, | |
| "learning_rate": 1.132e-05, | |
| "loss": 0.2878, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 2.3883495145631066, | |
| "grad_norm": 0.21769469381273382, | |
| "learning_rate": 1.128e-05, | |
| "loss": 0.2794, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.390507011866235, | |
| "grad_norm": 0.19950183867784813, | |
| "learning_rate": 1.124e-05, | |
| "loss": 0.2888, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 2.3926645091693635, | |
| "grad_norm": 0.20120005430252597, | |
| "learning_rate": 1.1200000000000001e-05, | |
| "loss": 0.2835, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.394822006472492, | |
| "grad_norm": 0.20369048867315678, | |
| "learning_rate": 1.1160000000000002e-05, | |
| "loss": 0.2842, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 2.3969795037756203, | |
| "grad_norm": 0.1931772151237011, | |
| "learning_rate": 1.112e-05, | |
| "loss": 0.2857, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.3991370010787487, | |
| "grad_norm": 0.19883071154560317, | |
| "learning_rate": 1.108e-05, | |
| "loss": 0.2653, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 2.401294498381877, | |
| "grad_norm": 0.19563569996619778, | |
| "learning_rate": 1.1040000000000001e-05, | |
| "loss": 0.2799, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.4034519956850056, | |
| "grad_norm": 0.20341050384160864, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 0.2738, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 2.405609492988134, | |
| "grad_norm": 0.1945190789786455, | |
| "learning_rate": 1.096e-05, | |
| "loss": 0.2793, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.407766990291262, | |
| "grad_norm": 0.21314612733031374, | |
| "learning_rate": 1.092e-05, | |
| "loss": 0.2841, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 2.4099244875943904, | |
| "grad_norm": 0.20091792860429103, | |
| "learning_rate": 1.088e-05, | |
| "loss": 0.2852, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.412081984897519, | |
| "grad_norm": 0.1912326765301699, | |
| "learning_rate": 1.084e-05, | |
| "loss": 0.2859, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 2.414239482200647, | |
| "grad_norm": 0.19350501585802152, | |
| "learning_rate": 1.08e-05, | |
| "loss": 0.2819, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.4163969795037756, | |
| "grad_norm": 0.19011528786997062, | |
| "learning_rate": 1.076e-05, | |
| "loss": 0.2684, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 2.418554476806904, | |
| "grad_norm": 0.19626087415761062, | |
| "learning_rate": 1.072e-05, | |
| "loss": 0.2863, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.4207119741100325, | |
| "grad_norm": 0.18700947119186567, | |
| "learning_rate": 1.0680000000000001e-05, | |
| "loss": 0.2771, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 2.422869471413161, | |
| "grad_norm": 0.20648852488177072, | |
| "learning_rate": 1.064e-05, | |
| "loss": 0.2834, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.4250269687162893, | |
| "grad_norm": 0.2012989868600837, | |
| "learning_rate": 1.06e-05, | |
| "loss": 0.2716, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.4271844660194173, | |
| "grad_norm": 0.199888643532734, | |
| "learning_rate": 1.056e-05, | |
| "loss": 0.2791, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.4293419633225457, | |
| "grad_norm": 0.18259826425673448, | |
| "learning_rate": 1.0520000000000001e-05, | |
| "loss": 0.2541, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 2.431499460625674, | |
| "grad_norm": 0.19649477554378358, | |
| "learning_rate": 1.0480000000000001e-05, | |
| "loss": 0.2801, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.4336569579288025, | |
| "grad_norm": 0.18199564261537443, | |
| "learning_rate": 1.0440000000000002e-05, | |
| "loss": 0.2559, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 2.435814455231931, | |
| "grad_norm": 0.1952536593316178, | |
| "learning_rate": 1.04e-05, | |
| "loss": 0.2745, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.4379719525350594, | |
| "grad_norm": 0.19876226657601323, | |
| "learning_rate": 1.036e-05, | |
| "loss": 0.2742, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 2.440129449838188, | |
| "grad_norm": 0.199938722386871, | |
| "learning_rate": 1.0320000000000001e-05, | |
| "loss": 0.2877, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.4422869471413162, | |
| "grad_norm": 0.1793115472237926, | |
| "learning_rate": 1.0280000000000002e-05, | |
| "loss": 0.2672, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 0.20872212600922832, | |
| "learning_rate": 1.024e-05, | |
| "loss": 0.2776, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.4466019417475726, | |
| "grad_norm": 0.19129962341137918, | |
| "learning_rate": 1.02e-05, | |
| "loss": 0.2819, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 2.448759439050701, | |
| "grad_norm": 0.19282605224929167, | |
| "learning_rate": 1.016e-05, | |
| "loss": 0.2864, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.4509169363538295, | |
| "grad_norm": 0.19659894723021457, | |
| "learning_rate": 1.012e-05, | |
| "loss": 0.2886, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 2.453074433656958, | |
| "grad_norm": 0.19381386784264035, | |
| "learning_rate": 1.008e-05, | |
| "loss": 0.288, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.4552319309600863, | |
| "grad_norm": 0.18628308033727628, | |
| "learning_rate": 1.004e-05, | |
| "loss": 0.2739, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 2.4573894282632147, | |
| "grad_norm": 0.21441635507962467, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2932, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.459546925566343, | |
| "grad_norm": 0.20534818966463134, | |
| "learning_rate": 9.96e-06, | |
| "loss": 0.2855, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 2.4617044228694716, | |
| "grad_norm": 0.1849300035193185, | |
| "learning_rate": 9.92e-06, | |
| "loss": 0.2632, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.4638619201726, | |
| "grad_norm": 0.18852281346545738, | |
| "learning_rate": 9.88e-06, | |
| "loss": 0.278, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 2.466019417475728, | |
| "grad_norm": 0.18627944823678463, | |
| "learning_rate": 9.84e-06, | |
| "loss": 0.275, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.4681769147788564, | |
| "grad_norm": 0.2045693903724477, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 0.2712, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 2.470334412081985, | |
| "grad_norm": 0.1989352745180615, | |
| "learning_rate": 9.760000000000001e-06, | |
| "loss": 0.2832, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.472491909385113, | |
| "grad_norm": 0.19608690680269333, | |
| "learning_rate": 9.72e-06, | |
| "loss": 0.2765, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 2.4746494066882416, | |
| "grad_norm": 0.18376170966118, | |
| "learning_rate": 9.68e-06, | |
| "loss": 0.2877, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.47680690399137, | |
| "grad_norm": 0.1830683736706619, | |
| "learning_rate": 9.640000000000001e-06, | |
| "loss": 0.273, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 2.4789644012944985, | |
| "grad_norm": 0.19774471106842714, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 0.2768, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.481121898597627, | |
| "grad_norm": 0.5289386953378491, | |
| "learning_rate": 9.560000000000002e-06, | |
| "loss": 0.2859, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 2.4832793959007553, | |
| "grad_norm": 0.18054827974203572, | |
| "learning_rate": 9.52e-06, | |
| "loss": 0.2739, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.4854368932038833, | |
| "grad_norm": 0.19041823794360682, | |
| "learning_rate": 9.48e-06, | |
| "loss": 0.2715, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 2.4875943905070117, | |
| "grad_norm": 0.19696127404865826, | |
| "learning_rate": 9.44e-06, | |
| "loss": 0.2749, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.48975188781014, | |
| "grad_norm": 0.20729223436965724, | |
| "learning_rate": 9.4e-06, | |
| "loss": 0.2665, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 2.4919093851132685, | |
| "grad_norm": 0.2151672552731983, | |
| "learning_rate": 9.36e-06, | |
| "loss": 0.2771, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.494066882416397, | |
| "grad_norm": 0.19970041745608494, | |
| "learning_rate": 9.32e-06, | |
| "loss": 0.2844, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 2.4962243797195254, | |
| "grad_norm": 0.1875452071423268, | |
| "learning_rate": 9.28e-06, | |
| "loss": 0.269, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.498381877022654, | |
| "grad_norm": 0.1815111230711524, | |
| "learning_rate": 9.24e-06, | |
| "loss": 0.2776, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 2.500539374325782, | |
| "grad_norm": 0.21522418787260655, | |
| "learning_rate": 9.2e-06, | |
| "loss": 0.2643, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.5026968716289106, | |
| "grad_norm": 0.21059659615733978, | |
| "learning_rate": 9.16e-06, | |
| "loss": 0.2859, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 2.5048543689320386, | |
| "grad_norm": 0.21572007660549064, | |
| "learning_rate": 9.12e-06, | |
| "loss": 0.2837, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.5070118662351675, | |
| "grad_norm": 0.19174617150175122, | |
| "learning_rate": 9.080000000000001e-06, | |
| "loss": 0.2664, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 2.5091693635382954, | |
| "grad_norm": 0.18370659572207848, | |
| "learning_rate": 9.04e-06, | |
| "loss": 0.2799, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.511326860841424, | |
| "grad_norm": 0.18956236987690317, | |
| "learning_rate": 9e-06, | |
| "loss": 0.2736, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 2.5134843581445523, | |
| "grad_norm": 0.21218350316632872, | |
| "learning_rate": 8.96e-06, | |
| "loss": 0.2745, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.5156418554476807, | |
| "grad_norm": 0.20564251196662126, | |
| "learning_rate": 8.920000000000001e-06, | |
| "loss": 0.2728, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 2.517799352750809, | |
| "grad_norm": 0.18190663215352182, | |
| "learning_rate": 8.880000000000001e-06, | |
| "loss": 0.2717, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.5199568500539375, | |
| "grad_norm": 0.19047962603852486, | |
| "learning_rate": 8.840000000000002e-06, | |
| "loss": 0.274, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 2.522114347357066, | |
| "grad_norm": 0.1956042804734501, | |
| "learning_rate": 8.8e-06, | |
| "loss": 0.2688, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.524271844660194, | |
| "grad_norm": 0.1960949651558405, | |
| "learning_rate": 8.76e-06, | |
| "loss": 0.2824, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 2.526429341963323, | |
| "grad_norm": 0.1880455744302837, | |
| "learning_rate": 8.720000000000001e-06, | |
| "loss": 0.2731, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.528586839266451, | |
| "grad_norm": 0.20463391348180632, | |
| "learning_rate": 8.68e-06, | |
| "loss": 0.2874, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 2.530744336569579, | |
| "grad_norm": 0.1868158566005862, | |
| "learning_rate": 8.64e-06, | |
| "loss": 0.2733, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.5329018338727076, | |
| "grad_norm": 0.19201921577442338, | |
| "learning_rate": 8.599999999999999e-06, | |
| "loss": 0.2707, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.535059331175836, | |
| "grad_norm": 0.1870377402294841, | |
| "learning_rate": 8.56e-06, | |
| "loss": 0.2705, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.5372168284789645, | |
| "grad_norm": 0.20226457534658343, | |
| "learning_rate": 8.52e-06, | |
| "loss": 0.2642, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 2.539374325782093, | |
| "grad_norm": 0.1957674767179278, | |
| "learning_rate": 8.48e-06, | |
| "loss": 0.2891, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.5415318230852213, | |
| "grad_norm": 0.18492027429247337, | |
| "learning_rate": 8.44e-06, | |
| "loss": 0.2724, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 2.5436893203883493, | |
| "grad_norm": 0.18451597966638716, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 0.2881, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.545846817691478, | |
| "grad_norm": 0.18042456868864143, | |
| "learning_rate": 8.36e-06, | |
| "loss": 0.2809, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 2.548004314994606, | |
| "grad_norm": 0.1939726883551459, | |
| "learning_rate": 8.32e-06, | |
| "loss": 0.2707, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.5501618122977345, | |
| "grad_norm": 0.19107636821986104, | |
| "learning_rate": 8.28e-06, | |
| "loss": 0.2797, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 2.552319309600863, | |
| "grad_norm": 0.18956795249933478, | |
| "learning_rate": 8.24e-06, | |
| "loss": 0.2705, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.5544768069039914, | |
| "grad_norm": 0.19186426285857633, | |
| "learning_rate": 8.200000000000001e-06, | |
| "loss": 0.2715, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 2.55663430420712, | |
| "grad_norm": 0.19503695734156132, | |
| "learning_rate": 8.160000000000001e-06, | |
| "loss": 0.2757, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.558791801510248, | |
| "grad_norm": 0.17801355493098456, | |
| "learning_rate": 8.12e-06, | |
| "loss": 0.2695, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 2.5609492988133766, | |
| "grad_norm": 0.18569837697870945, | |
| "learning_rate": 8.08e-06, | |
| "loss": 0.2788, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.5631067961165046, | |
| "grad_norm": 0.18209260370415475, | |
| "learning_rate": 8.040000000000001e-06, | |
| "loss": 0.2834, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 2.5652642934196335, | |
| "grad_norm": 0.20862033912842837, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.2787, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.5674217907227614, | |
| "grad_norm": 0.18909747904684734, | |
| "learning_rate": 7.96e-06, | |
| "loss": 0.2712, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 2.56957928802589, | |
| "grad_norm": 0.21129244434665395, | |
| "learning_rate": 7.92e-06, | |
| "loss": 0.287, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.5717367853290183, | |
| "grad_norm": 0.18022619488696826, | |
| "learning_rate": 7.879999999999999e-06, | |
| "loss": 0.2813, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 2.5738942826321467, | |
| "grad_norm": 0.19852098535134896, | |
| "learning_rate": 7.84e-06, | |
| "loss": 0.2862, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.576051779935275, | |
| "grad_norm": 0.19617888636362643, | |
| "learning_rate": 7.8e-06, | |
| "loss": 0.2844, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 2.5782092772384035, | |
| "grad_norm": 0.18969668381525537, | |
| "learning_rate": 7.76e-06, | |
| "loss": 0.2891, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.580366774541532, | |
| "grad_norm": 0.18430145815088345, | |
| "learning_rate": 7.72e-06, | |
| "loss": 0.2721, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 2.58252427184466, | |
| "grad_norm": 0.18320017834506805, | |
| "learning_rate": 7.68e-06, | |
| "loss": 0.2774, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.584681769147789, | |
| "grad_norm": 0.1892382939282894, | |
| "learning_rate": 7.64e-06, | |
| "loss": 0.2759, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 2.5868392664509168, | |
| "grad_norm": 0.1880248772535744, | |
| "learning_rate": 7.6e-06, | |
| "loss": 0.2797, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.588996763754045, | |
| "grad_norm": 0.18324546736309735, | |
| "learning_rate": 7.5600000000000005e-06, | |
| "loss": 0.2939, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 2.5911542610571736, | |
| "grad_norm": 0.1821909206802706, | |
| "learning_rate": 7.520000000000001e-06, | |
| "loss": 0.2797, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.593311758360302, | |
| "grad_norm": 0.18785236372663316, | |
| "learning_rate": 7.480000000000001e-06, | |
| "loss": 0.2798, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 2.5954692556634305, | |
| "grad_norm": 0.18862019277830552, | |
| "learning_rate": 7.44e-06, | |
| "loss": 0.268, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.597626752966559, | |
| "grad_norm": 0.17802704840377132, | |
| "learning_rate": 7.4e-06, | |
| "loss": 0.2653, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 2.5997842502696873, | |
| "grad_norm": 0.18808394376760307, | |
| "learning_rate": 7.36e-06, | |
| "loss": 0.2819, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.6019417475728153, | |
| "grad_norm": 0.1736414972830234, | |
| "learning_rate": 7.32e-06, | |
| "loss": 0.2755, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 2.604099244875944, | |
| "grad_norm": 0.19798820811833512, | |
| "learning_rate": 7.280000000000001e-06, | |
| "loss": 0.2766, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.606256742179072, | |
| "grad_norm": 0.19360358509115017, | |
| "learning_rate": 7.240000000000001e-06, | |
| "loss": 0.278, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 2.6084142394822005, | |
| "grad_norm": 0.1819274054858843, | |
| "learning_rate": 7.2e-06, | |
| "loss": 0.2748, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.610571736785329, | |
| "grad_norm": 0.19153275592635163, | |
| "learning_rate": 7.16e-06, | |
| "loss": 0.2865, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 2.6127292340884574, | |
| "grad_norm": 0.18106937603713005, | |
| "learning_rate": 7.1200000000000004e-06, | |
| "loss": 0.2836, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.614886731391586, | |
| "grad_norm": 0.18316357873796607, | |
| "learning_rate": 7.080000000000001e-06, | |
| "loss": 0.2703, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 2.617044228694714, | |
| "grad_norm": 0.19012692093079211, | |
| "learning_rate": 7.04e-06, | |
| "loss": 0.283, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.6192017259978426, | |
| "grad_norm": 0.1886991400739291, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 0.2951, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 2.6213592233009706, | |
| "grad_norm": 0.17872080834424994, | |
| "learning_rate": 6.9599999999999994e-06, | |
| "loss": 0.275, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.6235167206040995, | |
| "grad_norm": 0.19038957823975905, | |
| "learning_rate": 6.92e-06, | |
| "loss": 0.276, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 2.6256742179072274, | |
| "grad_norm": 0.18366617360792217, | |
| "learning_rate": 6.88e-06, | |
| "loss": 0.2866, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.627831715210356, | |
| "grad_norm": 0.18194261875821086, | |
| "learning_rate": 6.840000000000001e-06, | |
| "loss": 0.2789, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 2.6299892125134843, | |
| "grad_norm": 0.18748953872554902, | |
| "learning_rate": 6.800000000000001e-06, | |
| "loss": 0.2833, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.6321467098166127, | |
| "grad_norm": 0.18281890368924675, | |
| "learning_rate": 6.76e-06, | |
| "loss": 0.2712, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 2.634304207119741, | |
| "grad_norm": 0.18225058596967514, | |
| "learning_rate": 6.72e-06, | |
| "loss": 0.2784, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.6364617044228695, | |
| "grad_norm": 0.1825800678446187, | |
| "learning_rate": 6.68e-06, | |
| "loss": 0.2724, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 2.638619201725998, | |
| "grad_norm": 0.19363871055752166, | |
| "learning_rate": 6.640000000000001e-06, | |
| "loss": 0.2684, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.6407766990291264, | |
| "grad_norm": 0.1944748649154887, | |
| "learning_rate": 6.6e-06, | |
| "loss": 0.2784, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.642934196332255, | |
| "grad_norm": 0.1881742065564678, | |
| "learning_rate": 6.560000000000001e-06, | |
| "loss": 0.2756, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.6450916936353828, | |
| "grad_norm": 0.18392054853177556, | |
| "learning_rate": 6.519999999999999e-06, | |
| "loss": 0.2796, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 2.647249190938511, | |
| "grad_norm": 0.18906848548661837, | |
| "learning_rate": 6.48e-06, | |
| "loss": 0.2761, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.6494066882416396, | |
| "grad_norm": 0.1770937107224099, | |
| "learning_rate": 6.44e-06, | |
| "loss": 0.2764, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 2.651564185544768, | |
| "grad_norm": 0.1919361386776459, | |
| "learning_rate": 6.4000000000000006e-06, | |
| "loss": 0.2762, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.6537216828478964, | |
| "grad_norm": 0.1898063423812771, | |
| "learning_rate": 6.360000000000001e-06, | |
| "loss": 0.279, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 2.655879180151025, | |
| "grad_norm": 0.18359024005891844, | |
| "learning_rate": 6.320000000000001e-06, | |
| "loss": 0.2895, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.6580366774541533, | |
| "grad_norm": 0.18905016504891056, | |
| "learning_rate": 6.28e-06, | |
| "loss": 0.2841, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 2.6601941747572817, | |
| "grad_norm": 0.18962361851782872, | |
| "learning_rate": 6.24e-06, | |
| "loss": 0.266, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.66235167206041, | |
| "grad_norm": 0.18363662298141045, | |
| "learning_rate": 6.2e-06, | |
| "loss": 0.2874, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 2.664509169363538, | |
| "grad_norm": 0.20630491074122706, | |
| "learning_rate": 6.16e-06, | |
| "loss": 0.2906, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.18135071779569467, | |
| "learning_rate": 6.12e-06, | |
| "loss": 0.285, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 2.668824163969795, | |
| "grad_norm": 0.18372632774369446, | |
| "learning_rate": 6.08e-06, | |
| "loss": 0.2739, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.6709816612729234, | |
| "grad_norm": 0.18428383879403848, | |
| "learning_rate": 6.040000000000001e-06, | |
| "loss": 0.2875, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 2.6731391585760518, | |
| "grad_norm": 0.18460310424536336, | |
| "learning_rate": 6e-06, | |
| "loss": 0.2556, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.67529665587918, | |
| "grad_norm": 0.18947805389880149, | |
| "learning_rate": 5.9600000000000005e-06, | |
| "loss": 0.2798, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 2.6774541531823086, | |
| "grad_norm": 0.19780847857755182, | |
| "learning_rate": 5.920000000000001e-06, | |
| "loss": 0.2675, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.679611650485437, | |
| "grad_norm": 0.18499409934277794, | |
| "learning_rate": 5.8800000000000005e-06, | |
| "loss": 0.2806, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 2.6817691477885655, | |
| "grad_norm": 0.18316817509358638, | |
| "learning_rate": 5.84e-06, | |
| "loss": 0.2765, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.6839266450916934, | |
| "grad_norm": 0.17478366686579072, | |
| "learning_rate": 5.8e-06, | |
| "loss": 0.2644, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 2.686084142394822, | |
| "grad_norm": 0.19358311792535488, | |
| "learning_rate": 5.76e-06, | |
| "loss": 0.264, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.6882416396979503, | |
| "grad_norm": 0.1817491973984989, | |
| "learning_rate": 5.72e-06, | |
| "loss": 0.2776, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 2.6903991370010787, | |
| "grad_norm": 0.17836599511127066, | |
| "learning_rate": 5.680000000000001e-06, | |
| "loss": 0.2729, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.692556634304207, | |
| "grad_norm": 0.20855396251969097, | |
| "learning_rate": 5.64e-06, | |
| "loss": 0.2797, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 2.6947141316073355, | |
| "grad_norm": 0.17710319375165606, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 0.2664, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.696871628910464, | |
| "grad_norm": 0.18182561629022773, | |
| "learning_rate": 5.56e-06, | |
| "loss": 0.285, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 2.6990291262135924, | |
| "grad_norm": 0.18254255786329926, | |
| "learning_rate": 5.5200000000000005e-06, | |
| "loss": 0.2668, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.701186623516721, | |
| "grad_norm": 0.18071712621398686, | |
| "learning_rate": 5.48e-06, | |
| "loss": 0.2645, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 2.7033441208198488, | |
| "grad_norm": 0.17992767145304728, | |
| "learning_rate": 5.44e-06, | |
| "loss": 0.2747, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.705501618122977, | |
| "grad_norm": 0.184558078685827, | |
| "learning_rate": 5.4e-06, | |
| "loss": 0.2726, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 2.7076591154261056, | |
| "grad_norm": 0.1848059441306179, | |
| "learning_rate": 5.36e-06, | |
| "loss": 0.2829, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.709816612729234, | |
| "grad_norm": 0.17640374705147477, | |
| "learning_rate": 5.32e-06, | |
| "loss": 0.2701, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 2.7119741100323624, | |
| "grad_norm": 0.21805384839708805, | |
| "learning_rate": 5.28e-06, | |
| "loss": 0.2716, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.714131607335491, | |
| "grad_norm": 0.20101963800895353, | |
| "learning_rate": 5.240000000000001e-06, | |
| "loss": 0.2886, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 2.7162891046386193, | |
| "grad_norm": 0.1804114201116908, | |
| "learning_rate": 5.2e-06, | |
| "loss": 0.2776, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.7184466019417477, | |
| "grad_norm": 0.1774653180001705, | |
| "learning_rate": 5.1600000000000006e-06, | |
| "loss": 0.2826, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 2.720604099244876, | |
| "grad_norm": 0.17688387986830606, | |
| "learning_rate": 5.12e-06, | |
| "loss": 0.2689, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.722761596548004, | |
| "grad_norm": 0.18501330626919668, | |
| "learning_rate": 5.08e-06, | |
| "loss": 0.2703, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 2.724919093851133, | |
| "grad_norm": 0.17773374479207837, | |
| "learning_rate": 5.04e-06, | |
| "loss": 0.2697, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.727076591154261, | |
| "grad_norm": 0.18714014475865487, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2659, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 2.7292340884573894, | |
| "grad_norm": 0.1891342823049284, | |
| "learning_rate": 4.96e-06, | |
| "loss": 0.2689, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.7313915857605178, | |
| "grad_norm": 0.18120794072745916, | |
| "learning_rate": 4.92e-06, | |
| "loss": 0.2801, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 2.733549083063646, | |
| "grad_norm": 0.18421472259058802, | |
| "learning_rate": 4.880000000000001e-06, | |
| "loss": 0.2742, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.7357065803667746, | |
| "grad_norm": 0.19424882942881327, | |
| "learning_rate": 4.84e-06, | |
| "loss": 0.2835, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 2.737864077669903, | |
| "grad_norm": 0.19043478378237688, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 0.2689, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.7400215749730314, | |
| "grad_norm": 0.1849206683939454, | |
| "learning_rate": 4.76e-06, | |
| "loss": 0.2761, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 2.7421790722761594, | |
| "grad_norm": 0.18522900805651904, | |
| "learning_rate": 4.72e-06, | |
| "loss": 0.2767, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.7443365695792883, | |
| "grad_norm": 0.17248319008493151, | |
| "learning_rate": 4.68e-06, | |
| "loss": 0.2668, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 2.7464940668824163, | |
| "grad_norm": 0.18993609725197919, | |
| "learning_rate": 4.64e-06, | |
| "loss": 0.2744, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.7486515641855447, | |
| "grad_norm": 0.17727961540010734, | |
| "learning_rate": 4.6e-06, | |
| "loss": 0.2708, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.750809061488673, | |
| "grad_norm": 0.17989663911750914, | |
| "learning_rate": 4.56e-06, | |
| "loss": 0.2748, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.7529665587918015, | |
| "grad_norm": 0.18993560472210555, | |
| "learning_rate": 4.52e-06, | |
| "loss": 0.2876, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 2.75512405609493, | |
| "grad_norm": 0.18474539106255636, | |
| "learning_rate": 4.48e-06, | |
| "loss": 0.2767, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.7572815533980584, | |
| "grad_norm": 0.18191823825028888, | |
| "learning_rate": 4.440000000000001e-06, | |
| "loss": 0.2854, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 2.759439050701187, | |
| "grad_norm": 0.18478282075534863, | |
| "learning_rate": 4.4e-06, | |
| "loss": 0.2888, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.7615965480043148, | |
| "grad_norm": 0.17755688039996642, | |
| "learning_rate": 4.360000000000001e-06, | |
| "loss": 0.2737, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 2.7637540453074436, | |
| "grad_norm": 0.18419934859887713, | |
| "learning_rate": 4.32e-06, | |
| "loss": 0.2687, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.7659115426105716, | |
| "grad_norm": 0.1900384282711362, | |
| "learning_rate": 4.28e-06, | |
| "loss": 0.2759, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 2.7680690399137, | |
| "grad_norm": 0.1784962757580093, | |
| "learning_rate": 4.24e-06, | |
| "loss": 0.2716, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.7702265372168284, | |
| "grad_norm": 0.1809036819922652, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 0.27, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 2.772384034519957, | |
| "grad_norm": 0.18282579091259332, | |
| "learning_rate": 4.16e-06, | |
| "loss": 0.2865, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.7745415318230853, | |
| "grad_norm": 0.18438768981642326, | |
| "learning_rate": 4.12e-06, | |
| "loss": 0.2774, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 2.7766990291262137, | |
| "grad_norm": 0.19152300030259864, | |
| "learning_rate": 4.080000000000001e-06, | |
| "loss": 0.2796, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.778856526429342, | |
| "grad_norm": 0.17285666874457525, | |
| "learning_rate": 4.04e-06, | |
| "loss": 0.2754, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 2.78101402373247, | |
| "grad_norm": 0.18414001866694285, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.274, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.783171521035599, | |
| "grad_norm": 0.17520099229494396, | |
| "learning_rate": 3.96e-06, | |
| "loss": 0.2766, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 2.785329018338727, | |
| "grad_norm": 0.1806154595467803, | |
| "learning_rate": 3.92e-06, | |
| "loss": 0.2801, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.7874865156418553, | |
| "grad_norm": 0.18980187456754713, | |
| "learning_rate": 3.88e-06, | |
| "loss": 0.2727, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 2.7896440129449838, | |
| "grad_norm": 0.19597770737066833, | |
| "learning_rate": 3.84e-06, | |
| "loss": 0.285, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.791801510248112, | |
| "grad_norm": 0.18063854460198395, | |
| "learning_rate": 3.8e-06, | |
| "loss": 0.2752, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 2.7939590075512406, | |
| "grad_norm": 0.17977161179376194, | |
| "learning_rate": 3.7600000000000004e-06, | |
| "loss": 0.278, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.796116504854369, | |
| "grad_norm": 0.17338820852578288, | |
| "learning_rate": 3.72e-06, | |
| "loss": 0.2595, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 2.7982740021574974, | |
| "grad_norm": 0.18386654166694605, | |
| "learning_rate": 3.68e-06, | |
| "loss": 0.2767, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.8004314994606254, | |
| "grad_norm": 0.17901186263657326, | |
| "learning_rate": 3.6400000000000003e-06, | |
| "loss": 0.2797, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 2.8025889967637543, | |
| "grad_norm": 0.17993331313633365, | |
| "learning_rate": 3.6e-06, | |
| "loss": 0.274, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.8047464940668823, | |
| "grad_norm": 1.7365736687950526, | |
| "learning_rate": 3.5600000000000002e-06, | |
| "loss": 0.2749, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 2.8069039913700107, | |
| "grad_norm": 0.18738038459179354, | |
| "learning_rate": 3.52e-06, | |
| "loss": 0.2829, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.809061488673139, | |
| "grad_norm": 0.17958359868903084, | |
| "learning_rate": 3.4799999999999997e-06, | |
| "loss": 0.2774, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 2.8112189859762675, | |
| "grad_norm": 0.17556904314160943, | |
| "learning_rate": 3.44e-06, | |
| "loss": 0.2751, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.813376483279396, | |
| "grad_norm": 0.1788953079572772, | |
| "learning_rate": 3.4000000000000005e-06, | |
| "loss": 0.2797, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 2.8155339805825244, | |
| "grad_norm": 0.1881320873873, | |
| "learning_rate": 3.36e-06, | |
| "loss": 0.2873, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.8176914778856528, | |
| "grad_norm": 0.17951722014152932, | |
| "learning_rate": 3.3200000000000004e-06, | |
| "loss": 0.2776, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 2.8198489751887807, | |
| "grad_norm": 0.17798122918873352, | |
| "learning_rate": 3.2800000000000004e-06, | |
| "loss": 0.2783, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.8220064724919096, | |
| "grad_norm": 0.18442039725945125, | |
| "learning_rate": 3.24e-06, | |
| "loss": 0.2878, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 2.8241639697950376, | |
| "grad_norm": 0.18259364011672352, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 0.2884, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.826321467098166, | |
| "grad_norm": 0.1829848041342495, | |
| "learning_rate": 3.1600000000000007e-06, | |
| "loss": 0.2849, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 2.8284789644012944, | |
| "grad_norm": 0.179094433479786, | |
| "learning_rate": 3.12e-06, | |
| "loss": 0.2908, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.830636461704423, | |
| "grad_norm": 0.17681991868248295, | |
| "learning_rate": 3.08e-06, | |
| "loss": 0.2807, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 2.8327939590075513, | |
| "grad_norm": 0.17971050304421513, | |
| "learning_rate": 3.04e-06, | |
| "loss": 0.2848, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.8349514563106797, | |
| "grad_norm": 0.1828969018946432, | |
| "learning_rate": 3e-06, | |
| "loss": 0.2912, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 2.837108953613808, | |
| "grad_norm": 0.1751282473237561, | |
| "learning_rate": 2.9600000000000005e-06, | |
| "loss": 0.2695, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.839266450916936, | |
| "grad_norm": 0.17477314465096877, | |
| "learning_rate": 2.92e-06, | |
| "loss": 0.2759, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 2.841423948220065, | |
| "grad_norm": 0.17636269064952895, | |
| "learning_rate": 2.88e-06, | |
| "loss": 0.2864, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.843581445523193, | |
| "grad_norm": 0.1818185234275195, | |
| "learning_rate": 2.8400000000000003e-06, | |
| "loss": 0.29, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 2.8457389428263213, | |
| "grad_norm": 0.18916576451978248, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 0.2795, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.8478964401294498, | |
| "grad_norm": 0.16973846926356626, | |
| "learning_rate": 2.7600000000000003e-06, | |
| "loss": 0.2699, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 2.850053937432578, | |
| "grad_norm": 0.17182169354600235, | |
| "learning_rate": 2.72e-06, | |
| "loss": 0.2791, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.8522114347357066, | |
| "grad_norm": 0.1770412391282775, | |
| "learning_rate": 2.68e-06, | |
| "loss": 0.2765, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 2.854368932038835, | |
| "grad_norm": 0.17466980330096582, | |
| "learning_rate": 2.64e-06, | |
| "loss": 0.2623, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.8565264293419634, | |
| "grad_norm": 0.174928731160995, | |
| "learning_rate": 2.6e-06, | |
| "loss": 0.2656, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.858683926645092, | |
| "grad_norm": 0.18175215429014172, | |
| "learning_rate": 2.56e-06, | |
| "loss": 0.2819, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.8608414239482203, | |
| "grad_norm": 0.1749725285284474, | |
| "learning_rate": 2.52e-06, | |
| "loss": 0.2725, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 2.8629989212513482, | |
| "grad_norm": 0.17461652174068218, | |
| "learning_rate": 2.48e-06, | |
| "loss": 0.2696, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.8651564185544767, | |
| "grad_norm": 0.1811005220241736, | |
| "learning_rate": 2.4400000000000004e-06, | |
| "loss": 0.2742, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 2.867313915857605, | |
| "grad_norm": 0.17722407880597277, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 0.2773, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.8694714131607335, | |
| "grad_norm": 0.18478937048714053, | |
| "learning_rate": 2.36e-06, | |
| "loss": 0.2762, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 2.871628910463862, | |
| "grad_norm": 0.1864309200002423, | |
| "learning_rate": 2.32e-06, | |
| "loss": 0.2765, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.8737864077669903, | |
| "grad_norm": 0.18835737178532266, | |
| "learning_rate": 2.28e-06, | |
| "loss": 0.3021, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 2.8759439050701188, | |
| "grad_norm": 0.18323666534169097, | |
| "learning_rate": 2.24e-06, | |
| "loss": 0.2741, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.878101402373247, | |
| "grad_norm": 0.17560962160831747, | |
| "learning_rate": 2.2e-06, | |
| "loss": 0.2828, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 2.8802588996763756, | |
| "grad_norm": 0.18666287588971897, | |
| "learning_rate": 2.16e-06, | |
| "loss": 0.2954, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.8824163969795036, | |
| "grad_norm": 0.17167493193172903, | |
| "learning_rate": 2.12e-06, | |
| "loss": 0.272, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 2.884573894282632, | |
| "grad_norm": 0.17629098716034136, | |
| "learning_rate": 2.08e-06, | |
| "loss": 0.2889, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.8867313915857604, | |
| "grad_norm": 0.18406413731232454, | |
| "learning_rate": 2.0400000000000004e-06, | |
| "loss": 0.2939, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 0.18163509827918725, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.2761, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.8910463861920173, | |
| "grad_norm": 0.18223323103128686, | |
| "learning_rate": 1.96e-06, | |
| "loss": 0.2838, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 2.8932038834951457, | |
| "grad_norm": 0.18297522132977273, | |
| "learning_rate": 1.92e-06, | |
| "loss": 0.2859, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.895361380798274, | |
| "grad_norm": 0.1863689788725593, | |
| "learning_rate": 1.8800000000000002e-06, | |
| "loss": 0.2662, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 2.8975188781014025, | |
| "grad_norm": 0.17488508490885735, | |
| "learning_rate": 1.84e-06, | |
| "loss": 0.2756, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.899676375404531, | |
| "grad_norm": 0.17992252244376258, | |
| "learning_rate": 1.8e-06, | |
| "loss": 0.264, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 2.901833872707659, | |
| "grad_norm": 0.17566030078045597, | |
| "learning_rate": 1.76e-06, | |
| "loss": 0.2696, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.9039913700107873, | |
| "grad_norm": 0.17910981646975624, | |
| "learning_rate": 1.72e-06, | |
| "loss": 0.2812, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 2.9061488673139158, | |
| "grad_norm": 0.18324253604522966, | |
| "learning_rate": 1.68e-06, | |
| "loss": 0.2759, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.908306364617044, | |
| "grad_norm": 0.18154686495512268, | |
| "learning_rate": 1.6400000000000002e-06, | |
| "loss": 0.2948, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 2.9104638619201726, | |
| "grad_norm": 0.1856157405155338, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 0.2842, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.912621359223301, | |
| "grad_norm": 0.167599852032292, | |
| "learning_rate": 1.56e-06, | |
| "loss": 0.2566, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 2.9147788565264294, | |
| "grad_norm": 0.17436662357542593, | |
| "learning_rate": 1.52e-06, | |
| "loss": 0.2787, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.916936353829558, | |
| "grad_norm": 0.1728537122922309, | |
| "learning_rate": 1.4800000000000002e-06, | |
| "loss": 0.2843, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 2.9190938511326863, | |
| "grad_norm": 0.17797631599058847, | |
| "learning_rate": 1.44e-06, | |
| "loss": 0.2651, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.9212513484358142, | |
| "grad_norm": 0.1770523988484935, | |
| "learning_rate": 1.4000000000000001e-06, | |
| "loss": 0.2675, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 2.9234088457389427, | |
| "grad_norm": 0.19407900415997797, | |
| "learning_rate": 1.36e-06, | |
| "loss": 0.2845, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.925566343042071, | |
| "grad_norm": 0.1788656382635837, | |
| "learning_rate": 1.32e-06, | |
| "loss": 0.2704, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 2.9277238403451995, | |
| "grad_norm": 0.18340763904947952, | |
| "learning_rate": 1.28e-06, | |
| "loss": 0.2789, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.929881337648328, | |
| "grad_norm": 0.17707734315166593, | |
| "learning_rate": 1.24e-06, | |
| "loss": 0.2822, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 2.9320388349514563, | |
| "grad_norm": 0.17856073770387085, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 0.2727, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.9341963322545848, | |
| "grad_norm": 0.18303142619049678, | |
| "learning_rate": 1.16e-06, | |
| "loss": 0.2714, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 2.936353829557713, | |
| "grad_norm": 0.17597510232889213, | |
| "learning_rate": 1.12e-06, | |
| "loss": 0.278, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.9385113268608416, | |
| "grad_norm": 0.18381504160428017, | |
| "learning_rate": 1.08e-06, | |
| "loss": 0.2793, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 2.9406688241639696, | |
| "grad_norm": 0.16872753735299875, | |
| "learning_rate": 1.04e-06, | |
| "loss": 0.2623, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.9428263214670984, | |
| "grad_norm": 0.17739253303853683, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.2828, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 2.9449838187702264, | |
| "grad_norm": 0.17698033237379485, | |
| "learning_rate": 9.6e-07, | |
| "loss": 0.2775, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.947141316073355, | |
| "grad_norm": 0.17104830030403248, | |
| "learning_rate": 9.2e-07, | |
| "loss": 0.2748, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 2.9492988133764833, | |
| "grad_norm": 0.17749083061324694, | |
| "learning_rate": 8.8e-07, | |
| "loss": 0.2794, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.9514563106796117, | |
| "grad_norm": 0.1798254311534882, | |
| "learning_rate": 8.4e-07, | |
| "loss": 0.2788, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 2.95361380798274, | |
| "grad_norm": 0.1784770144640976, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 0.2677, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.9557713052858685, | |
| "grad_norm": 0.17694547600035745, | |
| "learning_rate": 7.6e-07, | |
| "loss": 0.2948, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 2.957928802588997, | |
| "grad_norm": 0.1766645173046717, | |
| "learning_rate": 7.2e-07, | |
| "loss": 0.2752, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.960086299892125, | |
| "grad_norm": 0.17231184142391467, | |
| "learning_rate": 6.8e-07, | |
| "loss": 0.2708, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 2.9622437971952538, | |
| "grad_norm": 0.17699841769345795, | |
| "learning_rate": 6.4e-07, | |
| "loss": 0.283, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.9644012944983817, | |
| "grad_norm": 0.17906935863892173, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 0.2805, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 2.96655879180151, | |
| "grad_norm": 0.18194456358375216, | |
| "learning_rate": 5.6e-07, | |
| "loss": 0.2784, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.9687162891046386, | |
| "grad_norm": 0.16849620824352807, | |
| "learning_rate": 5.2e-07, | |
| "loss": 0.2754, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 2.970873786407767, | |
| "grad_norm": 0.17761203976543524, | |
| "learning_rate": 4.8e-07, | |
| "loss": 0.2708, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.9730312837108954, | |
| "grad_norm": 0.17640781813626127, | |
| "learning_rate": 4.4e-07, | |
| "loss": 0.2834, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 2.975188781014024, | |
| "grad_norm": 0.17331909600308287, | |
| "learning_rate": 4.0000000000000003e-07, | |
| "loss": 0.2743, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.9773462783171523, | |
| "grad_norm": 0.17381405226314825, | |
| "learning_rate": 3.6e-07, | |
| "loss": 0.2782, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 2.9795037756202802, | |
| "grad_norm": 0.17830336979426112, | |
| "learning_rate": 3.2e-07, | |
| "loss": 0.2758, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.981661272923409, | |
| "grad_norm": 0.16766789787935527, | |
| "learning_rate": 2.8e-07, | |
| "loss": 0.2651, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 2.983818770226537, | |
| "grad_norm": 0.18328063807292563, | |
| "learning_rate": 2.4e-07, | |
| "loss": 0.2894, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.9859762675296655, | |
| "grad_norm": 0.17725873997381406, | |
| "learning_rate": 2.0000000000000002e-07, | |
| "loss": 0.2812, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 2.988133764832794, | |
| "grad_norm": 0.18575248175533493, | |
| "learning_rate": 1.6e-07, | |
| "loss": 0.2762, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.9902912621359223, | |
| "grad_norm": 0.1738477732474402, | |
| "learning_rate": 1.2e-07, | |
| "loss": 0.2754, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 2.9924487594390508, | |
| "grad_norm": 0.17133851982383583, | |
| "learning_rate": 8e-08, | |
| "loss": 0.2612, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.994606256742179, | |
| "grad_norm": 0.18020219397411835, | |
| "learning_rate": 4e-08, | |
| "loss": 0.2814, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 2.994606256742179, | |
| "step": 1389, | |
| "total_flos": 1.5436844662748348e+19, | |
| "train_loss": 0.0, | |
| "train_runtime": 1.4945, | |
| "train_samples_per_second": 14884.231, | |
| "train_steps_per_second": 929.386 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1389, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.5436844662748348e+19, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |