| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9983155530600785, | |
| "eval_steps": 500, | |
| "global_step": 1335, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0022459292532285235, | |
| "grad_norm": 5.7204437255859375, | |
| "learning_rate": 7.462686567164179e-08, | |
| "loss": 0.8234, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004491858506457047, | |
| "grad_norm": 5.842348575592041, | |
| "learning_rate": 1.4925373134328358e-07, | |
| "loss": 0.8425, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00673778775968557, | |
| "grad_norm": 6.015719413757324, | |
| "learning_rate": 2.2388059701492537e-07, | |
| "loss": 0.886, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.008983717012914094, | |
| "grad_norm": 5.857845783233643, | |
| "learning_rate": 2.9850746268656716e-07, | |
| "loss": 0.8574, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.011229646266142616, | |
| "grad_norm": 5.838263988494873, | |
| "learning_rate": 3.7313432835820895e-07, | |
| "loss": 0.8673, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01347557551937114, | |
| "grad_norm": 5.752118110656738, | |
| "learning_rate": 4.4776119402985074e-07, | |
| "loss": 0.8437, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.015721504772599662, | |
| "grad_norm": 5.738947868347168, | |
| "learning_rate": 5.223880597014925e-07, | |
| "loss": 0.8768, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.017967434025828188, | |
| "grad_norm": 5.7029314041137695, | |
| "learning_rate": 5.970149253731343e-07, | |
| "loss": 0.8455, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02021336327905671, | |
| "grad_norm": 5.459065914154053, | |
| "learning_rate": 6.716417910447762e-07, | |
| "loss": 0.8427, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.022459292532285232, | |
| "grad_norm": 5.500589370727539, | |
| "learning_rate": 7.462686567164179e-07, | |
| "loss": 0.8739, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.024705221785513758, | |
| "grad_norm": 5.371927738189697, | |
| "learning_rate": 8.208955223880598e-07, | |
| "loss": 0.8339, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.02695115103874228, | |
| "grad_norm": 4.383749008178711, | |
| "learning_rate": 8.955223880597015e-07, | |
| "loss": 0.8146, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.029197080291970802, | |
| "grad_norm": 4.2294511795043945, | |
| "learning_rate": 9.701492537313434e-07, | |
| "loss": 0.8006, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.031443009545199324, | |
| "grad_norm": 3.9959418773651123, | |
| "learning_rate": 1.044776119402985e-06, | |
| "loss": 0.7986, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.033688938798427846, | |
| "grad_norm": 4.030126094818115, | |
| "learning_rate": 1.119402985074627e-06, | |
| "loss": 0.808, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.035934868051656375, | |
| "grad_norm": 2.3749942779541016, | |
| "learning_rate": 1.1940298507462686e-06, | |
| "loss": 0.7639, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0381807973048849, | |
| "grad_norm": 2.266770362854004, | |
| "learning_rate": 1.2686567164179105e-06, | |
| "loss": 0.7588, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.04042672655811342, | |
| "grad_norm": 2.169877767562866, | |
| "learning_rate": 1.3432835820895524e-06, | |
| "loss": 0.7664, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04267265581134194, | |
| "grad_norm": 2.0016181468963623, | |
| "learning_rate": 1.417910447761194e-06, | |
| "loss": 0.7452, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.044918585064570464, | |
| "grad_norm": 1.9403204917907715, | |
| "learning_rate": 1.4925373134328358e-06, | |
| "loss": 0.7691, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.047164514317798986, | |
| "grad_norm": 1.8641579151153564, | |
| "learning_rate": 1.5671641791044779e-06, | |
| "loss": 0.7745, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.049410443571027515, | |
| "grad_norm": 1.87736177444458, | |
| "learning_rate": 1.6417910447761196e-06, | |
| "loss": 0.72, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05165637282425604, | |
| "grad_norm": 2.7966866493225098, | |
| "learning_rate": 1.7164179104477613e-06, | |
| "loss": 0.7302, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.05390230207748456, | |
| "grad_norm": 2.9194653034210205, | |
| "learning_rate": 1.791044776119403e-06, | |
| "loss": 0.7144, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05614823133071308, | |
| "grad_norm": 2.9114489555358887, | |
| "learning_rate": 1.865671641791045e-06, | |
| "loss": 0.7111, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.058394160583941604, | |
| "grad_norm": 2.698354482650757, | |
| "learning_rate": 1.9402985074626867e-06, | |
| "loss": 0.7052, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.060640089837170126, | |
| "grad_norm": 2.5505008697509766, | |
| "learning_rate": 2.0149253731343284e-06, | |
| "loss": 0.7071, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.06288601909039865, | |
| "grad_norm": 2.1805033683776855, | |
| "learning_rate": 2.08955223880597e-06, | |
| "loss": 0.7041, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06513194834362718, | |
| "grad_norm": 1.668395757675171, | |
| "learning_rate": 2.1641791044776118e-06, | |
| "loss": 0.6815, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.06737787759685569, | |
| "grad_norm": 1.138392448425293, | |
| "learning_rate": 2.238805970149254e-06, | |
| "loss": 0.6862, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06962380685008422, | |
| "grad_norm": 1.057366132736206, | |
| "learning_rate": 2.3134328358208956e-06, | |
| "loss": 0.6672, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.07186973610331275, | |
| "grad_norm": 1.0795561075210571, | |
| "learning_rate": 2.3880597014925373e-06, | |
| "loss": 0.6714, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07411566535654127, | |
| "grad_norm": 1.074954628944397, | |
| "learning_rate": 2.4626865671641794e-06, | |
| "loss": 0.6706, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0763615946097698, | |
| "grad_norm": 1.0592774152755737, | |
| "learning_rate": 2.537313432835821e-06, | |
| "loss": 0.6721, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07860752386299831, | |
| "grad_norm": 0.8358403444290161, | |
| "learning_rate": 2.6119402985074627e-06, | |
| "loss": 0.6425, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.08085345311622684, | |
| "grad_norm": 0.8717141151428223, | |
| "learning_rate": 2.686567164179105e-06, | |
| "loss": 0.6505, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08309938236945537, | |
| "grad_norm": 0.7979157567024231, | |
| "learning_rate": 2.7611940298507465e-06, | |
| "loss": 0.6386, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.08534531162268388, | |
| "grad_norm": 0.6813825368881226, | |
| "learning_rate": 2.835820895522388e-06, | |
| "loss": 0.6352, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08759124087591241, | |
| "grad_norm": 0.6683703064918518, | |
| "learning_rate": 2.9104477611940303e-06, | |
| "loss": 0.644, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.08983717012914093, | |
| "grad_norm": 0.6825925707817078, | |
| "learning_rate": 2.9850746268656716e-06, | |
| "loss": 0.6352, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09208309938236946, | |
| "grad_norm": 0.7220752239227295, | |
| "learning_rate": 3.0597014925373137e-06, | |
| "loss": 0.6207, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.09432902863559797, | |
| "grad_norm": 0.7097088694572449, | |
| "learning_rate": 3.1343283582089558e-06, | |
| "loss": 0.6177, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0965749578888265, | |
| "grad_norm": 0.6021708250045776, | |
| "learning_rate": 3.208955223880597e-06, | |
| "loss": 0.6188, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.09882088714205503, | |
| "grad_norm": 0.5546464920043945, | |
| "learning_rate": 3.283582089552239e-06, | |
| "loss": 0.6101, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.10106681639528355, | |
| "grad_norm": 0.5791826248168945, | |
| "learning_rate": 3.3582089552238813e-06, | |
| "loss": 0.6079, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.10331274564851207, | |
| "grad_norm": 0.6221138834953308, | |
| "learning_rate": 3.4328358208955225e-06, | |
| "loss": 0.6047, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10555867490174059, | |
| "grad_norm": 0.5765758752822876, | |
| "learning_rate": 3.5074626865671646e-06, | |
| "loss": 0.5965, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.10780460415496912, | |
| "grad_norm": 0.47714346647262573, | |
| "learning_rate": 3.582089552238806e-06, | |
| "loss": 0.5977, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.11005053340819765, | |
| "grad_norm": 0.5033997893333435, | |
| "learning_rate": 3.656716417910448e-06, | |
| "loss": 0.6066, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.11229646266142616, | |
| "grad_norm": 0.4991725981235504, | |
| "learning_rate": 3.73134328358209e-06, | |
| "loss": 0.5852, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11454239191465469, | |
| "grad_norm": 0.4966943562030792, | |
| "learning_rate": 3.8059701492537314e-06, | |
| "loss": 0.5846, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.11678832116788321, | |
| "grad_norm": 0.4513320326805115, | |
| "learning_rate": 3.8805970149253735e-06, | |
| "loss": 0.5637, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11903425042111174, | |
| "grad_norm": 0.47153928875923157, | |
| "learning_rate": 3.955223880597015e-06, | |
| "loss": 0.5814, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.12128017967434025, | |
| "grad_norm": 0.5067244172096252, | |
| "learning_rate": 4.029850746268657e-06, | |
| "loss": 0.588, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.12352610892756878, | |
| "grad_norm": 0.4318973124027252, | |
| "learning_rate": 4.104477611940299e-06, | |
| "loss": 0.5852, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1257720381807973, | |
| "grad_norm": 0.41859719157218933, | |
| "learning_rate": 4.17910447761194e-06, | |
| "loss": 0.5788, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12801796743402583, | |
| "grad_norm": 0.4497435986995697, | |
| "learning_rate": 4.253731343283583e-06, | |
| "loss": 0.5746, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.13026389668725435, | |
| "grad_norm": 0.407840371131897, | |
| "learning_rate": 4.3283582089552236e-06, | |
| "loss": 0.5848, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.13250982594048288, | |
| "grad_norm": 0.3589821457862854, | |
| "learning_rate": 4.402985074626866e-06, | |
| "loss": 0.5799, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.13475575519371139, | |
| "grad_norm": 0.4474234879016876, | |
| "learning_rate": 4.477611940298508e-06, | |
| "loss": 0.5768, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13700168444693991, | |
| "grad_norm": 0.38281363248825073, | |
| "learning_rate": 4.5522388059701495e-06, | |
| "loss": 0.5684, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.13924761370016844, | |
| "grad_norm": 0.34512782096862793, | |
| "learning_rate": 4.626865671641791e-06, | |
| "loss": 0.5835, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.14149354295339697, | |
| "grad_norm": 0.32510608434677124, | |
| "learning_rate": 4.701492537313434e-06, | |
| "loss": 0.5811, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.1437394722066255, | |
| "grad_norm": 0.40574586391448975, | |
| "learning_rate": 4.7761194029850745e-06, | |
| "loss": 0.5594, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.145985401459854, | |
| "grad_norm": 0.3952745497226715, | |
| "learning_rate": 4.850746268656717e-06, | |
| "loss": 0.5674, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.14823133071308253, | |
| "grad_norm": 0.3393004834651947, | |
| "learning_rate": 4.925373134328359e-06, | |
| "loss": 0.5471, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.15047725996631106, | |
| "grad_norm": 0.3402893543243408, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5689, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.1527231892195396, | |
| "grad_norm": 0.31731945276260376, | |
| "learning_rate": 5.074626865671642e-06, | |
| "loss": 0.5588, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.15496911847276812, | |
| "grad_norm": 0.2877805829048157, | |
| "learning_rate": 5.149253731343285e-06, | |
| "loss": 0.5567, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.15721504772599662, | |
| "grad_norm": 0.3303472399711609, | |
| "learning_rate": 5.2238805970149255e-06, | |
| "loss": 0.5624, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15946097697922515, | |
| "grad_norm": 0.3219895660877228, | |
| "learning_rate": 5.298507462686567e-06, | |
| "loss": 0.5522, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.16170690623245368, | |
| "grad_norm": 0.29180029034614563, | |
| "learning_rate": 5.37313432835821e-06, | |
| "loss": 0.544, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1639528354856822, | |
| "grad_norm": 0.30961552262306213, | |
| "learning_rate": 5.447761194029851e-06, | |
| "loss": 0.5462, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.16619876473891074, | |
| "grad_norm": 0.3001321852207184, | |
| "learning_rate": 5.522388059701493e-06, | |
| "loss": 0.5479, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16844469399213924, | |
| "grad_norm": 0.29555678367614746, | |
| "learning_rate": 5.597014925373134e-06, | |
| "loss": 0.5646, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.17069062324536777, | |
| "grad_norm": 0.344656765460968, | |
| "learning_rate": 5.671641791044776e-06, | |
| "loss": 0.5475, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1729365524985963, | |
| "grad_norm": 0.3049803078174591, | |
| "learning_rate": 5.746268656716418e-06, | |
| "loss": 0.5457, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.17518248175182483, | |
| "grad_norm": 0.2782682180404663, | |
| "learning_rate": 5.820895522388061e-06, | |
| "loss": 0.5558, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.17742841100505333, | |
| "grad_norm": 0.33001065254211426, | |
| "learning_rate": 5.895522388059702e-06, | |
| "loss": 0.5692, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.17967434025828186, | |
| "grad_norm": 0.26358768343925476, | |
| "learning_rate": 5.970149253731343e-06, | |
| "loss": 0.5419, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18192026951151039, | |
| "grad_norm": 0.2817039489746094, | |
| "learning_rate": 6.044776119402986e-06, | |
| "loss": 0.5661, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.18416619876473891, | |
| "grad_norm": 0.2643490135669708, | |
| "learning_rate": 6.119402985074627e-06, | |
| "loss": 0.5362, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.18641212801796744, | |
| "grad_norm": 0.2636040151119232, | |
| "learning_rate": 6.194029850746269e-06, | |
| "loss": 0.5394, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.18865805727119594, | |
| "grad_norm": 0.251675546169281, | |
| "learning_rate": 6.2686567164179116e-06, | |
| "loss": 0.5379, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.19090398652442447, | |
| "grad_norm": 0.26983481645584106, | |
| "learning_rate": 6.343283582089553e-06, | |
| "loss": 0.5389, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.193149915777653, | |
| "grad_norm": 0.2974947690963745, | |
| "learning_rate": 6.417910447761194e-06, | |
| "loss": 0.5342, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.19539584503088153, | |
| "grad_norm": 0.3126147389411926, | |
| "learning_rate": 6.492537313432837e-06, | |
| "loss": 0.537, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.19764177428411006, | |
| "grad_norm": 0.27590620517730713, | |
| "learning_rate": 6.567164179104478e-06, | |
| "loss": 0.5507, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19988770353733856, | |
| "grad_norm": 0.32750827074050903, | |
| "learning_rate": 6.64179104477612e-06, | |
| "loss": 0.5361, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.2021336327905671, | |
| "grad_norm": 0.2821713984012604, | |
| "learning_rate": 6.7164179104477625e-06, | |
| "loss": 0.5273, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.20437956204379562, | |
| "grad_norm": 0.3005189597606659, | |
| "learning_rate": 6.791044776119403e-06, | |
| "loss": 0.5436, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.20662549129702415, | |
| "grad_norm": 0.28068017959594727, | |
| "learning_rate": 6.865671641791045e-06, | |
| "loss": 0.5305, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.20887142055025268, | |
| "grad_norm": 0.28698408603668213, | |
| "learning_rate": 6.9402985074626876e-06, | |
| "loss": 0.5388, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.21111734980348118, | |
| "grad_norm": 0.3307916820049286, | |
| "learning_rate": 7.014925373134329e-06, | |
| "loss": 0.5191, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2133632790567097, | |
| "grad_norm": 0.2854793667793274, | |
| "learning_rate": 7.089552238805971e-06, | |
| "loss": 0.5222, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.21560920830993824, | |
| "grad_norm": 0.3629694879055023, | |
| "learning_rate": 7.164179104477612e-06, | |
| "loss": 0.5451, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.21785513756316677, | |
| "grad_norm": 0.313763827085495, | |
| "learning_rate": 7.238805970149254e-06, | |
| "loss": 0.5322, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2201010668163953, | |
| "grad_norm": 0.30298125743865967, | |
| "learning_rate": 7.313432835820896e-06, | |
| "loss": 0.5089, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2223469960696238, | |
| "grad_norm": 0.34473463892936707, | |
| "learning_rate": 7.3880597014925385e-06, | |
| "loss": 0.5444, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.22459292532285233, | |
| "grad_norm": 0.2840663194656372, | |
| "learning_rate": 7.46268656716418e-06, | |
| "loss": 0.5433, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22683885457608086, | |
| "grad_norm": 0.32824480533599854, | |
| "learning_rate": 7.537313432835821e-06, | |
| "loss": 0.5149, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.22908478382930939, | |
| "grad_norm": 0.31232303380966187, | |
| "learning_rate": 7.611940298507463e-06, | |
| "loss": 0.5415, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2313307130825379, | |
| "grad_norm": 0.2765471935272217, | |
| "learning_rate": 7.686567164179105e-06, | |
| "loss": 0.5208, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.23357664233576642, | |
| "grad_norm": 0.31149113178253174, | |
| "learning_rate": 7.761194029850747e-06, | |
| "loss": 0.5417, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.23582257158899494, | |
| "grad_norm": 0.3036503195762634, | |
| "learning_rate": 7.835820895522389e-06, | |
| "loss": 0.5259, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.23806850084222347, | |
| "grad_norm": 0.2747598886489868, | |
| "learning_rate": 7.91044776119403e-06, | |
| "loss": 0.5257, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.240314430095452, | |
| "grad_norm": 0.27585095167160034, | |
| "learning_rate": 7.985074626865672e-06, | |
| "loss": 0.5304, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.2425603593486805, | |
| "grad_norm": 0.3225706219673157, | |
| "learning_rate": 8.059701492537314e-06, | |
| "loss": 0.533, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.24480628860190903, | |
| "grad_norm": 0.30163803696632385, | |
| "learning_rate": 8.134328358208955e-06, | |
| "loss": 0.5128, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.24705221785513756, | |
| "grad_norm": 0.30006369948387146, | |
| "learning_rate": 8.208955223880599e-06, | |
| "loss": 0.5087, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2492981471083661, | |
| "grad_norm": 0.36344826221466064, | |
| "learning_rate": 8.283582089552239e-06, | |
| "loss": 0.5229, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2515440763615946, | |
| "grad_norm": 0.3036467730998993, | |
| "learning_rate": 8.35820895522388e-06, | |
| "loss": 0.5232, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2537900056148231, | |
| "grad_norm": 0.3324042856693268, | |
| "learning_rate": 8.432835820895524e-06, | |
| "loss": 0.5257, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.25603593486805165, | |
| "grad_norm": 0.3443598449230194, | |
| "learning_rate": 8.507462686567165e-06, | |
| "loss": 0.5173, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2582818641212802, | |
| "grad_norm": 0.3419680595397949, | |
| "learning_rate": 8.582089552238807e-06, | |
| "loss": 0.514, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.2605277933745087, | |
| "grad_norm": 0.3660188615322113, | |
| "learning_rate": 8.656716417910447e-06, | |
| "loss": 0.5137, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.26277372262773724, | |
| "grad_norm": 0.322307825088501, | |
| "learning_rate": 8.73134328358209e-06, | |
| "loss": 0.5221, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.26501965188096577, | |
| "grad_norm": 0.3525477945804596, | |
| "learning_rate": 8.805970149253732e-06, | |
| "loss": 0.5302, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2672655811341943, | |
| "grad_norm": 0.39976975321769714, | |
| "learning_rate": 8.880597014925374e-06, | |
| "loss": 0.5115, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.26951151038742277, | |
| "grad_norm": 0.30590498447418213, | |
| "learning_rate": 8.955223880597016e-06, | |
| "loss": 0.5251, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2717574396406513, | |
| "grad_norm": 0.3515385389328003, | |
| "learning_rate": 9.029850746268657e-06, | |
| "loss": 0.5154, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.27400336889387983, | |
| "grad_norm": 0.37321946024894714, | |
| "learning_rate": 9.104477611940299e-06, | |
| "loss": 0.5075, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.27624929814710836, | |
| "grad_norm": 0.3113161623477936, | |
| "learning_rate": 9.17910447761194e-06, | |
| "loss": 0.5172, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.2784952274003369, | |
| "grad_norm": 0.35777148604393005, | |
| "learning_rate": 9.253731343283582e-06, | |
| "loss": 0.5187, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2807411566535654, | |
| "grad_norm": 0.2908802926540375, | |
| "learning_rate": 9.328358208955226e-06, | |
| "loss": 0.5181, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.28298708590679394, | |
| "grad_norm": 0.3901764452457428, | |
| "learning_rate": 9.402985074626867e-06, | |
| "loss": 0.5323, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2852330151600225, | |
| "grad_norm": 0.3103543519973755, | |
| "learning_rate": 9.477611940298507e-06, | |
| "loss": 0.5035, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.287478944413251, | |
| "grad_norm": 0.32105693221092224, | |
| "learning_rate": 9.552238805970149e-06, | |
| "loss": 0.5166, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.28972487366647953, | |
| "grad_norm": 0.3075639605522156, | |
| "learning_rate": 9.626865671641792e-06, | |
| "loss": 0.5238, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.291970802919708, | |
| "grad_norm": 0.31366583704948425, | |
| "learning_rate": 9.701492537313434e-06, | |
| "loss": 0.5054, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.29421673217293653, | |
| "grad_norm": 0.31075215339660645, | |
| "learning_rate": 9.776119402985076e-06, | |
| "loss": 0.5093, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.29646266142616506, | |
| "grad_norm": 0.3048778474330902, | |
| "learning_rate": 9.850746268656717e-06, | |
| "loss": 0.4938, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2987085906793936, | |
| "grad_norm": 0.3239855468273163, | |
| "learning_rate": 9.925373134328359e-06, | |
| "loss": 0.5204, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3009545199326221, | |
| "grad_norm": 0.30303385853767395, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5097, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.30320044918585065, | |
| "grad_norm": 0.3344568908214569, | |
| "learning_rate": 9.999982893802117e-06, | |
| "loss": 0.5095, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.3054463784390792, | |
| "grad_norm": 0.3649601340293884, | |
| "learning_rate": 9.999931575325515e-06, | |
| "loss": 0.502, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.3643246591091156, | |
| "learning_rate": 9.999846044921342e-06, | |
| "loss": 0.5145, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.30993823694553624, | |
| "grad_norm": 0.31586742401123047, | |
| "learning_rate": 9.999726303174833e-06, | |
| "loss": 0.5064, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3121841661987647, | |
| "grad_norm": 0.39719676971435547, | |
| "learning_rate": 9.999572350905325e-06, | |
| "loss": 0.5018, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.31443009545199324, | |
| "grad_norm": 0.3038526177406311, | |
| "learning_rate": 9.999384189166227e-06, | |
| "loss": 0.5147, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.31667602470522177, | |
| "grad_norm": 0.38959312438964844, | |
| "learning_rate": 9.999161819245036e-06, | |
| "loss": 0.499, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3189219539584503, | |
| "grad_norm": 0.29253315925598145, | |
| "learning_rate": 9.998905242663313e-06, | |
| "loss": 0.5097, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.32116788321167883, | |
| "grad_norm": 0.2925349175930023, | |
| "learning_rate": 9.998614461176676e-06, | |
| "loss": 0.5084, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.32341381246490736, | |
| "grad_norm": 0.317200630903244, | |
| "learning_rate": 9.998289476774792e-06, | |
| "loss": 0.5341, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3256597417181359, | |
| "grad_norm": 0.3577384352684021, | |
| "learning_rate": 9.997930291681362e-06, | |
| "loss": 0.4969, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3279056709713644, | |
| "grad_norm": 0.31183212995529175, | |
| "learning_rate": 9.997536908354101e-06, | |
| "loss": 0.514, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.33015160022459294, | |
| "grad_norm": 0.35079729557037354, | |
| "learning_rate": 9.997109329484725e-06, | |
| "loss": 0.5114, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3323975294778215, | |
| "grad_norm": 0.3374541401863098, | |
| "learning_rate": 9.996647557998935e-06, | |
| "loss": 0.5103, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.33464345873104995, | |
| "grad_norm": 0.3685130774974823, | |
| "learning_rate": 9.996151597056391e-06, | |
| "loss": 0.507, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.3368893879842785, | |
| "grad_norm": 0.3940074145793915, | |
| "learning_rate": 9.9956214500507e-06, | |
| "loss": 0.5236, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.339135317237507, | |
| "grad_norm": 0.36086133122444153, | |
| "learning_rate": 9.995057120609376e-06, | |
| "loss": 0.4958, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.34138124649073553, | |
| "grad_norm": 0.3247486650943756, | |
| "learning_rate": 9.994458612593835e-06, | |
| "loss": 0.5065, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.34362717574396406, | |
| "grad_norm": 0.3979952335357666, | |
| "learning_rate": 9.993825930099355e-06, | |
| "loss": 0.5075, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3458731049971926, | |
| "grad_norm": 0.3016234040260315, | |
| "learning_rate": 9.993159077455053e-06, | |
| "loss": 0.5206, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3481190342504211, | |
| "grad_norm": 0.3936152458190918, | |
| "learning_rate": 9.992458059223852e-06, | |
| "loss": 0.4939, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.35036496350364965, | |
| "grad_norm": 0.34936875104904175, | |
| "learning_rate": 9.991722880202457e-06, | |
| "loss": 0.4979, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3526108927568782, | |
| "grad_norm": 0.3858884572982788, | |
| "learning_rate": 9.990953545421314e-06, | |
| "loss": 0.5087, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.35485682201010665, | |
| "grad_norm": 0.3864620327949524, | |
| "learning_rate": 9.990150060144582e-06, | |
| "loss": 0.5127, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3571027512633352, | |
| "grad_norm": 0.37431252002716064, | |
| "learning_rate": 9.98931242987009e-06, | |
| "loss": 0.5209, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3593486805165637, | |
| "grad_norm": 0.3798641562461853, | |
| "learning_rate": 9.988440660329308e-06, | |
| "loss": 0.4979, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.36159460976979224, | |
| "grad_norm": 0.3777308464050293, | |
| "learning_rate": 9.9875347574873e-06, | |
| "loss": 0.5266, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.36384053902302077, | |
| "grad_norm": 0.4040112793445587, | |
| "learning_rate": 9.986594727542684e-06, | |
| "loss": 0.4973, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3660864682762493, | |
| "grad_norm": 0.3550376892089844, | |
| "learning_rate": 9.985620576927601e-06, | |
| "loss": 0.5111, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.36833239752947783, | |
| "grad_norm": 0.3775559663772583, | |
| "learning_rate": 9.984612312307653e-06, | |
| "loss": 0.5175, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.37057832678270636, | |
| "grad_norm": 0.42534682154655457, | |
| "learning_rate": 9.98356994058187e-06, | |
| "loss": 0.4971, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3728242560359349, | |
| "grad_norm": 0.34163060784339905, | |
| "learning_rate": 9.98249346888266e-06, | |
| "loss": 0.5151, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3750701852891634, | |
| "grad_norm": 0.3757290244102478, | |
| "learning_rate": 9.981382904575754e-06, | |
| "loss": 0.5018, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.3773161145423919, | |
| "grad_norm": 0.39487966895103455, | |
| "learning_rate": 9.98023825526017e-06, | |
| "loss": 0.5074, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3795620437956204, | |
| "grad_norm": 0.4693453013896942, | |
| "learning_rate": 9.979059528768146e-06, | |
| "loss": 0.5118, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.38180797304884895, | |
| "grad_norm": 0.3660091161727905, | |
| "learning_rate": 9.977846733165092e-06, | |
| "loss": 0.5019, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3840539023020775, | |
| "grad_norm": 0.41689541935920715, | |
| "learning_rate": 9.976599876749537e-06, | |
| "loss": 0.4806, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.386299831555306, | |
| "grad_norm": 0.3330056071281433, | |
| "learning_rate": 9.975318968053071e-06, | |
| "loss": 0.5003, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.38854576080853453, | |
| "grad_norm": 0.3664453625679016, | |
| "learning_rate": 9.974004015840284e-06, | |
| "loss": 0.4913, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.39079169006176306, | |
| "grad_norm": 0.35467249155044556, | |
| "learning_rate": 9.972655029108711e-06, | |
| "loss": 0.491, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3930376193149916, | |
| "grad_norm": 0.3422984480857849, | |
| "learning_rate": 9.971272017088762e-06, | |
| "loss": 0.4964, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3952835485682201, | |
| "grad_norm": 0.30467477440834045, | |
| "learning_rate": 9.969854989243672e-06, | |
| "loss": 0.4958, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.39752947782144865, | |
| "grad_norm": 0.3571237623691559, | |
| "learning_rate": 9.968403955269422e-06, | |
| "loss": 0.5043, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.3997754070746771, | |
| "grad_norm": 0.40359944105148315, | |
| "learning_rate": 9.966918925094682e-06, | |
| "loss": 0.502, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.40202133632790565, | |
| "grad_norm": 0.2942937910556793, | |
| "learning_rate": 9.96539990888074e-06, | |
| "loss": 0.5113, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.4042672655811342, | |
| "grad_norm": 0.3451298773288727, | |
| "learning_rate": 9.963846917021433e-06, | |
| "loss": 0.4895, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4065131948343627, | |
| "grad_norm": 0.32071009278297424, | |
| "learning_rate": 9.962259960143076e-06, | |
| "loss": 0.4917, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.40875912408759124, | |
| "grad_norm": 0.29624050855636597, | |
| "learning_rate": 9.96063904910439e-06, | |
| "loss": 0.516, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.41100505334081977, | |
| "grad_norm": 0.3379235863685608, | |
| "learning_rate": 9.958984194996419e-06, | |
| "loss": 0.4936, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.4132509825940483, | |
| "grad_norm": 0.3338676989078522, | |
| "learning_rate": 9.957295409142474e-06, | |
| "loss": 0.494, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.41549691184727683, | |
| "grad_norm": 0.3495246469974518, | |
| "learning_rate": 9.955572703098035e-06, | |
| "loss": 0.4887, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.41774284110050536, | |
| "grad_norm": 0.33925801515579224, | |
| "learning_rate": 9.95381608865068e-06, | |
| "loss": 0.5041, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.41998877035373383, | |
| "grad_norm": 0.3868575692176819, | |
| "learning_rate": 9.952025577820009e-06, | |
| "loss": 0.4985, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.42223469960696236, | |
| "grad_norm": 0.34473907947540283, | |
| "learning_rate": 9.950201182857555e-06, | |
| "loss": 0.5065, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4244806288601909, | |
| "grad_norm": 0.3982524573802948, | |
| "learning_rate": 9.948342916246702e-06, | |
| "loss": 0.5017, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4267265581134194, | |
| "grad_norm": 0.40433281660079956, | |
| "learning_rate": 9.9464507907026e-06, | |
| "loss": 0.5036, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.42897248736664795, | |
| "grad_norm": 0.29866451025009155, | |
| "learning_rate": 9.94452481917208e-06, | |
| "loss": 0.4861, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.4312184166198765, | |
| "grad_norm": 0.37620702385902405, | |
| "learning_rate": 9.94256501483356e-06, | |
| "loss": 0.4792, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.433464345873105, | |
| "grad_norm": 0.30438610911369324, | |
| "learning_rate": 9.940571391096962e-06, | |
| "loss": 0.504, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.43571027512633353, | |
| "grad_norm": 0.32881197333335876, | |
| "learning_rate": 9.938543961603616e-06, | |
| "loss": 0.5008, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.43795620437956206, | |
| "grad_norm": 0.31999659538269043, | |
| "learning_rate": 9.936482740226163e-06, | |
| "loss": 0.4868, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4402021336327906, | |
| "grad_norm": 0.3441828489303589, | |
| "learning_rate": 9.93438774106847e-06, | |
| "loss": 0.5055, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.44244806288601907, | |
| "grad_norm": 0.29661545157432556, | |
| "learning_rate": 9.932258978465523e-06, | |
| "loss": 0.4673, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.4446939921392476, | |
| "grad_norm": 0.38478636741638184, | |
| "learning_rate": 9.930096466983337e-06, | |
| "loss": 0.4869, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4469399213924761, | |
| "grad_norm": 0.3225785493850708, | |
| "learning_rate": 9.92790022141885e-06, | |
| "loss": 0.4814, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.44918585064570465, | |
| "grad_norm": 0.3994785249233246, | |
| "learning_rate": 9.925670256799829e-06, | |
| "loss": 0.4929, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4514317798989332, | |
| "grad_norm": 0.3152889311313629, | |
| "learning_rate": 9.923406588384759e-06, | |
| "loss": 0.4843, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.4536777091521617, | |
| "grad_norm": 0.38969138264656067, | |
| "learning_rate": 9.921109231662744e-06, | |
| "loss": 0.513, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.45592363840539024, | |
| "grad_norm": 0.38721248507499695, | |
| "learning_rate": 9.9187782023534e-06, | |
| "loss": 0.4894, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.45816956765861877, | |
| "grad_norm": 0.38004323840141296, | |
| "learning_rate": 9.916413516406746e-06, | |
| "loss": 0.4987, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4604154969118473, | |
| "grad_norm": 0.40154218673706055, | |
| "learning_rate": 9.914015190003096e-06, | |
| "loss": 0.4848, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.4626614261650758, | |
| "grad_norm": 0.37615618109703064, | |
| "learning_rate": 9.911583239552949e-06, | |
| "loss": 0.5083, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4649073554183043, | |
| "grad_norm": 0.4611421227455139, | |
| "learning_rate": 9.909117681696874e-06, | |
| "loss": 0.4799, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.46715328467153283, | |
| "grad_norm": 0.49794813990592957, | |
| "learning_rate": 9.906618533305401e-06, | |
| "loss": 0.4892, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.46939921392476136, | |
| "grad_norm": 0.40189069509506226, | |
| "learning_rate": 9.904085811478901e-06, | |
| "loss": 0.4797, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.4716451431779899, | |
| "grad_norm": 0.37438878417015076, | |
| "learning_rate": 9.901519533547468e-06, | |
| "loss": 0.4826, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4738910724312184, | |
| "grad_norm": 0.3949896991252899, | |
| "learning_rate": 9.898919717070808e-06, | |
| "loss": 0.4995, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.47613700168444695, | |
| "grad_norm": 0.3877430856227875, | |
| "learning_rate": 9.896286379838109e-06, | |
| "loss": 0.4787, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4783829309376755, | |
| "grad_norm": 0.3562919497489929, | |
| "learning_rate": 9.893619539867926e-06, | |
| "loss": 0.5, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.480628860190904, | |
| "grad_norm": 0.34773513674736023, | |
| "learning_rate": 9.890919215408059e-06, | |
| "loss": 0.4755, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.48287478944413254, | |
| "grad_norm": 0.42745330929756165, | |
| "learning_rate": 9.888185424935418e-06, | |
| "loss": 0.4921, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.485120718697361, | |
| "grad_norm": 0.34176507592201233, | |
| "learning_rate": 9.885418187155909e-06, | |
| "loss": 0.4995, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.48736664795058954, | |
| "grad_norm": 0.4287734031677246, | |
| "learning_rate": 9.882617521004298e-06, | |
| "loss": 0.4962, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.48961257720381807, | |
| "grad_norm": 0.4167402684688568, | |
| "learning_rate": 9.879783445644086e-06, | |
| "loss": 0.4956, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4918585064570466, | |
| "grad_norm": 0.40856555104255676, | |
| "learning_rate": 9.876915980467373e-06, | |
| "loss": 0.491, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.4941044357102751, | |
| "grad_norm": 0.43443533778190613, | |
| "learning_rate": 9.874015145094733e-06, | |
| "loss": 0.4948, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.49635036496350365, | |
| "grad_norm": 0.4324890971183777, | |
| "learning_rate": 9.871080959375067e-06, | |
| "loss": 0.5015, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4985962942167322, | |
| "grad_norm": 0.4211356043815613, | |
| "learning_rate": 9.868113443385483e-06, | |
| "loss": 0.491, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.5008422234699607, | |
| "grad_norm": 0.34874603152275085, | |
| "learning_rate": 9.865112617431146e-06, | |
| "loss": 0.4802, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.5030881527231892, | |
| "grad_norm": 0.41246911883354187, | |
| "learning_rate": 9.862078502045145e-06, | |
| "loss": 0.4851, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5053340819764177, | |
| "grad_norm": 0.3335956931114197, | |
| "learning_rate": 9.85901111798835e-06, | |
| "loss": 0.495, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5075800112296462, | |
| "grad_norm": 0.4276493191719055, | |
| "learning_rate": 9.855910486249276e-06, | |
| "loss": 0.5064, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5098259404828748, | |
| "grad_norm": 0.3431427776813507, | |
| "learning_rate": 9.852776628043928e-06, | |
| "loss": 0.5033, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.5120718697361033, | |
| "grad_norm": 0.368875652551651, | |
| "learning_rate": 9.849609564815668e-06, | |
| "loss": 0.4892, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5143177989893318, | |
| "grad_norm": 0.4343670904636383, | |
| "learning_rate": 9.846409318235056e-06, | |
| "loss": 0.4877, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.5165637282425604, | |
| "grad_norm": 0.358761191368103, | |
| "learning_rate": 9.843175910199715e-06, | |
| "loss": 0.4766, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5188096574957889, | |
| "grad_norm": 0.4277135133743286, | |
| "learning_rate": 9.839909362834174e-06, | |
| "loss": 0.4981, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5210555867490174, | |
| "grad_norm": 0.37256282567977905, | |
| "learning_rate": 9.836609698489714e-06, | |
| "loss": 0.5042, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.523301516002246, | |
| "grad_norm": 0.3928300142288208, | |
| "learning_rate": 9.833276939744217e-06, | |
| "loss": 0.4798, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5255474452554745, | |
| "grad_norm": 0.36464980244636536, | |
| "learning_rate": 9.829911109402017e-06, | |
| "loss": 0.4999, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.527793374508703, | |
| "grad_norm": 0.4434768855571747, | |
| "learning_rate": 9.82651223049374e-06, | |
| "loss": 0.4933, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5300393037619315, | |
| "grad_norm": 0.3624848425388336, | |
| "learning_rate": 9.82308032627614e-06, | |
| "loss": 0.4999, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5322852330151601, | |
| "grad_norm": 0.41842374205589294, | |
| "learning_rate": 9.819615420231954e-06, | |
| "loss": 0.4871, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5345311622683886, | |
| "grad_norm": 0.40757784247398376, | |
| "learning_rate": 9.816117536069724e-06, | |
| "loss": 0.4846, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5367770915216171, | |
| "grad_norm": 0.5392343401908875, | |
| "learning_rate": 9.812586697723658e-06, | |
| "loss": 0.4878, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.5390230207748455, | |
| "grad_norm": 0.38242799043655396, | |
| "learning_rate": 9.809022929353436e-06, | |
| "loss": 0.4855, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5412689500280741, | |
| "grad_norm": 0.42983102798461914, | |
| "learning_rate": 9.805426255344071e-06, | |
| "loss": 0.4909, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.5435148792813026, | |
| "grad_norm": 0.408312052488327, | |
| "learning_rate": 9.801796700305732e-06, | |
| "loss": 0.4954, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5457608085345311, | |
| "grad_norm": 0.3748157024383545, | |
| "learning_rate": 9.798134289073571e-06, | |
| "loss": 0.4844, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.5480067377877597, | |
| "grad_norm": 0.39674103260040283, | |
| "learning_rate": 9.794439046707562e-06, | |
| "loss": 0.4893, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5502526670409882, | |
| "grad_norm": 0.3584100604057312, | |
| "learning_rate": 9.790710998492325e-06, | |
| "loss": 0.4663, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5524985962942167, | |
| "grad_norm": 0.33988258242607117, | |
| "learning_rate": 9.786950169936948e-06, | |
| "loss": 0.4744, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5547445255474452, | |
| "grad_norm": 0.4141857624053955, | |
| "learning_rate": 9.783156586774826e-06, | |
| "loss": 0.491, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.5569904548006738, | |
| "grad_norm": 0.344392329454422, | |
| "learning_rate": 9.779330274963473e-06, | |
| "loss": 0.5052, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5592363840539023, | |
| "grad_norm": 0.3439772129058838, | |
| "learning_rate": 9.775471260684346e-06, | |
| "loss": 0.4859, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.5614823133071308, | |
| "grad_norm": 0.31984543800354004, | |
| "learning_rate": 9.771579570342668e-06, | |
| "loss": 0.509, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5637282425603594, | |
| "grad_norm": 0.3450314402580261, | |
| "learning_rate": 9.767655230567252e-06, | |
| "loss": 0.4793, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.5659741718135879, | |
| "grad_norm": 0.3397728502750397, | |
| "learning_rate": 9.763698268210312e-06, | |
| "loss": 0.4749, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5682201010668164, | |
| "grad_norm": 0.31943392753601074, | |
| "learning_rate": 9.759708710347275e-06, | |
| "loss": 0.4718, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.570466030320045, | |
| "grad_norm": 0.3831331431865692, | |
| "learning_rate": 9.755686584276614e-06, | |
| "loss": 0.484, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5727119595732735, | |
| "grad_norm": 0.27558228373527527, | |
| "learning_rate": 9.751631917519637e-06, | |
| "loss": 0.4838, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.574957888826502, | |
| "grad_norm": 0.392098069190979, | |
| "learning_rate": 9.747544737820322e-06, | |
| "loss": 0.4844, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5772038180797305, | |
| "grad_norm": 0.29363974928855896, | |
| "learning_rate": 9.743425073145109e-06, | |
| "loss": 0.4993, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5794497473329591, | |
| "grad_norm": 0.3312382400035858, | |
| "learning_rate": 9.739272951682716e-06, | |
| "loss": 0.4812, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5816956765861875, | |
| "grad_norm": 0.34420520067214966, | |
| "learning_rate": 9.735088401843948e-06, | |
| "loss": 0.4744, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 0.29115816950798035, | |
| "learning_rate": 9.730871452261502e-06, | |
| "loss": 0.4755, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5861875350926445, | |
| "grad_norm": 0.3523911237716675, | |
| "learning_rate": 9.726622131789766e-06, | |
| "loss": 0.4918, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.5884334643458731, | |
| "grad_norm": 0.3150189220905304, | |
| "learning_rate": 9.722340469504628e-06, | |
| "loss": 0.4846, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5906793935991016, | |
| "grad_norm": 0.3749271333217621, | |
| "learning_rate": 9.718026494703269e-06, | |
| "loss": 0.48, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5929253228523301, | |
| "grad_norm": 0.30882978439331055, | |
| "learning_rate": 9.713680236903979e-06, | |
| "loss": 0.4632, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5951712521055587, | |
| "grad_norm": 0.378319650888443, | |
| "learning_rate": 9.70930172584593e-06, | |
| "loss": 0.4876, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5974171813587872, | |
| "grad_norm": 0.2804391384124756, | |
| "learning_rate": 9.704890991488994e-06, | |
| "loss": 0.4682, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5996631106120157, | |
| "grad_norm": 0.3175744414329529, | |
| "learning_rate": 9.70044806401353e-06, | |
| "loss": 0.4932, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.6019090398652442, | |
| "grad_norm": 0.3088872730731964, | |
| "learning_rate": 9.695972973820176e-06, | |
| "loss": 0.4758, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.6041549691184728, | |
| "grad_norm": 0.2943213880062103, | |
| "learning_rate": 9.691465751529645e-06, | |
| "loss": 0.4995, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.6064008983717013, | |
| "grad_norm": 0.3486208915710449, | |
| "learning_rate": 9.68692642798251e-06, | |
| "loss": 0.4686, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6086468276249298, | |
| "grad_norm": 0.37442758679389954, | |
| "learning_rate": 9.682355034238997e-06, | |
| "loss": 0.4918, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.6108927568781584, | |
| "grad_norm": 0.5018337368965149, | |
| "learning_rate": 9.677751601578773e-06, | |
| "loss": 0.4793, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6131386861313869, | |
| "grad_norm": 0.3704725205898285, | |
| "learning_rate": 9.67311616150073e-06, | |
| "loss": 0.482, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 0.3328251540660858, | |
| "learning_rate": 9.668448745722772e-06, | |
| "loss": 0.4815, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.617630544637844, | |
| "grad_norm": 0.374489963054657, | |
| "learning_rate": 9.663749386181593e-06, | |
| "loss": 0.4765, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.6198764738910725, | |
| "grad_norm": 0.3103203773498535, | |
| "learning_rate": 9.65901811503246e-06, | |
| "loss": 0.4773, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.622122403144301, | |
| "grad_norm": 0.45630261301994324, | |
| "learning_rate": 9.654254964649e-06, | |
| "loss": 0.4814, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.6243683323975294, | |
| "grad_norm": 0.32191282510757446, | |
| "learning_rate": 9.649459967622972e-06, | |
| "loss": 0.4876, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.626614261650758, | |
| "grad_norm": 0.4367053210735321, | |
| "learning_rate": 9.644633156764038e-06, | |
| "loss": 0.4826, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.6288601909039865, | |
| "grad_norm": 0.3019036650657654, | |
| "learning_rate": 9.639774565099555e-06, | |
| "loss": 0.4707, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.631106120157215, | |
| "grad_norm": 0.3420458137989044, | |
| "learning_rate": 9.634884225874335e-06, | |
| "loss": 0.4989, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.6333520494104435, | |
| "grad_norm": 0.39009857177734375, | |
| "learning_rate": 9.629962172550419e-06, | |
| "loss": 0.4756, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6355979786636721, | |
| "grad_norm": 0.32310977578163147, | |
| "learning_rate": 9.625008438806857e-06, | |
| "loss": 0.4722, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.6378439079169006, | |
| "grad_norm": 0.36968040466308594, | |
| "learning_rate": 9.620023058539467e-06, | |
| "loss": 0.4797, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6400898371701291, | |
| "grad_norm": 0.34846970438957214, | |
| "learning_rate": 9.615006065860611e-06, | |
| "loss": 0.471, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6423357664233577, | |
| "grad_norm": 0.3717726767063141, | |
| "learning_rate": 9.609957495098957e-06, | |
| "loss": 0.4669, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6445816956765862, | |
| "grad_norm": 0.3212599456310272, | |
| "learning_rate": 9.604877380799244e-06, | |
| "loss": 0.4702, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.6468276249298147, | |
| "grad_norm": 0.3086533844470978, | |
| "learning_rate": 9.59976575772205e-06, | |
| "loss": 0.4734, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6490735541830432, | |
| "grad_norm": 0.37244805693626404, | |
| "learning_rate": 9.594622660843547e-06, | |
| "loss": 0.4867, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.6513194834362718, | |
| "grad_norm": 0.327836275100708, | |
| "learning_rate": 9.58944812535527e-06, | |
| "loss": 0.4903, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6535654126895003, | |
| "grad_norm": 0.3715110421180725, | |
| "learning_rate": 9.58424218666387e-06, | |
| "loss": 0.4847, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.6558113419427288, | |
| "grad_norm": 0.4033578932285309, | |
| "learning_rate": 9.579004880390872e-06, | |
| "loss": 0.4785, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6580572711959574, | |
| "grad_norm": 0.4034516513347626, | |
| "learning_rate": 9.573736242372436e-06, | |
| "loss": 0.4707, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.6603032004491859, | |
| "grad_norm": 0.3724893033504486, | |
| "learning_rate": 9.56843630865911e-06, | |
| "loss": 0.4895, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6625491297024144, | |
| "grad_norm": 0.3855060935020447, | |
| "learning_rate": 9.563105115515579e-06, | |
| "loss": 0.4751, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.664795058955643, | |
| "grad_norm": 0.3687981963157654, | |
| "learning_rate": 9.557742699420419e-06, | |
| "loss": 0.4779, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6670409882088714, | |
| "grad_norm": 0.31713899970054626, | |
| "learning_rate": 9.552349097065851e-06, | |
| "loss": 0.4889, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.6692869174620999, | |
| "grad_norm": 0.3856634199619293, | |
| "learning_rate": 9.546924345357488e-06, | |
| "loss": 0.4747, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6715328467153284, | |
| "grad_norm": 0.3156067728996277, | |
| "learning_rate": 9.54146848141408e-06, | |
| "loss": 0.4772, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.673778775968557, | |
| "grad_norm": 0.33510684967041016, | |
| "learning_rate": 9.53598154256726e-06, | |
| "loss": 0.472, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6760247052217855, | |
| "grad_norm": 0.42198294401168823, | |
| "learning_rate": 9.530463566361296e-06, | |
| "loss": 0.4947, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.678270634475014, | |
| "grad_norm": 0.32931357622146606, | |
| "learning_rate": 9.524914590552825e-06, | |
| "loss": 0.4862, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6805165637282425, | |
| "grad_norm": 0.33701708912849426, | |
| "learning_rate": 9.519334653110597e-06, | |
| "loss": 0.5042, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.6827624929814711, | |
| "grad_norm": 0.3782896101474762, | |
| "learning_rate": 9.513723792215217e-06, | |
| "loss": 0.4858, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6850084222346996, | |
| "grad_norm": 0.3276413381099701, | |
| "learning_rate": 9.508082046258884e-06, | |
| "loss": 0.473, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6872543514879281, | |
| "grad_norm": 0.3396032452583313, | |
| "learning_rate": 9.502409453845127e-06, | |
| "loss": 0.4978, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6895002807411567, | |
| "grad_norm": 0.38355326652526855, | |
| "learning_rate": 9.496706053788545e-06, | |
| "loss": 0.4695, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6917462099943852, | |
| "grad_norm": 0.3016837537288666, | |
| "learning_rate": 9.490971885114529e-06, | |
| "loss": 0.4868, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6939921392476137, | |
| "grad_norm": 0.3403872549533844, | |
| "learning_rate": 9.48520698705901e-06, | |
| "loss": 0.4964, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6962380685008422, | |
| "grad_norm": 0.33010175824165344, | |
| "learning_rate": 9.479411399068183e-06, | |
| "loss": 0.4675, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6984839977540708, | |
| "grad_norm": 0.36622872948646545, | |
| "learning_rate": 9.473585160798239e-06, | |
| "loss": 0.489, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.7007299270072993, | |
| "grad_norm": 0.37846261262893677, | |
| "learning_rate": 9.46772831211509e-06, | |
| "loss": 0.4702, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.7029758562605278, | |
| "grad_norm": 0.2969339191913605, | |
| "learning_rate": 9.461840893094103e-06, | |
| "loss": 0.4824, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.7052217855137564, | |
| "grad_norm": 0.42460620403289795, | |
| "learning_rate": 9.45592294401982e-06, | |
| "loss": 0.4654, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.7074677147669849, | |
| "grad_norm": 0.31242653727531433, | |
| "learning_rate": 9.449974505385682e-06, | |
| "loss": 0.4732, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.7097136440202133, | |
| "grad_norm": 0.3350578546524048, | |
| "learning_rate": 9.44399561789376e-06, | |
| "loss": 0.4834, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.7119595732734418, | |
| "grad_norm": 0.3971409499645233, | |
| "learning_rate": 9.437986322454462e-06, | |
| "loss": 0.485, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.7142055025266704, | |
| "grad_norm": 0.3148505389690399, | |
| "learning_rate": 9.43194666018627e-06, | |
| "loss": 0.4965, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7164514317798989, | |
| "grad_norm": 0.3623645603656769, | |
| "learning_rate": 9.425876672415448e-06, | |
| "loss": 0.4847, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.7186973610331274, | |
| "grad_norm": 0.33705249428749084, | |
| "learning_rate": 9.419776400675758e-06, | |
| "loss": 0.4834, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.720943290286356, | |
| "grad_norm": 0.3334520161151886, | |
| "learning_rate": 9.413645886708185e-06, | |
| "loss": 0.4728, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.7231892195395845, | |
| "grad_norm": 0.3809893727302551, | |
| "learning_rate": 9.40748517246064e-06, | |
| "loss": 0.4738, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.725435148792813, | |
| "grad_norm": 0.3264145851135254, | |
| "learning_rate": 9.401294300087682e-06, | |
| "loss": 0.4776, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.7276810780460415, | |
| "grad_norm": 0.3935585916042328, | |
| "learning_rate": 9.39507331195023e-06, | |
| "loss": 0.473, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7299270072992701, | |
| "grad_norm": 0.38635513186454773, | |
| "learning_rate": 9.388822250615264e-06, | |
| "loss": 0.4649, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.7321729365524986, | |
| "grad_norm": 0.41219913959503174, | |
| "learning_rate": 9.382541158855538e-06, | |
| "loss": 0.4593, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7344188658057271, | |
| "grad_norm": 0.35313233733177185, | |
| "learning_rate": 9.376230079649295e-06, | |
| "loss": 0.4695, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.7366647950589557, | |
| "grad_norm": 0.48907920718193054, | |
| "learning_rate": 9.369889056179961e-06, | |
| "loss": 0.4615, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7389107243121842, | |
| "grad_norm": 0.32115358114242554, | |
| "learning_rate": 9.363518131835857e-06, | |
| "loss": 0.4806, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.7411566535654127, | |
| "grad_norm": 0.4651142358779907, | |
| "learning_rate": 9.357117350209901e-06, | |
| "loss": 0.4823, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7434025828186412, | |
| "grad_norm": 0.37610235810279846, | |
| "learning_rate": 9.350686755099307e-06, | |
| "loss": 0.476, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.7456485120718698, | |
| "grad_norm": 0.3762288987636566, | |
| "learning_rate": 9.344226390505288e-06, | |
| "loss": 0.4878, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7478944413250983, | |
| "grad_norm": 0.34318727254867554, | |
| "learning_rate": 9.337736300632754e-06, | |
| "loss": 0.4823, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.7501403705783268, | |
| "grad_norm": 0.3277176320552826, | |
| "learning_rate": 9.331216529890009e-06, | |
| "loss": 0.492, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7523862998315554, | |
| "grad_norm": 0.3363962471485138, | |
| "learning_rate": 9.324667122888452e-06, | |
| "loss": 0.477, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7546322290847838, | |
| "grad_norm": 0.34611254930496216, | |
| "learning_rate": 9.318088124442259e-06, | |
| "loss": 0.4622, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7568781583380123, | |
| "grad_norm": 0.4872119724750519, | |
| "learning_rate": 9.311479579568091e-06, | |
| "loss": 0.4704, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.7591240875912408, | |
| "grad_norm": 0.30356013774871826, | |
| "learning_rate": 9.30484153348478e-06, | |
| "loss": 0.4826, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7613700168444694, | |
| "grad_norm": 0.3759292662143707, | |
| "learning_rate": 9.298174031613019e-06, | |
| "loss": 0.4771, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.7636159460976979, | |
| "grad_norm": 0.4052506387233734, | |
| "learning_rate": 9.291477119575048e-06, | |
| "loss": 0.4747, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7658618753509264, | |
| "grad_norm": 0.40775245428085327, | |
| "learning_rate": 9.28475084319435e-06, | |
| "loss": 0.4963, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.768107804604155, | |
| "grad_norm": 0.34407731890678406, | |
| "learning_rate": 9.277995248495328e-06, | |
| "loss": 0.472, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7703537338573835, | |
| "grad_norm": 0.4342804253101349, | |
| "learning_rate": 9.271210381703e-06, | |
| "loss": 0.4633, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.772599663110612, | |
| "grad_norm": 0.325330913066864, | |
| "learning_rate": 9.264396289242676e-06, | |
| "loss": 0.4859, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7748455923638405, | |
| "grad_norm": 0.4626711905002594, | |
| "learning_rate": 9.25755301773964e-06, | |
| "loss": 0.457, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7770915216170691, | |
| "grad_norm": 0.34164246916770935, | |
| "learning_rate": 9.250680614018837e-06, | |
| "loss": 0.4748, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7793374508702976, | |
| "grad_norm": 0.3387359082698822, | |
| "learning_rate": 9.243779125104544e-06, | |
| "loss": 0.4862, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.7815833801235261, | |
| "grad_norm": 0.40897244215011597, | |
| "learning_rate": 9.236848598220055e-06, | |
| "loss": 0.4739, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7838293093767547, | |
| "grad_norm": 0.37918272614479065, | |
| "learning_rate": 9.229889080787357e-06, | |
| "loss": 0.4717, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.7860752386299832, | |
| "grad_norm": 0.4629786014556885, | |
| "learning_rate": 9.222900620426802e-06, | |
| "loss": 0.4939, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7883211678832117, | |
| "grad_norm": 0.42090147733688354, | |
| "learning_rate": 9.215883264956786e-06, | |
| "loss": 0.4776, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.7905670971364402, | |
| "grad_norm": 0.3530665338039398, | |
| "learning_rate": 9.208837062393416e-06, | |
| "loss": 0.4875, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7928130263896688, | |
| "grad_norm": 0.4339233338832855, | |
| "learning_rate": 9.201762060950185e-06, | |
| "loss": 0.4484, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.7950589556428973, | |
| "grad_norm": 0.3293563425540924, | |
| "learning_rate": 9.194658309037647e-06, | |
| "loss": 0.4757, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7973048848961257, | |
| "grad_norm": 0.3879033923149109, | |
| "learning_rate": 9.187525855263071e-06, | |
| "loss": 0.4816, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7995508141493542, | |
| "grad_norm": 0.36516231298446655, | |
| "learning_rate": 9.180364748430127e-06, | |
| "loss": 0.4598, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.8017967434025828, | |
| "grad_norm": 0.3673107326030731, | |
| "learning_rate": 9.173175037538539e-06, | |
| "loss": 0.4731, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.8040426726558113, | |
| "grad_norm": 0.38570478558540344, | |
| "learning_rate": 9.165956771783751e-06, | |
| "loss": 0.4744, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.8062886019090398, | |
| "grad_norm": 0.42901894450187683, | |
| "learning_rate": 9.1587100005566e-06, | |
| "loss": 0.4842, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.8085345311622684, | |
| "grad_norm": 0.39992624521255493, | |
| "learning_rate": 9.151434773442963e-06, | |
| "loss": 0.475, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.8107804604154969, | |
| "grad_norm": 0.4681251347064972, | |
| "learning_rate": 9.144131140223434e-06, | |
| "loss": 0.4886, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.8130263896687254, | |
| "grad_norm": 0.35085222125053406, | |
| "learning_rate": 9.136799150872967e-06, | |
| "loss": 0.4861, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.815272318921954, | |
| "grad_norm": 0.42589834332466125, | |
| "learning_rate": 9.129438855560551e-06, | |
| "loss": 0.4668, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.8175182481751825, | |
| "grad_norm": 0.38507068157196045, | |
| "learning_rate": 9.122050304648849e-06, | |
| "loss": 0.4766, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.819764177428411, | |
| "grad_norm": 0.375751256942749, | |
| "learning_rate": 9.114633548693868e-06, | |
| "loss": 0.4816, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.8220101066816395, | |
| "grad_norm": 0.503512442111969, | |
| "learning_rate": 9.107188638444606e-06, | |
| "loss": 0.4746, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8242560359348681, | |
| "grad_norm": 0.34955278038978577, | |
| "learning_rate": 9.099715624842707e-06, | |
| "loss": 0.4734, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.8265019651880966, | |
| "grad_norm": 0.37166303396224976, | |
| "learning_rate": 9.09221455902211e-06, | |
| "loss": 0.4635, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.8287478944413251, | |
| "grad_norm": 0.32505786418914795, | |
| "learning_rate": 9.0846854923087e-06, | |
| "loss": 0.4716, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.8309938236945537, | |
| "grad_norm": 0.3304513096809387, | |
| "learning_rate": 9.077128476219963e-06, | |
| "loss": 0.4648, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8332397529477822, | |
| "grad_norm": 0.32548874616622925, | |
| "learning_rate": 9.06954356246462e-06, | |
| "loss": 0.4628, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.8354856822010107, | |
| "grad_norm": 0.351330041885376, | |
| "learning_rate": 9.061930802942286e-06, | |
| "loss": 0.4848, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8377316114542392, | |
| "grad_norm": 0.3573990762233734, | |
| "learning_rate": 9.054290249743113e-06, | |
| "loss": 0.4762, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.8399775407074677, | |
| "grad_norm": 0.32974398136138916, | |
| "learning_rate": 9.046621955147423e-06, | |
| "loss": 0.4751, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8422234699606962, | |
| "grad_norm": 0.31952598690986633, | |
| "learning_rate": 9.03892597162536e-06, | |
| "loss": 0.4652, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.8444693992139247, | |
| "grad_norm": 0.33405670523643494, | |
| "learning_rate": 9.031202351836539e-06, | |
| "loss": 0.4712, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8467153284671532, | |
| "grad_norm": 0.41173166036605835, | |
| "learning_rate": 9.02345114862966e-06, | |
| "loss": 0.4644, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.8489612577203818, | |
| "grad_norm": 0.3065979480743408, | |
| "learning_rate": 9.01567241504217e-06, | |
| "loss": 0.4685, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8512071869736103, | |
| "grad_norm": 0.38998886942863464, | |
| "learning_rate": 9.007866204299896e-06, | |
| "loss": 0.4836, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.8534531162268388, | |
| "grad_norm": 0.3278312683105469, | |
| "learning_rate": 9.000032569816668e-06, | |
| "loss": 0.482, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8556990454800674, | |
| "grad_norm": 0.389222115278244, | |
| "learning_rate": 8.992171565193968e-06, | |
| "loss": 0.4642, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.8579449747332959, | |
| "grad_norm": 0.3489379584789276, | |
| "learning_rate": 8.984283244220558e-06, | |
| "loss": 0.4961, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8601909039865244, | |
| "grad_norm": 0.38780078291893005, | |
| "learning_rate": 8.976367660872104e-06, | |
| "loss": 0.4858, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.862436833239753, | |
| "grad_norm": 0.3673154413700104, | |
| "learning_rate": 8.968424869310828e-06, | |
| "loss": 0.4691, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8646827624929815, | |
| "grad_norm": 0.36734986305236816, | |
| "learning_rate": 8.960454923885111e-06, | |
| "loss": 0.4622, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.86692869174621, | |
| "grad_norm": 0.3670867085456848, | |
| "learning_rate": 8.95245787912914e-06, | |
| "loss": 0.4835, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8691746209994385, | |
| "grad_norm": 0.33945947885513306, | |
| "learning_rate": 8.944433789762523e-06, | |
| "loss": 0.4756, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.8714205502526671, | |
| "grad_norm": 0.37823382019996643, | |
| "learning_rate": 8.93638271068993e-06, | |
| "loss": 0.4927, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8736664795058956, | |
| "grad_norm": 0.3298521935939789, | |
| "learning_rate": 8.9283046970007e-06, | |
| "loss": 0.4639, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 0.33418142795562744, | |
| "learning_rate": 8.92019980396847e-06, | |
| "loss": 0.4559, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8781583380123527, | |
| "grad_norm": 0.32573068141937256, | |
| "learning_rate": 8.912068087050807e-06, | |
| "loss": 0.4599, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.8804042672655812, | |
| "grad_norm": 0.2992747724056244, | |
| "learning_rate": 8.90390960188881e-06, | |
| "loss": 0.4699, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8826501965188096, | |
| "grad_norm": 0.419653981924057, | |
| "learning_rate": 8.895724404306745e-06, | |
| "loss": 0.4644, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.8848961257720381, | |
| "grad_norm": 0.34604114294052124, | |
| "learning_rate": 8.887512550311655e-06, | |
| "loss": 0.4758, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8871420550252667, | |
| "grad_norm": 0.30816447734832764, | |
| "learning_rate": 8.879274096092983e-06, | |
| "loss": 0.4709, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8893879842784952, | |
| "grad_norm": 0.3544372320175171, | |
| "learning_rate": 8.871009098022176e-06, | |
| "loss": 0.4903, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8916339135317237, | |
| "grad_norm": 0.3021892011165619, | |
| "learning_rate": 8.862717612652316e-06, | |
| "loss": 0.4576, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.8938798427849522, | |
| "grad_norm": 0.33287468552589417, | |
| "learning_rate": 8.854399696717713e-06, | |
| "loss": 0.4823, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8961257720381808, | |
| "grad_norm": 0.2934684455394745, | |
| "learning_rate": 8.846055407133539e-06, | |
| "loss": 0.4619, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.8983717012914093, | |
| "grad_norm": 0.37255221605300903, | |
| "learning_rate": 8.837684800995417e-06, | |
| "loss": 0.4567, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9006176305446378, | |
| "grad_norm": 0.3295063376426697, | |
| "learning_rate": 8.829287935579046e-06, | |
| "loss": 0.4667, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.9028635597978664, | |
| "grad_norm": 0.38328802585601807, | |
| "learning_rate": 8.820864868339804e-06, | |
| "loss": 0.4735, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.9051094890510949, | |
| "grad_norm": 0.36380237340927124, | |
| "learning_rate": 8.812415656912353e-06, | |
| "loss": 0.4918, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.9073554183043234, | |
| "grad_norm": 0.3465980887413025, | |
| "learning_rate": 8.803940359110246e-06, | |
| "loss": 0.4798, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.909601347557552, | |
| "grad_norm": 0.35272216796875, | |
| "learning_rate": 8.79543903292553e-06, | |
| "loss": 0.4724, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.9118472768107805, | |
| "grad_norm": 0.38653409481048584, | |
| "learning_rate": 8.786911736528352e-06, | |
| "loss": 0.4559, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.914093206064009, | |
| "grad_norm": 0.35222503542900085, | |
| "learning_rate": 8.778358528266562e-06, | |
| "loss": 0.4586, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.9163391353172375, | |
| "grad_norm": 0.31955739855766296, | |
| "learning_rate": 8.769779466665309e-06, | |
| "loss": 0.4748, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.9185850645704661, | |
| "grad_norm": 0.30488333106040955, | |
| "learning_rate": 8.761174610426642e-06, | |
| "loss": 0.467, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.9208309938236946, | |
| "grad_norm": 0.268274188041687, | |
| "learning_rate": 8.75254401842911e-06, | |
| "loss": 0.481, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.35750773549079895, | |
| "learning_rate": 8.74388774972736e-06, | |
| "loss": 0.4931, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.9253228523301515, | |
| "grad_norm": 0.27234843373298645, | |
| "learning_rate": 8.73520586355173e-06, | |
| "loss": 0.4709, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.9275687815833801, | |
| "grad_norm": 0.31700101494789124, | |
| "learning_rate": 8.726498419307844e-06, | |
| "loss": 0.4618, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.9298147108366086, | |
| "grad_norm": 0.27126544713974, | |
| "learning_rate": 8.71776547657621e-06, | |
| "loss": 0.4663, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.9320606400898371, | |
| "grad_norm": 0.27281293272972107, | |
| "learning_rate": 8.709007095111805e-06, | |
| "loss": 0.4641, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.9343065693430657, | |
| "grad_norm": 0.32400500774383545, | |
| "learning_rate": 8.70022333484367e-06, | |
| "loss": 0.4703, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.9365524985962942, | |
| "grad_norm": 0.34988343715667725, | |
| "learning_rate": 8.691414255874506e-06, | |
| "loss": 0.4912, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.9387984278495227, | |
| "grad_norm": 0.2996358573436737, | |
| "learning_rate": 8.682579918480247e-06, | |
| "loss": 0.4605, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9410443571027512, | |
| "grad_norm": 0.3629034757614136, | |
| "learning_rate": 8.673720383109666e-06, | |
| "loss": 0.4881, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.9432902863559798, | |
| "grad_norm": 0.3697206377983093, | |
| "learning_rate": 8.664835710383949e-06, | |
| "loss": 0.4693, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9455362156092083, | |
| "grad_norm": 0.2857604920864105, | |
| "learning_rate": 8.655925961096284e-06, | |
| "loss": 0.46, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.9477821448624368, | |
| "grad_norm": 0.3731415569782257, | |
| "learning_rate": 8.64699119621144e-06, | |
| "loss": 0.4781, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9500280741156654, | |
| "grad_norm": 0.2709653675556183, | |
| "learning_rate": 8.638031476865366e-06, | |
| "loss": 0.4582, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.9522740033688939, | |
| "grad_norm": 0.3546141982078552, | |
| "learning_rate": 8.629046864364751e-06, | |
| "loss": 0.468, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9545199326221224, | |
| "grad_norm": 0.30327171087265015, | |
| "learning_rate": 8.62003742018662e-06, | |
| "loss": 0.4668, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.956765861875351, | |
| "grad_norm": 0.3272528052330017, | |
| "learning_rate": 8.611003205977905e-06, | |
| "loss": 0.4579, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9590117911285795, | |
| "grad_norm": 0.3644426167011261, | |
| "learning_rate": 8.601944283555033e-06, | |
| "loss": 0.4644, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.961257720381808, | |
| "grad_norm": 0.3664405941963196, | |
| "learning_rate": 8.592860714903488e-06, | |
| "loss": 0.4789, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9635036496350365, | |
| "grad_norm": 0.4094981551170349, | |
| "learning_rate": 8.583752562177401e-06, | |
| "loss": 0.4533, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.9657495788882651, | |
| "grad_norm": 0.3394399881362915, | |
| "learning_rate": 8.574619887699115e-06, | |
| "loss": 0.452, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9679955081414935, | |
| "grad_norm": 0.3262495696544647, | |
| "learning_rate": 8.565462753958767e-06, | |
| "loss": 0.47, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.970241437394722, | |
| "grad_norm": 0.3226722776889801, | |
| "learning_rate": 8.556281223613851e-06, | |
| "loss": 0.459, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9724873666479505, | |
| "grad_norm": 0.28685227036476135, | |
| "learning_rate": 8.5470753594888e-06, | |
| "loss": 0.4404, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.9747332959011791, | |
| "grad_norm": 0.32768598198890686, | |
| "learning_rate": 8.537845224574546e-06, | |
| "loss": 0.4849, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9769792251544076, | |
| "grad_norm": 0.2841854393482208, | |
| "learning_rate": 8.528590882028094e-06, | |
| "loss": 0.4686, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9792251544076361, | |
| "grad_norm": 0.29862478375434875, | |
| "learning_rate": 8.519312395172093e-06, | |
| "loss": 0.4707, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9814710836608647, | |
| "grad_norm": 0.30814310908317566, | |
| "learning_rate": 8.510009827494392e-06, | |
| "loss": 0.477, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.9837170129140932, | |
| "grad_norm": 0.3032066822052002, | |
| "learning_rate": 8.500683242647617e-06, | |
| "loss": 0.4638, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9859629421673217, | |
| "grad_norm": 0.3458973169326782, | |
| "learning_rate": 8.491332704448734e-06, | |
| "loss": 0.4756, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.9882088714205502, | |
| "grad_norm": 0.30614790320396423, | |
| "learning_rate": 8.481958276878602e-06, | |
| "loss": 0.4856, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9904548006737788, | |
| "grad_norm": 0.3345167338848114, | |
| "learning_rate": 8.472560024081546e-06, | |
| "loss": 0.4613, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.9927007299270073, | |
| "grad_norm": 0.3257136344909668, | |
| "learning_rate": 8.463138010364918e-06, | |
| "loss": 0.4786, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9949466591802358, | |
| "grad_norm": 0.3315941393375397, | |
| "learning_rate": 8.453692300198648e-06, | |
| "loss": 0.4654, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.9971925884334644, | |
| "grad_norm": 0.32225826382637024, | |
| "learning_rate": 8.444222958214812e-06, | |
| "loss": 0.4765, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9994385176866929, | |
| "grad_norm": 0.3224077820777893, | |
| "learning_rate": 8.434730049207184e-06, | |
| "loss": 0.4593, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.0016844469399213, | |
| "grad_norm": 0.6709184646606445, | |
| "learning_rate": 8.425213638130798e-06, | |
| "loss": 0.7572, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.00393037619315, | |
| "grad_norm": 0.4798668920993805, | |
| "learning_rate": 8.415673790101495e-06, | |
| "loss": 0.472, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.0061763054463784, | |
| "grad_norm": 0.37248560786247253, | |
| "learning_rate": 8.40611057039549e-06, | |
| "loss": 0.448, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.008422234699607, | |
| "grad_norm": 0.45663711428642273, | |
| "learning_rate": 8.396524044448913e-06, | |
| "loss": 0.4495, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.0106681639528354, | |
| "grad_norm": 0.38295912742614746, | |
| "learning_rate": 8.386914277857365e-06, | |
| "loss": 0.4203, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.012914093206064, | |
| "grad_norm": 0.44765421748161316, | |
| "learning_rate": 8.37728133637548e-06, | |
| "loss": 0.4774, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.0151600224592925, | |
| "grad_norm": 0.35290607810020447, | |
| "learning_rate": 8.367625285916454e-06, | |
| "loss": 0.4205, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.0174059517125211, | |
| "grad_norm": 0.4127921462059021, | |
| "learning_rate": 8.357946192551611e-06, | |
| "loss": 0.4512, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.0196518809657495, | |
| "grad_norm": 0.4858415126800537, | |
| "learning_rate": 8.348244122509949e-06, | |
| "loss": 0.4631, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.0218978102189782, | |
| "grad_norm": 0.40491798520088196, | |
| "learning_rate": 8.338519142177679e-06, | |
| "loss": 0.4365, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.0241437394722066, | |
| "grad_norm": 0.34673023223876953, | |
| "learning_rate": 8.328771318097773e-06, | |
| "loss": 0.4477, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.0263896687254352, | |
| "grad_norm": 0.40387821197509766, | |
| "learning_rate": 8.319000716969518e-06, | |
| "loss": 0.4611, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.0286355979786637, | |
| "grad_norm": 0.34297940135002136, | |
| "learning_rate": 8.309207405648047e-06, | |
| "loss": 0.4474, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.0308815272318923, | |
| "grad_norm": 0.3807845711708069, | |
| "learning_rate": 8.299391451143887e-06, | |
| "loss": 0.469, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.0331274564851207, | |
| "grad_norm": 0.3148818612098694, | |
| "learning_rate": 8.289552920622505e-06, | |
| "loss": 0.4526, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.0353733857383491, | |
| "grad_norm": 0.34133604168891907, | |
| "learning_rate": 8.27969188140384e-06, | |
| "loss": 0.4645, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.0376193149915778, | |
| "grad_norm": 0.3762519657611847, | |
| "learning_rate": 8.269808400961845e-06, | |
| "loss": 0.4483, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.0398652442448062, | |
| "grad_norm": 0.46112120151519775, | |
| "learning_rate": 8.259902546924032e-06, | |
| "loss": 0.4667, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.0421111734980348, | |
| "grad_norm": 4.490077972412109, | |
| "learning_rate": 8.249974387071e-06, | |
| "loss": 0.4467, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.0443571027512633, | |
| "grad_norm": 4.129928112030029, | |
| "learning_rate": 8.240023989335975e-06, | |
| "loss": 0.463, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.046603032004492, | |
| "grad_norm": 0.6177784204483032, | |
| "learning_rate": 8.230051421804346e-06, | |
| "loss": 0.4552, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.0488489612577203, | |
| "grad_norm": 1.1695165634155273, | |
| "learning_rate": 8.220056752713198e-06, | |
| "loss": 0.4519, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.051094890510949, | |
| "grad_norm": 0.5390977263450623, | |
| "learning_rate": 8.210040050450846e-06, | |
| "loss": 0.473, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.0533408197641774, | |
| "grad_norm": 0.4115554392337799, | |
| "learning_rate": 8.20000138355637e-06, | |
| "loss": 0.446, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.055586749017406, | |
| "grad_norm": 0.4782909154891968, | |
| "learning_rate": 8.189940820719136e-06, | |
| "loss": 0.4574, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0578326782706344, | |
| "grad_norm": 0.4880026876926422, | |
| "learning_rate": 8.179858430778334e-06, | |
| "loss": 0.4549, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.060078607523863, | |
| "grad_norm": 0.4663502275943756, | |
| "learning_rate": 8.169754282722508e-06, | |
| "loss": 0.4533, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0623245367770915, | |
| "grad_norm": 0.4719676077365875, | |
| "learning_rate": 8.159628445689083e-06, | |
| "loss": 0.4507, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.0645704660303201, | |
| "grad_norm": 0.37671101093292236, | |
| "learning_rate": 8.149480988963884e-06, | |
| "loss": 0.4445, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0668163952835485, | |
| "grad_norm": 0.4894201457500458, | |
| "learning_rate": 8.139311981980675e-06, | |
| "loss": 0.4425, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.0690623245367772, | |
| "grad_norm": 1.3329061269760132, | |
| "learning_rate": 8.129121494320673e-06, | |
| "loss": 0.4334, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0713082537900056, | |
| "grad_norm": 0.4755379557609558, | |
| "learning_rate": 8.118909595712077e-06, | |
| "loss": 0.4596, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.073554183043234, | |
| "grad_norm": 0.3152107894420624, | |
| "learning_rate": 8.108676356029593e-06, | |
| "loss": 0.4773, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0758001122964627, | |
| "grad_norm": 0.40582582354545593, | |
| "learning_rate": 8.098421845293946e-06, | |
| "loss": 0.436, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.078046041549691, | |
| "grad_norm": 0.333881676197052, | |
| "learning_rate": 8.088146133671415e-06, | |
| "loss": 0.4441, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0802919708029197, | |
| "grad_norm": 0.36508119106292725, | |
| "learning_rate": 8.077849291473339e-06, | |
| "loss": 0.445, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.0825379000561481, | |
| "grad_norm": 0.40846577286720276, | |
| "learning_rate": 8.067531389155652e-06, | |
| "loss": 0.4652, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0847838293093768, | |
| "grad_norm": 0.29027220606803894, | |
| "learning_rate": 8.057192497318383e-06, | |
| "loss": 0.432, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.0870297585626052, | |
| "grad_norm": 0.3959558606147766, | |
| "learning_rate": 8.046832686705179e-06, | |
| "loss": 0.475, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0892756878158338, | |
| "grad_norm": 0.2976958453655243, | |
| "learning_rate": 8.036452028202837e-06, | |
| "loss": 0.437, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.0915216170690623, | |
| "grad_norm": 0.26725515723228455, | |
| "learning_rate": 8.026050592840788e-06, | |
| "loss": 0.4279, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.093767546322291, | |
| "grad_norm": 0.3430537283420563, | |
| "learning_rate": 8.015628451790642e-06, | |
| "loss": 0.4596, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.0960134755755193, | |
| "grad_norm": 0.28370511531829834, | |
| "learning_rate": 8.00518567636568e-06, | |
| "loss": 0.4457, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.098259404828748, | |
| "grad_norm": 0.3284716308116913, | |
| "learning_rate": 7.994722338020375e-06, | |
| "loss": 0.4424, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.1005053340819764, | |
| "grad_norm": 0.30496740341186523, | |
| "learning_rate": 7.984238508349901e-06, | |
| "loss": 0.4534, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.102751263335205, | |
| "grad_norm": 0.3204284608364105, | |
| "learning_rate": 7.973734259089644e-06, | |
| "loss": 0.4559, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.1049971925884334, | |
| "grad_norm": 0.28355643153190613, | |
| "learning_rate": 7.963209662114714e-06, | |
| "loss": 0.4683, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.107243121841662, | |
| "grad_norm": 0.2843816578388214, | |
| "learning_rate": 7.952664789439443e-06, | |
| "loss": 0.4605, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.1094890510948905, | |
| "grad_norm": 0.2756952941417694, | |
| "learning_rate": 7.942099713216902e-06, | |
| "loss": 0.4218, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.1117349803481191, | |
| "grad_norm": 0.27619650959968567, | |
| "learning_rate": 7.931514505738408e-06, | |
| "loss": 0.4309, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.1139809096013475, | |
| "grad_norm": 0.31005722284317017, | |
| "learning_rate": 7.92090923943302e-06, | |
| "loss": 0.4478, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.1162268388545762, | |
| "grad_norm": 0.26537370681762695, | |
| "learning_rate": 7.910283986867051e-06, | |
| "loss": 0.4721, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.1184727681078046, | |
| "grad_norm": 0.3197883665561676, | |
| "learning_rate": 7.89963882074357e-06, | |
| "loss": 0.4371, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.120718697361033, | |
| "grad_norm": 0.27182987332344055, | |
| "learning_rate": 7.888973813901909e-06, | |
| "loss": 0.454, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.1229646266142617, | |
| "grad_norm": 0.36007192730903625, | |
| "learning_rate": 7.87828903931715e-06, | |
| "loss": 0.4666, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.12521055586749, | |
| "grad_norm": 0.2985324263572693, | |
| "learning_rate": 7.867584570099642e-06, | |
| "loss": 0.4463, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.1274564851207187, | |
| "grad_norm": 0.30184683203697205, | |
| "learning_rate": 7.856860479494492e-06, | |
| "loss": 0.4582, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.1297024143739471, | |
| "grad_norm": 0.2989865839481354, | |
| "learning_rate": 7.846116840881069e-06, | |
| "loss": 0.4557, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.1319483436271758, | |
| "grad_norm": 0.2534805238246918, | |
| "learning_rate": 7.835353727772491e-06, | |
| "loss": 0.4058, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.1341942728804042, | |
| "grad_norm": 0.35043448209762573, | |
| "learning_rate": 7.82457121381514e-06, | |
| "loss": 0.49, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.1364402021336328, | |
| "grad_norm": 0.2577075660228729, | |
| "learning_rate": 7.81376937278814e-06, | |
| "loss": 0.4293, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.1386861313868613, | |
| "grad_norm": 0.3364856541156769, | |
| "learning_rate": 7.802948278602866e-06, | |
| "loss": 0.4755, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.14093206064009, | |
| "grad_norm": 0.282972514629364, | |
| "learning_rate": 7.792108005302426e-06, | |
| "loss": 0.4537, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.1431779898933183, | |
| "grad_norm": 0.26607781648635864, | |
| "learning_rate": 7.781248627061166e-06, | |
| "loss": 0.4228, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.145423919146547, | |
| "grad_norm": 0.3014846742153168, | |
| "learning_rate": 7.770370218184156e-06, | |
| "loss": 0.4455, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.1476698483997754, | |
| "grad_norm": 0.27567797899246216, | |
| "learning_rate": 7.75947285310668e-06, | |
| "loss": 0.482, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.149915777653004, | |
| "grad_norm": 0.2605037987232208, | |
| "learning_rate": 7.748556606393732e-06, | |
| "loss": 0.4284, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.1521617069062324, | |
| "grad_norm": 0.3069257140159607, | |
| "learning_rate": 7.737621552739501e-06, | |
| "loss": 0.4571, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.154407636159461, | |
| "grad_norm": 0.3215087354183197, | |
| "learning_rate": 7.726667766966866e-06, | |
| "loss": 0.4502, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1566535654126895, | |
| "grad_norm": 0.31216177344322205, | |
| "learning_rate": 7.71569532402688e-06, | |
| "loss": 0.4565, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.158899494665918, | |
| "grad_norm": 0.3760012984275818, | |
| "learning_rate": 7.70470429899825e-06, | |
| "loss": 0.4362, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1611454239191465, | |
| "grad_norm": 0.33376315236091614, | |
| "learning_rate": 7.69369476708684e-06, | |
| "loss": 0.4724, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.1633913531723752, | |
| "grad_norm": 0.2877935469150543, | |
| "learning_rate": 7.682666803625138e-06, | |
| "loss": 0.4453, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1656372824256036, | |
| "grad_norm": 0.33166879415512085, | |
| "learning_rate": 7.671620484071758e-06, | |
| "loss": 0.4585, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.167883211678832, | |
| "grad_norm": 0.2634395360946655, | |
| "learning_rate": 7.66055588401091e-06, | |
| "loss": 0.4302, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1701291409320607, | |
| "grad_norm": 0.28289881348609924, | |
| "learning_rate": 7.649473079151888e-06, | |
| "loss": 0.4303, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.172375070185289, | |
| "grad_norm": 0.29282352328300476, | |
| "learning_rate": 7.638372145328554e-06, | |
| "loss": 0.4395, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1746209994385177, | |
| "grad_norm": 0.27824363112449646, | |
| "learning_rate": 7.627253158498819e-06, | |
| "loss": 0.445, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.1768669286917461, | |
| "grad_norm": 0.3538764715194702, | |
| "learning_rate": 7.616116194744114e-06, | |
| "loss": 0.4612, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1791128579449748, | |
| "grad_norm": 0.26989635825157166, | |
| "learning_rate": 7.604961330268885e-06, | |
| "loss": 0.4544, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.1813587871982032, | |
| "grad_norm": 0.32161369919776917, | |
| "learning_rate": 7.593788641400057e-06, | |
| "loss": 0.4405, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.1836047164514318, | |
| "grad_norm": 0.27198460698127747, | |
| "learning_rate": 7.582598204586522e-06, | |
| "loss": 0.4759, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.1858506457046603, | |
| "grad_norm": 0.365715891122818, | |
| "learning_rate": 7.571390096398611e-06, | |
| "loss": 0.4433, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.188096574957889, | |
| "grad_norm": 0.2920300364494324, | |
| "learning_rate": 7.56016439352757e-06, | |
| "loss": 0.4536, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.1903425042111173, | |
| "grad_norm": 0.3396730422973633, | |
| "learning_rate": 7.548921172785038e-06, | |
| "loss": 0.4604, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.192588433464346, | |
| "grad_norm": 0.3063504695892334, | |
| "learning_rate": 7.537660511102516e-06, | |
| "loss": 0.4371, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.1948343627175744, | |
| "grad_norm": 0.30634409189224243, | |
| "learning_rate": 7.526382485530848e-06, | |
| "loss": 0.4547, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.197080291970803, | |
| "grad_norm": 0.28994691371917725, | |
| "learning_rate": 7.51508717323969e-06, | |
| "loss": 0.4474, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.1993262212240314, | |
| "grad_norm": 0.31030574440956116, | |
| "learning_rate": 7.5037746515169795e-06, | |
| "loss": 0.4382, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.20157215047726, | |
| "grad_norm": 0.29604753851890564, | |
| "learning_rate": 7.492444997768412e-06, | |
| "loss": 0.4641, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.2038180797304885, | |
| "grad_norm": 0.305606484413147, | |
| "learning_rate": 7.481098289516906e-06, | |
| "loss": 0.45, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.206064008983717, | |
| "grad_norm": 0.28157690167427063, | |
| "learning_rate": 7.469734604402076e-06, | |
| "loss": 0.447, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.2083099382369455, | |
| "grad_norm": 0.31427818536758423, | |
| "learning_rate": 7.4583540201797015e-06, | |
| "loss": 0.4486, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.210555867490174, | |
| "grad_norm": 0.3320254683494568, | |
| "learning_rate": 7.446956614721191e-06, | |
| "loss": 0.4491, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.2128017967434026, | |
| "grad_norm": 0.2562301456928253, | |
| "learning_rate": 7.435542466013057e-06, | |
| "loss": 0.4262, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.215047725996631, | |
| "grad_norm": 0.2971283495426178, | |
| "learning_rate": 7.424111652156369e-06, | |
| "loss": 0.4471, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.2172936552498597, | |
| "grad_norm": 0.3181101977825165, | |
| "learning_rate": 7.412664251366239e-06, | |
| "loss": 0.4607, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.219539584503088, | |
| "grad_norm": 0.3226609230041504, | |
| "learning_rate": 7.401200341971263e-06, | |
| "loss": 0.4556, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.2217855137563167, | |
| "grad_norm": 0.3116491734981537, | |
| "learning_rate": 7.389720002413003e-06, | |
| "loss": 0.4349, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.2240314430095451, | |
| "grad_norm": 0.33195728063583374, | |
| "learning_rate": 7.378223311245447e-06, | |
| "loss": 0.4371, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.2262773722627738, | |
| "grad_norm": 0.27619820833206177, | |
| "learning_rate": 7.3667103471344585e-06, | |
| "loss": 0.4381, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.2285233015160022, | |
| "grad_norm": 0.29046374559402466, | |
| "learning_rate": 7.355181188857258e-06, | |
| "loss": 0.4515, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 0.31919410824775696, | |
| "learning_rate": 7.343635915301872e-06, | |
| "loss": 0.4488, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.2330151600224593, | |
| "grad_norm": 0.3814048171043396, | |
| "learning_rate": 7.33207460546659e-06, | |
| "loss": 0.4749, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.235261089275688, | |
| "grad_norm": 0.3012455403804779, | |
| "learning_rate": 7.3204973384594365e-06, | |
| "loss": 0.4498, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.2375070185289163, | |
| "grad_norm": 0.35009750723838806, | |
| "learning_rate": 7.3089041934976216e-06, | |
| "loss": 0.469, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.239752947782145, | |
| "grad_norm": 0.24640639126300812, | |
| "learning_rate": 7.297295249906992e-06, | |
| "loss": 0.4148, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.2419988770353734, | |
| "grad_norm": 0.315764844417572, | |
| "learning_rate": 7.285670587121508e-06, | |
| "loss": 0.4464, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.2442448062886018, | |
| "grad_norm": 0.2749885618686676, | |
| "learning_rate": 7.274030284682679e-06, | |
| "loss": 0.4436, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.2464907355418304, | |
| "grad_norm": 0.2588658630847931, | |
| "learning_rate": 7.262374422239033e-06, | |
| "loss": 0.4639, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.248736664795059, | |
| "grad_norm": 0.3450206518173218, | |
| "learning_rate": 7.250703079545566e-06, | |
| "loss": 0.4403, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.2509825940482875, | |
| "grad_norm": 0.264999657869339, | |
| "learning_rate": 7.2390163364631945e-06, | |
| "loss": 0.4634, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.253228523301516, | |
| "grad_norm": 0.28712841868400574, | |
| "learning_rate": 7.22731427295822e-06, | |
| "loss": 0.4049, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.2554744525547445, | |
| "grad_norm": 0.2988751530647278, | |
| "learning_rate": 7.215596969101762e-06, | |
| "loss": 0.4507, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.2577203818079732, | |
| "grad_norm": 0.29097434878349304, | |
| "learning_rate": 7.2038645050692315e-06, | |
| "loss": 0.4418, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.2599663110612016, | |
| "grad_norm": 0.2874724268913269, | |
| "learning_rate": 7.192116961139769e-06, | |
| "loss": 0.4603, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.26221224031443, | |
| "grad_norm": 0.2682914435863495, | |
| "learning_rate": 7.180354417695696e-06, | |
| "loss": 0.4487, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2644581695676587, | |
| "grad_norm": 0.29097360372543335, | |
| "learning_rate": 7.168576955221975e-06, | |
| "loss": 0.4323, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.266704098820887, | |
| "grad_norm": 0.28627073764801025, | |
| "learning_rate": 7.1567846543056445e-06, | |
| "loss": 0.4651, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2689500280741157, | |
| "grad_norm": 0.29652172327041626, | |
| "learning_rate": 7.144977595635278e-06, | |
| "loss": 0.4369, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.2711959573273441, | |
| "grad_norm": 0.2619209885597229, | |
| "learning_rate": 7.133155860000429e-06, | |
| "loss": 0.4486, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.2734418865805728, | |
| "grad_norm": 0.28887611627578735, | |
| "learning_rate": 7.121319528291077e-06, | |
| "loss": 0.4568, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.2756878158338012, | |
| "grad_norm": 0.27822646498680115, | |
| "learning_rate": 7.109468681497076e-06, | |
| "loss": 0.4434, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2779337450870298, | |
| "grad_norm": 0.3062475323677063, | |
| "learning_rate": 7.097603400707595e-06, | |
| "loss": 0.4635, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.2801796743402583, | |
| "grad_norm": 0.27848997712135315, | |
| "learning_rate": 7.0857237671105735e-06, | |
| "loss": 0.4504, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2824256035934867, | |
| "grad_norm": 0.2792271375656128, | |
| "learning_rate": 7.0738298619921565e-06, | |
| "loss": 0.4364, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.2846715328467153, | |
| "grad_norm": 0.28332486748695374, | |
| "learning_rate": 7.06192176673614e-06, | |
| "loss": 0.4722, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.286917462099944, | |
| "grad_norm": 0.2763806879520416, | |
| "learning_rate": 7.0499995628234195e-06, | |
| "loss": 0.4313, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.2891633913531724, | |
| "grad_norm": 0.2765560746192932, | |
| "learning_rate": 7.038063331831425e-06, | |
| "loss": 0.4414, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2914093206064008, | |
| "grad_norm": 0.2661452889442444, | |
| "learning_rate": 7.026113155433569e-06, | |
| "loss": 0.4559, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.2936552498596294, | |
| "grad_norm": 0.2632508873939514, | |
| "learning_rate": 7.0141491153986856e-06, | |
| "loss": 0.4591, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.295901179112858, | |
| "grad_norm": 0.24122297763824463, | |
| "learning_rate": 7.002171293590467e-06, | |
| "loss": 0.4396, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.2981471083660865, | |
| "grad_norm": 0.2598783075809479, | |
| "learning_rate": 6.990179771966911e-06, | |
| "loss": 0.4138, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.300393037619315, | |
| "grad_norm": 0.2668991982936859, | |
| "learning_rate": 6.978174632579754e-06, | |
| "loss": 0.4601, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.3026389668725435, | |
| "grad_norm": 0.2742937505245209, | |
| "learning_rate": 6.966155957573911e-06, | |
| "loss": 0.4214, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.304884896125772, | |
| "grad_norm": 0.31684938073158264, | |
| "learning_rate": 6.954123829186917e-06, | |
| "loss": 0.4655, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.3071308253790006, | |
| "grad_norm": 0.2928871810436249, | |
| "learning_rate": 6.9420783297483575e-06, | |
| "loss": 0.4494, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.309376754632229, | |
| "grad_norm": 0.32177117466926575, | |
| "learning_rate": 6.930019541679314e-06, | |
| "loss": 0.441, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.3116226838854577, | |
| "grad_norm": 0.3396602272987366, | |
| "learning_rate": 6.917947547491789e-06, | |
| "loss": 0.4638, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.313868613138686, | |
| "grad_norm": 0.3194241225719452, | |
| "learning_rate": 6.9058624297881525e-06, | |
| "loss": 0.4381, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.3161145423919147, | |
| "grad_norm": 0.3782861828804016, | |
| "learning_rate": 6.893764271260572e-06, | |
| "loss": 0.4582, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.3183604716451431, | |
| "grad_norm": 0.2568625807762146, | |
| "learning_rate": 6.881653154690445e-06, | |
| "loss": 0.4211, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.3206064008983718, | |
| "grad_norm": 0.3422069847583771, | |
| "learning_rate": 6.869529162947831e-06, | |
| "loss": 0.4402, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.3228523301516002, | |
| "grad_norm": 0.30332496762275696, | |
| "learning_rate": 6.857392378990895e-06, | |
| "loss": 0.4683, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.3250982594048288, | |
| "grad_norm": 0.32135963439941406, | |
| "learning_rate": 6.845242885865324e-06, | |
| "loss": 0.4586, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.3273441886580573, | |
| "grad_norm": 0.32824966311454773, | |
| "learning_rate": 6.833080766703776e-06, | |
| "loss": 0.458, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.3295901179112857, | |
| "grad_norm": 0.3076978921890259, | |
| "learning_rate": 6.820906104725293e-06, | |
| "loss": 0.4597, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.3318360471645143, | |
| "grad_norm": 0.2813679873943329, | |
| "learning_rate": 6.808718983234748e-06, | |
| "loss": 0.4311, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.334081976417743, | |
| "grad_norm": 0.3031136095523834, | |
| "learning_rate": 6.796519485622267e-06, | |
| "loss": 0.4575, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.3363279056709714, | |
| "grad_norm": 0.30593588948249817, | |
| "learning_rate": 6.7843076953626555e-06, | |
| "loss": 0.4269, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.3385738349241998, | |
| "grad_norm": 0.29532647132873535, | |
| "learning_rate": 6.7720836960148376e-06, | |
| "loss": 0.437, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.3408197641774284, | |
| "grad_norm": 0.2953149378299713, | |
| "learning_rate": 6.7598475712212695e-06, | |
| "loss": 0.4429, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.343065693430657, | |
| "grad_norm": 0.2667207419872284, | |
| "learning_rate": 6.747599404707382e-06, | |
| "loss": 0.4464, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.3453116226838855, | |
| "grad_norm": 0.35777759552001953, | |
| "learning_rate": 6.735339280281001e-06, | |
| "loss": 0.4632, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.347557551937114, | |
| "grad_norm": 0.26391759514808655, | |
| "learning_rate": 6.72306728183177e-06, | |
| "loss": 0.4384, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.3498034811903425, | |
| "grad_norm": 0.3116670846939087, | |
| "learning_rate": 6.710783493330583e-06, | |
| "loss": 0.4627, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.352049410443571, | |
| "grad_norm": 0.2874084413051605, | |
| "learning_rate": 6.698487998829007e-06, | |
| "loss": 0.4705, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.3542953396967996, | |
| "grad_norm": 0.2724873125553131, | |
| "learning_rate": 6.686180882458705e-06, | |
| "loss": 0.4129, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.356541268950028, | |
| "grad_norm": 0.3315389156341553, | |
| "learning_rate": 6.673862228430867e-06, | |
| "loss": 0.4471, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.3587871982032567, | |
| "grad_norm": 0.32733264565467834, | |
| "learning_rate": 6.661532121035624e-06, | |
| "loss": 0.4529, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.361033127456485, | |
| "grad_norm": 0.31259867548942566, | |
| "learning_rate": 6.649190644641482e-06, | |
| "loss": 0.4225, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.3632790567097137, | |
| "grad_norm": 0.3450546860694885, | |
| "learning_rate": 6.636837883694735e-06, | |
| "loss": 0.4468, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.3655249859629421, | |
| "grad_norm": 0.33732178807258606, | |
| "learning_rate": 6.624473922718888e-06, | |
| "loss": 0.4607, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.3677709152161706, | |
| "grad_norm": 0.2904933989048004, | |
| "learning_rate": 6.6120988463140925e-06, | |
| "loss": 0.4242, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.3700168444693992, | |
| "grad_norm": 0.30185356736183167, | |
| "learning_rate": 6.599712739156546e-06, | |
| "loss": 0.4398, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3722627737226278, | |
| "grad_norm": 0.2974070906639099, | |
| "learning_rate": 6.587315685997931e-06, | |
| "loss": 0.4482, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.3745087029758563, | |
| "grad_norm": 0.3085421919822693, | |
| "learning_rate": 6.574907771664826e-06, | |
| "loss": 0.4338, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3767546322290847, | |
| "grad_norm": 0.2998266816139221, | |
| "learning_rate": 6.5624890810581225e-06, | |
| "loss": 0.4387, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.3790005614823133, | |
| "grad_norm": 0.39851927757263184, | |
| "learning_rate": 6.5500596991524556e-06, | |
| "loss": 0.4531, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.381246490735542, | |
| "grad_norm": 0.2550167143344879, | |
| "learning_rate": 6.537619710995611e-06, | |
| "loss": 0.4192, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.3834924199887704, | |
| "grad_norm": 0.4163671135902405, | |
| "learning_rate": 6.525169201707946e-06, | |
| "loss": 0.4707, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3857383492419988, | |
| "grad_norm": 0.3337157666683197, | |
| "learning_rate": 6.512708256481814e-06, | |
| "loss": 0.4429, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.3879842784952274, | |
| "grad_norm": 0.43529441952705383, | |
| "learning_rate": 6.500236960580973e-06, | |
| "loss": 0.4496, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3902302077484558, | |
| "grad_norm": 0.26580479741096497, | |
| "learning_rate": 6.487755399340005e-06, | |
| "loss": 0.4069, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.3924761370016845, | |
| "grad_norm": 0.3973635137081146, | |
| "learning_rate": 6.475263658163729e-06, | |
| "loss": 0.4457, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.394722066254913, | |
| "grad_norm": 0.42304566502571106, | |
| "learning_rate": 6.462761822526627e-06, | |
| "loss": 0.4589, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.3969679955081415, | |
| "grad_norm": 0.3066060543060303, | |
| "learning_rate": 6.450249977972247e-06, | |
| "loss": 0.4118, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.39921392476137, | |
| "grad_norm": 0.4160257577896118, | |
| "learning_rate": 6.437728210112626e-06, | |
| "loss": 0.4471, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.4014598540145986, | |
| "grad_norm": 0.34768301248550415, | |
| "learning_rate": 6.4251966046277e-06, | |
| "loss": 0.4369, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.403705783267827, | |
| "grad_norm": 0.34642931818962097, | |
| "learning_rate": 6.412655247264718e-06, | |
| "loss": 0.4467, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.4059517125210557, | |
| "grad_norm": 0.3499101400375366, | |
| "learning_rate": 6.4001042238376534e-06, | |
| "loss": 0.4241, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.408197641774284, | |
| "grad_norm": 0.40661197900772095, | |
| "learning_rate": 6.387543620226626e-06, | |
| "loss": 0.4675, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.4104435710275127, | |
| "grad_norm": 0.3330638110637665, | |
| "learning_rate": 6.374973522377303e-06, | |
| "loss": 0.4507, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.4126895002807411, | |
| "grad_norm": 0.3860412538051605, | |
| "learning_rate": 6.362394016300315e-06, | |
| "loss": 0.4555, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.4149354295339696, | |
| "grad_norm": 0.3007884621620178, | |
| "learning_rate": 6.3498051880706726e-06, | |
| "loss": 0.4482, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.4171813587871982, | |
| "grad_norm": 0.3595775365829468, | |
| "learning_rate": 6.337207123827169e-06, | |
| "loss": 0.4325, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.4194272880404268, | |
| "grad_norm": 0.3329215943813324, | |
| "learning_rate": 6.324599909771798e-06, | |
| "loss": 0.4644, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.4216732172936553, | |
| "grad_norm": 0.2800936698913574, | |
| "learning_rate": 6.311983632169157e-06, | |
| "loss": 0.429, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.4239191465468837, | |
| "grad_norm": 0.3583846688270569, | |
| "learning_rate": 6.299358377345864e-06, | |
| "loss": 0.4461, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.4261650758001123, | |
| "grad_norm": 0.3122238218784332, | |
| "learning_rate": 6.2867242316899615e-06, | |
| "loss": 0.4805, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.428411005053341, | |
| "grad_norm": 0.325324684381485, | |
| "learning_rate": 6.2740812816503264e-06, | |
| "loss": 0.4169, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.4306569343065694, | |
| "grad_norm": 0.28409814834594727, | |
| "learning_rate": 6.261429613736082e-06, | |
| "loss": 0.4567, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.4329028635597978, | |
| "grad_norm": 0.29375067353248596, | |
| "learning_rate": 6.248769314516002e-06, | |
| "loss": 0.4465, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.4351487928130264, | |
| "grad_norm": 0.3233538866043091, | |
| "learning_rate": 6.2361004706179195e-06, | |
| "loss": 0.4702, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.4373947220662548, | |
| "grad_norm": 0.2539404332637787, | |
| "learning_rate": 6.223423168728136e-06, | |
| "loss": 0.4403, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.4396406513194835, | |
| "grad_norm": 0.26419639587402344, | |
| "learning_rate": 6.210737495590825e-06, | |
| "loss": 0.4324, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.441886580572712, | |
| "grad_norm": 0.25423571467399597, | |
| "learning_rate": 6.198043538007441e-06, | |
| "loss": 0.4401, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.4441325098259405, | |
| "grad_norm": 0.3024260997772217, | |
| "learning_rate": 6.185341382836121e-06, | |
| "loss": 0.4618, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.446378439079169, | |
| "grad_norm": 0.27369245886802673, | |
| "learning_rate": 6.1726311169911e-06, | |
| "loss": 0.4423, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.4486243683323976, | |
| "grad_norm": 0.2825721204280853, | |
| "learning_rate": 6.159912827442107e-06, | |
| "loss": 0.4416, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.450870297585626, | |
| "grad_norm": 0.29679155349731445, | |
| "learning_rate": 6.147186601213773e-06, | |
| "loss": 0.4949, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.4531162268388544, | |
| "grad_norm": 0.30457913875579834, | |
| "learning_rate": 6.134452525385035e-06, | |
| "loss": 0.4387, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.455362156092083, | |
| "grad_norm": 0.26383036375045776, | |
| "learning_rate": 6.12171068708854e-06, | |
| "loss": 0.4454, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.4576080853453117, | |
| "grad_norm": 0.3353641629219055, | |
| "learning_rate": 6.108961173510052e-06, | |
| "loss": 0.4302, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.4598540145985401, | |
| "grad_norm": 0.2700467109680176, | |
| "learning_rate": 6.096204071887854e-06, | |
| "loss": 0.4459, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.4620999438517686, | |
| "grad_norm": 0.2580196261405945, | |
| "learning_rate": 6.083439469512146e-06, | |
| "loss": 0.4426, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.4643458731049972, | |
| "grad_norm": 0.2723543643951416, | |
| "learning_rate": 6.0706674537244535e-06, | |
| "loss": 0.4379, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4665918023582258, | |
| "grad_norm": 0.2748951017856598, | |
| "learning_rate": 6.057888111917028e-06, | |
| "loss": 0.4498, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.4688377316114543, | |
| "grad_norm": 0.2623066008090973, | |
| "learning_rate": 6.0451015315322515e-06, | |
| "loss": 0.4373, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.4710836608646827, | |
| "grad_norm": 0.2672736644744873, | |
| "learning_rate": 6.032307800062032e-06, | |
| "loss": 0.4409, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.4733295901179113, | |
| "grad_norm": 0.2850876450538635, | |
| "learning_rate": 6.019507005047209e-06, | |
| "loss": 0.4612, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.4755755193711397, | |
| "grad_norm": 0.30435261130332947, | |
| "learning_rate": 6.0066992340769606e-06, | |
| "loss": 0.4716, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.4778214486243684, | |
| "grad_norm": 0.24608232080936432, | |
| "learning_rate": 5.993884574788186e-06, | |
| "loss": 0.4315, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4800673778775968, | |
| "grad_norm": 0.2793516516685486, | |
| "learning_rate": 5.981063114864928e-06, | |
| "loss": 0.4404, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.4823133071308254, | |
| "grad_norm": 0.2838444113731384, | |
| "learning_rate": 5.96823494203776e-06, | |
| "loss": 0.4339, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4845592363840538, | |
| "grad_norm": 0.2751578092575073, | |
| "learning_rate": 5.955400144083183e-06, | |
| "loss": 0.4555, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.4868051656372825, | |
| "grad_norm": 0.312559574842453, | |
| "learning_rate": 5.942558808823039e-06, | |
| "loss": 0.4512, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.489051094890511, | |
| "grad_norm": 0.2821672260761261, | |
| "learning_rate": 5.929711024123894e-06, | |
| "loss": 0.4523, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.4912970241437395, | |
| "grad_norm": 0.2883569896221161, | |
| "learning_rate": 5.916856877896447e-06, | |
| "loss": 0.425, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.493542953396968, | |
| "grad_norm": 0.2930947244167328, | |
| "learning_rate": 5.903996458094928e-06, | |
| "loss": 0.4528, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.4957888826501966, | |
| "grad_norm": 0.2596952021121979, | |
| "learning_rate": 5.89112985271649e-06, | |
| "loss": 0.448, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.498034811903425, | |
| "grad_norm": 0.2668738067150116, | |
| "learning_rate": 5.878257149800609e-06, | |
| "loss": 0.4581, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.5002807411566534, | |
| "grad_norm": 0.2872879207134247, | |
| "learning_rate": 5.865378437428491e-06, | |
| "loss": 0.4565, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.502526670409882, | |
| "grad_norm": 0.27810871601104736, | |
| "learning_rate": 5.8524938037224555e-06, | |
| "loss": 0.4348, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.5047725996631107, | |
| "grad_norm": 0.29902833700180054, | |
| "learning_rate": 5.83960333684534e-06, | |
| "loss": 0.4692, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.5070185289163391, | |
| "grad_norm": 0.271638959646225, | |
| "learning_rate": 5.826707124999893e-06, | |
| "loss": 0.4315, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.5092644581695676, | |
| "grad_norm": 0.301960825920105, | |
| "learning_rate": 5.813805256428177e-06, | |
| "loss": 0.4393, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.5115103874227962, | |
| "grad_norm": 0.28544798493385315, | |
| "learning_rate": 5.800897819410961e-06, | |
| "loss": 0.4597, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.5137563166760248, | |
| "grad_norm": 0.2677849531173706, | |
| "learning_rate": 5.787984902267111e-06, | |
| "loss": 0.4204, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.5160022459292533, | |
| "grad_norm": 0.2877887189388275, | |
| "learning_rate": 5.775066593352994e-06, | |
| "loss": 0.4491, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.5182481751824817, | |
| "grad_norm": 0.27290868759155273, | |
| "learning_rate": 5.762142981061869e-06, | |
| "loss": 0.4318, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.5204941044357103, | |
| "grad_norm": 0.2793848514556885, | |
| "learning_rate": 5.749214153823284e-06, | |
| "loss": 0.4631, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.522740033688939, | |
| "grad_norm": 0.27665579319000244, | |
| "learning_rate": 5.736280200102471e-06, | |
| "loss": 0.441, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.5249859629421674, | |
| "grad_norm": 0.26563090085983276, | |
| "learning_rate": 5.723341208399737e-06, | |
| "loss": 0.4231, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.5272318921953958, | |
| "grad_norm": 0.28303608298301697, | |
| "learning_rate": 5.7103972672498645e-06, | |
| "loss": 0.4622, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.5294778214486242, | |
| "grad_norm": 0.2486550211906433, | |
| "learning_rate": 5.697448465221499e-06, | |
| "loss": 0.4509, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.5317237507018528, | |
| "grad_norm": 0.26522529125213623, | |
| "learning_rate": 5.684494890916551e-06, | |
| "loss": 0.4512, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.5339696799550815, | |
| "grad_norm": 0.2896977365016937, | |
| "learning_rate": 5.6715366329695805e-06, | |
| "loss": 0.4344, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.53621560920831, | |
| "grad_norm": 0.28568655252456665, | |
| "learning_rate": 5.658573780047197e-06, | |
| "loss": 0.4713, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.2812296152114868, | |
| "learning_rate": 5.645606420847454e-06, | |
| "loss": 0.4279, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.540707467714767, | |
| "grad_norm": 0.2628013789653778, | |
| "learning_rate": 5.632634644099235e-06, | |
| "loss": 0.4428, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.5429533969679956, | |
| "grad_norm": 0.27226313948631287, | |
| "learning_rate": 5.6196585385616505e-06, | |
| "loss": 0.4366, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.545199326221224, | |
| "grad_norm": 0.2939417362213135, | |
| "learning_rate": 5.606678193023436e-06, | |
| "loss": 0.4775, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.5474452554744524, | |
| "grad_norm": 0.3119971752166748, | |
| "learning_rate": 5.593693696302333e-06, | |
| "loss": 0.4658, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.549691184727681, | |
| "grad_norm": 0.23466715216636658, | |
| "learning_rate": 5.580705137244488e-06, | |
| "loss": 0.4282, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.5519371139809097, | |
| "grad_norm": 0.32123836874961853, | |
| "learning_rate": 5.567712604723846e-06, | |
| "loss": 0.4383, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.5541830432341381, | |
| "grad_norm": 0.28472721576690674, | |
| "learning_rate": 5.5547161876415435e-06, | |
| "loss": 0.444, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.5564289724873666, | |
| "grad_norm": 0.3107893168926239, | |
| "learning_rate": 5.54171597492529e-06, | |
| "loss": 0.4578, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.5586749017405952, | |
| "grad_norm": 0.29814159870147705, | |
| "learning_rate": 5.52871205552877e-06, | |
| "loss": 0.4509, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.5609208309938238, | |
| "grad_norm": 0.2612503170967102, | |
| "learning_rate": 5.515704518431033e-06, | |
| "loss": 0.4284, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.5631667602470523, | |
| "grad_norm": 0.2933982014656067, | |
| "learning_rate": 5.50269345263588e-06, | |
| "loss": 0.4382, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.5654126895002807, | |
| "grad_norm": 0.24303555488586426, | |
| "learning_rate": 5.489678947171255e-06, | |
| "loss": 0.4317, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.5676586187535093, | |
| "grad_norm": 0.25020086765289307, | |
| "learning_rate": 5.4766610910886445e-06, | |
| "loss": 0.4396, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.5699045480067377, | |
| "grad_norm": 0.2751081883907318, | |
| "learning_rate": 5.4636399734624534e-06, | |
| "loss": 0.4557, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.5721504772599664, | |
| "grad_norm": 0.26188722252845764, | |
| "learning_rate": 5.450615683389408e-06, | |
| "loss": 0.4092, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5743964065131948, | |
| "grad_norm": 0.31535235047340393, | |
| "learning_rate": 5.437588309987945e-06, | |
| "loss": 0.4918, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.5766423357664232, | |
| "grad_norm": 0.2722760736942291, | |
| "learning_rate": 5.424557942397593e-06, | |
| "loss": 0.4208, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.5788882650196518, | |
| "grad_norm": 0.3277275562286377, | |
| "learning_rate": 5.411524669778369e-06, | |
| "loss": 0.4578, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.5811341942728805, | |
| "grad_norm": 0.24588078260421753, | |
| "learning_rate": 5.398488581310172e-06, | |
| "loss": 0.4456, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.583380123526109, | |
| "grad_norm": 0.2953939139842987, | |
| "learning_rate": 5.385449766192164e-06, | |
| "loss": 0.4503, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.5856260527793373, | |
| "grad_norm": 0.2831403613090515, | |
| "learning_rate": 5.372408313642168e-06, | |
| "loss": 0.4403, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.587871982032566, | |
| "grad_norm": 0.2721308767795563, | |
| "learning_rate": 5.359364312896047e-06, | |
| "loss": 0.4243, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.5901179112857946, | |
| "grad_norm": 0.283263623714447, | |
| "learning_rate": 5.346317853207108e-06, | |
| "loss": 0.4658, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.592363840539023, | |
| "grad_norm": 0.2844542860984802, | |
| "learning_rate": 5.333269023845478e-06, | |
| "loss": 0.4366, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.5946097697922514, | |
| "grad_norm": 0.2929394245147705, | |
| "learning_rate": 5.320217914097498e-06, | |
| "loss": 0.4604, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.59685569904548, | |
| "grad_norm": 0.2344074845314026, | |
| "learning_rate": 5.307164613265119e-06, | |
| "loss": 0.4172, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.5991016282987087, | |
| "grad_norm": 0.2725594639778137, | |
| "learning_rate": 5.294109210665275e-06, | |
| "loss": 0.4322, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.6013475575519371, | |
| "grad_norm": 0.27773675322532654, | |
| "learning_rate": 5.281051795629289e-06, | |
| "loss": 0.454, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.6035934868051656, | |
| "grad_norm": 0.28249219059944153, | |
| "learning_rate": 5.26799245750225e-06, | |
| "loss": 0.4216, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.6058394160583942, | |
| "grad_norm": 0.3116042912006378, | |
| "learning_rate": 5.254931285642406e-06, | |
| "loss": 0.4531, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.6080853453116228, | |
| "grad_norm": 0.2770937383174896, | |
| "learning_rate": 5.2418683694205574e-06, | |
| "loss": 0.4509, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.6103312745648513, | |
| "grad_norm": 0.3380868136882782, | |
| "learning_rate": 5.228803798219432e-06, | |
| "loss": 0.4492, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.6125772038180797, | |
| "grad_norm": 0.2792224586009979, | |
| "learning_rate": 5.215737661433087e-06, | |
| "loss": 0.4572, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.614823133071308, | |
| "grad_norm": 0.2865675389766693, | |
| "learning_rate": 5.20267004846629e-06, | |
| "loss": 0.441, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.6170690623245367, | |
| "grad_norm": 0.29234838485717773, | |
| "learning_rate": 5.189601048733912e-06, | |
| "loss": 0.4337, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.6193149915777654, | |
| "grad_norm": 0.2698359489440918, | |
| "learning_rate": 5.17653075166031e-06, | |
| "loss": 0.45, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.6215609208309938, | |
| "grad_norm": 0.3000829517841339, | |
| "learning_rate": 5.16345924667872e-06, | |
| "loss": 0.4387, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.6238068500842222, | |
| "grad_norm": 0.3248939514160156, | |
| "learning_rate": 5.150386623230643e-06, | |
| "loss": 0.4733, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.6260527793374508, | |
| "grad_norm": 0.24670802056789398, | |
| "learning_rate": 5.137312970765232e-06, | |
| "loss": 0.4398, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.6282987085906795, | |
| "grad_norm": 0.28131037950515747, | |
| "learning_rate": 5.12423837873868e-06, | |
| "loss": 0.4413, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.630544637843908, | |
| "grad_norm": 0.2791185677051544, | |
| "learning_rate": 5.1111629366136115e-06, | |
| "loss": 0.4213, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.6327905670971363, | |
| "grad_norm": 0.2748776972293854, | |
| "learning_rate": 5.0980867338584675e-06, | |
| "loss": 0.4322, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.635036496350365, | |
| "grad_norm": 0.22908659279346466, | |
| "learning_rate": 5.08500985994689e-06, | |
| "loss": 0.4316, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.6372824256035936, | |
| "grad_norm": 0.27406492829322815, | |
| "learning_rate": 5.071932404357119e-06, | |
| "loss": 0.4355, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.639528354856822, | |
| "grad_norm": 0.2680008113384247, | |
| "learning_rate": 5.058854456571372e-06, | |
| "loss": 0.4607, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.6417742841100504, | |
| "grad_norm": 0.24551571905612946, | |
| "learning_rate": 5.045776106075232e-06, | |
| "loss": 0.4165, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.644020213363279, | |
| "grad_norm": 0.2642151117324829, | |
| "learning_rate": 5.032697442357039e-06, | |
| "loss": 0.4548, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.6462661426165077, | |
| "grad_norm": 0.2644287049770355, | |
| "learning_rate": 5.019618554907279e-06, | |
| "loss": 0.4169, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.6485120718697361, | |
| "grad_norm": 0.24862082302570343, | |
| "learning_rate": 5.0065395332179666e-06, | |
| "loss": 0.4308, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.6507580011229646, | |
| "grad_norm": 0.2720666825771332, | |
| "learning_rate": 4.993460466782034e-06, | |
| "loss": 0.465, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.6530039303761932, | |
| "grad_norm": 0.355563759803772, | |
| "learning_rate": 4.9803814450927214e-06, | |
| "loss": 0.4572, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.6552498596294218, | |
| "grad_norm": 0.22143852710723877, | |
| "learning_rate": 4.967302557642962e-06, | |
| "loss": 0.4203, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.6574957888826503, | |
| "grad_norm": 0.30551275610923767, | |
| "learning_rate": 4.954223893924771e-06, | |
| "loss": 0.4391, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.6597417181358787, | |
| "grad_norm": 0.23899058997631073, | |
| "learning_rate": 4.94114554342863e-06, | |
| "loss": 0.4523, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.661987647389107, | |
| "grad_norm": 0.23506562411785126, | |
| "learning_rate": 4.928067595642882e-06, | |
| "loss": 0.444, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.6642335766423357, | |
| "grad_norm": 0.2739086151123047, | |
| "learning_rate": 4.91499014005311e-06, | |
| "loss": 0.4283, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.6664795058955644, | |
| "grad_norm": 0.22991512715816498, | |
| "learning_rate": 4.901913266141534e-06, | |
| "loss": 0.4277, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.6687254351487928, | |
| "grad_norm": 0.28890857100486755, | |
| "learning_rate": 4.888837063386391e-06, | |
| "loss": 0.4633, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.6709713644020212, | |
| "grad_norm": 0.25182008743286133, | |
| "learning_rate": 4.875761621261322e-06, | |
| "loss": 0.4591, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.6732172936552498, | |
| "grad_norm": 0.259389191865921, | |
| "learning_rate": 4.862687029234769e-06, | |
| "loss": 0.4407, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.6754632229084785, | |
| "grad_norm": 0.24826925992965698, | |
| "learning_rate": 4.849613376769358e-06, | |
| "loss": 0.4582, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.677709152161707, | |
| "grad_norm": 0.31528979539871216, | |
| "learning_rate": 4.83654075332128e-06, | |
| "loss": 0.4321, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.6799550814149353, | |
| "grad_norm": 0.24880996346473694, | |
| "learning_rate": 4.8234692483396915e-06, | |
| "loss": 0.4298, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.682201010668164, | |
| "grad_norm": 0.2553097903728485, | |
| "learning_rate": 4.81039895126609e-06, | |
| "loss": 0.4359, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.6844469399213926, | |
| "grad_norm": 0.2735806107521057, | |
| "learning_rate": 4.797329951533712e-06, | |
| "loss": 0.4513, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.686692869174621, | |
| "grad_norm": 0.2573295831680298, | |
| "learning_rate": 4.784262338566915e-06, | |
| "loss": 0.4431, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.6889387984278494, | |
| "grad_norm": 0.25200626254081726, | |
| "learning_rate": 4.77119620178057e-06, | |
| "loss": 0.453, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.691184727681078, | |
| "grad_norm": 0.24043521285057068, | |
| "learning_rate": 4.758131630579446e-06, | |
| "loss": 0.4097, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.6934306569343067, | |
| "grad_norm": 0.27149125933647156, | |
| "learning_rate": 4.745068714357595e-06, | |
| "loss": 0.4415, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.6956765861875351, | |
| "grad_norm": 0.2776370942592621, | |
| "learning_rate": 4.7320075424977515e-06, | |
| "loss": 0.4653, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.6979225154407636, | |
| "grad_norm": 0.29149848222732544, | |
| "learning_rate": 4.718948204370713e-06, | |
| "loss": 0.4206, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.700168444693992, | |
| "grad_norm": 0.27004140615463257, | |
| "learning_rate": 4.705890789334726e-06, | |
| "loss": 0.4717, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.7024143739472206, | |
| "grad_norm": 0.27363502979278564, | |
| "learning_rate": 4.692835386734884e-06, | |
| "loss": 0.4262, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.7046603032004493, | |
| "grad_norm": 0.27881062030792236, | |
| "learning_rate": 4.679782085902503e-06, | |
| "loss": 0.4562, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.7069062324536777, | |
| "grad_norm": 0.2494436502456665, | |
| "learning_rate": 4.6667309761545245e-06, | |
| "loss": 0.4537, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.709152161706906, | |
| "grad_norm": 0.2262820154428482, | |
| "learning_rate": 4.6536821467928926e-06, | |
| "loss": 0.3919, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.7113980909601347, | |
| "grad_norm": 0.25715264678001404, | |
| "learning_rate": 4.6406356871039534e-06, | |
| "loss": 0.4665, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.7136440202133634, | |
| "grad_norm": 0.26350539922714233, | |
| "learning_rate": 4.627591686357835e-06, | |
| "loss": 0.4623, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.7158899494665918, | |
| "grad_norm": 0.23280011117458344, | |
| "learning_rate": 4.6145502338078365e-06, | |
| "loss": 0.4195, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.7181358787198202, | |
| "grad_norm": 0.25985339283943176, | |
| "learning_rate": 4.60151141868983e-06, | |
| "loss": 0.4449, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.7203818079730488, | |
| "grad_norm": 0.2784518599510193, | |
| "learning_rate": 4.5884753302216315e-06, | |
| "loss": 0.491, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.7226277372262775, | |
| "grad_norm": 0.2532546818256378, | |
| "learning_rate": 4.575442057602408e-06, | |
| "loss": 0.4271, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.724873666479506, | |
| "grad_norm": 0.270094633102417, | |
| "learning_rate": 4.562411690012057e-06, | |
| "loss": 0.4388, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.7271195957327343, | |
| "grad_norm": 0.2802513837814331, | |
| "learning_rate": 4.549384316610593e-06, | |
| "loss": 0.4443, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.729365524985963, | |
| "grad_norm": 0.2635841965675354, | |
| "learning_rate": 4.536360026537548e-06, | |
| "loss": 0.4262, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.7316114542391916, | |
| "grad_norm": 0.25495946407318115, | |
| "learning_rate": 4.523338908911358e-06, | |
| "loss": 0.4558, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.73385738349242, | |
| "grad_norm": 0.25492119789123535, | |
| "learning_rate": 4.510321052828745e-06, | |
| "loss": 0.4478, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.7361033127456484, | |
| "grad_norm": 0.2536661922931671, | |
| "learning_rate": 4.497306547364123e-06, | |
| "loss": 0.473, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.738349241998877, | |
| "grad_norm": 0.23842228949069977, | |
| "learning_rate": 4.484295481568968e-06, | |
| "loss": 0.434, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.7405951712521057, | |
| "grad_norm": 0.26309531927108765, | |
| "learning_rate": 4.471287944471231e-06, | |
| "loss": 0.4383, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.7428411005053341, | |
| "grad_norm": 0.2441006749868393, | |
| "learning_rate": 4.458284025074711e-06, | |
| "loss": 0.4548, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.7450870297585626, | |
| "grad_norm": 0.2809121608734131, | |
| "learning_rate": 4.4452838123584565e-06, | |
| "loss": 0.4373, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.747332959011791, | |
| "grad_norm": 0.2502027153968811, | |
| "learning_rate": 4.432287395276155e-06, | |
| "loss": 0.4721, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.7495788882650196, | |
| "grad_norm": 0.2655166685581207, | |
| "learning_rate": 4.419294862755515e-06, | |
| "loss": 0.4245, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.7518248175182483, | |
| "grad_norm": 0.2757239043712616, | |
| "learning_rate": 4.406306303697669e-06, | |
| "loss": 0.4414, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.7540707467714767, | |
| "grad_norm": 0.23585571348667145, | |
| "learning_rate": 4.393321806976565e-06, | |
| "loss": 0.4397, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.756316676024705, | |
| "grad_norm": 0.25489094853401184, | |
| "learning_rate": 4.380341461438349e-06, | |
| "loss": 0.4496, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.7585626052779337, | |
| "grad_norm": 0.2948884665966034, | |
| "learning_rate": 4.3673653559007676e-06, | |
| "loss": 0.4521, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.7608085345311624, | |
| "grad_norm": 0.26162976026535034, | |
| "learning_rate": 4.354393579152547e-06, | |
| "loss": 0.409, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.7630544637843908, | |
| "grad_norm": 0.27988922595977783, | |
| "learning_rate": 4.3414262199528045e-06, | |
| "loss": 0.4661, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.7653003930376192, | |
| "grad_norm": 0.3011482059955597, | |
| "learning_rate": 4.328463367030421e-06, | |
| "loss": 0.4586, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.7675463222908478, | |
| "grad_norm": 0.27512040734291077, | |
| "learning_rate": 4.315505109083451e-06, | |
| "loss": 0.4452, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.7697922515440765, | |
| "grad_norm": 0.22836817800998688, | |
| "learning_rate": 4.302551534778504e-06, | |
| "loss": 0.4213, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.772038180797305, | |
| "grad_norm": 0.3237468898296356, | |
| "learning_rate": 4.289602732750138e-06, | |
| "loss": 0.4307, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.7742841100505333, | |
| "grad_norm": 0.2781298756599426, | |
| "learning_rate": 4.276658791600264e-06, | |
| "loss": 0.428, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.776530039303762, | |
| "grad_norm": 0.26471009850502014, | |
| "learning_rate": 4.26371979989753e-06, | |
| "loss": 0.4424, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.7787759685569906, | |
| "grad_norm": 0.25274160504341125, | |
| "learning_rate": 4.250785846176716e-06, | |
| "loss": 0.4272, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.781021897810219, | |
| "grad_norm": 0.2389991134405136, | |
| "learning_rate": 4.237857018938132e-06, | |
| "loss": 0.4469, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.7832678270634474, | |
| "grad_norm": 0.2341649830341339, | |
| "learning_rate": 4.224933406647008e-06, | |
| "loss": 0.4175, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.7855137563166759, | |
| "grad_norm": 0.2746540606021881, | |
| "learning_rate": 4.212015097732891e-06, | |
| "loss": 0.4406, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.7877596855699045, | |
| "grad_norm": 0.2597159445285797, | |
| "learning_rate": 4.1991021805890394e-06, | |
| "loss": 0.4579, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.7900056148231331, | |
| "grad_norm": 0.2421720176935196, | |
| "learning_rate": 4.186194743571823e-06, | |
| "loss": 0.4247, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.7922515440763616, | |
| "grad_norm": 0.25346839427948, | |
| "learning_rate": 4.173292875000108e-06, | |
| "loss": 0.4471, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.79449747332959, | |
| "grad_norm": 0.2318015843629837, | |
| "learning_rate": 4.1603966631546634e-06, | |
| "loss": 0.4357, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.7967434025828186, | |
| "grad_norm": 0.23157362639904022, | |
| "learning_rate": 4.147506196277546e-06, | |
| "loss": 0.4507, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.7989893318360473, | |
| "grad_norm": 0.2407248169183731, | |
| "learning_rate": 4.13462156257151e-06, | |
| "loss": 0.4502, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.8012352610892757, | |
| "grad_norm": 0.24326087534427643, | |
| "learning_rate": 4.121742850199391e-06, | |
| "loss": 0.4505, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.803481190342504, | |
| "grad_norm": 0.23502765595912933, | |
| "learning_rate": 4.108870147283512e-06, | |
| "loss": 0.4407, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.8057271195957327, | |
| "grad_norm": 0.28090357780456543, | |
| "learning_rate": 4.0960035419050745e-06, | |
| "loss": 0.4359, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.8079730488489614, | |
| "grad_norm": 0.22931216657161713, | |
| "learning_rate": 4.083143122103554e-06, | |
| "loss": 0.4145, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.8102189781021898, | |
| "grad_norm": 0.24223902821540833, | |
| "learning_rate": 4.070288975876107e-06, | |
| "loss": 0.4556, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.8124649073554182, | |
| "grad_norm": 0.2725001871585846, | |
| "learning_rate": 4.0574411911769625e-06, | |
| "loss": 0.4639, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.8147108366086468, | |
| "grad_norm": 0.24160481989383698, | |
| "learning_rate": 4.044599855916817e-06, | |
| "loss": 0.4609, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.8169567658618755, | |
| "grad_norm": 0.23829206824302673, | |
| "learning_rate": 4.031765057962243e-06, | |
| "loss": 0.427, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.819202695115104, | |
| "grad_norm": 0.2611043155193329, | |
| "learning_rate": 4.018936885135074e-06, | |
| "loss": 0.4584, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.8214486243683323, | |
| "grad_norm": 0.2420017123222351, | |
| "learning_rate": 4.006115425211816e-06, | |
| "loss": 0.4084, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.823694553621561, | |
| "grad_norm": 0.2647510766983032, | |
| "learning_rate": 3.993300765923042e-06, | |
| "loss": 0.453, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.8259404828747896, | |
| "grad_norm": 0.218390554189682, | |
| "learning_rate": 3.980492994952792e-06, | |
| "loss": 0.4203, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.828186412128018, | |
| "grad_norm": 0.3060971200466156, | |
| "learning_rate": 3.967692199937971e-06, | |
| "loss": 0.4673, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.8304323413812464, | |
| "grad_norm": 0.2392362505197525, | |
| "learning_rate": 3.95489846846775e-06, | |
| "loss": 0.436, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.8326782706344749, | |
| "grad_norm": 0.22931107878684998, | |
| "learning_rate": 3.9421118880829735e-06, | |
| "loss": 0.4058, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.8349241998877035, | |
| "grad_norm": 0.30072271823883057, | |
| "learning_rate": 3.929332546275547e-06, | |
| "loss": 0.4499, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.8371701291409321, | |
| "grad_norm": 0.22911213338375092, | |
| "learning_rate": 3.916560530487854e-06, | |
| "loss": 0.4453, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.8394160583941606, | |
| "grad_norm": 0.2482576072216034, | |
| "learning_rate": 3.9037959281121474e-06, | |
| "loss": 0.4288, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.841661987647389, | |
| "grad_norm": 0.24556680023670197, | |
| "learning_rate": 3.891038826489949e-06, | |
| "loss": 0.4389, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.8439079169006176, | |
| "grad_norm": 0.22505217790603638, | |
| "learning_rate": 3.878289312911462e-06, | |
| "loss": 0.4505, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 0.2489365190267563, | |
| "learning_rate": 3.865547474614967e-06, | |
| "loss": 0.4563, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.8483997754070747, | |
| "grad_norm": 0.2253488451242447, | |
| "learning_rate": 3.852813398786228e-06, | |
| "loss": 0.4099, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.850645704660303, | |
| "grad_norm": 0.2778521478176117, | |
| "learning_rate": 3.840087172557894e-06, | |
| "loss": 0.4527, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.8528916339135317, | |
| "grad_norm": 0.22189773619174957, | |
| "learning_rate": 3.8273688830089005e-06, | |
| "loss": 0.4205, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.8551375631667604, | |
| "grad_norm": 0.2973972260951996, | |
| "learning_rate": 3.8146586171638803e-06, | |
| "loss": 0.4554, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.8573834924199888, | |
| "grad_norm": 0.21712501347064972, | |
| "learning_rate": 3.801956461992561e-06, | |
| "loss": 0.4249, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.8596294216732172, | |
| "grad_norm": 0.22984138131141663, | |
| "learning_rate": 3.7892625044091747e-06, | |
| "loss": 0.4383, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.8618753509264458, | |
| "grad_norm": 0.23754611611366272, | |
| "learning_rate": 3.776576831271865e-06, | |
| "loss": 0.4669, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.8641212801796745, | |
| "grad_norm": 0.2339145392179489, | |
| "learning_rate": 3.7638995293820817e-06, | |
| "loss": 0.4167, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.866367209432903, | |
| "grad_norm": 0.2435954064130783, | |
| "learning_rate": 3.7512306854839993e-06, | |
| "loss": 0.4558, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.8686131386861313, | |
| "grad_norm": 0.23407921195030212, | |
| "learning_rate": 3.73857038626392e-06, | |
| "loss": 0.4524, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.87085906793936, | |
| "grad_norm": 0.22388103604316711, | |
| "learning_rate": 3.725918718349675e-06, | |
| "loss": 0.4319, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.8731049971925884, | |
| "grad_norm": 0.2634325623512268, | |
| "learning_rate": 3.713275768310041e-06, | |
| "loss": 0.4501, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.875350926445817, | |
| "grad_norm": 0.2393648326396942, | |
| "learning_rate": 3.7006416226541375e-06, | |
| "loss": 0.4365, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.8775968556990454, | |
| "grad_norm": 0.24255381524562836, | |
| "learning_rate": 3.6880163678308443e-06, | |
| "loss": 0.4521, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.8798427849522739, | |
| "grad_norm": 0.2714441120624542, | |
| "learning_rate": 3.6754000902282026e-06, | |
| "loss": 0.4426, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.8820887142055025, | |
| "grad_norm": 0.24428033828735352, | |
| "learning_rate": 3.6627928761728315e-06, | |
| "loss": 0.4381, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.8843346434587311, | |
| "grad_norm": 0.2361423224210739, | |
| "learning_rate": 3.65019481192933e-06, | |
| "loss": 0.4353, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.8865805727119596, | |
| "grad_norm": 0.28072136640548706, | |
| "learning_rate": 3.637605983699687e-06, | |
| "loss": 0.4555, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.888826501965188, | |
| "grad_norm": 0.2483406364917755, | |
| "learning_rate": 3.6250264776226995e-06, | |
| "loss": 0.4162, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.8910724312184166, | |
| "grad_norm": 0.2832973301410675, | |
| "learning_rate": 3.612456379773376e-06, | |
| "loss": 0.4573, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.8933183604716453, | |
| "grad_norm": 0.2808990776538849, | |
| "learning_rate": 3.599895776162347e-06, | |
| "loss": 0.4322, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.8955642897248737, | |
| "grad_norm": 0.24022577702999115, | |
| "learning_rate": 3.5873447527352852e-06, | |
| "loss": 0.4376, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.897810218978102, | |
| "grad_norm": 0.2281453162431717, | |
| "learning_rate": 3.574803395372301e-06, | |
| "loss": 0.44, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.9000561482313307, | |
| "grad_norm": 0.26453354954719543, | |
| "learning_rate": 3.562271789887375e-06, | |
| "loss": 0.4462, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.9023020774845594, | |
| "grad_norm": 0.23726797103881836, | |
| "learning_rate": 3.5497500220277535e-06, | |
| "loss": 0.4221, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.9045480067377878, | |
| "grad_norm": 0.24078302085399628, | |
| "learning_rate": 3.537238177473375e-06, | |
| "loss": 0.4357, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.9067939359910162, | |
| "grad_norm": 0.2368990182876587, | |
| "learning_rate": 3.524736341836272e-06, | |
| "loss": 0.4401, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.9090398652442448, | |
| "grad_norm": 0.22038300335407257, | |
| "learning_rate": 3.5122446006599988e-06, | |
| "loss": 0.4522, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.9112857944974735, | |
| "grad_norm": 0.23867258429527283, | |
| "learning_rate": 3.499763039419028e-06, | |
| "loss": 0.4319, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.913531723750702, | |
| "grad_norm": 0.2527855336666107, | |
| "learning_rate": 3.4872917435181862e-06, | |
| "loss": 0.444, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.9157776530039303, | |
| "grad_norm": 0.23553407192230225, | |
| "learning_rate": 3.474830798292054e-06, | |
| "loss": 0.4408, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.9180235822571587, | |
| "grad_norm": 0.26365795731544495, | |
| "learning_rate": 3.462380289004391e-06, | |
| "loss": 0.4466, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.9202695115103874, | |
| "grad_norm": 0.2614414393901825, | |
| "learning_rate": 3.4499403008475474e-06, | |
| "loss": 0.4437, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.922515440763616, | |
| "grad_norm": 0.25481751561164856, | |
| "learning_rate": 3.437510918941879e-06, | |
| "loss": 0.4401, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.9247613700168444, | |
| "grad_norm": 0.24284076690673828, | |
| "learning_rate": 3.4250922283351762e-06, | |
| "loss": 0.439, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.9270072992700729, | |
| "grad_norm": 0.21187108755111694, | |
| "learning_rate": 3.4126843140020697e-06, | |
| "loss": 0.4261, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.9292532285233015, | |
| "grad_norm": 0.2579561769962311, | |
| "learning_rate": 3.400287260843454e-06, | |
| "loss": 0.4456, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.9314991577765301, | |
| "grad_norm": 0.24703598022460938, | |
| "learning_rate": 3.3879011536859095e-06, | |
| "loss": 0.4333, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.9337450870297586, | |
| "grad_norm": 0.2076679766178131, | |
| "learning_rate": 3.3755260772811135e-06, | |
| "loss": 0.4251, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.935991016282987, | |
| "grad_norm": 0.25392812490463257, | |
| "learning_rate": 3.3631621163052673e-06, | |
| "loss": 0.4618, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.9382369455362156, | |
| "grad_norm": 0.23472177982330322, | |
| "learning_rate": 3.350809355358518e-06, | |
| "loss": 0.4396, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.9404828747894443, | |
| "grad_norm": 0.2323472946882248, | |
| "learning_rate": 3.3384678789643754e-06, | |
| "loss": 0.4465, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.9427288040426727, | |
| "grad_norm": 0.21967169642448425, | |
| "learning_rate": 3.3261377715691355e-06, | |
| "loss": 0.4525, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.944974733295901, | |
| "grad_norm": 0.2311394363641739, | |
| "learning_rate": 3.313819117541297e-06, | |
| "loss": 0.4151, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.9472206625491297, | |
| "grad_norm": 0.2546962797641754, | |
| "learning_rate": 3.3015120011709955e-06, | |
| "loss": 0.4423, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.9494665918023584, | |
| "grad_norm": 0.23539777100086212, | |
| "learning_rate": 3.289216506669419e-06, | |
| "loss": 0.44, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.9517125210555868, | |
| "grad_norm": 0.21500107645988464, | |
| "learning_rate": 3.2769327181682307e-06, | |
| "loss": 0.4144, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.9539584503088152, | |
| "grad_norm": 0.24959760904312134, | |
| "learning_rate": 3.264660719719001e-06, | |
| "loss": 0.4371, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.9562043795620438, | |
| "grad_norm": 0.2544858753681183, | |
| "learning_rate": 3.2524005952926195e-06, | |
| "loss": 0.4499, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.9584503088152723, | |
| "grad_norm": 0.23261022567749023, | |
| "learning_rate": 3.2401524287787317e-06, | |
| "loss": 0.4587, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.960696238068501, | |
| "grad_norm": 0.25908032059669495, | |
| "learning_rate": 3.2279163039851637e-06, | |
| "loss": 0.4268, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.9629421673217293, | |
| "grad_norm": 0.24189579486846924, | |
| "learning_rate": 3.2156923046373444e-06, | |
| "loss": 0.4386, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.9651880965749577, | |
| "grad_norm": 0.2793926000595093, | |
| "learning_rate": 3.2034805143777353e-06, | |
| "loss": 0.4601, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.9674340258281864, | |
| "grad_norm": 0.24393576383590698, | |
| "learning_rate": 3.191281016765253e-06, | |
| "loss": 0.4385, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.969679955081415, | |
| "grad_norm": 0.2667008340358734, | |
| "learning_rate": 3.179093895274709e-06, | |
| "loss": 0.4381, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.9719258843346434, | |
| "grad_norm": 0.2440071552991867, | |
| "learning_rate": 3.1669192332962264e-06, | |
| "loss": 0.4057, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.9741718135878719, | |
| "grad_norm": 0.2677282392978668, | |
| "learning_rate": 3.1547571141346756e-06, | |
| "loss": 0.4554, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.9764177428411005, | |
| "grad_norm": 0.23887000977993011, | |
| "learning_rate": 3.142607621009107e-06, | |
| "loss": 0.4177, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.9786636720943291, | |
| "grad_norm": 0.2643425762653351, | |
| "learning_rate": 3.1304708370521695e-06, | |
| "loss": 0.4624, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.9809096013475576, | |
| "grad_norm": 0.2131706178188324, | |
| "learning_rate": 3.118346845309556e-06, | |
| "loss": 0.413, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.983155530600786, | |
| "grad_norm": 0.2590519189834595, | |
| "learning_rate": 3.1062357287394284e-06, | |
| "loss": 0.4617, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.9854014598540146, | |
| "grad_norm": 0.23261459171772003, | |
| "learning_rate": 3.094137570211847e-06, | |
| "loss": 0.434, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.9876473891072433, | |
| "grad_norm": 0.22255754470825195, | |
| "learning_rate": 3.082052452508213e-06, | |
| "loss": 0.4351, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.9898933183604717, | |
| "grad_norm": 0.22782853245735168, | |
| "learning_rate": 3.0699804583206882e-06, | |
| "loss": 0.4316, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.9921392476137, | |
| "grad_norm": 0.2501652240753174, | |
| "learning_rate": 3.057921670251644e-06, | |
| "loss": 0.455, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.9943851768669287, | |
| "grad_norm": 0.2316114753484726, | |
| "learning_rate": 3.045876170813084e-06, | |
| "loss": 0.4451, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.9966311061201574, | |
| "grad_norm": 0.22861182689666748, | |
| "learning_rate": 3.0338440424260897e-06, | |
| "loss": 0.4362, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.9988770353733858, | |
| "grad_norm": 0.24283848702907562, | |
| "learning_rate": 3.021825367420248e-06, | |
| "loss": 0.4348, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.001122964626614, | |
| "grad_norm": 0.5297620892524719, | |
| "learning_rate": 3.0098202280330907e-06, | |
| "loss": 0.7405, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 2.0033688938798426, | |
| "grad_norm": 0.25243425369262695, | |
| "learning_rate": 2.997828706409534e-06, | |
| "loss": 0.4301, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.0056148231330715, | |
| "grad_norm": 0.3185347318649292, | |
| "learning_rate": 2.985850884601316e-06, | |
| "loss": 0.4232, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 2.0078607523863, | |
| "grad_norm": 0.26330360770225525, | |
| "learning_rate": 2.9738868445664314e-06, | |
| "loss": 0.4381, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 2.0101066816395283, | |
| "grad_norm": 0.23436835408210754, | |
| "learning_rate": 2.961936668168577e-06, | |
| "loss": 0.4191, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 2.0123526108927567, | |
| "grad_norm": 0.31119340658187866, | |
| "learning_rate": 2.950000437176582e-06, | |
| "loss": 0.4259, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 2.0145985401459856, | |
| "grad_norm": 0.2767098844051361, | |
| "learning_rate": 2.9380782332638614e-06, | |
| "loss": 0.4282, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 2.016844469399214, | |
| "grad_norm": 0.22678621113300323, | |
| "learning_rate": 2.9261701380078443e-06, | |
| "loss": 0.4133, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 2.0190903986524424, | |
| "grad_norm": 0.274517297744751, | |
| "learning_rate": 2.9142762328894273e-06, | |
| "loss": 0.4231, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 2.021336327905671, | |
| "grad_norm": 0.29050254821777344, | |
| "learning_rate": 2.9023965992924076e-06, | |
| "loss": 0.4387, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.0235822571588993, | |
| "grad_norm": 0.24458545446395874, | |
| "learning_rate": 2.8905313185029267e-06, | |
| "loss": 0.4048, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 2.025828186412128, | |
| "grad_norm": 0.26588353514671326, | |
| "learning_rate": 2.878680471708924e-06, | |
| "loss": 0.4159, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.0280741156653566, | |
| "grad_norm": 0.24073943495750427, | |
| "learning_rate": 2.8668441399995712e-06, | |
| "loss": 0.4311, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 2.030320044918585, | |
| "grad_norm": 0.2562435567378998, | |
| "learning_rate": 2.8550224043647236e-06, | |
| "loss": 0.4232, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.0325659741718134, | |
| "grad_norm": 0.2863386273384094, | |
| "learning_rate": 2.843215345694359e-06, | |
| "loss": 0.4466, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 2.0348119034250423, | |
| "grad_norm": 0.24601112306118011, | |
| "learning_rate": 2.831423044778027e-06, | |
| "loss": 0.4139, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.0370578326782707, | |
| "grad_norm": 0.22661253809928894, | |
| "learning_rate": 2.8196455823043047e-06, | |
| "loss": 0.4174, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 2.039303761931499, | |
| "grad_norm": 0.25296610593795776, | |
| "learning_rate": 2.8078830388602318e-06, | |
| "loss": 0.432, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.0415496911847275, | |
| "grad_norm": 0.2235630601644516, | |
| "learning_rate": 2.7961354949307677e-06, | |
| "loss": 0.4261, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 2.0437956204379564, | |
| "grad_norm": 0.2354028970003128, | |
| "learning_rate": 2.784403030898239e-06, | |
| "loss": 0.4229, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.046041549691185, | |
| "grad_norm": 0.2226496785879135, | |
| "learning_rate": 2.772685727041783e-06, | |
| "loss": 0.4193, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 2.048287478944413, | |
| "grad_norm": 0.23907402157783508, | |
| "learning_rate": 2.760983663536806e-06, | |
| "loss": 0.4494, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.0505334081976416, | |
| "grad_norm": 0.23263433575630188, | |
| "learning_rate": 2.7492969204544356e-06, | |
| "loss": 0.41, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 2.0527793374508705, | |
| "grad_norm": 0.24426434934139252, | |
| "learning_rate": 2.7376255777609674e-06, | |
| "loss": 0.4356, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.055025266704099, | |
| "grad_norm": 0.2100609987974167, | |
| "learning_rate": 2.7259697153173207e-06, | |
| "loss": 0.4112, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 2.0572711959573273, | |
| "grad_norm": 0.2561478018760681, | |
| "learning_rate": 2.7143294128784934e-06, | |
| "loss": 0.4565, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.0595171252105557, | |
| "grad_norm": 0.23428645730018616, | |
| "learning_rate": 2.7027047500930098e-06, | |
| "loss": 0.4124, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 2.0617630544637846, | |
| "grad_norm": 0.22505903244018555, | |
| "learning_rate": 2.6910958065023805e-06, | |
| "loss": 0.4285, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.064008983717013, | |
| "grad_norm": 0.2354445606470108, | |
| "learning_rate": 2.6795026615405635e-06, | |
| "loss": 0.4326, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 2.0662549129702414, | |
| "grad_norm": 0.22063247859477997, | |
| "learning_rate": 2.6679253945334096e-06, | |
| "loss": 0.4098, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.06850084222347, | |
| "grad_norm": 0.25319838523864746, | |
| "learning_rate": 2.65636408469813e-06, | |
| "loss": 0.427, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 2.0707467714766983, | |
| "grad_norm": 0.2321866899728775, | |
| "learning_rate": 2.6448188111427426e-06, | |
| "loss": 0.3939, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.072992700729927, | |
| "grad_norm": 0.22791002690792084, | |
| "learning_rate": 2.633289652865544e-06, | |
| "loss": 0.4375, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 2.0752386299831556, | |
| "grad_norm": 0.21649421751499176, | |
| "learning_rate": 2.6217766887545558e-06, | |
| "loss": 0.434, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.077484559236384, | |
| "grad_norm": 0.2443019300699234, | |
| "learning_rate": 2.6102799975869976e-06, | |
| "loss": 0.407, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.0797304884896124, | |
| "grad_norm": 0.2476467788219452, | |
| "learning_rate": 2.5987996580287397e-06, | |
| "loss": 0.4238, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.0819764177428413, | |
| "grad_norm": 0.21861916780471802, | |
| "learning_rate": 2.5873357486337626e-06, | |
| "loss": 0.4365, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.0842223469960697, | |
| "grad_norm": 0.23719200491905212, | |
| "learning_rate": 2.5758883478436304e-06, | |
| "loss": 0.3978, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.086468276249298, | |
| "grad_norm": 0.26309382915496826, | |
| "learning_rate": 2.564457533986944e-06, | |
| "loss": 0.4282, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.0887142055025265, | |
| "grad_norm": 0.23072056472301483, | |
| "learning_rate": 2.5530433852788095e-06, | |
| "loss": 0.44, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.0909601347557554, | |
| "grad_norm": 0.23523831367492676, | |
| "learning_rate": 2.541645979820301e-06, | |
| "loss": 0.4089, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.093206064008984, | |
| "grad_norm": 0.20197081565856934, | |
| "learning_rate": 2.5302653955979257e-06, | |
| "loss": 0.3742, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.095451993262212, | |
| "grad_norm": 0.24736276268959045, | |
| "learning_rate": 2.518901710483095e-06, | |
| "loss": 0.447, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.0976979225154406, | |
| "grad_norm": 0.23071594536304474, | |
| "learning_rate": 2.5075550022315885e-06, | |
| "loss": 0.4195, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.0999438517686695, | |
| "grad_norm": 0.21248017251491547, | |
| "learning_rate": 2.4962253484830197e-06, | |
| "loss": 0.4163, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.102189781021898, | |
| "grad_norm": 0.20917271077632904, | |
| "learning_rate": 2.4849128267603106e-06, | |
| "loss": 0.4017, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.1044357102751263, | |
| "grad_norm": 0.23887436091899872, | |
| "learning_rate": 2.4736175144691543e-06, | |
| "loss": 0.443, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.1066816395283547, | |
| "grad_norm": 0.23489055037498474, | |
| "learning_rate": 2.4623394888974863e-06, | |
| "loss": 0.4361, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.108927568781583, | |
| "grad_norm": 0.21189194917678833, | |
| "learning_rate": 2.451078827214964e-06, | |
| "loss": 0.3941, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.111173498034812, | |
| "grad_norm": 0.22420427203178406, | |
| "learning_rate": 2.4398356064724298e-06, | |
| "loss": 0.4345, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.1134194272880404, | |
| "grad_norm": 0.2321353554725647, | |
| "learning_rate": 2.4286099036013904e-06, | |
| "loss": 0.4527, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.115665356541269, | |
| "grad_norm": 0.224471315741539, | |
| "learning_rate": 2.417401795413478e-06, | |
| "loss": 0.395, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.1179112857944973, | |
| "grad_norm": 0.24702583253383636, | |
| "learning_rate": 2.4062113585999452e-06, | |
| "loss": 0.4491, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.120157215047726, | |
| "grad_norm": 0.21472668647766113, | |
| "learning_rate": 2.395038669731117e-06, | |
| "loss": 0.4342, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.1224031443009546, | |
| "grad_norm": 0.22108450531959534, | |
| "learning_rate": 2.3838838052558867e-06, | |
| "loss": 0.4183, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.124649073554183, | |
| "grad_norm": 0.2732450067996979, | |
| "learning_rate": 2.372746841501184e-06, | |
| "loss": 0.4166, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.1268950028074114, | |
| "grad_norm": 0.21384459733963013, | |
| "learning_rate": 2.3616278546714464e-06, | |
| "loss": 0.4534, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.1291409320606403, | |
| "grad_norm": 0.20551139116287231, | |
| "learning_rate": 2.350526920848113e-06, | |
| "loss": 0.4005, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.1313868613138687, | |
| "grad_norm": 0.24042649567127228, | |
| "learning_rate": 2.339444115989093e-06, | |
| "loss": 0.4474, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.133632790567097, | |
| "grad_norm": 0.2255227416753769, | |
| "learning_rate": 2.3283795159282443e-06, | |
| "loss": 0.4203, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.1358787198203255, | |
| "grad_norm": 0.20983435213565826, | |
| "learning_rate": 2.3173331963748646e-06, | |
| "loss": 0.4102, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.1381246490735544, | |
| "grad_norm": 0.2264542430639267, | |
| "learning_rate": 2.306305232913163e-06, | |
| "loss": 0.4155, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.140370578326783, | |
| "grad_norm": 0.24919871985912323, | |
| "learning_rate": 2.2952957010017506e-06, | |
| "loss": 0.4175, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.142616507580011, | |
| "grad_norm": 0.21375016868114471, | |
| "learning_rate": 2.2843046759731206e-06, | |
| "loss": 0.4367, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.1448624368332396, | |
| "grad_norm": 0.2692919671535492, | |
| "learning_rate": 2.273332233033134e-06, | |
| "loss": 0.4246, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.147108366086468, | |
| "grad_norm": 0.22732344269752502, | |
| "learning_rate": 2.2623784472605016e-06, | |
| "loss": 0.4229, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.149354295339697, | |
| "grad_norm": 0.23208336532115936, | |
| "learning_rate": 2.2514433936062714e-06, | |
| "loss": 0.4367, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.1516002245929253, | |
| "grad_norm": 0.24797451496124268, | |
| "learning_rate": 2.2405271468933224e-06, | |
| "loss": 0.4062, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 0.2095116823911667, | |
| "learning_rate": 2.2296297818158458e-06, | |
| "loss": 0.4208, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.156092083099382, | |
| "grad_norm": 0.2208539843559265, | |
| "learning_rate": 2.218751372938834e-06, | |
| "loss": 0.4243, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.158338012352611, | |
| "grad_norm": 0.2577050030231476, | |
| "learning_rate": 2.2078919946975753e-06, | |
| "loss": 0.438, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.1605839416058394, | |
| "grad_norm": 0.23505088686943054, | |
| "learning_rate": 2.1970517213971367e-06, | |
| "loss": 0.4164, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.162829870859068, | |
| "grad_norm": 0.212454691529274, | |
| "learning_rate": 2.186230627211861e-06, | |
| "loss": 0.4239, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.1650758001122963, | |
| "grad_norm": 0.22702592611312866, | |
| "learning_rate": 2.175428786184861e-06, | |
| "loss": 0.4094, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.167321729365525, | |
| "grad_norm": 0.2175099104642868, | |
| "learning_rate": 2.1646462722275085e-06, | |
| "loss": 0.411, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.1695676586187536, | |
| "grad_norm": 0.22848990559577942, | |
| "learning_rate": 2.1538831591189317e-06, | |
| "loss": 0.4353, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.171813587871982, | |
| "grad_norm": 0.22559164464473724, | |
| "learning_rate": 2.1431395205055085e-06, | |
| "loss": 0.4398, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.1740595171252104, | |
| "grad_norm": 0.19590629637241364, | |
| "learning_rate": 2.1324154299003597e-06, | |
| "loss": 0.4266, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.1763054463784393, | |
| "grad_norm": 0.2409408986568451, | |
| "learning_rate": 2.121710960682851e-06, | |
| "loss": 0.4286, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.1785513756316677, | |
| "grad_norm": 0.2229623794555664, | |
| "learning_rate": 2.111026186098092e-06, | |
| "loss": 0.4492, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.180797304884896, | |
| "grad_norm": 0.20039457082748413, | |
| "learning_rate": 2.1003611792564288e-06, | |
| "loss": 0.4213, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.1830432341381245, | |
| "grad_norm": 0.2124020755290985, | |
| "learning_rate": 2.0897160131329508e-06, | |
| "loss": 0.4235, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.1852891633913534, | |
| "grad_norm": 0.23414523899555206, | |
| "learning_rate": 2.079090760566982e-06, | |
| "loss": 0.4318, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.187535092644582, | |
| "grad_norm": 0.24739769101142883, | |
| "learning_rate": 2.0684854942615946e-06, | |
| "loss": 0.4196, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.18978102189781, | |
| "grad_norm": 0.22191846370697021, | |
| "learning_rate": 2.0579002867830987e-06, | |
| "loss": 0.4345, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.1920269511510386, | |
| "grad_norm": 0.21731607615947723, | |
| "learning_rate": 2.0473352105605583e-06, | |
| "loss": 0.4056, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.1942728804042675, | |
| "grad_norm": 0.2345353066921234, | |
| "learning_rate": 2.0367903378852876e-06, | |
| "loss": 0.428, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.196518809657496, | |
| "grad_norm": 0.23607279360294342, | |
| "learning_rate": 2.0262657409103565e-06, | |
| "loss": 0.4514, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.1987647389107243, | |
| "grad_norm": 0.21260501444339752, | |
| "learning_rate": 2.0157614916501e-06, | |
| "loss": 0.411, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.2010106681639527, | |
| "grad_norm": 0.2566327750682831, | |
| "learning_rate": 2.0052776619796265e-06, | |
| "loss": 0.4125, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.203256597417181, | |
| "grad_norm": 0.23026920855045319, | |
| "learning_rate": 1.9948143236343226e-06, | |
| "loss": 0.4223, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.20550252667041, | |
| "grad_norm": 0.21213333308696747, | |
| "learning_rate": 1.9843715482093613e-06, | |
| "loss": 0.4035, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.2077484559236384, | |
| "grad_norm": 0.22886443138122559, | |
| "learning_rate": 1.9739494071592143e-06, | |
| "loss": 0.4215, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.209994385176867, | |
| "grad_norm": 0.2419017106294632, | |
| "learning_rate": 1.9635479717971656e-06, | |
| "loss": 0.4185, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.2122403144300953, | |
| "grad_norm": 0.22518743574619293, | |
| "learning_rate": 1.953167313294821e-06, | |
| "loss": 0.4334, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.214486243683324, | |
| "grad_norm": 0.23835636675357819, | |
| "learning_rate": 1.9428075026816186e-06, | |
| "loss": 0.432, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.2167321729365526, | |
| "grad_norm": 0.23428522050380707, | |
| "learning_rate": 1.9324686108443487e-06, | |
| "loss": 0.4255, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.218978102189781, | |
| "grad_norm": 0.2030942142009735, | |
| "learning_rate": 1.9221507085266617e-06, | |
| "loss": 0.4117, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.2212240314430094, | |
| "grad_norm": 0.2084757536649704, | |
| "learning_rate": 1.9118538663285874e-06, | |
| "loss": 0.4233, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.2234699606962383, | |
| "grad_norm": 0.209101602435112, | |
| "learning_rate": 1.9015781547060552e-06, | |
| "loss": 0.3821, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.2257158899494667, | |
| "grad_norm": 0.22239622473716736, | |
| "learning_rate": 1.8913236439704085e-06, | |
| "loss": 0.4382, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.227961819202695, | |
| "grad_norm": 0.21578405797481537, | |
| "learning_rate": 1.881090404287924e-06, | |
| "loss": 0.415, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.2302077484559235, | |
| "grad_norm": 0.2138473093509674, | |
| "learning_rate": 1.8708785056793276e-06, | |
| "loss": 0.4217, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.2324536777091524, | |
| "grad_norm": 0.20428405702114105, | |
| "learning_rate": 1.8606880180193265e-06, | |
| "loss": 0.405, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.234699606962381, | |
| "grad_norm": 0.20017048716545105, | |
| "learning_rate": 1.850519011036117e-06, | |
| "loss": 0.4286, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.236945536215609, | |
| "grad_norm": 0.21324189007282257, | |
| "learning_rate": 1.840371554310918e-06, | |
| "loss": 0.4253, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.2391914654688376, | |
| "grad_norm": 0.21719501912593842, | |
| "learning_rate": 1.8302457172774929e-06, | |
| "loss": 0.4175, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.241437394722066, | |
| "grad_norm": 0.19842517375946045, | |
| "learning_rate": 1.8201415692216673e-06, | |
| "loss": 0.4131, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.243683323975295, | |
| "grad_norm": 0.19860929250717163, | |
| "learning_rate": 1.8100591792808652e-06, | |
| "loss": 0.4093, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.2459292532285233, | |
| "grad_norm": 0.20503376424312592, | |
| "learning_rate": 1.7999986164436312e-06, | |
| "loss": 0.4346, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.2481751824817517, | |
| "grad_norm": 0.2113645225763321, | |
| "learning_rate": 1.7899599495491532e-06, | |
| "loss": 0.4244, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.25042111173498, | |
| "grad_norm": 0.21410761773586273, | |
| "learning_rate": 1.7799432472868038e-06, | |
| "loss": 0.4226, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.252667040988209, | |
| "grad_norm": 0.2089909315109253, | |
| "learning_rate": 1.769948578195656e-06, | |
| "loss": 0.4032, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.2549129702414374, | |
| "grad_norm": 0.20202669501304626, | |
| "learning_rate": 1.7599760106640263e-06, | |
| "loss": 0.4177, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.257158899494666, | |
| "grad_norm": 0.21100208163261414, | |
| "learning_rate": 1.7500256129290005e-06, | |
| "loss": 0.4445, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.2594048287478943, | |
| "grad_norm": 0.22142833471298218, | |
| "learning_rate": 1.740097453075969e-06, | |
| "loss": 0.4252, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.261650758001123, | |
| "grad_norm": 0.20687736570835114, | |
| "learning_rate": 1.7301915990381568e-06, | |
| "loss": 0.4301, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.2638966872543516, | |
| "grad_norm": 0.21982485055923462, | |
| "learning_rate": 1.7203081185961624e-06, | |
| "loss": 0.4307, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.26614261650758, | |
| "grad_norm": 0.21791280806064606, | |
| "learning_rate": 1.7104470793774959e-06, | |
| "loss": 0.4517, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.2683885457608084, | |
| "grad_norm": 0.20038799941539764, | |
| "learning_rate": 1.700608548856113e-06, | |
| "loss": 0.4109, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.2706344750140373, | |
| "grad_norm": 0.22775229811668396, | |
| "learning_rate": 1.6907925943519532e-06, | |
| "loss": 0.4219, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.2728804042672657, | |
| "grad_norm": 0.21923872828483582, | |
| "learning_rate": 1.6809992830304827e-06, | |
| "loss": 0.4278, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.275126333520494, | |
| "grad_norm": 0.20975294709205627, | |
| "learning_rate": 1.671228681902229e-06, | |
| "loss": 0.4241, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.2773722627737225, | |
| "grad_norm": 0.20294855535030365, | |
| "learning_rate": 1.6614808578223235e-06, | |
| "loss": 0.407, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.279618192026951, | |
| "grad_norm": 0.2194415181875229, | |
| "learning_rate": 1.6517558774900517e-06, | |
| "loss": 0.4304, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.28186412128018, | |
| "grad_norm": 0.21036003530025482, | |
| "learning_rate": 1.642053807448389e-06, | |
| "loss": 0.43, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.284110050533408, | |
| "grad_norm": 0.2032628208398819, | |
| "learning_rate": 1.6323747140835484e-06, | |
| "loss": 0.4567, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.2863559797866366, | |
| "grad_norm": 0.2219427227973938, | |
| "learning_rate": 1.6227186636245218e-06, | |
| "loss": 0.418, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.2886019090398655, | |
| "grad_norm": 0.227565735578537, | |
| "learning_rate": 1.613085722142636e-06, | |
| "loss": 0.4321, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.290847838293094, | |
| "grad_norm": 0.19647327065467834, | |
| "learning_rate": 1.60347595555109e-06, | |
| "loss": 0.4233, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.2930937675463223, | |
| "grad_norm": 0.20968946814537048, | |
| "learning_rate": 1.593889429604511e-06, | |
| "loss": 0.4558, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.2953396967995507, | |
| "grad_norm": 0.22316963970661163, | |
| "learning_rate": 1.5843262098985051e-06, | |
| "loss": 0.425, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.297585626052779, | |
| "grad_norm": 0.22763703763484955, | |
| "learning_rate": 1.5747863618692044e-06, | |
| "loss": 0.4291, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.299831555306008, | |
| "grad_norm": 0.198600634932518, | |
| "learning_rate": 1.5652699507928166e-06, | |
| "loss": 0.4273, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.3020774845592364, | |
| "grad_norm": 0.20251289010047913, | |
| "learning_rate": 1.5557770417851886e-06, | |
| "loss": 0.4173, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.304323413812465, | |
| "grad_norm": 0.2171899527311325, | |
| "learning_rate": 1.5463076998013533e-06, | |
| "loss": 0.4439, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.3065693430656933, | |
| "grad_norm": 0.22569715976715088, | |
| "learning_rate": 1.5368619896350828e-06, | |
| "loss": 0.4294, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 2.308815272318922, | |
| "grad_norm": 0.2233586460351944, | |
| "learning_rate": 1.527439975918455e-06, | |
| "loss": 0.439, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.3110612015721506, | |
| "grad_norm": 0.2036871761083603, | |
| "learning_rate": 1.5180417231214001e-06, | |
| "loss": 0.3983, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 2.313307130825379, | |
| "grad_norm": 0.21388086676597595, | |
| "learning_rate": 1.5086672955512672e-06, | |
| "loss": 0.4523, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.3155530600786074, | |
| "grad_norm": 0.21578197181224823, | |
| "learning_rate": 1.4993167573523821e-06, | |
| "loss": 0.4105, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 2.317798989331836, | |
| "grad_norm": 0.20119976997375488, | |
| "learning_rate": 1.4899901725056093e-06, | |
| "loss": 0.401, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.3200449185850647, | |
| "grad_norm": 0.19066974520683289, | |
| "learning_rate": 1.4806876048279095e-06, | |
| "loss": 0.416, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 2.322290847838293, | |
| "grad_norm": 0.205108642578125, | |
| "learning_rate": 1.471409117971907e-06, | |
| "loss": 0.4203, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.3245367770915215, | |
| "grad_norm": 0.2170393466949463, | |
| "learning_rate": 1.462154775425455e-06, | |
| "loss": 0.4322, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.3267827063447504, | |
| "grad_norm": 0.19994419813156128, | |
| "learning_rate": 1.4529246405112002e-06, | |
| "loss": 0.4364, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.329028635597979, | |
| "grad_norm": 0.1924624890089035, | |
| "learning_rate": 1.4437187763861487e-06, | |
| "loss": 0.4234, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 2.331274564851207, | |
| "grad_norm": 0.21115237474441528, | |
| "learning_rate": 1.4345372460412348e-06, | |
| "loss": 0.398, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.3335204941044356, | |
| "grad_norm": 0.21371133625507355, | |
| "learning_rate": 1.425380112300887e-06, | |
| "loss": 0.4537, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 2.335766423357664, | |
| "grad_norm": 0.19634070992469788, | |
| "learning_rate": 1.416247437822601e-06, | |
| "loss": 0.4035, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.338012352610893, | |
| "grad_norm": 0.21442176401615143, | |
| "learning_rate": 1.4071392850965126e-06, | |
| "loss": 0.4507, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 2.3402582818641213, | |
| "grad_norm": 0.20288068056106567, | |
| "learning_rate": 1.398055716444967e-06, | |
| "loss": 0.4034, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.3425042111173497, | |
| "grad_norm": 0.20702247321605682, | |
| "learning_rate": 1.388996794022095e-06, | |
| "loss": 0.4336, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 2.344750140370578, | |
| "grad_norm": 0.23960836231708527, | |
| "learning_rate": 1.3799625798133814e-06, | |
| "loss": 0.4464, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.346996069623807, | |
| "grad_norm": 0.20227837562561035, | |
| "learning_rate": 1.3709531356352512e-06, | |
| "loss": 0.4085, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.3492419988770354, | |
| "grad_norm": 0.21481740474700928, | |
| "learning_rate": 1.3619685231346358e-06, | |
| "loss": 0.4478, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.351487928130264, | |
| "grad_norm": 0.19349761307239532, | |
| "learning_rate": 1.3530088037885608e-06, | |
| "loss": 0.4202, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 2.3537338573834923, | |
| "grad_norm": 0.21037468314170837, | |
| "learning_rate": 1.3440740389037198e-06, | |
| "loss": 0.4613, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.3559797866367207, | |
| "grad_norm": 0.19390463829040527, | |
| "learning_rate": 1.3351642896160522e-06, | |
| "loss": 0.3749, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 2.3582257158899496, | |
| "grad_norm": 0.19581280648708344, | |
| "learning_rate": 1.3262796168903342e-06, | |
| "loss": 0.4362, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.360471645143178, | |
| "grad_norm": 0.21136844158172607, | |
| "learning_rate": 1.317420081519754e-06, | |
| "loss": 0.4499, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 2.3627175743964064, | |
| "grad_norm": 0.19537141919136047, | |
| "learning_rate": 1.3085857441254956e-06, | |
| "loss": 0.4137, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.3649635036496353, | |
| "grad_norm": 0.20818866789340973, | |
| "learning_rate": 1.2997766651563316e-06, | |
| "loss": 0.4093, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 2.3672094329028637, | |
| "grad_norm": 0.21565309166908264, | |
| "learning_rate": 1.2909929048881976e-06, | |
| "loss": 0.4468, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.369455362156092, | |
| "grad_norm": 0.19632428884506226, | |
| "learning_rate": 1.2822345234237915e-06, | |
| "loss": 0.4116, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.3717012914093205, | |
| "grad_norm": 0.2132972776889801, | |
| "learning_rate": 1.2735015806921563e-06, | |
| "loss": 0.4218, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.373947220662549, | |
| "grad_norm": 0.19294115900993347, | |
| "learning_rate": 1.264794136448272e-06, | |
| "loss": 0.4081, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 2.376193149915778, | |
| "grad_norm": 0.21431930363178253, | |
| "learning_rate": 1.2561122502726424e-06, | |
| "loss": 0.4433, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.378439079169006, | |
| "grad_norm": 0.21879686415195465, | |
| "learning_rate": 1.247455981570892e-06, | |
| "loss": 0.4266, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 2.3806850084222346, | |
| "grad_norm": 0.2212316393852234, | |
| "learning_rate": 1.2388253895733598e-06, | |
| "loss": 0.4508, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.382930937675463, | |
| "grad_norm": 0.19899475574493408, | |
| "learning_rate": 1.2302205333346923e-06, | |
| "loss": 0.4252, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 2.385176866928692, | |
| "grad_norm": 0.24420271813869476, | |
| "learning_rate": 1.2216414717334378e-06, | |
| "loss": 0.436, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.3874227961819203, | |
| "grad_norm": 0.19686606526374817, | |
| "learning_rate": 1.213088263471649e-06, | |
| "loss": 0.3885, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 2.3896687254351487, | |
| "grad_norm": 0.19978255033493042, | |
| "learning_rate": 1.2045609670744729e-06, | |
| "loss": 0.4507, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.391914654688377, | |
| "grad_norm": 0.20148004591464996, | |
| "learning_rate": 1.1960596408897562e-06, | |
| "loss": 0.4287, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.394160583941606, | |
| "grad_norm": 0.1934734582901001, | |
| "learning_rate": 1.1875843430876484e-06, | |
| "loss": 0.4145, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.3964065131948344, | |
| "grad_norm": 0.19930601119995117, | |
| "learning_rate": 1.1791351316601962e-06, | |
| "loss": 0.4234, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 2.398652442448063, | |
| "grad_norm": 0.2159959226846695, | |
| "learning_rate": 1.1707120644209557e-06, | |
| "loss": 0.4407, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.4008983717012913, | |
| "grad_norm": 0.20033979415893555, | |
| "learning_rate": 1.162315199004585e-06, | |
| "loss": 0.4142, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 2.40314430095452, | |
| "grad_norm": 0.21157881617546082, | |
| "learning_rate": 1.153944592866464e-06, | |
| "loss": 0.4211, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.4053902302077486, | |
| "grad_norm": 0.19438238441944122, | |
| "learning_rate": 1.1456003032822882e-06, | |
| "loss": 0.4439, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 2.407636159460977, | |
| "grad_norm": 0.19469432532787323, | |
| "learning_rate": 1.1372823873476857e-06, | |
| "loss": 0.4035, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.4098820887142054, | |
| "grad_norm": 0.19877132773399353, | |
| "learning_rate": 1.128990901977825e-06, | |
| "loss": 0.4334, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 2.412128017967434, | |
| "grad_norm": 0.1978437304496765, | |
| "learning_rate": 1.1207259039070183e-06, | |
| "loss": 0.4471, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.4143739472206627, | |
| "grad_norm": 0.1895316243171692, | |
| "learning_rate": 1.1124874496883454e-06, | |
| "loss": 0.4103, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.416619876473891, | |
| "grad_norm": 0.18528953194618225, | |
| "learning_rate": 1.104275595693256e-06, | |
| "loss": 0.4137, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.4188658057271195, | |
| "grad_norm": 0.20490433275699615, | |
| "learning_rate": 1.096090398111192e-06, | |
| "loss": 0.4598, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 2.421111734980348, | |
| "grad_norm": 0.19539935886859894, | |
| "learning_rate": 1.087931912949195e-06, | |
| "loss": 0.3935, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.423357664233577, | |
| "grad_norm": 0.20176726579666138, | |
| "learning_rate": 1.0798001960315313e-06, | |
| "loss": 0.4247, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 2.425603593486805, | |
| "grad_norm": 0.194259911775589, | |
| "learning_rate": 1.071695302999302e-06, | |
| "loss": 0.425, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.4278495227400336, | |
| "grad_norm": 0.2146841585636139, | |
| "learning_rate": 1.0636172893100704e-06, | |
| "loss": 0.4366, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 2.430095451993262, | |
| "grad_norm": 0.20084460079669952, | |
| "learning_rate": 1.0555662102374764e-06, | |
| "loss": 0.4355, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.432341381246491, | |
| "grad_norm": 0.1886490285396576, | |
| "learning_rate": 1.0475421208708626e-06, | |
| "loss": 0.4014, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 2.4345873104997193, | |
| "grad_norm": 0.19774523377418518, | |
| "learning_rate": 1.0395450761148911e-06, | |
| "loss": 0.4074, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.4368332397529477, | |
| "grad_norm": 0.2086760252714157, | |
| "learning_rate": 1.031575130689173e-06, | |
| "loss": 0.4258, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.439079169006176, | |
| "grad_norm": 0.20382975041866302, | |
| "learning_rate": 1.0236323391278958e-06, | |
| "loss": 0.4247, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.441325098259405, | |
| "grad_norm": 0.20429746806621552, | |
| "learning_rate": 1.0157167557794433e-06, | |
| "loss": 0.4215, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 2.4435710275126334, | |
| "grad_norm": 0.18974192440509796, | |
| "learning_rate": 1.0078284348060318e-06, | |
| "loss": 0.4119, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.445816956765862, | |
| "grad_norm": 0.21000362932682037, | |
| "learning_rate": 9.999674301833328e-07, | |
| "loss": 0.4524, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 2.4480628860190903, | |
| "grad_norm": 0.1856634020805359, | |
| "learning_rate": 9.921337957001059e-07, | |
| "loss": 0.3757, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.4503088152723187, | |
| "grad_norm": 0.1969255656003952, | |
| "learning_rate": 9.843275849578305e-07, | |
| "loss": 0.4292, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 2.4525547445255476, | |
| "grad_norm": 0.21311074495315552, | |
| "learning_rate": 9.765488513703414e-07, | |
| "loss": 0.426, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.454800673778776, | |
| "grad_norm": 0.19789327681064606, | |
| "learning_rate": 9.68797648163462e-07, | |
| "loss": 0.429, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 2.4570466030320044, | |
| "grad_norm": 0.19123998284339905, | |
| "learning_rate": 9.610740283746395e-07, | |
| "loss": 0.4161, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.4592925322852333, | |
| "grad_norm": 0.1999826729297638, | |
| "learning_rate": 9.533780448525792e-07, | |
| "loss": 0.4232, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 0.20449966192245483, | |
| "learning_rate": 9.457097502568896e-07, | |
| "loss": 0.4478, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.46378439079169, | |
| "grad_norm": 0.2035766839981079, | |
| "learning_rate": 9.380691970577144e-07, | |
| "loss": 0.4434, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 2.4660303200449185, | |
| "grad_norm": 0.200229674577713, | |
| "learning_rate": 9.304564375353814e-07, | |
| "loss": 0.3788, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.468276249298147, | |
| "grad_norm": 0.19415318965911865, | |
| "learning_rate": 9.228715237800395e-07, | |
| "loss": 0.4382, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 2.470522178551376, | |
| "grad_norm": 0.21206416189670563, | |
| "learning_rate": 9.153145076913006e-07, | |
| "loss": 0.4445, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.472768107804604, | |
| "grad_norm": 0.19867388904094696, | |
| "learning_rate": 9.077854409778913e-07, | |
| "loss": 0.4104, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 2.4750140370578326, | |
| "grad_norm": 0.202217236161232, | |
| "learning_rate": 9.002843751572943e-07, | |
| "loss": 0.4641, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.477259966311061, | |
| "grad_norm": 0.1925583928823471, | |
| "learning_rate": 8.928113615553946e-07, | |
| "loss": 0.4218, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 2.47950589556429, | |
| "grad_norm": 0.20704378187656403, | |
| "learning_rate": 8.853664513061333e-07, | |
| "loss": 0.4178, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.4817518248175183, | |
| "grad_norm": 0.1998777687549591, | |
| "learning_rate": 8.779496953511519e-07, | |
| "loss": 0.4299, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 2.4839977540707467, | |
| "grad_norm": 0.2032717913389206, | |
| "learning_rate": 8.705611444394496e-07, | |
| "loss": 0.4173, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.486243683323975, | |
| "grad_norm": 0.21000362932682037, | |
| "learning_rate": 8.632008491270316e-07, | |
| "loss": 0.4336, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 2.4884896125772036, | |
| "grad_norm": 0.19644078612327576, | |
| "learning_rate": 8.558688597765668e-07, | |
| "loss": 0.4197, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.4907355418304324, | |
| "grad_norm": 0.19872646033763885, | |
| "learning_rate": 8.485652265570376e-07, | |
| "loss": 0.427, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 2.492981471083661, | |
| "grad_norm": 0.1902010142803192, | |
| "learning_rate": 8.412899994434015e-07, | |
| "loss": 0.4204, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.4952274003368893, | |
| "grad_norm": 0.19192348420619965, | |
| "learning_rate": 8.340432282162492e-07, | |
| "loss": 0.4235, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 2.497473329590118, | |
| "grad_norm": 0.20328937470912933, | |
| "learning_rate": 8.268249624614622e-07, | |
| "loss": 0.4191, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.4997192588433466, | |
| "grad_norm": 0.19253182411193848, | |
| "learning_rate": 8.19635251569873e-07, | |
| "loss": 0.3998, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 2.501965188096575, | |
| "grad_norm": 0.20604483783245087, | |
| "learning_rate": 8.1247414473693e-07, | |
| "loss": 0.4568, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.5042111173498034, | |
| "grad_norm": 0.188734769821167, | |
| "learning_rate": 8.053416909623557e-07, | |
| "loss": 0.421, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 2.506457046603032, | |
| "grad_norm": 0.19376307725906372, | |
| "learning_rate": 7.982379390498157e-07, | |
| "loss": 0.3927, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.5087029758562607, | |
| "grad_norm": 0.20353421568870544, | |
| "learning_rate": 7.911629376065849e-07, | |
| "loss": 0.4049, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 2.510948905109489, | |
| "grad_norm": 0.20604628324508667, | |
| "learning_rate": 7.841167350432144e-07, | |
| "loss": 0.4351, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.5131948343627175, | |
| "grad_norm": 0.19524161517620087, | |
| "learning_rate": 7.770993795731984e-07, | |
| "loss": 0.4201, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 2.5154407636159464, | |
| "grad_norm": 0.18067501485347748, | |
| "learning_rate": 7.701109192126438e-07, | |
| "loss": 0.4176, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.517686692869175, | |
| "grad_norm": 0.2033979743719101, | |
| "learning_rate": 7.631514017799451e-07, | |
| "loss": 0.4368, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 2.519932622122403, | |
| "grad_norm": 0.19285623729228973, | |
| "learning_rate": 7.56220874895458e-07, | |
| "loss": 0.3991, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.5221785513756316, | |
| "grad_norm": 0.2072119414806366, | |
| "learning_rate": 7.493193859811643e-07, | |
| "loss": 0.439, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 2.52442448062886, | |
| "grad_norm": 0.1932649165391922, | |
| "learning_rate": 7.424469822603613e-07, | |
| "loss": 0.4049, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.5266704098820885, | |
| "grad_norm": 0.19807684421539307, | |
| "learning_rate": 7.356037107573255e-07, | |
| "loss": 0.417, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.5289163391353173, | |
| "grad_norm": 0.19324353337287903, | |
| "learning_rate": 7.287896182970011e-07, | |
| "loss": 0.4432, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.5311622683885457, | |
| "grad_norm": 0.18392372131347656, | |
| "learning_rate": 7.220047515046729e-07, | |
| "loss": 0.3841, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 2.533408197641774, | |
| "grad_norm": 0.209947869181633, | |
| "learning_rate": 7.152491568056524e-07, | |
| "loss": 0.4411, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.535654126895003, | |
| "grad_norm": 0.2061583697795868, | |
| "learning_rate": 7.085228804249538e-07, | |
| "loss": 0.4309, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 2.5379000561482314, | |
| "grad_norm": 0.20045273005962372, | |
| "learning_rate": 7.018259683869827e-07, | |
| "loss": 0.4388, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.54014598540146, | |
| "grad_norm": 0.22213779389858246, | |
| "learning_rate": 6.9515846651522e-07, | |
| "loss": 0.4372, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 2.5423919146546883, | |
| "grad_norm": 0.20819616317749023, | |
| "learning_rate": 6.885204204319096e-07, | |
| "loss": 0.4334, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.5446378439079167, | |
| "grad_norm": 0.1961192786693573, | |
| "learning_rate": 6.819118755577419e-07, | |
| "loss": 0.4276, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 2.5468837731611456, | |
| "grad_norm": 0.19298788905143738, | |
| "learning_rate": 6.753328771115503e-07, | |
| "loss": 0.4254, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.549129702414374, | |
| "grad_norm": 0.17879649996757507, | |
| "learning_rate": 6.687834701099921e-07, | |
| "loss": 0.3883, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 2.5513756316676024, | |
| "grad_norm": 0.19765320420265198, | |
| "learning_rate": 6.622636993672477e-07, | |
| "loss": 0.4365, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.5536215609208313, | |
| "grad_norm": 0.19494295120239258, | |
| "learning_rate": 6.557736094947137e-07, | |
| "loss": 0.4137, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 2.5558674901740597, | |
| "grad_norm": 0.21241888403892517, | |
| "learning_rate": 6.493132449006939e-07, | |
| "loss": 0.4415, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.558113419427288, | |
| "grad_norm": 0.19840067625045776, | |
| "learning_rate": 6.428826497900992e-07, | |
| "loss": 0.459, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 2.5603593486805165, | |
| "grad_norm": 0.19528187811374664, | |
| "learning_rate": 6.364818681641438e-07, | |
| "loss": 0.4057, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.562605277933745, | |
| "grad_norm": 0.20458662509918213, | |
| "learning_rate": 6.301109438200403e-07, | |
| "loss": 0.442, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 2.5648512071869733, | |
| "grad_norm": 0.2054254561662674, | |
| "learning_rate": 6.237699203507058e-07, | |
| "loss": 0.4237, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.567097136440202, | |
| "grad_norm": 0.2081318199634552, | |
| "learning_rate": 6.174588411444621e-07, | |
| "loss": 0.4252, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 2.5693430656934306, | |
| "grad_norm": 0.19667313992977142, | |
| "learning_rate": 6.111777493847365e-07, | |
| "loss": 0.4285, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.571588994946659, | |
| "grad_norm": 0.1907162368297577, | |
| "learning_rate": 6.0492668804977e-07, | |
| "loss": 0.4135, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 2.573834924199888, | |
| "grad_norm": 0.1859651803970337, | |
| "learning_rate": 5.987056999123175e-07, | |
| "loss": 0.4227, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.5760808534531163, | |
| "grad_norm": 0.20672091841697693, | |
| "learning_rate": 5.925148275393621e-07, | |
| "loss": 0.424, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 2.5783267827063447, | |
| "grad_norm": 0.19329291582107544, | |
| "learning_rate": 5.863541132918171e-07, | |
| "loss": 0.4062, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.580572711959573, | |
| "grad_norm": 0.2025369554758072, | |
| "learning_rate": 5.802235993242428e-07, | |
| "loss": 0.458, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 2.5828186412128016, | |
| "grad_norm": 0.20467379689216614, | |
| "learning_rate": 5.741233275845537e-07, | |
| "loss": 0.4513, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.5850645704660304, | |
| "grad_norm": 0.2135162204504013, | |
| "learning_rate": 5.680533398137305e-07, | |
| "loss": 0.3892, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 2.587310499719259, | |
| "grad_norm": 0.19036920368671417, | |
| "learning_rate": 5.620136775455387e-07, | |
| "loss": 0.4193, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.5895564289724873, | |
| "grad_norm": 0.17486929893493652, | |
| "learning_rate": 5.560043821062421e-07, | |
| "loss": 0.39, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 2.591802358225716, | |
| "grad_norm": 0.19415879249572754, | |
| "learning_rate": 5.50025494614318e-07, | |
| "loss": 0.4548, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.5940482874789446, | |
| "grad_norm": 0.1955343335866928, | |
| "learning_rate": 5.440770559801817e-07, | |
| "loss": 0.4209, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 2.596294216732173, | |
| "grad_norm": 0.22082816064357758, | |
| "learning_rate": 5.381591069058973e-07, | |
| "loss": 0.4281, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.5985401459854014, | |
| "grad_norm": 0.19918213784694672, | |
| "learning_rate": 5.322716878849104e-07, | |
| "loss": 0.4192, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 2.60078607523863, | |
| "grad_norm": 0.19099506735801697, | |
| "learning_rate": 5.264148392017621e-07, | |
| "loss": 0.4085, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.6030320044918582, | |
| "grad_norm": 0.20114244520664215, | |
| "learning_rate": 5.205886009318184e-07, | |
| "loss": 0.4239, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 2.605277933745087, | |
| "grad_norm": 0.19720801711082458, | |
| "learning_rate": 5.147930129409928e-07, | |
| "loss": 0.4299, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.6075238629983155, | |
| "grad_norm": 0.19777406752109528, | |
| "learning_rate": 5.090281148854737e-07, | |
| "loss": 0.431, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 2.609769792251544, | |
| "grad_norm": 0.19977416098117828, | |
| "learning_rate": 5.032939462114572e-07, | |
| "loss": 0.4257, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.612015721504773, | |
| "grad_norm": 0.20614181458950043, | |
| "learning_rate": 4.975905461548725e-07, | |
| "loss": 0.437, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 2.614261650758001, | |
| "grad_norm": 0.1861875206232071, | |
| "learning_rate": 4.919179537411161e-07, | |
| "loss": 0.4164, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.6165075800112296, | |
| "grad_norm": 0.19667655229568481, | |
| "learning_rate": 4.862762077847844e-07, | |
| "loss": 0.4375, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 2.618753509264458, | |
| "grad_norm": 0.18777360022068024, | |
| "learning_rate": 4.806653468894051e-07, | |
| "loss": 0.4238, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.6209994385176865, | |
| "grad_norm": 0.18164758384227753, | |
| "learning_rate": 4.750854094471757e-07, | |
| "loss": 0.3991, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 2.6232453677709153, | |
| "grad_norm": 0.1905893087387085, | |
| "learning_rate": 4.695364336387037e-07, | |
| "loss": 0.4175, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.6254912970241437, | |
| "grad_norm": 0.19531551003456116, | |
| "learning_rate": 4.6401845743273945e-07, | |
| "loss": 0.4588, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 2.627737226277372, | |
| "grad_norm": 0.1983010172843933, | |
| "learning_rate": 4.585315185859218e-07, | |
| "loss": 0.4121, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.629983155530601, | |
| "grad_norm": 0.18379969894886017, | |
| "learning_rate": 4.53075654642513e-07, | |
| "loss": 0.4074, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 2.6322290847838294, | |
| "grad_norm": 0.1939253956079483, | |
| "learning_rate": 4.476509029341497e-07, | |
| "loss": 0.4521, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.634475014037058, | |
| "grad_norm": 0.19147953391075134, | |
| "learning_rate": 4.422573005795827e-07, | |
| "loss": 0.4376, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 2.6367209432902863, | |
| "grad_norm": 0.19624711573123932, | |
| "learning_rate": 4.368948844844223e-07, | |
| "loss": 0.4182, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.6389668725435147, | |
| "grad_norm": 0.18469464778900146, | |
| "learning_rate": 4.3156369134089103e-07, | |
| "loss": 0.4354, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.6412128017967436, | |
| "grad_norm": 0.19770587980747223, | |
| "learning_rate": 4.262637576275641e-07, | |
| "loss": 0.4047, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.643458731049972, | |
| "grad_norm": 0.1849193125963211, | |
| "learning_rate": 4.209951196091294e-07, | |
| "loss": 0.4088, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 2.6457046603032004, | |
| "grad_norm": 0.2000664472579956, | |
| "learning_rate": 4.1575781333613176e-07, | |
| "loss": 0.4539, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.647950589556429, | |
| "grad_norm": 0.19104914367198944, | |
| "learning_rate": 4.1055187464473125e-07, | |
| "loss": 0.4097, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 2.6501965188096577, | |
| "grad_norm": 0.19243937730789185, | |
| "learning_rate": 4.0537733915645474e-07, | |
| "loss": 0.4218, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.652442448062886, | |
| "grad_norm": 0.19347138702869415, | |
| "learning_rate": 4.00234242277952e-07, | |
| "loss": 0.4278, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 2.6546883773161145, | |
| "grad_norm": 0.18277958035469055, | |
| "learning_rate": 3.951226192007568e-07, | |
| "loss": 0.4373, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.656934306569343, | |
| "grad_norm": 0.18322674930095673, | |
| "learning_rate": 3.900425049010437e-07, | |
| "loss": 0.4349, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 2.6591802358225713, | |
| "grad_norm": 0.19357453286647797, | |
| "learning_rate": 3.8499393413938937e-07, | |
| "loss": 0.4287, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.6614261650758, | |
| "grad_norm": 0.19136710464954376, | |
| "learning_rate": 3.799769414605342e-07, | |
| "loss": 0.4154, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 2.6636720943290286, | |
| "grad_norm": 0.18795958161354065, | |
| "learning_rate": 3.7499156119314537e-07, | |
| "loss": 0.4077, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.665918023582257, | |
| "grad_norm": 0.18998844921588898, | |
| "learning_rate": 3.700378274495825e-07, | |
| "loss": 0.4125, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 2.668163952835486, | |
| "grad_norm": 0.194740891456604, | |
| "learning_rate": 3.6511577412566665e-07, | |
| "loss": 0.439, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.6704098820887143, | |
| "grad_norm": 0.18627774715423584, | |
| "learning_rate": 3.602254349004447e-07, | |
| "loss": 0.4139, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 2.6726558113419427, | |
| "grad_norm": 0.20535503327846527, | |
| "learning_rate": 3.553668432359625e-07, | |
| "loss": 0.441, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.674901740595171, | |
| "grad_norm": 0.18549248576164246, | |
| "learning_rate": 3.5054003237702916e-07, | |
| "loss": 0.441, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 2.6771476698483996, | |
| "grad_norm": 0.17974409461021423, | |
| "learning_rate": 3.45745035351e-07, | |
| "loss": 0.3985, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.6793935991016284, | |
| "grad_norm": 0.194856658577919, | |
| "learning_rate": 3.4098188496754057e-07, | |
| "loss": 0.4406, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 2.681639528354857, | |
| "grad_norm": 0.1955060213804245, | |
| "learning_rate": 3.362506138184085e-07, | |
| "loss": 0.4168, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.6838854576080853, | |
| "grad_norm": 0.18493853509426117, | |
| "learning_rate": 3.3155125427722814e-07, | |
| "loss": 0.4128, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 2.686131386861314, | |
| "grad_norm": 0.19132456183433533, | |
| "learning_rate": 3.268838384992695e-07, | |
| "loss": 0.4372, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.6883773161145426, | |
| "grad_norm": 0.18947117030620575, | |
| "learning_rate": 3.2224839842122713e-07, | |
| "loss": 0.4166, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 2.690623245367771, | |
| "grad_norm": 0.17782782018184662, | |
| "learning_rate": 3.1764496576100425e-07, | |
| "loss": 0.3997, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.6928691746209994, | |
| "grad_norm": 0.19115474820137024, | |
| "learning_rate": 3.1307357201749157e-07, | |
| "loss": 0.4568, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 2.695115103874228, | |
| "grad_norm": 0.18287594616413116, | |
| "learning_rate": 3.0853424847035573e-07, | |
| "loss": 0.4024, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.6973610331274562, | |
| "grad_norm": 0.20194946229457855, | |
| "learning_rate": 3.040270261798245e-07, | |
| "loss": 0.4233, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 2.699606962380685, | |
| "grad_norm": 0.18246972560882568, | |
| "learning_rate": 2.995519359864707e-07, | |
| "loss": 0.4282, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.7018528916339135, | |
| "grad_norm": 0.17514237761497498, | |
| "learning_rate": 2.9510900851100646e-07, | |
| "loss": 0.4079, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 2.704098820887142, | |
| "grad_norm": 0.18999601900577545, | |
| "learning_rate": 2.90698274154072e-07, | |
| "loss": 0.4401, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.706344750140371, | |
| "grad_norm": 0.1866077333688736, | |
| "learning_rate": 2.863197630960224e-07, | |
| "loss": 0.4019, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 2.708590679393599, | |
| "grad_norm": 0.18696747720241547, | |
| "learning_rate": 2.81973505296731e-07, | |
| "loss": 0.4247, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.7108366086468276, | |
| "grad_norm": 0.1890602558851242, | |
| "learning_rate": 2.776595304953739e-07, | |
| "loss": 0.4345, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 2.713082537900056, | |
| "grad_norm": 0.21192647516727448, | |
| "learning_rate": 2.7337786821023503e-07, | |
| "loss": 0.4338, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.7153284671532845, | |
| "grad_norm": 0.19118010997772217, | |
| "learning_rate": 2.691285477384986e-07, | |
| "loss": 0.4223, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 2.7175743964065133, | |
| "grad_norm": 0.1966598927974701, | |
| "learning_rate": 2.6491159815605294e-07, | |
| "loss": 0.4268, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.7198203256597417, | |
| "grad_norm": 0.18716298043727875, | |
| "learning_rate": 2.6072704831728633e-07, | |
| "loss": 0.4214, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 2.72206625491297, | |
| "grad_norm": 0.19453807175159454, | |
| "learning_rate": 2.5657492685489283e-07, | |
| "loss": 0.4527, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.724312184166199, | |
| "grad_norm": 0.18477098643779755, | |
| "learning_rate": 2.5245526217967887e-07, | |
| "loss": 0.3948, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 2.7265581134194274, | |
| "grad_norm": 0.19278430938720703, | |
| "learning_rate": 2.4836808248036305e-07, | |
| "loss": 0.4088, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.728804042672656, | |
| "grad_norm": 0.18528202176094055, | |
| "learning_rate": 2.443134157233873e-07, | |
| "loss": 0.4136, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 2.7310499719258843, | |
| "grad_norm": 0.18257422745227814, | |
| "learning_rate": 2.40291289652726e-07, | |
| "loss": 0.4362, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.7332959011791127, | |
| "grad_norm": 0.18132422864437103, | |
| "learning_rate": 2.363017317896904e-07, | |
| "loss": 0.402, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 2.735541830432341, | |
| "grad_norm": 0.17809224128723145, | |
| "learning_rate": 2.323447694327491e-07, | |
| "loss": 0.4177, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.73778775968557, | |
| "grad_norm": 0.19087287783622742, | |
| "learning_rate": 2.284204296573328e-07, | |
| "loss": 0.4402, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 2.7400336889387984, | |
| "grad_norm": 0.17470994591712952, | |
| "learning_rate": 2.2452873931565534e-07, | |
| "loss": 0.4098, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.742279618192027, | |
| "grad_norm": 0.18862077593803406, | |
| "learning_rate": 2.2066972503652807e-07, | |
| "loss": 0.4231, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 2.7445255474452557, | |
| "grad_norm": 0.19681653380393982, | |
| "learning_rate": 2.1684341322517343e-07, | |
| "loss": 0.4438, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.746771476698484, | |
| "grad_norm": 0.193466454744339, | |
| "learning_rate": 2.130498300630518e-07, | |
| "loss": 0.4131, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 2.7490174059517125, | |
| "grad_norm": 0.1851184368133545, | |
| "learning_rate": 2.092890015076765e-07, | |
| "loss": 0.4047, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.751263335204941, | |
| "grad_norm": 0.192936971783638, | |
| "learning_rate": 2.0556095329243853e-07, | |
| "loss": 0.4201, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.7535092644581693, | |
| "grad_norm": 0.19225548207759857, | |
| "learning_rate": 2.0186571092642992e-07, | |
| "loss": 0.4182, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.755755193711398, | |
| "grad_norm": 0.1829329878091812, | |
| "learning_rate": 1.9820329969426954e-07, | |
| "loss": 0.4217, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 2.7580011229646266, | |
| "grad_norm": 0.18259641528129578, | |
| "learning_rate": 1.9457374465592927e-07, | |
| "loss": 0.4343, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.760247052217855, | |
| "grad_norm": 0.1908586025238037, | |
| "learning_rate": 1.9097707064656523e-07, | |
| "loss": 0.4135, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 2.762492981471084, | |
| "grad_norm": 0.18594199419021606, | |
| "learning_rate": 1.8741330227634412e-07, | |
| "loss": 0.4226, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.7647389107243123, | |
| "grad_norm": 0.19256974756717682, | |
| "learning_rate": 1.8388246393027552e-07, | |
| "loss": 0.4267, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 2.7669848399775407, | |
| "grad_norm": 0.2004840224981308, | |
| "learning_rate": 1.8038457976804812e-07, | |
| "loss": 0.4255, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 0.19169549643993378, | |
| "learning_rate": 1.76919673723861e-07, | |
| "loss": 0.3957, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 2.7714766984839976, | |
| "grad_norm": 0.20605385303497314, | |
| "learning_rate": 1.7348776950626146e-07, | |
| "loss": 0.4388, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.7737226277372264, | |
| "grad_norm": 0.18344692885875702, | |
| "learning_rate": 1.7008889059798306e-07, | |
| "loss": 0.422, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 2.775968556990455, | |
| "grad_norm": 0.17997150123119354, | |
| "learning_rate": 1.66723060255784e-07, | |
| "loss": 0.4304, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.7782144862436833, | |
| "grad_norm": 0.18032418191432953, | |
| "learning_rate": 1.633903015102878e-07, | |
| "loss": 0.4372, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 2.7804604154969117, | |
| "grad_norm": 0.1851246953010559, | |
| "learning_rate": 1.600906371658262e-07, | |
| "loss": 0.4099, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.7827063447501406, | |
| "grad_norm": 0.19547966122627258, | |
| "learning_rate": 1.568240898002843e-07, | |
| "loss": 0.4284, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 2.784952274003369, | |
| "grad_norm": 0.20962592959403992, | |
| "learning_rate": 1.5359068176494462e-07, | |
| "loss": 0.4296, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.7871982032565974, | |
| "grad_norm": 0.17490459978580475, | |
| "learning_rate": 1.5039043518433383e-07, | |
| "loss": 0.3977, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 2.789444132509826, | |
| "grad_norm": 0.1864641159772873, | |
| "learning_rate": 1.4722337195607228e-07, | |
| "loss": 0.3936, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.7916900617630542, | |
| "grad_norm": 0.19050495326519012, | |
| "learning_rate": 1.4408951375072477e-07, | |
| "loss": 0.4443, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 2.793935991016283, | |
| "grad_norm": 0.1971900761127472, | |
| "learning_rate": 1.4098888201165005e-07, | |
| "loss": 0.453, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.7961819202695115, | |
| "grad_norm": 0.19153332710266113, | |
| "learning_rate": 1.3792149795485655e-07, | |
| "loss": 0.4088, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 2.79842784952274, | |
| "grad_norm": 0.19257591664791107, | |
| "learning_rate": 1.348873825688557e-07, | |
| "loss": 0.4256, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.800673778775969, | |
| "grad_norm": 0.18633553385734558, | |
| "learning_rate": 1.3188655661451833e-07, | |
| "loss": 0.4187, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 2.802919708029197, | |
| "grad_norm": 0.18796589970588684, | |
| "learning_rate": 1.2891904062493355e-07, | |
| "loss": 0.44, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.8051656372824256, | |
| "grad_norm": 0.19225618243217468, | |
| "learning_rate": 1.259848549052689e-07, | |
| "loss": 0.4402, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 2.807411566535654, | |
| "grad_norm": 0.185172900557518, | |
| "learning_rate": 1.2308401953262716e-07, | |
| "loss": 0.4066, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.8096574957888825, | |
| "grad_norm": 0.17786043882369995, | |
| "learning_rate": 1.2021655435591472e-07, | |
| "loss": 0.4176, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 2.8119034250421113, | |
| "grad_norm": 0.19384820759296417, | |
| "learning_rate": 1.1738247899570287e-07, | |
| "loss": 0.4345, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.8141493542953397, | |
| "grad_norm": 0.18932607769966125, | |
| "learning_rate": 1.145818128440923e-07, | |
| "loss": 0.4076, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 2.816395283548568, | |
| "grad_norm": 0.20454899966716766, | |
| "learning_rate": 1.1181457506458271e-07, | |
| "loss": 0.4638, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.8186412128017966, | |
| "grad_norm": 0.17342036962509155, | |
| "learning_rate": 1.0908078459194227e-07, | |
| "loss": 0.3839, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 2.8208871420550254, | |
| "grad_norm": 0.20639826357364655, | |
| "learning_rate": 1.0638046013207337e-07, | |
| "loss": 0.4386, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.823133071308254, | |
| "grad_norm": 0.20575068891048431, | |
| "learning_rate": 1.0371362016189158e-07, | |
| "loss": 0.4155, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 2.8253790005614823, | |
| "grad_norm": 0.1837739795446396, | |
| "learning_rate": 1.0108028292919237e-07, | |
| "loss": 0.4209, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.8276249298147107, | |
| "grad_norm": 0.1831589937210083, | |
| "learning_rate": 9.848046645253184e-08, | |
| "loss": 0.4171, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 2.829870859067939, | |
| "grad_norm": 0.20222926139831543, | |
| "learning_rate": 9.591418852109957e-08, | |
| "loss": 0.4118, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.832116788321168, | |
| "grad_norm": 0.1949760466814041, | |
| "learning_rate": 9.338146669459925e-08, | |
| "loss": 0.4126, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 2.8343627175743964, | |
| "grad_norm": 0.1802796870470047, | |
| "learning_rate": 9.088231830312655e-08, | |
| "loss": 0.4435, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.836608646827625, | |
| "grad_norm": 0.19096410274505615, | |
| "learning_rate": 8.841676044705261e-08, | |
| "loss": 0.4398, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 2.8388545760808537, | |
| "grad_norm": 0.1780502051115036, | |
| "learning_rate": 8.598480999690573e-08, | |
| "loss": 0.4135, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.841100505334082, | |
| "grad_norm": 0.18318232893943787, | |
| "learning_rate": 8.358648359325539e-08, | |
| "loss": 0.4294, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 2.8433464345873105, | |
| "grad_norm": 0.186601459980011, | |
| "learning_rate": 8.122179764660121e-08, | |
| "loss": 0.4435, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.845592363840539, | |
| "grad_norm": 0.19515588879585266, | |
| "learning_rate": 7.889076833725695e-08, | |
| "loss": 0.4068, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 2.8478382930937673, | |
| "grad_norm": 0.1878891885280609, | |
| "learning_rate": 7.659341161524225e-08, | |
| "loss": 0.4538, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.850084222346996, | |
| "grad_norm": 0.18124721944332123, | |
| "learning_rate": 7.432974320017216e-08, | |
| "loss": 0.4121, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 2.8523301516002246, | |
| "grad_norm": 0.19087855517864227, | |
| "learning_rate": 7.209977858115058e-08, | |
| "loss": 0.4439, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.854576080853453, | |
| "grad_norm": 0.17772875726222992, | |
| "learning_rate": 6.990353301666475e-08, | |
| "loss": 0.4262, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 2.856822010106682, | |
| "grad_norm": 0.1787647157907486, | |
| "learning_rate": 6.774102153447814e-08, | |
| "loss": 0.4057, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.8590679393599103, | |
| "grad_norm": 0.20238277316093445, | |
| "learning_rate": 6.561225893153112e-08, | |
| "loss": 0.4361, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 2.8613138686131387, | |
| "grad_norm": 0.1737276315689087, | |
| "learning_rate": 6.351725977383704e-08, | |
| "loss": 0.3966, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.863559797866367, | |
| "grad_norm": 0.18557517230510712, | |
| "learning_rate": 6.14560383963847e-08, | |
| "loss": 0.438, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.8658057271195956, | |
| "grad_norm": 0.19200386106967926, | |
| "learning_rate": 5.94286089030377e-08, | |
| "loss": 0.4359, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.868051656372824, | |
| "grad_norm": 0.17796795070171356, | |
| "learning_rate": 5.743498516644019e-08, | |
| "loss": 0.4051, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 2.870297585626053, | |
| "grad_norm": 0.19527527689933777, | |
| "learning_rate": 5.547518082792136e-08, | |
| "loss": 0.4301, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.8725435148792813, | |
| "grad_norm": 0.1781342327594757, | |
| "learning_rate": 5.354920929740048e-08, | |
| "loss": 0.4249, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 2.8747894441325097, | |
| "grad_norm": 0.1727127581834793, | |
| "learning_rate": 5.1657083753299256e-08, | |
| "loss": 0.4137, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.8770353733857386, | |
| "grad_norm": 0.18090222775936127, | |
| "learning_rate": 4.979881714244628e-08, | |
| "loss": 0.4256, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 2.879281302638967, | |
| "grad_norm": 0.18660210072994232, | |
| "learning_rate": 4.797442217999215e-08, | |
| "loss": 0.4152, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.8815272318921954, | |
| "grad_norm": 0.19095072150230408, | |
| "learning_rate": 4.618391134932121e-08, | |
| "loss": 0.4072, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 2.883773161145424, | |
| "grad_norm": 0.1802065372467041, | |
| "learning_rate": 4.442729690196657e-08, | |
| "loss": 0.4397, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.8860190903986522, | |
| "grad_norm": 0.1801634430885315, | |
| "learning_rate": 4.270459085752687e-08, | |
| "loss": 0.4234, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 2.888265019651881, | |
| "grad_norm": 0.17258504033088684, | |
| "learning_rate": 4.101580500358082e-08, | |
| "loss": 0.4047, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.8905109489051095, | |
| "grad_norm": 0.194522425532341, | |
| "learning_rate": 3.936095089561165e-08, | |
| "loss": 0.4544, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 2.892756878158338, | |
| "grad_norm": 0.18839098513126373, | |
| "learning_rate": 3.774003985692387e-08, | |
| "loss": 0.4202, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.895002807411567, | |
| "grad_norm": 0.18398089706897736, | |
| "learning_rate": 3.615308297856668e-08, | |
| "loss": 0.4098, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 2.897248736664795, | |
| "grad_norm": 0.1946476548910141, | |
| "learning_rate": 3.4600091119260106e-08, | |
| "loss": 0.449, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.8994946659180236, | |
| "grad_norm": 0.186300590634346, | |
| "learning_rate": 3.308107490531842e-08, | |
| "loss": 0.4285, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 2.901740595171252, | |
| "grad_norm": 0.18534432351589203, | |
| "learning_rate": 3.159604473057909e-08, | |
| "loss": 0.4392, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.9039865244244805, | |
| "grad_norm": 0.18315456807613373, | |
| "learning_rate": 3.0145010756328364e-08, | |
| "loss": 0.4178, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 2.906232453677709, | |
| "grad_norm": 0.1906488984823227, | |
| "learning_rate": 2.8727982911238017e-08, | |
| "loss": 0.4339, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.9084783829309377, | |
| "grad_norm": 0.18358033895492554, | |
| "learning_rate": 2.73449708912904e-08, | |
| "loss": 0.4031, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 2.910724312184166, | |
| "grad_norm": 0.19111478328704834, | |
| "learning_rate": 2.599598415971627e-08, | |
| "loss": 0.423, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.9129702414373946, | |
| "grad_norm": 0.17649492621421814, | |
| "learning_rate": 2.4681031946929834e-08, | |
| "loss": 0.4165, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 2.9152161706906234, | |
| "grad_norm": 0.190648153424263, | |
| "learning_rate": 2.340012325046326e-08, | |
| "loss": 0.408, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.917462099943852, | |
| "grad_norm": 0.17262622714042664, | |
| "learning_rate": 2.2153266834908927e-08, | |
| "loss": 0.4148, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 2.9197080291970803, | |
| "grad_norm": 0.18755358457565308, | |
| "learning_rate": 2.0940471231855052e-08, | |
| "loss": 0.4272, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.9219539584503087, | |
| "grad_norm": 0.19861868023872375, | |
| "learning_rate": 1.9761744739830723e-08, | |
| "loss": 0.4661, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 2.924199887703537, | |
| "grad_norm": 0.18785429000854492, | |
| "learning_rate": 1.86170954242465e-08, | |
| "loss": 0.4185, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.926445816956766, | |
| "grad_norm": 0.1750560849905014, | |
| "learning_rate": 1.750653111734224e-08, | |
| "loss": 0.4075, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 2.9286917462099944, | |
| "grad_norm": 0.18948881328105927, | |
| "learning_rate": 1.643005941813103e-08, | |
| "loss": 0.4398, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.930937675463223, | |
| "grad_norm": 0.17896808683872223, | |
| "learning_rate": 1.538768769234811e-08, | |
| "loss": 0.4188, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 2.9331836047164517, | |
| "grad_norm": 0.17966261506080627, | |
| "learning_rate": 1.4379423072399812e-08, | |
| "loss": 0.4168, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.93542953396968, | |
| "grad_norm": 0.1799083948135376, | |
| "learning_rate": 1.3405272457315822e-08, | |
| "loss": 0.4184, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 2.9376754632229085, | |
| "grad_norm": 0.18236926198005676, | |
| "learning_rate": 1.2465242512701425e-08, | |
| "loss": 0.3994, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.939921392476137, | |
| "grad_norm": 0.18373502790927887, | |
| "learning_rate": 1.155933967069256e-08, | |
| "loss": 0.4164, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 2.9421673217293653, | |
| "grad_norm": 0.18677166104316711, | |
| "learning_rate": 1.068757012990973e-08, | |
| "loss": 0.4351, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.944413250982594, | |
| "grad_norm": 0.17726090550422668, | |
| "learning_rate": 9.8499398554186e-09, | |
| "loss": 0.4065, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 2.9466591802358226, | |
| "grad_norm": 0.1915796846151352, | |
| "learning_rate": 9.046454578686136e-09, | |
| "loss": 0.4182, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.948905109489051, | |
| "grad_norm": 0.1905493140220642, | |
| "learning_rate": 8.277119797543975e-09, | |
| "loss": 0.4316, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 2.9511510387422795, | |
| "grad_norm": 0.17604538798332214, | |
| "learning_rate": 7.541940776149559e-09, | |
| "loss": 0.4251, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.9533969679955083, | |
| "grad_norm": 0.1901237815618515, | |
| "learning_rate": 6.840922544948947e-09, | |
| "loss": 0.436, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 2.9556428972487367, | |
| "grad_norm": 0.1710127294063568, | |
| "learning_rate": 6.174069900646285e-09, | |
| "loss": 0.3952, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.957888826501965, | |
| "grad_norm": 0.18744228780269623, | |
| "learning_rate": 5.541387406165499e-09, | |
| "loss": 0.4292, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 2.9601347557551936, | |
| "grad_norm": 0.1872478723526001, | |
| "learning_rate": 4.942879390624766e-09, | |
| "loss": 0.452, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.962380685008422, | |
| "grad_norm": 0.18257497251033783, | |
| "learning_rate": 4.378549949301536e-09, | |
| "loss": 0.4161, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 2.964626614261651, | |
| "grad_norm": 0.1956602931022644, | |
| "learning_rate": 3.848402943608664e-09, | |
| "loss": 0.4329, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.9668725435148793, | |
| "grad_norm": 0.18602769076824188, | |
| "learning_rate": 3.352442001066103e-09, | |
| "loss": 0.4274, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 2.9691184727681077, | |
| "grad_norm": 0.17961065471172333, | |
| "learning_rate": 2.8906705152759175e-09, | |
| "loss": 0.3989, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.9713644020213366, | |
| "grad_norm": 0.1952294111251831, | |
| "learning_rate": 2.4630916459000844e-09, | |
| "loss": 0.4381, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 2.973610331274565, | |
| "grad_norm": 0.19028258323669434, | |
| "learning_rate": 2.069708318638286e-09, | |
| "loss": 0.422, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.9758562605277934, | |
| "grad_norm": 0.182929128408432, | |
| "learning_rate": 1.7105232252079274e-09, | |
| "loss": 0.3971, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.978102189781022, | |
| "grad_norm": 0.19286341965198517, | |
| "learning_rate": 1.3855388233247057e-09, | |
| "loss": 0.4152, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.9803481190342502, | |
| "grad_norm": 0.1891261488199234, | |
| "learning_rate": 1.0947573366881791e-09, | |
| "loss": 0.4364, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 2.982594048287479, | |
| "grad_norm": 0.18247157335281372, | |
| "learning_rate": 8.381807549645571e-10, | |
| "loss": 0.3987, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.9848399775407075, | |
| "grad_norm": 0.19202907383441925, | |
| "learning_rate": 6.158108337733782e-10, | |
| "loss": 0.4559, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 2.987085906793936, | |
| "grad_norm": 0.17795370519161224, | |
| "learning_rate": 4.2764909467696293e-10, | |
| "loss": 0.4122, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.9893318360471643, | |
| "grad_norm": 0.19224773347377777, | |
| "learning_rate": 2.736968251670913e-10, | |
| "loss": 0.4322, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 2.991577765300393, | |
| "grad_norm": 0.19133056700229645, | |
| "learning_rate": 1.5395507866000637e-10, | |
| "loss": 0.4018, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.9938236945536216, | |
| "grad_norm": 0.1867966204881668, | |
| "learning_rate": 6.842467448531231e-11, | |
| "loss": 0.4393, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 2.99606962380685, | |
| "grad_norm": 0.17837318778038025, | |
| "learning_rate": 1.7106197883753894e-11, | |
| "loss": 0.4112, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.9983155530600785, | |
| "grad_norm": 0.185623437166214, | |
| "learning_rate": 0.0, | |
| "loss": 0.4139, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 2.9983155530600785, | |
| "step": 1335, | |
| "total_flos": 4617447504347136.0, | |
| "train_loss": 0.46386746891428915, | |
| "train_runtime": 117690.8189, | |
| "train_samples_per_second": 2.905, | |
| "train_steps_per_second": 0.011 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1335, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4617447504347136.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |