| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.955414012738854, | |
| "eval_steps": 500, | |
| "global_step": 364, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01910828025477707, | |
| "grad_norm": 0.3258431400870058, | |
| "learning_rate": 5.405405405405406e-09, | |
| "loss": 0.2596, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.03821656050955414, | |
| "grad_norm": 0.2865253531051695, | |
| "learning_rate": 1.0810810810810811e-08, | |
| "loss": 0.2512, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.05732484076433121, | |
| "grad_norm": 0.30892863941613385, | |
| "learning_rate": 1.6216216216216218e-08, | |
| "loss": 0.2584, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.07643312101910828, | |
| "grad_norm": 0.3145090623295482, | |
| "learning_rate": 2.1621621621621623e-08, | |
| "loss": 0.2562, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.09554140127388536, | |
| "grad_norm": 0.3250498078418424, | |
| "learning_rate": 2.7027027027027028e-08, | |
| "loss": 0.2557, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.11464968152866242, | |
| "grad_norm": 0.3048291606019023, | |
| "learning_rate": 3.2432432432432436e-08, | |
| "loss": 0.2429, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.1337579617834395, | |
| "grad_norm": 0.2813946488902139, | |
| "learning_rate": 3.783783783783784e-08, | |
| "loss": 0.244, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.15286624203821655, | |
| "grad_norm": 0.2951556246869404, | |
| "learning_rate": 4.3243243243243246e-08, | |
| "loss": 0.2475, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.17197452229299362, | |
| "grad_norm": 0.31375967109328035, | |
| "learning_rate": 4.864864864864865e-08, | |
| "loss": 0.2548, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.1910828025477707, | |
| "grad_norm": 0.320646972442393, | |
| "learning_rate": 5.4054054054054056e-08, | |
| "loss": 0.2593, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.21019108280254778, | |
| "grad_norm": 0.3101235439496355, | |
| "learning_rate": 5.945945945945946e-08, | |
| "loss": 0.2606, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.22929936305732485, | |
| "grad_norm": 0.29711763418174875, | |
| "learning_rate": 6.486486486486487e-08, | |
| "loss": 0.2517, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.2484076433121019, | |
| "grad_norm": 0.3071305484501633, | |
| "learning_rate": 7.027027027027027e-08, | |
| "loss": 0.2494, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.267515923566879, | |
| "grad_norm": 0.2845310181092547, | |
| "learning_rate": 7.567567567567568e-08, | |
| "loss": 0.2491, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.28662420382165604, | |
| "grad_norm": 0.3203987747997849, | |
| "learning_rate": 8.108108108108108e-08, | |
| "loss": 0.2543, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.3057324840764331, | |
| "grad_norm": 0.28456107510027123, | |
| "learning_rate": 8.648648648648649e-08, | |
| "loss": 0.258, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.3248407643312102, | |
| "grad_norm": 0.27941243424990353, | |
| "learning_rate": 9.189189189189189e-08, | |
| "loss": 0.2574, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.34394904458598724, | |
| "grad_norm": 0.2930471474625414, | |
| "learning_rate": 9.72972972972973e-08, | |
| "loss": 0.2743, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.3630573248407643, | |
| "grad_norm": 0.30494204356688037, | |
| "learning_rate": 1.027027027027027e-07, | |
| "loss": 0.269, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.3821656050955414, | |
| "grad_norm": 0.30231424553130715, | |
| "learning_rate": 1.0810810810810811e-07, | |
| "loss": 0.2557, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.4012738853503185, | |
| "grad_norm": 0.3015266138690226, | |
| "learning_rate": 1.135135135135135e-07, | |
| "loss": 0.2523, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.42038216560509556, | |
| "grad_norm": 0.3043409829723809, | |
| "learning_rate": 1.1891891891891891e-07, | |
| "loss": 0.2547, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.4394904458598726, | |
| "grad_norm": 0.29444734831015446, | |
| "learning_rate": 1.2432432432432432e-07, | |
| "loss": 0.2517, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.4585987261146497, | |
| "grad_norm": 0.297937003697346, | |
| "learning_rate": 1.2972972972972974e-07, | |
| "loss": 0.2434, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.47770700636942676, | |
| "grad_norm": 0.3027152740282303, | |
| "learning_rate": 1.3513513513513512e-07, | |
| "loss": 0.2538, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.4968152866242038, | |
| "grad_norm": 0.3085528948055293, | |
| "learning_rate": 1.4054054054054055e-07, | |
| "loss": 0.2455, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.5159235668789809, | |
| "grad_norm": 0.30288755432020104, | |
| "learning_rate": 1.4594594594594595e-07, | |
| "loss": 0.2642, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.535031847133758, | |
| "grad_norm": 0.2888257868462652, | |
| "learning_rate": 1.5135135135135135e-07, | |
| "loss": 0.2535, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.554140127388535, | |
| "grad_norm": 0.31240869601074167, | |
| "learning_rate": 1.5675675675675675e-07, | |
| "loss": 0.2574, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.5732484076433121, | |
| "grad_norm": 0.28779468390754065, | |
| "learning_rate": 1.6216216216216215e-07, | |
| "loss": 0.2562, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5923566878980892, | |
| "grad_norm": 0.3026300082985163, | |
| "learning_rate": 1.6756756756756755e-07, | |
| "loss": 0.2489, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.6114649681528662, | |
| "grad_norm": 0.29410215399776, | |
| "learning_rate": 1.7297297297297298e-07, | |
| "loss": 0.2513, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.6305732484076433, | |
| "grad_norm": 0.2967099409221429, | |
| "learning_rate": 1.7837837837837836e-07, | |
| "loss": 0.2605, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.6496815286624203, | |
| "grad_norm": 0.28890784109752826, | |
| "learning_rate": 1.8378378378378379e-07, | |
| "loss": 0.2488, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.6687898089171974, | |
| "grad_norm": 0.3228375228965286, | |
| "learning_rate": 1.891891891891892e-07, | |
| "loss": 0.2629, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.6878980891719745, | |
| "grad_norm": 0.30208391730174905, | |
| "learning_rate": 1.945945945945946e-07, | |
| "loss": 0.2662, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.7070063694267515, | |
| "grad_norm": 0.2895031799209941, | |
| "learning_rate": 2e-07, | |
| "loss": 0.2536, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.7261146496815286, | |
| "grad_norm": 0.3036410296650111, | |
| "learning_rate": 1.999953850085163e-07, | |
| "loss": 0.2654, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.7452229299363057, | |
| "grad_norm": 0.30128310490546484, | |
| "learning_rate": 1.999815404600282e-07, | |
| "loss": 0.2471, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.7643312101910829, | |
| "grad_norm": 0.3079903978462585, | |
| "learning_rate": 1.999584676323851e-07, | |
| "loss": 0.256, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.7834394904458599, | |
| "grad_norm": 0.2917481324310926, | |
| "learning_rate": 1.9992616865520512e-07, | |
| "loss": 0.2499, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.802547770700637, | |
| "grad_norm": 0.31471272570274456, | |
| "learning_rate": 1.998846465096783e-07, | |
| "loss": 0.2641, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.821656050955414, | |
| "grad_norm": 0.3152199748266918, | |
| "learning_rate": 1.9983390502829166e-07, | |
| "loss": 0.2593, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.8407643312101911, | |
| "grad_norm": 0.29879027476626574, | |
| "learning_rate": 1.9977394889447523e-07, | |
| "loss": 0.2686, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.8598726114649682, | |
| "grad_norm": 0.3172098054667475, | |
| "learning_rate": 1.9970478364216996e-07, | |
| "loss": 0.2408, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.8789808917197452, | |
| "grad_norm": 0.29042679625683193, | |
| "learning_rate": 1.996264156553169e-07, | |
| "loss": 0.2664, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.8980891719745223, | |
| "grad_norm": 0.30019512901289386, | |
| "learning_rate": 1.9953885216726785e-07, | |
| "loss": 0.2508, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.9171974522292994, | |
| "grad_norm": 0.28586435210675887, | |
| "learning_rate": 1.9944210126011788e-07, | |
| "loss": 0.2522, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.9363057324840764, | |
| "grad_norm": 0.30773911686783834, | |
| "learning_rate": 1.9933617186395914e-07, | |
| "loss": 0.2502, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.9554140127388535, | |
| "grad_norm": 0.31371477716041124, | |
| "learning_rate": 1.9922107375605698e-07, | |
| "loss": 0.2619, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9745222929936306, | |
| "grad_norm": 0.2950309178219575, | |
| "learning_rate": 1.990968175599471e-07, | |
| "loss": 0.2613, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.9936305732484076, | |
| "grad_norm": 0.3320695491566013, | |
| "learning_rate": 1.9896341474445524e-07, | |
| "loss": 0.2541, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 1.0127388535031847, | |
| "grad_norm": 0.2902283899600383, | |
| "learning_rate": 1.9882087762263852e-07, | |
| "loss": 0.2485, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 1.0318471337579618, | |
| "grad_norm": 0.30704788847785364, | |
| "learning_rate": 1.9866921935064905e-07, | |
| "loss": 0.2579, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 1.0509554140127388, | |
| "grad_norm": 0.2786908392929887, | |
| "learning_rate": 1.9850845392651947e-07, | |
| "loss": 0.2638, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.070063694267516, | |
| "grad_norm": 0.317768378195936, | |
| "learning_rate": 1.983385961888711e-07, | |
| "loss": 0.2483, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.089171974522293, | |
| "grad_norm": 0.2999290392989265, | |
| "learning_rate": 1.981596618155441e-07, | |
| "loss": 0.2604, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 1.10828025477707, | |
| "grad_norm": 0.31058999539375254, | |
| "learning_rate": 1.9797166732215075e-07, | |
| "loss": 0.2619, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.127388535031847, | |
| "grad_norm": 0.3064782254040447, | |
| "learning_rate": 1.977746300605507e-07, | |
| "loss": 0.2607, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.1464968152866242, | |
| "grad_norm": 0.3002412022740287, | |
| "learning_rate": 1.9756856821724967e-07, | |
| "loss": 0.2617, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.1656050955414012, | |
| "grad_norm": 0.29772159876550963, | |
| "learning_rate": 1.9735350081172067e-07, | |
| "loss": 0.2481, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.1847133757961783, | |
| "grad_norm": 0.31356783559096124, | |
| "learning_rate": 1.9712944769464862e-07, | |
| "loss": 0.2587, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.2038216560509554, | |
| "grad_norm": 0.2888581149656851, | |
| "learning_rate": 1.9689642954609806e-07, | |
| "loss": 0.2538, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.2229299363057324, | |
| "grad_norm": 0.30385671289298677, | |
| "learning_rate": 1.966544678736044e-07, | |
| "loss": 0.2527, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.2420382165605095, | |
| "grad_norm": 0.2973647308882802, | |
| "learning_rate": 1.9640358501018882e-07, | |
| "loss": 0.2479, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.2611464968152866, | |
| "grad_norm": 0.3095848016456744, | |
| "learning_rate": 1.961438041122969e-07, | |
| "loss": 0.2482, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.2802547770700636, | |
| "grad_norm": 0.3515887000957586, | |
| "learning_rate": 1.9587514915766122e-07, | |
| "loss": 0.253, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.2993630573248407, | |
| "grad_norm": 0.2931897201883793, | |
| "learning_rate": 1.9559764494308834e-07, | |
| "loss": 0.2457, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.3184713375796178, | |
| "grad_norm": 0.31283649147238096, | |
| "learning_rate": 1.9531131708217004e-07, | |
| "loss": 0.2554, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.3375796178343948, | |
| "grad_norm": 0.30295461967215936, | |
| "learning_rate": 1.9501619200291905e-07, | |
| "loss": 0.2561, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.356687898089172, | |
| "grad_norm": 0.2910098967517895, | |
| "learning_rate": 1.9471229694533e-07, | |
| "loss": 0.264, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 1.3757961783439492, | |
| "grad_norm": 0.29074377848027877, | |
| "learning_rate": 1.9439965995886488e-07, | |
| "loss": 0.2579, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.394904458598726, | |
| "grad_norm": 0.2913116920333876, | |
| "learning_rate": 1.9407830989986428e-07, | |
| "loss": 0.2515, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 1.4140127388535033, | |
| "grad_norm": 0.28025336367518355, | |
| "learning_rate": 1.9374827642888395e-07, | |
| "loss": 0.2592, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.4331210191082802, | |
| "grad_norm": 0.308381777961692, | |
| "learning_rate": 1.9340959000795706e-07, | |
| "loss": 0.2559, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.4522292993630574, | |
| "grad_norm": 0.29320496586078204, | |
| "learning_rate": 1.9306228189778253e-07, | |
| "loss": 0.2658, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.4713375796178343, | |
| "grad_norm": 0.32221369887070983, | |
| "learning_rate": 1.927063841548398e-07, | |
| "loss": 0.2646, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.4904458598726116, | |
| "grad_norm": 0.299728133006886, | |
| "learning_rate": 1.923419296284299e-07, | |
| "loss": 0.2581, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.5095541401273884, | |
| "grad_norm": 0.3209108114734401, | |
| "learning_rate": 1.919689519576436e-07, | |
| "loss": 0.2567, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.5286624203821657, | |
| "grad_norm": 0.2804129315244473, | |
| "learning_rate": 1.9158748556825634e-07, | |
| "loss": 0.253, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.5477707006369426, | |
| "grad_norm": 0.3045816243631572, | |
| "learning_rate": 1.911975656695509e-07, | |
| "loss": 0.2584, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.5668789808917198, | |
| "grad_norm": 0.3126990342175922, | |
| "learning_rate": 1.907992282510675e-07, | |
| "loss": 0.2605, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.5859872611464967, | |
| "grad_norm": 0.30614700686477747, | |
| "learning_rate": 1.90392510079282e-07, | |
| "loss": 0.2548, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.605095541401274, | |
| "grad_norm": 0.2971554730236426, | |
| "learning_rate": 1.8997744869421245e-07, | |
| "loss": 0.2354, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.6242038216560508, | |
| "grad_norm": 0.2988208824377952, | |
| "learning_rate": 1.8955408240595392e-07, | |
| "loss": 0.2441, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.643312101910828, | |
| "grad_norm": 0.2947992742232842, | |
| "learning_rate": 1.8912245029114278e-07, | |
| "loss": 0.2503, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.662420382165605, | |
| "grad_norm": 0.3147211833492056, | |
| "learning_rate": 1.8868259218934966e-07, | |
| "loss": 0.2529, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.6815286624203822, | |
| "grad_norm": 0.3150826072001003, | |
| "learning_rate": 1.882345486994024e-07, | |
| "loss": 0.2492, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.700636942675159, | |
| "grad_norm": 0.29922927789546194, | |
| "learning_rate": 1.877783611756389e-07, | |
| "loss": 0.2657, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.7197452229299364, | |
| "grad_norm": 0.2841068444708262, | |
| "learning_rate": 1.8731407172408987e-07, | |
| "loss": 0.2503, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.7388535031847132, | |
| "grad_norm": 0.3000862442323796, | |
| "learning_rate": 1.8684172319859257e-07, | |
| "loss": 0.2553, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.7579617834394905, | |
| "grad_norm": 0.3058443181160777, | |
| "learning_rate": 1.863613591968355e-07, | |
| "loss": 0.2572, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.7770700636942676, | |
| "grad_norm": 0.31867483871696056, | |
| "learning_rate": 1.8587302405633417e-07, | |
| "loss": 0.2527, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.7961783439490446, | |
| "grad_norm": 0.3025042298897706, | |
| "learning_rate": 1.8537676285033885e-07, | |
| "loss": 0.2595, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.8152866242038217, | |
| "grad_norm": 0.2923220305017809, | |
| "learning_rate": 1.848726213836744e-07, | |
| "loss": 0.2577, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.8343949044585988, | |
| "grad_norm": 0.3003765605795671, | |
| "learning_rate": 1.8436064618851224e-07, | |
| "loss": 0.2521, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.8535031847133758, | |
| "grad_norm": 0.317610406351691, | |
| "learning_rate": 1.8384088452007576e-07, | |
| "loss": 0.2502, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.872611464968153, | |
| "grad_norm": 0.2887579767892906, | |
| "learning_rate": 1.8331338435227837e-07, | |
| "loss": 0.2586, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.89171974522293, | |
| "grad_norm": 0.3197071782148506, | |
| "learning_rate": 1.8277819437329574e-07, | |
| "loss": 0.267, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.910828025477707, | |
| "grad_norm": 0.2849069609408447, | |
| "learning_rate": 1.8223536398107174e-07, | |
| "loss": 0.2485, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.929936305732484, | |
| "grad_norm": 0.2887703170078543, | |
| "learning_rate": 1.8168494327875916e-07, | |
| "loss": 0.2499, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 1.9490445859872612, | |
| "grad_norm": 0.3161969478865281, | |
| "learning_rate": 1.8112698307009504e-07, | |
| "loss": 0.2505, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.9681528662420382, | |
| "grad_norm": 0.287901366472073, | |
| "learning_rate": 1.8056153485471165e-07, | |
| "loss": 0.2668, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.9872611464968153, | |
| "grad_norm": 0.3012560634417445, | |
| "learning_rate": 1.7998865082338287e-07, | |
| "loss": 0.2565, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 2.0063694267515926, | |
| "grad_norm": 0.29276615150168056, | |
| "learning_rate": 1.7940838385320732e-07, | |
| "loss": 0.2533, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.0254777070063694, | |
| "grad_norm": 0.30402861564543393, | |
| "learning_rate": 1.788207875027274e-07, | |
| "loss": 0.2525, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 2.0445859872611467, | |
| "grad_norm": 0.2967173967659541, | |
| "learning_rate": 1.7822591600698629e-07, | |
| "loss": 0.2532, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 2.0636942675159236, | |
| "grad_norm": 0.30366484369127167, | |
| "learning_rate": 1.7762382427252165e-07, | |
| "loss": 0.2573, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 2.082802547770701, | |
| "grad_norm": 0.2878188158407926, | |
| "learning_rate": 1.7701456787229803e-07, | |
| "loss": 0.2602, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 2.1019108280254777, | |
| "grad_norm": 0.2743059722128589, | |
| "learning_rate": 1.7639820304057742e-07, | |
| "loss": 0.2554, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.121019108280255, | |
| "grad_norm": 0.29808343098853324, | |
| "learning_rate": 1.7577478666772882e-07, | |
| "loss": 0.253, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 2.140127388535032, | |
| "grad_norm": 0.2875587315242635, | |
| "learning_rate": 1.7514437629497717e-07, | |
| "loss": 0.2488, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 2.159235668789809, | |
| "grad_norm": 0.30351361854827635, | |
| "learning_rate": 1.7450703010909262e-07, | |
| "loss": 0.2562, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 2.178343949044586, | |
| "grad_norm": 0.30394129590401453, | |
| "learning_rate": 1.738628069370195e-07, | |
| "loss": 0.2607, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 2.1974522292993632, | |
| "grad_norm": 0.32285950783403167, | |
| "learning_rate": 1.7321176624044687e-07, | |
| "loss": 0.2503, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.21656050955414, | |
| "grad_norm": 0.3120864765033612, | |
| "learning_rate": 1.7255396811032013e-07, | |
| "loss": 0.2509, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 2.2356687898089174, | |
| "grad_norm": 0.3236920273170069, | |
| "learning_rate": 1.718894732612947e-07, | |
| "loss": 0.2502, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 2.254777070063694, | |
| "grad_norm": 0.29570882874175936, | |
| "learning_rate": 1.7121834302613186e-07, | |
| "loss": 0.2639, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 2.2738853503184715, | |
| "grad_norm": 0.31678397348330944, | |
| "learning_rate": 1.7054063935003812e-07, | |
| "loss": 0.2496, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 2.2929936305732483, | |
| "grad_norm": 0.2988770843936996, | |
| "learning_rate": 1.6985642478494727e-07, | |
| "loss": 0.2507, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.3121019108280256, | |
| "grad_norm": 0.3109434916642981, | |
| "learning_rate": 1.6916576248374716e-07, | |
| "loss": 0.2616, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 2.3312101910828025, | |
| "grad_norm": 0.3261007418875524, | |
| "learning_rate": 1.684687161944506e-07, | |
| "loss": 0.2554, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.3503184713375798, | |
| "grad_norm": 0.31440782489796776, | |
| "learning_rate": 1.6776535025431129e-07, | |
| "loss": 0.2559, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 2.3694267515923566, | |
| "grad_norm": 0.30410776957586444, | |
| "learning_rate": 1.6705572958388573e-07, | |
| "loss": 0.2603, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 2.388535031847134, | |
| "grad_norm": 0.30666376843334026, | |
| "learning_rate": 1.6633991968104092e-07, | |
| "loss": 0.2439, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.4076433121019107, | |
| "grad_norm": 0.3133700400691411, | |
| "learning_rate": 1.6561798661490902e-07, | |
| "loss": 0.2514, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 2.426751592356688, | |
| "grad_norm": 0.30312163468190945, | |
| "learning_rate": 1.6488999701978902e-07, | |
| "loss": 0.2522, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 2.445859872611465, | |
| "grad_norm": 0.2980517023756582, | |
| "learning_rate": 1.6415601808899658e-07, | |
| "loss": 0.2634, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 2.464968152866242, | |
| "grad_norm": 0.29620642402954184, | |
| "learning_rate": 1.63416117568662e-07, | |
| "loss": 0.2523, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 2.484076433121019, | |
| "grad_norm": 0.30847707783645134, | |
| "learning_rate": 1.6267036375147723e-07, | |
| "loss": 0.2519, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.5031847133757963, | |
| "grad_norm": 0.3150233916320143, | |
| "learning_rate": 1.6191882547039266e-07, | |
| "loss": 0.257, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 2.522292993630573, | |
| "grad_norm": 0.27358778066993983, | |
| "learning_rate": 1.6116157209226352e-07, | |
| "loss": 0.2647, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 2.5414012738853504, | |
| "grad_norm": 0.3019435375253268, | |
| "learning_rate": 1.6039867351144777e-07, | |
| "loss": 0.2647, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 2.5605095541401273, | |
| "grad_norm": 0.30895517117616256, | |
| "learning_rate": 1.5963020014335436e-07, | |
| "loss": 0.2559, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 2.5796178343949046, | |
| "grad_norm": 0.31282726024916446, | |
| "learning_rate": 1.5885622291794428e-07, | |
| "loss": 0.2473, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.5987261146496814, | |
| "grad_norm": 0.29386800602210383, | |
| "learning_rate": 1.580768132731837e-07, | |
| "loss": 0.2564, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 2.6178343949044587, | |
| "grad_norm": 0.2994659441023476, | |
| "learning_rate": 1.5729204314845e-07, | |
| "loss": 0.2593, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 2.6369426751592355, | |
| "grad_norm": 0.306461349101831, | |
| "learning_rate": 1.56501984977892e-07, | |
| "loss": 0.2618, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 2.656050955414013, | |
| "grad_norm": 0.28736145386635525, | |
| "learning_rate": 1.5570671168374436e-07, | |
| "loss": 0.2702, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 2.6751592356687897, | |
| "grad_norm": 0.2862972968768037, | |
| "learning_rate": 1.5490629666959666e-07, | |
| "loss": 0.2604, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.694267515923567, | |
| "grad_norm": 0.3188610660653441, | |
| "learning_rate": 1.5410081381361829e-07, | |
| "loss": 0.2671, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 2.713375796178344, | |
| "grad_norm": 0.3094794846413303, | |
| "learning_rate": 1.5329033746173973e-07, | |
| "loss": 0.2535, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 2.732484076433121, | |
| "grad_norm": 0.29891576281966575, | |
| "learning_rate": 1.5247494242079021e-07, | |
| "loss": 0.252, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 2.7515923566878984, | |
| "grad_norm": 0.292028133356272, | |
| "learning_rate": 1.5165470395159313e-07, | |
| "loss": 0.2517, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 2.770700636942675, | |
| "grad_norm": 0.3015119959336549, | |
| "learning_rate": 1.5082969776201945e-07, | |
| "loss": 0.2485, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.789808917197452, | |
| "grad_norm": 0.30946055407183126, | |
| "learning_rate": 1.5e-07, | |
| "loss": 0.2549, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 2.8089171974522293, | |
| "grad_norm": 0.3011288987792503, | |
| "learning_rate": 1.4916568724649686e-07, | |
| "loss": 0.2526, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 2.8280254777070066, | |
| "grad_norm": 0.27935366900331016, | |
| "learning_rate": 1.4832683650843506e-07, | |
| "loss": 0.2569, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 2.8471337579617835, | |
| "grad_norm": 0.3053623378204874, | |
| "learning_rate": 1.4748352521159491e-07, | |
| "loss": 0.2543, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 2.8662420382165603, | |
| "grad_norm": 0.30986894267799603, | |
| "learning_rate": 1.4663583119346538e-07, | |
| "loss": 0.2414, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.8853503184713376, | |
| "grad_norm": 0.3029701849877518, | |
| "learning_rate": 1.4578383269606002e-07, | |
| "loss": 0.2645, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 2.904458598726115, | |
| "grad_norm": 0.3007935845890958, | |
| "learning_rate": 1.4492760835869502e-07, | |
| "loss": 0.2524, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 2.9235668789808917, | |
| "grad_norm": 0.3027359482846415, | |
| "learning_rate": 1.4406723721073087e-07, | |
| "loss": 0.2399, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 2.9426751592356686, | |
| "grad_norm": 0.286593283803398, | |
| "learning_rate": 1.4320279866427796e-07, | |
| "loss": 0.2491, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 2.961783439490446, | |
| "grad_norm": 0.2975044719847865, | |
| "learning_rate": 1.4233437250686693e-07, | |
| "loss": 0.2556, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.980891719745223, | |
| "grad_norm": 0.29788132002693174, | |
| "learning_rate": 1.4146203889408418e-07, | |
| "loss": 0.242, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.2947537039751411, | |
| "learning_rate": 1.4058587834217354e-07, | |
| "loss": 0.2584, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 3.0191082802547773, | |
| "grad_norm": 0.29945926345973045, | |
| "learning_rate": 1.397059717206048e-07, | |
| "loss": 0.2591, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 3.038216560509554, | |
| "grad_norm": 0.3078621269226234, | |
| "learning_rate": 1.3882240024460924e-07, | |
| "loss": 0.2587, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 3.0573248407643314, | |
| "grad_norm": 0.30621687808211867, | |
| "learning_rate": 1.3793524546768356e-07, | |
| "loss": 0.2603, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.0764331210191083, | |
| "grad_norm": 0.30958764885598344, | |
| "learning_rate": 1.370445892740626e-07, | |
| "loss": 0.2594, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 3.0955414012738856, | |
| "grad_norm": 0.3117099403903537, | |
| "learning_rate": 1.361505138711613e-07, | |
| "loss": 0.2538, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 3.1146496815286624, | |
| "grad_norm": 0.31084130617285904, | |
| "learning_rate": 1.3525310178198706e-07, | |
| "loss": 0.2658, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 3.1337579617834397, | |
| "grad_norm": 0.30690130397546955, | |
| "learning_rate": 1.343524358375229e-07, | |
| "loss": 0.2495, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 3.1528662420382165, | |
| "grad_norm": 0.290775795962111, | |
| "learning_rate": 1.3344859916908204e-07, | |
| "loss": 0.2574, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 3.171974522292994, | |
| "grad_norm": 0.31192756387768816, | |
| "learning_rate": 1.325416752006351e-07, | |
| "loss": 0.2548, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 3.1910828025477707, | |
| "grad_norm": 0.2970090514175917, | |
| "learning_rate": 1.3163174764110982e-07, | |
| "loss": 0.248, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 3.210191082802548, | |
| "grad_norm": 0.2904565168466135, | |
| "learning_rate": 1.3071890047666496e-07, | |
| "loss": 0.2469, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 3.229299363057325, | |
| "grad_norm": 0.30572059607999896, | |
| "learning_rate": 1.2980321796293835e-07, | |
| "loss": 0.2433, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 3.248407643312102, | |
| "grad_norm": 0.3080588666919199, | |
| "learning_rate": 1.288847846172701e-07, | |
| "loss": 0.2455, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.267515923566879, | |
| "grad_norm": 0.33087482625160974, | |
| "learning_rate": 1.2796368521090143e-07, | |
| "loss": 0.2563, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 3.286624203821656, | |
| "grad_norm": 0.29813533166624945, | |
| "learning_rate": 1.270400047611508e-07, | |
| "loss": 0.2486, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 3.305732484076433, | |
| "grad_norm": 0.3086312021030327, | |
| "learning_rate": 1.261138285235663e-07, | |
| "loss": 0.2458, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 3.3248407643312103, | |
| "grad_norm": 0.29331773923085075, | |
| "learning_rate": 1.2518524198405698e-07, | |
| "loss": 0.2709, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 3.343949044585987, | |
| "grad_norm": 0.3389129034608298, | |
| "learning_rate": 1.2425433085100222e-07, | |
| "loss": 0.249, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 3.3630573248407645, | |
| "grad_norm": 0.27583808400445303, | |
| "learning_rate": 1.2332118104734109e-07, | |
| "loss": 0.2593, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 3.3821656050955413, | |
| "grad_norm": 0.30026419416491845, | |
| "learning_rate": 1.223858787026415e-07, | |
| "loss": 0.2571, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 3.4012738853503186, | |
| "grad_norm": 0.2870756139104833, | |
| "learning_rate": 1.2144851014515054e-07, | |
| "loss": 0.2433, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 3.4203821656050954, | |
| "grad_norm": 0.2872438134366015, | |
| "learning_rate": 1.2050916189382645e-07, | |
| "loss": 0.2612, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 3.4394904458598727, | |
| "grad_norm": 0.3163865718075819, | |
| "learning_rate": 1.195679206503528e-07, | |
| "loss": 0.2549, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.4585987261146496, | |
| "grad_norm": 0.2978921734506274, | |
| "learning_rate": 1.1862487329113604e-07, | |
| "loss": 0.2622, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 3.477707006369427, | |
| "grad_norm": 0.2948938802787196, | |
| "learning_rate": 1.1768010685928685e-07, | |
| "loss": 0.2556, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 3.4968152866242037, | |
| "grad_norm": 0.31770785254670114, | |
| "learning_rate": 1.1673370855658591e-07, | |
| "loss": 0.2564, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 3.515923566878981, | |
| "grad_norm": 0.3077187664088862, | |
| "learning_rate": 1.1578576573543539e-07, | |
| "loss": 0.2603, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 3.535031847133758, | |
| "grad_norm": 0.27940138197404885, | |
| "learning_rate": 1.1483636589079626e-07, | |
| "loss": 0.2537, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 3.554140127388535, | |
| "grad_norm": 0.30920602027740407, | |
| "learning_rate": 1.138855966521124e-07, | |
| "loss": 0.2605, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 3.573248407643312, | |
| "grad_norm": 0.2878030111034016, | |
| "learning_rate": 1.1293354577522263e-07, | |
| "loss": 0.2642, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 3.5923566878980893, | |
| "grad_norm": 0.31242615729718354, | |
| "learning_rate": 1.1198030113426074e-07, | |
| "loss": 0.2689, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 3.611464968152866, | |
| "grad_norm": 0.2948055240186696, | |
| "learning_rate": 1.110259507135447e-07, | |
| "loss": 0.268, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 3.6305732484076434, | |
| "grad_norm": 0.2983537474983395, | |
| "learning_rate": 1.1007058259945583e-07, | |
| "loss": 0.25, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.6496815286624202, | |
| "grad_norm": 0.30237646104024896, | |
| "learning_rate": 1.0911428497230832e-07, | |
| "loss": 0.2398, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 3.6687898089171975, | |
| "grad_norm": 0.29700896373492647, | |
| "learning_rate": 1.0815714609821025e-07, | |
| "loss": 0.2568, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 3.6878980891719744, | |
| "grad_norm": 0.30679225984995734, | |
| "learning_rate": 1.071992543209167e-07, | |
| "loss": 0.2401, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 3.7070063694267517, | |
| "grad_norm": 0.3007632644983776, | |
| "learning_rate": 1.0624069805367557e-07, | |
| "loss": 0.2477, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 3.7261146496815285, | |
| "grad_norm": 0.3112671485408903, | |
| "learning_rate": 1.0528156577106702e-07, | |
| "loss": 0.2463, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 3.745222929936306, | |
| "grad_norm": 0.30401064479289264, | |
| "learning_rate": 1.0432194600083739e-07, | |
| "loss": 0.2574, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 3.7643312101910826, | |
| "grad_norm": 0.2851374502628386, | |
| "learning_rate": 1.0336192731572803e-07, | |
| "loss": 0.2582, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 3.78343949044586, | |
| "grad_norm": 0.2914379090084522, | |
| "learning_rate": 1.0240159832530007e-07, | |
| "loss": 0.2555, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 3.802547770700637, | |
| "grad_norm": 0.33178330655356186, | |
| "learning_rate": 1.0144104766775572e-07, | |
| "loss": 0.2614, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 3.821656050955414, | |
| "grad_norm": 0.31640502862571734, | |
| "learning_rate": 1.0048036400175708e-07, | |
| "loss": 0.235, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.840764331210191, | |
| "grad_norm": 0.3161044897697338, | |
| "learning_rate": 9.951963599824293e-08, | |
| "loss": 0.2433, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 3.859872611464968, | |
| "grad_norm": 0.2960452451709703, | |
| "learning_rate": 9.855895233224429e-08, | |
| "loss": 0.2589, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 3.8789808917197455, | |
| "grad_norm": 0.2881678617442765, | |
| "learning_rate": 9.759840167469994e-08, | |
| "loss": 0.257, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 3.8980891719745223, | |
| "grad_norm": 0.3020128060320192, | |
| "learning_rate": 9.663807268427197e-08, | |
| "loss": 0.2489, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 3.917197452229299, | |
| "grad_norm": 0.303673255967667, | |
| "learning_rate": 9.567805399916259e-08, | |
| "loss": 0.2552, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 3.9363057324840764, | |
| "grad_norm": 0.3029517107073107, | |
| "learning_rate": 9.471843422893297e-08, | |
| "loss": 0.2545, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 3.9554140127388537, | |
| "grad_norm": 0.29991781394420536, | |
| "learning_rate": 9.375930194632446e-08, | |
| "loss": 0.2541, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 3.9745222929936306, | |
| "grad_norm": 0.29801939082138607, | |
| "learning_rate": 9.28007456790833e-08, | |
| "loss": 0.2651, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 3.9936305732484074, | |
| "grad_norm": 0.3107836552290268, | |
| "learning_rate": 9.184285390178977e-08, | |
| "loss": 0.245, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 4.012738853503185, | |
| "grad_norm": 0.31402034579974897, | |
| "learning_rate": 9.088571502769167e-08, | |
| "loss": 0.261, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 4.031847133757962, | |
| "grad_norm": 0.29505379895390366, | |
| "learning_rate": 8.992941740054417e-08, | |
| "loss": 0.2435, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 4.050955414012739, | |
| "grad_norm": 0.29509128808113044, | |
| "learning_rate": 8.897404928645527e-08, | |
| "loss": 0.2445, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 4.070063694267516, | |
| "grad_norm": 0.29570344259163295, | |
| "learning_rate": 8.801969886573929e-08, | |
| "loss": 0.2698, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 4.089171974522293, | |
| "grad_norm": 0.29346662152654746, | |
| "learning_rate": 8.706645422477737e-08, | |
| "loss": 0.2597, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 4.10828025477707, | |
| "grad_norm": 0.2805185175645672, | |
| "learning_rate": 8.611440334788762e-08, | |
| "loss": 0.2586, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 4.127388535031847, | |
| "grad_norm": 0.30742002754181397, | |
| "learning_rate": 8.516363410920375e-08, | |
| "loss": 0.2478, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 4.146496815286624, | |
| "grad_norm": 0.2850884458824681, | |
| "learning_rate": 8.42142342645646e-08, | |
| "loss": 0.2602, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 4.165605095541402, | |
| "grad_norm": 0.306269262929328, | |
| "learning_rate": 8.326629144341405e-08, | |
| "loss": 0.2515, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 4.1847133757961785, | |
| "grad_norm": 0.3018034076236653, | |
| "learning_rate": 8.231989314071316e-08, | |
| "loss": 0.2471, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 4.203821656050955, | |
| "grad_norm": 0.3185658018423822, | |
| "learning_rate": 8.137512670886396e-08, | |
| "loss": 0.2615, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 4.222929936305732, | |
| "grad_norm": 0.29628833966395685, | |
| "learning_rate": 8.04320793496472e-08, | |
| "loss": 0.2577, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 4.24203821656051, | |
| "grad_norm": 0.2922501668352887, | |
| "learning_rate": 7.949083810617357e-08, | |
| "loss": 0.2705, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 4.261146496815287, | |
| "grad_norm": 0.2961392897242003, | |
| "learning_rate": 7.855148985484945e-08, | |
| "loss": 0.2499, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 4.280254777070064, | |
| "grad_norm": 0.2953084567404899, | |
| "learning_rate": 7.761412129735851e-08, | |
| "loss": 0.2529, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 4.2993630573248405, | |
| "grad_norm": 0.31348462098681923, | |
| "learning_rate": 7.667881895265893e-08, | |
| "loss": 0.2458, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 4.318471337579618, | |
| "grad_norm": 0.30412264596940947, | |
| "learning_rate": 7.574566914899778e-08, | |
| "loss": 0.2548, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 4.337579617834395, | |
| "grad_norm": 0.2847001566971413, | |
| "learning_rate": 7.481475801594301e-08, | |
| "loss": 0.2534, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 4.356687898089172, | |
| "grad_norm": 0.2853350717625523, | |
| "learning_rate": 7.38861714764337e-08, | |
| "loss": 0.2422, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 4.375796178343949, | |
| "grad_norm": 0.29979993619083456, | |
| "learning_rate": 7.29599952388492e-08, | |
| "loss": 0.2592, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 4.3949044585987265, | |
| "grad_norm": 0.3007849327371836, | |
| "learning_rate": 7.203631478909857e-08, | |
| "loss": 0.2487, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 4.414012738853503, | |
| "grad_norm": 0.2964965887396194, | |
| "learning_rate": 7.111521538272996e-08, | |
| "loss": 0.2591, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 4.43312101910828, | |
| "grad_norm": 0.29153090184611197, | |
| "learning_rate": 7.019678203706163e-08, | |
| "loss": 0.2506, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 4.452229299363057, | |
| "grad_norm": 0.31293925272597667, | |
| "learning_rate": 6.928109952333506e-08, | |
| "loss": 0.2545, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 4.471337579617835, | |
| "grad_norm": 0.31681568074468586, | |
| "learning_rate": 6.836825235889018e-08, | |
| "loss": 0.2566, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 4.490445859872612, | |
| "grad_norm": 0.3303594370705204, | |
| "learning_rate": 6.74583247993649e-08, | |
| "loss": 0.2528, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 4.509554140127388, | |
| "grad_norm": 0.3117652488003975, | |
| "learning_rate": 6.655140083091793e-08, | |
| "loss": 0.2467, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 4.528662420382165, | |
| "grad_norm": 0.3169598110655772, | |
| "learning_rate": 6.56475641624771e-08, | |
| "loss": 0.2639, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 4.547770700636943, | |
| "grad_norm": 0.3243563781200814, | |
| "learning_rate": 6.474689821801294e-08, | |
| "loss": 0.2687, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 4.56687898089172, | |
| "grad_norm": 0.289124780632532, | |
| "learning_rate": 6.384948612883871e-08, | |
| "loss": 0.2603, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 4.585987261146497, | |
| "grad_norm": 0.29440728893533646, | |
| "learning_rate": 6.29554107259374e-08, | |
| "loss": 0.2477, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.6050955414012735, | |
| "grad_norm": 0.28967915725187654, | |
| "learning_rate": 6.206475453231643e-08, | |
| "loss": 0.2498, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 4.624203821656051, | |
| "grad_norm": 0.3078511207653716, | |
| "learning_rate": 6.117759975539074e-08, | |
| "loss": 0.2536, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 4.643312101910828, | |
| "grad_norm": 0.3087724727800373, | |
| "learning_rate": 6.029402827939519e-08, | |
| "loss": 0.2475, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 4.662420382165605, | |
| "grad_norm": 0.30886308506307386, | |
| "learning_rate": 5.941412165782644e-08, | |
| "loss": 0.2646, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 4.681528662420382, | |
| "grad_norm": 0.30050126959049084, | |
| "learning_rate": 5.853796110591582e-08, | |
| "loss": 0.2516, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 4.7006369426751595, | |
| "grad_norm": 0.2863534353646146, | |
| "learning_rate": 5.7665627493133084e-08, | |
| "loss": 0.254, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 4.719745222929936, | |
| "grad_norm": 0.31186833842781353, | |
| "learning_rate": 5.6797201335722055e-08, | |
| "loss": 0.2636, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 4.738853503184713, | |
| "grad_norm": 0.33081843113519477, | |
| "learning_rate": 5.593276278926912e-08, | |
| "loss": 0.2439, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 4.757961783439491, | |
| "grad_norm": 0.3034226509534758, | |
| "learning_rate": 5.5072391641305003e-08, | |
| "loss": 0.2547, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 4.777070063694268, | |
| "grad_norm": 0.29722750131690423, | |
| "learning_rate": 5.4216167303939996e-08, | |
| "loss": 0.2526, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.796178343949045, | |
| "grad_norm": 0.2988145357986916, | |
| "learning_rate": 5.33641688065346e-08, | |
| "loss": 0.2547, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 4.8152866242038215, | |
| "grad_norm": 0.30646135021295096, | |
| "learning_rate": 5.251647478840511e-08, | |
| "loss": 0.2484, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 4.834394904458598, | |
| "grad_norm": 0.3157144375006897, | |
| "learning_rate": 5.167316349156494e-08, | |
| "loss": 0.2419, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 4.853503184713376, | |
| "grad_norm": 0.2983724975662055, | |
| "learning_rate": 5.0834312753503117e-08, | |
| "loss": 0.2589, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 4.872611464968153, | |
| "grad_norm": 0.3102633335125285, | |
| "learning_rate": 5.000000000000002e-08, | |
| "loss": 0.2599, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 4.89171974522293, | |
| "grad_norm": 0.29723998911569716, | |
| "learning_rate": 4.9170302237980564e-08, | |
| "loss": 0.2457, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 4.9108280254777075, | |
| "grad_norm": 0.3054372275805486, | |
| "learning_rate": 4.8345296048406856e-08, | |
| "loss": 0.2538, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 4.929936305732484, | |
| "grad_norm": 0.30363830684403537, | |
| "learning_rate": 4.752505757920977e-08, | |
| "loss": 0.2486, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 4.949044585987261, | |
| "grad_norm": 0.2898474470428452, | |
| "learning_rate": 4.6709662538260266e-08, | |
| "loss": 0.2581, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 4.968152866242038, | |
| "grad_norm": 0.2910012884854503, | |
| "learning_rate": 4.5899186186381725e-08, | |
| "loss": 0.2537, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.987261146496815, | |
| "grad_norm": 0.3149542929423315, | |
| "learning_rate": 4.5093703330403374e-08, | |
| "loss": 0.2535, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 5.006369426751593, | |
| "grad_norm": 0.3175825995451886, | |
| "learning_rate": 4.429328831625565e-08, | |
| "loss": 0.2585, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 5.025477707006369, | |
| "grad_norm": 0.3236452474738274, | |
| "learning_rate": 4.3498015022108e-08, | |
| "loss": 0.2653, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 5.044585987261146, | |
| "grad_norm": 0.29626391836670807, | |
| "learning_rate": 4.270795685155001e-08, | |
| "loss": 0.257, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 5.063694267515924, | |
| "grad_norm": 0.3026250583365394, | |
| "learning_rate": 4.1923186726816305e-08, | |
| "loss": 0.2559, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 5.082802547770701, | |
| "grad_norm": 0.2832298751120326, | |
| "learning_rate": 4.114377708205571e-08, | |
| "loss": 0.2627, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 5.101910828025478, | |
| "grad_norm": 0.29799965486813934, | |
| "learning_rate": 4.036979985664566e-08, | |
| "loss": 0.2494, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 5.1210191082802545, | |
| "grad_norm": 0.30594749582433756, | |
| "learning_rate": 3.9601326488552255e-08, | |
| "loss": 0.258, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 5.140127388535032, | |
| "grad_norm": 0.2762446500911052, | |
| "learning_rate": 3.883842790773647e-08, | |
| "loss": 0.2427, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 5.159235668789809, | |
| "grad_norm": 0.30019745369817896, | |
| "learning_rate": 3.808117452960734e-08, | |
| "loss": 0.2547, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 5.178343949044586, | |
| "grad_norm": 0.29753102050585134, | |
| "learning_rate": 3.732963624852274e-08, | |
| "loss": 0.2535, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 5.197452229299363, | |
| "grad_norm": 0.2935625407698436, | |
| "learning_rate": 3.658388243133804e-08, | |
| "loss": 0.2587, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 5.2165605095541405, | |
| "grad_norm": 0.3209655263508771, | |
| "learning_rate": 3.584398191100341e-08, | |
| "loss": 0.2452, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 5.235668789808917, | |
| "grad_norm": 0.31820460210115087, | |
| "learning_rate": 3.5110002980210973e-08, | |
| "loss": 0.2432, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 5.254777070063694, | |
| "grad_norm": 0.2992144995803717, | |
| "learning_rate": 3.438201338509098e-08, | |
| "loss": 0.2431, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 5.273885350318471, | |
| "grad_norm": 0.31045639532084096, | |
| "learning_rate": 3.366008031895904e-08, | |
| "loss": 0.2545, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 5.292993630573249, | |
| "grad_norm": 0.29354822656777496, | |
| "learning_rate": 3.294427041611425e-08, | |
| "loss": 0.2396, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 5.312101910828026, | |
| "grad_norm": 0.30124303336940966, | |
| "learning_rate": 3.223464974568874e-08, | |
| "loss": 0.2477, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 5.3312101910828025, | |
| "grad_norm": 0.30605544638967863, | |
| "learning_rate": 3.15312838055494e-08, | |
| "loss": 0.2591, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 5.350318471337579, | |
| "grad_norm": 0.31776982294402606, | |
| "learning_rate": 3.083423751625281e-08, | |
| "loss": 0.2515, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 5.369426751592357, | |
| "grad_norm": 0.30233438292813775, | |
| "learning_rate": 3.014357521505273e-08, | |
| "loss": 0.2609, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 5.388535031847134, | |
| "grad_norm": 0.29933329063857167, | |
| "learning_rate": 2.9459360649961896e-08, | |
| "loss": 0.2378, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 5.407643312101911, | |
| "grad_norm": 0.3093582978729572, | |
| "learning_rate": 2.878165697386812e-08, | |
| "loss": 0.2542, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 5.426751592356688, | |
| "grad_norm": 0.2992731993684574, | |
| "learning_rate": 2.811052673870534e-08, | |
| "loss": 0.2411, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 5.445859872611465, | |
| "grad_norm": 0.3240374627908033, | |
| "learning_rate": 2.7446031889679888e-08, | |
| "loss": 0.2483, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 5.464968152866242, | |
| "grad_norm": 0.3012591950583537, | |
| "learning_rate": 2.6788233759553138e-08, | |
| "loss": 0.2594, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 5.484076433121019, | |
| "grad_norm": 0.3232535802137296, | |
| "learning_rate": 2.61371930629805e-08, | |
| "loss": 0.2424, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 5.503184713375796, | |
| "grad_norm": 0.3024568075598484, | |
| "learning_rate": 2.549296989090738e-08, | |
| "loss": 0.2637, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 5.522292993630574, | |
| "grad_norm": 0.2862988052808537, | |
| "learning_rate": 2.4855623705022788e-08, | |
| "loss": 0.2656, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 5.54140127388535, | |
| "grad_norm": 0.30274335948775416, | |
| "learning_rate": 2.4225213332271198e-08, | |
| "loss": 0.2625, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 5.560509554140127, | |
| "grad_norm": 0.30372044645832685, | |
| "learning_rate": 2.3601796959422582e-08, | |
| "loss": 0.2534, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 5.579617834394904, | |
| "grad_norm": 0.2932251760911936, | |
| "learning_rate": 2.2985432127701942e-08, | |
| "loss": 0.2609, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 5.598726114649682, | |
| "grad_norm": 0.3003519183393173, | |
| "learning_rate": 2.237617572747834e-08, | |
| "loss": 0.2586, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 5.617834394904459, | |
| "grad_norm": 0.29295961875583704, | |
| "learning_rate": 2.1774083993013716e-08, | |
| "loss": 0.2514, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 5.6369426751592355, | |
| "grad_norm": 0.30284063679621004, | |
| "learning_rate": 2.117921249727258e-08, | |
| "loss": 0.2517, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 5.656050955414012, | |
| "grad_norm": 0.2944923570429743, | |
| "learning_rate": 2.0591616146792702e-08, | |
| "loss": 0.2571, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 5.67515923566879, | |
| "grad_norm": 0.2926540183721099, | |
| "learning_rate": 2.001134917661713e-08, | |
| "loss": 0.2699, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 5.694267515923567, | |
| "grad_norm": 0.28358364929517027, | |
| "learning_rate": 1.9438465145288373e-08, | |
| "loss": 0.2607, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 5.713375796178344, | |
| "grad_norm": 0.3058440366761672, | |
| "learning_rate": 1.8873016929904938e-08, | |
| "loss": 0.2545, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 5.732484076433121, | |
| "grad_norm": 0.2956033323033416, | |
| "learning_rate": 1.831505672124083e-08, | |
| "loss": 0.2441, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 5.751592356687898, | |
| "grad_norm": 0.3260543878256, | |
| "learning_rate": 1.776463601892825e-08, | |
| "loss": 0.2498, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 5.770700636942675, | |
| "grad_norm": 0.2815433176272766, | |
| "learning_rate": 1.7221805626704277e-08, | |
| "loss": 0.2561, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 5.789808917197452, | |
| "grad_norm": 0.32147374978023563, | |
| "learning_rate": 1.6686615647721637e-08, | |
| "loss": 0.2507, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 5.80891719745223, | |
| "grad_norm": 0.3136030754385911, | |
| "learning_rate": 1.615911547992426e-08, | |
| "loss": 0.2591, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 5.828025477707007, | |
| "grad_norm": 0.2925892696690556, | |
| "learning_rate": 1.5639353811487744e-08, | |
| "loss": 0.2487, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 5.8471337579617835, | |
| "grad_norm": 0.30292693895644507, | |
| "learning_rate": 1.5127378616325602e-08, | |
| "loss": 0.2514, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 5.86624203821656, | |
| "grad_norm": 0.31710406869483737, | |
| "learning_rate": 1.4623237149661139e-08, | |
| "loss": 0.2629, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 5.885350318471337, | |
| "grad_norm": 0.30143729522096213, | |
| "learning_rate": 1.4126975943665842e-08, | |
| "loss": 0.2388, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 5.904458598726115, | |
| "grad_norm": 0.2928856492144717, | |
| "learning_rate": 1.3638640803164514e-08, | |
| "loss": 0.2591, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 5.923566878980892, | |
| "grad_norm": 0.30225338228365756, | |
| "learning_rate": 1.3158276801407431e-08, | |
| "loss": 0.2549, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 5.942675159235669, | |
| "grad_norm": 0.31999882843186456, | |
| "learning_rate": 1.268592827591014e-08, | |
| "loss": 0.2552, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 5.961783439490446, | |
| "grad_norm": 0.3116709540538741, | |
| "learning_rate": 1.2221638824361069e-08, | |
| "loss": 0.2561, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 5.980891719745223, | |
| "grad_norm": 0.2852514751876125, | |
| "learning_rate": 1.1765451300597573e-08, | |
| "loss": 0.2494, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.30127697140383475, | |
| "learning_rate": 1.131740781065037e-08, | |
| "loss": 0.2677, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 6.019108280254777, | |
| "grad_norm": 0.299470849511753, | |
| "learning_rate": 1.0877549708857225e-08, | |
| "loss": 0.2492, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 6.038216560509555, | |
| "grad_norm": 0.29525030906520566, | |
| "learning_rate": 1.0445917594046071e-08, | |
| "loss": 0.2573, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 6.057324840764331, | |
| "grad_norm": 0.309895318768612, | |
| "learning_rate": 1.0022551305787563e-08, | |
| "loss": 0.2478, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 6.076433121019108, | |
| "grad_norm": 0.3116428679714083, | |
| "learning_rate": 9.607489920717981e-09, | |
| "loss": 0.2616, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 6.095541401273885, | |
| "grad_norm": 0.28485740043622854, | |
| "learning_rate": 9.200771748932512e-09, | |
| "loss": 0.23, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 6.114649681528663, | |
| "grad_norm": 0.30867096345949346, | |
| "learning_rate": 8.802434330449127e-09, | |
| "loss": 0.2423, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 6.13375796178344, | |
| "grad_norm": 0.29735720879032396, | |
| "learning_rate": 8.412514431743656e-09, | |
| "loss": 0.2506, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 6.1528662420382165, | |
| "grad_norm": 0.296939152603047, | |
| "learning_rate": 8.031048042356392e-09, | |
| "loss": 0.2518, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 6.171974522292993, | |
| "grad_norm": 0.3047288976089965, | |
| "learning_rate": 7.65807037157007e-09, | |
| "loss": 0.2571, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 6.191082802547771, | |
| "grad_norm": 0.30499824231597183, | |
| "learning_rate": 7.293615845160195e-09, | |
| "loss": 0.2492, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 6.210191082802548, | |
| "grad_norm": 0.28149771596925177, | |
| "learning_rate": 6.9377181022174604e-09, | |
| "loss": 0.2486, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 6.229299363057325, | |
| "grad_norm": 0.29778166813478346, | |
| "learning_rate": 6.590409992042956e-09, | |
| "loss": 0.253, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 6.248407643312102, | |
| "grad_norm": 0.3094630597200761, | |
| "learning_rate": 6.25172357111603e-09, | |
| "loss": 0.2552, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 6.267515923566879, | |
| "grad_norm": 0.3190922374282261, | |
| "learning_rate": 5.921690100135712e-09, | |
| "loss": 0.2585, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 6.286624203821656, | |
| "grad_norm": 0.3193448427368392, | |
| "learning_rate": 5.600340041135132e-09, | |
| "loss": 0.2566, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 6.305732484076433, | |
| "grad_norm": 0.30874010954068026, | |
| "learning_rate": 5.2877030546700115e-09, | |
| "loss": 0.2476, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 6.32484076433121, | |
| "grad_norm": 0.29530151136694954, | |
| "learning_rate": 4.9838079970809245e-09, | |
| "loss": 0.2526, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 6.343949044585988, | |
| "grad_norm": 0.29201059542728436, | |
| "learning_rate": 4.688682917829967e-09, | |
| "loss": 0.2743, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 6.3630573248407645, | |
| "grad_norm": 0.29014727881359437, | |
| "learning_rate": 4.402355056911655e-09, | |
| "loss": 0.2506, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 6.382165605095541, | |
| "grad_norm": 0.30021554157449915, | |
| "learning_rate": 4.124850842338778e-09, | |
| "loss": 0.2658, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 6.401273885350318, | |
| "grad_norm": 0.29191739918759346, | |
| "learning_rate": 3.856195887703095e-09, | |
| "loss": 0.2526, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 6.420382165605096, | |
| "grad_norm": 0.2970091109257102, | |
| "learning_rate": 3.5964149898111585e-09, | |
| "loss": 0.2515, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 6.439490445859873, | |
| "grad_norm": 0.301615837755627, | |
| "learning_rate": 3.345532126395578e-09, | |
| "loss": 0.2525, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 6.45859872611465, | |
| "grad_norm": 0.29483374535498386, | |
| "learning_rate": 3.103570453901938e-09, | |
| "loss": 0.2518, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 6.477707006369426, | |
| "grad_norm": 0.31193863122862164, | |
| "learning_rate": 2.8705523053513814e-09, | |
| "loss": 0.2581, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 6.496815286624204, | |
| "grad_norm": 0.3102645257081597, | |
| "learning_rate": 2.6464991882793277e-09, | |
| "loss": 0.2596, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 6.515923566878981, | |
| "grad_norm": 0.29720330855238497, | |
| "learning_rate": 2.4314317827503373e-09, | |
| "loss": 0.2482, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 6.535031847133758, | |
| "grad_norm": 0.30554779244411534, | |
| "learning_rate": 2.2253699394493065e-09, | |
| "loss": 0.2495, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 6.554140127388535, | |
| "grad_norm": 0.3132780643873276, | |
| "learning_rate": 2.0283326778492536e-09, | |
| "loss": 0.2445, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 6.573248407643312, | |
| "grad_norm": 0.2930656726881392, | |
| "learning_rate": 1.8403381844558808e-09, | |
| "loss": 0.2538, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 6.592356687898089, | |
| "grad_norm": 0.29674947300302884, | |
| "learning_rate": 1.661403811128903e-09, | |
| "loss": 0.2668, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 6.611464968152866, | |
| "grad_norm": 0.3075402793564451, | |
| "learning_rate": 1.4915460734805096e-09, | |
| "loss": 0.269, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 6.630573248407643, | |
| "grad_norm": 0.28966161314222383, | |
| "learning_rate": 1.3307806493509377e-09, | |
| "loss": 0.247, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 6.649681528662421, | |
| "grad_norm": 0.31994849636539213, | |
| "learning_rate": 1.1791223773614634e-09, | |
| "loss": 0.2594, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 6.6687898089171975, | |
| "grad_norm": 0.3026013256083666, | |
| "learning_rate": 1.036585255544764e-09, | |
| "loss": 0.2638, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 6.687898089171974, | |
| "grad_norm": 0.3132269351064342, | |
| "learning_rate": 9.031824400528854e-10, | |
| "loss": 0.2528, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 6.707006369426751, | |
| "grad_norm": 0.2882624273113626, | |
| "learning_rate": 7.789262439430012e-10, | |
| "loss": 0.2469, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 6.726114649681529, | |
| "grad_norm": 0.3064618613259807, | |
| "learning_rate": 6.638281360408338e-10, | |
| "loss": 0.2574, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 6.745222929936306, | |
| "grad_norm": 0.30382975584991506, | |
| "learning_rate": 5.578987398821344e-10, | |
| "loss": 0.2493, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 6.764331210191083, | |
| "grad_norm": 0.2902011016409033, | |
| "learning_rate": 4.611478327321339e-10, | |
| "loss": 0.2605, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 6.7834394904458595, | |
| "grad_norm": 0.29282370941155056, | |
| "learning_rate": 3.735843446830866e-10, | |
| "loss": 0.2531, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 6.802547770700637, | |
| "grad_norm": 0.3078439684683759, | |
| "learning_rate": 2.952163578300193e-10, | |
| "loss": 0.2473, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 6.821656050955414, | |
| "grad_norm": 0.3087832606093336, | |
| "learning_rate": 2.2605110552477157e-10, | |
| "loss": 0.2672, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 6.840764331210191, | |
| "grad_norm": 0.3098671142360526, | |
| "learning_rate": 1.6609497170834154e-10, | |
| "loss": 0.2569, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 6.859872611464969, | |
| "grad_norm": 0.29429296975358027, | |
| "learning_rate": 1.1535349032167907e-10, | |
| "loss": 0.2546, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 6.8789808917197455, | |
| "grad_norm": 0.31345754430512796, | |
| "learning_rate": 7.38313447948724e-11, | |
| "loss": 0.2639, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 6.898089171974522, | |
| "grad_norm": 0.2921055018298526, | |
| "learning_rate": 4.153236761488266e-11, | |
| "loss": 0.2612, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 6.917197452229299, | |
| "grad_norm": 0.28576822025063914, | |
| "learning_rate": 1.8459539971804605e-11, | |
| "loss": 0.2473, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 6.936305732484076, | |
| "grad_norm": 0.2826191180491914, | |
| "learning_rate": 4.614991483686825e-12, | |
| "loss": 0.2395, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 6.955414012738854, | |
| "grad_norm": 0.33090612760402355, | |
| "learning_rate": 0.0, | |
| "loss": 0.2531, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 6.955414012738854, | |
| "step": 364, | |
| "total_flos": 3.947982283988664e+17, | |
| "train_loss": 0.25456836733680505, | |
| "train_runtime": 5658.0504, | |
| "train_samples_per_second": 6.186, | |
| "train_steps_per_second": 0.064 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 364, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.947982283988664e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |