| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 460, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008695652173913044, |
| "grad_norm": 33.915157318115234, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 2.3126, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.017391304347826087, |
| "grad_norm": 35.09430694580078, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 2.4328, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.02608695652173913, |
| "grad_norm": 33.54511260986328, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 2.2895, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.034782608695652174, |
| "grad_norm": 34.5639762878418, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 2.376, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.043478260869565216, |
| "grad_norm": 34.895896911621094, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 2.4092, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05217391304347826, |
| "grad_norm": 33.44582748413086, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 2.3196, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.06086956521739131, |
| "grad_norm": 34.687496185302734, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 2.3925, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.06956521739130435, |
| "grad_norm": 34.72901153564453, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 2.3679, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0782608695652174, |
| "grad_norm": 34.008853912353516, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 2.2935, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.08695652173913043, |
| "grad_norm": 33.60919189453125, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 2.2791, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09565217391304348, |
| "grad_norm": 32.73677444458008, |
| "learning_rate": 5.5e-07, |
| "loss": 2.172, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.10434782608695652, |
| "grad_norm": 32.384212493896484, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 2.1622, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.11304347826086956, |
| "grad_norm": 34.02764129638672, |
| "learning_rate": 6.5e-07, |
| "loss": 2.2014, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.12173913043478261, |
| "grad_norm": 33.0348014831543, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 2.1775, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.13043478260869565, |
| "grad_norm": 32.571834564208984, |
| "learning_rate": 7.5e-07, |
| "loss": 2.0714, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.1391304347826087, |
| "grad_norm": 33.0487174987793, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 2.0866, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.14782608695652175, |
| "grad_norm": 30.354747772216797, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 1.8247, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1565217391304348, |
| "grad_norm": 29.680463790893555, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.7154, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.16521739130434782, |
| "grad_norm": 29.8133544921875, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.6374, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 30.196664810180664, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.5918, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1826086956521739, |
| "grad_norm": 29.475982666015625, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.3912, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.19130434782608696, |
| "grad_norm": 30.726896286010742, |
| "learning_rate": 1.1e-06, |
| "loss": 1.2958, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 30.79201889038086, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.1146, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.20869565217391303, |
| "grad_norm": 30.13853645324707, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 0.9209, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.21739130434782608, |
| "grad_norm": 29.31069564819336, |
| "learning_rate": 1.25e-06, |
| "loss": 0.7969, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.22608695652173913, |
| "grad_norm": 27.00128746032715, |
| "learning_rate": 1.3e-06, |
| "loss": 0.653, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.23478260869565218, |
| "grad_norm": 27.202844619750977, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 0.4821, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.24347826086956523, |
| "grad_norm": 23.356842041015625, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 0.3932, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.25217391304347825, |
| "grad_norm": 16.253108978271484, |
| "learning_rate": 1.45e-06, |
| "loss": 0.2794, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.2608695652173913, |
| "grad_norm": 11.343944549560547, |
| "learning_rate": 1.5e-06, |
| "loss": 0.2122, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.26956521739130435, |
| "grad_norm": 6.002540111541748, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 0.1371, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.2782608695652174, |
| "grad_norm": 4.205584526062012, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.1406, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.28695652173913044, |
| "grad_norm": 3.3316493034362793, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 0.1172, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.2956521739130435, |
| "grad_norm": 2.546919822692871, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 0.1013, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.30434782608695654, |
| "grad_norm": 1.871219515800476, |
| "learning_rate": 1.75e-06, |
| "loss": 0.1, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.3130434782608696, |
| "grad_norm": 1.7010533809661865, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.0903, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.3217391304347826, |
| "grad_norm": 1.7138007879257202, |
| "learning_rate": 1.85e-06, |
| "loss": 0.0882, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.33043478260869563, |
| "grad_norm": 1.897299885749817, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 0.0926, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.3391304347826087, |
| "grad_norm": 1.2912218570709229, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 0.08, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 1.2403124570846558, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.0851, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3565217391304348, |
| "grad_norm": 1.0497726202011108, |
| "learning_rate": 2.05e-06, |
| "loss": 0.0854, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.3652173913043478, |
| "grad_norm": 1.2289705276489258, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.0706, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.3739130434782609, |
| "grad_norm": 1.0778898000717163, |
| "learning_rate": 2.15e-06, |
| "loss": 0.0735, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.3826086956521739, |
| "grad_norm": 1.101493239402771, |
| "learning_rate": 2.2e-06, |
| "loss": 0.0736, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.391304347826087, |
| "grad_norm": 1.746185064315796, |
| "learning_rate": 2.25e-06, |
| "loss": 0.0973, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.2611403465270996, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 0.0684, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.40869565217391307, |
| "grad_norm": 1.09405517578125, |
| "learning_rate": 2.35e-06, |
| "loss": 0.0698, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.41739130434782606, |
| "grad_norm": 1.1591057777404785, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.0757, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.4260869565217391, |
| "grad_norm": 0.9820723533630371, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 0.0721, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 1.1460777521133423, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0849, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4434782608695652, |
| "grad_norm": 0.951232373714447, |
| "learning_rate": 2.55e-06, |
| "loss": 0.0707, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.45217391304347826, |
| "grad_norm": 1.0160707235336304, |
| "learning_rate": 2.6e-06, |
| "loss": 0.0695, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.4608695652173913, |
| "grad_norm": 1.0926896333694458, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 0.0815, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.46956521739130436, |
| "grad_norm": 0.8407694697380066, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 0.0718, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.4782608695652174, |
| "grad_norm": 0.9152198433876038, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 0.0733, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.48695652173913045, |
| "grad_norm": 0.8152011632919312, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.0827, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.4956521739130435, |
| "grad_norm": 0.8188056349754333, |
| "learning_rate": 2.85e-06, |
| "loss": 0.0707, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.5043478260869565, |
| "grad_norm": 0.8655344843864441, |
| "learning_rate": 2.9e-06, |
| "loss": 0.0696, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.5130434782608696, |
| "grad_norm": 0.8174591660499573, |
| "learning_rate": 2.95e-06, |
| "loss": 0.0718, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 1.2075238227844238, |
| "learning_rate": 3e-06, |
| "loss": 0.0817, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5304347826086957, |
| "grad_norm": 0.7282372117042542, |
| "learning_rate": 3.05e-06, |
| "loss": 0.0675, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.5391304347826087, |
| "grad_norm": 0.8066464066505432, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 0.0638, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.5478260869565217, |
| "grad_norm": 0.9105910062789917, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 0.0714, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.5565217391304348, |
| "grad_norm": 1.090287208557129, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.0742, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.5652173913043478, |
| "grad_norm": 0.7674099802970886, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 0.0594, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5739130434782609, |
| "grad_norm": 0.9247289299964905, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 0.0671, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.5826086956521739, |
| "grad_norm": 0.7552309632301331, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 0.0609, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.591304347826087, |
| "grad_norm": 1.1822036504745483, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.07, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.805238664150238, |
| "learning_rate": 3.45e-06, |
| "loss": 0.0637, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.6086956521739131, |
| "grad_norm": 1.0068074464797974, |
| "learning_rate": 3.5e-06, |
| "loss": 0.0659, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6173913043478261, |
| "grad_norm": 0.7666197419166565, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 0.0641, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.6260869565217392, |
| "grad_norm": 0.8774266242980957, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.0639, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.6347826086956522, |
| "grad_norm": 1.4135913848876953, |
| "learning_rate": 3.65e-06, |
| "loss": 0.0716, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.6434782608695652, |
| "grad_norm": 1.052467942237854, |
| "learning_rate": 3.7e-06, |
| "loss": 0.0617, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.6521739130434783, |
| "grad_norm": 0.6905954480171204, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.0655, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6608695652173913, |
| "grad_norm": 0.8480051755905151, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.055, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.6695652173913044, |
| "grad_norm": 0.8274970054626465, |
| "learning_rate": 3.85e-06, |
| "loss": 0.0634, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.6782608695652174, |
| "grad_norm": 0.8180427551269531, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 0.0591, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.6869565217391305, |
| "grad_norm": 1.117491602897644, |
| "learning_rate": 3.95e-06, |
| "loss": 0.0596, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 0.80575031042099, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0587, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.7043478260869566, |
| "grad_norm": 1.1225630044937134, |
| "learning_rate": 4.05e-06, |
| "loss": 0.0571, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.7130434782608696, |
| "grad_norm": 0.9180626273155212, |
| "learning_rate": 4.1e-06, |
| "loss": 0.0514, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.7217391304347827, |
| "grad_norm": 0.9540777206420898, |
| "learning_rate": 4.15e-06, |
| "loss": 0.0539, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.7304347826086957, |
| "grad_norm": 1.032495379447937, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.0593, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.7391304347826086, |
| "grad_norm": 0.8488896489143372, |
| "learning_rate": 4.25e-06, |
| "loss": 0.0508, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.7478260869565218, |
| "grad_norm": 0.7575100660324097, |
| "learning_rate": 4.3e-06, |
| "loss": 0.0604, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.7565217391304347, |
| "grad_norm": 0.8140726089477539, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 0.0538, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.7652173913043478, |
| "grad_norm": 0.8753012418746948, |
| "learning_rate": 4.4e-06, |
| "loss": 0.0627, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.7739130434782608, |
| "grad_norm": 0.7341794967651367, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 0.0517, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.782608695652174, |
| "grad_norm": 0.8983039259910583, |
| "learning_rate": 4.5e-06, |
| "loss": 0.0553, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7913043478260869, |
| "grad_norm": 0.8660378456115723, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 0.0577, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.8682013154029846, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.0504, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.808695652173913, |
| "grad_norm": 1.1238961219787598, |
| "learning_rate": 4.65e-06, |
| "loss": 0.0466, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.8173913043478261, |
| "grad_norm": 0.9690432548522949, |
| "learning_rate": 4.7e-06, |
| "loss": 0.0506, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.8260869565217391, |
| "grad_norm": 0.8743138313293457, |
| "learning_rate": 4.75e-06, |
| "loss": 0.052, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.8347826086956521, |
| "grad_norm": 1.1654411554336548, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.0541, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.8434782608695652, |
| "grad_norm": 0.9813936948776245, |
| "learning_rate": 4.85e-06, |
| "loss": 0.0511, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.8521739130434782, |
| "grad_norm": 0.8186525702476501, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.0524, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.8608695652173913, |
| "grad_norm": 0.7779029607772827, |
| "learning_rate": 4.95e-06, |
| "loss": 0.0414, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.7967627048492432, |
| "learning_rate": 5e-06, |
| "loss": 0.0424, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.8782608695652174, |
| "grad_norm": 0.9512422680854797, |
| "learning_rate": 4.999964559102694e-06, |
| "loss": 0.0459, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.8869565217391304, |
| "grad_norm": 1.0316367149353027, |
| "learning_rate": 4.999858237415621e-06, |
| "loss": 0.0415, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.8956521739130435, |
| "grad_norm": 0.955311119556427, |
| "learning_rate": 4.999681037953289e-06, |
| "loss": 0.0462, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.9043478260869565, |
| "grad_norm": 0.8455808758735657, |
| "learning_rate": 4.999432965739786e-06, |
| "loss": 0.045, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.9130434782608695, |
| "grad_norm": 0.9638617038726807, |
| "learning_rate": 4.999114027808632e-06, |
| "loss": 0.0454, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.9217391304347826, |
| "grad_norm": 0.9750456213951111, |
| "learning_rate": 4.998724233202585e-06, |
| "loss": 0.0411, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.9304347826086956, |
| "grad_norm": 1.0203043222427368, |
| "learning_rate": 4.998263592973382e-06, |
| "loss": 0.0467, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.9391304347826087, |
| "grad_norm": 1.0116546154022217, |
| "learning_rate": 4.9977321201814235e-06, |
| "loss": 0.0432, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.9478260869565217, |
| "grad_norm": 0.8726145029067993, |
| "learning_rate": 4.997129829895409e-06, |
| "loss": 0.0482, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.9565217391304348, |
| "grad_norm": 0.7527220845222473, |
| "learning_rate": 4.996456739191905e-06, |
| "loss": 0.04, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.9652173913043478, |
| "grad_norm": 0.8785611391067505, |
| "learning_rate": 4.995712867154863e-06, |
| "loss": 0.0483, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.9739130434782609, |
| "grad_norm": 0.9951519966125488, |
| "learning_rate": 4.994898234875075e-06, |
| "loss": 0.0454, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.9826086956521739, |
| "grad_norm": 1.0761841535568237, |
| "learning_rate": 4.9940128654495826e-06, |
| "loss": 0.0375, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.991304347826087, |
| "grad_norm": 0.8400607109069824, |
| "learning_rate": 4.9930567839810125e-06, |
| "loss": 0.0369, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9820336103439331, |
| "learning_rate": 4.992030017576876e-06, |
| "loss": 0.0323, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.008695652173913, |
| "grad_norm": 0.8266397714614868, |
| "learning_rate": 4.990932595348788e-06, |
| "loss": 0.0301, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.017391304347826, |
| "grad_norm": 0.9163568615913391, |
| "learning_rate": 4.989764548411654e-06, |
| "loss": 0.0284, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.0260869565217392, |
| "grad_norm": 1.0225435495376587, |
| "learning_rate": 4.988525909882779e-06, |
| "loss": 0.0229, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.0347826086956522, |
| "grad_norm": 1.2318713665008545, |
| "learning_rate": 4.987216714880929e-06, |
| "loss": 0.0304, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.0434782608695652, |
| "grad_norm": 1.0533114671707153, |
| "learning_rate": 4.9858370005253435e-06, |
| "loss": 0.0333, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.0521739130434782, |
| "grad_norm": 1.0429824590682983, |
| "learning_rate": 4.9843868059346725e-06, |
| "loss": 0.0265, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.0608695652173914, |
| "grad_norm": 0.9843570590019226, |
| "learning_rate": 4.982866172225876e-06, |
| "loss": 0.0306, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.0695652173913044, |
| "grad_norm": 0.8079569935798645, |
| "learning_rate": 4.981275142513049e-06, |
| "loss": 0.0227, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.0782608695652174, |
| "grad_norm": 0.8699679970741272, |
| "learning_rate": 4.979613761906212e-06, |
| "loss": 0.0244, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.0869565217391304, |
| "grad_norm": 0.87395179271698, |
| "learning_rate": 4.977882077510018e-06, |
| "loss": 0.0238, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.0956521739130434, |
| "grad_norm": 1.1012191772460938, |
| "learning_rate": 4.9760801384224274e-06, |
| "loss": 0.0265, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.1043478260869566, |
| "grad_norm": 0.6216080784797668, |
| "learning_rate": 4.97420799573331e-06, |
| "loss": 0.0161, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.1130434782608696, |
| "grad_norm": 0.9586366415023804, |
| "learning_rate": 4.972265702523001e-06, |
| "loss": 0.0226, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.1217391304347826, |
| "grad_norm": 1.3069987297058105, |
| "learning_rate": 4.970253313860788e-06, |
| "loss": 0.0213, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.1304347826086956, |
| "grad_norm": 1.4542529582977295, |
| "learning_rate": 4.968170886803361e-06, |
| "loss": 0.0242, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.1391304347826088, |
| "grad_norm": 1.221802830696106, |
| "learning_rate": 4.966018480393189e-06, |
| "loss": 0.0244, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.1478260869565218, |
| "grad_norm": 0.8851010203361511, |
| "learning_rate": 4.9637961556568405e-06, |
| "loss": 0.0231, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.1565217391304348, |
| "grad_norm": 0.7256351709365845, |
| "learning_rate": 4.961503975603263e-06, |
| "loss": 0.016, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.1652173913043478, |
| "grad_norm": 0.9153109192848206, |
| "learning_rate": 4.959142005221991e-06, |
| "loss": 0.0212, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.1739130434782608, |
| "grad_norm": 0.9752025604248047, |
| "learning_rate": 4.956710311481303e-06, |
| "loss": 0.0213, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.182608695652174, |
| "grad_norm": 0.9055240154266357, |
| "learning_rate": 4.954208963326327e-06, |
| "loss": 0.0147, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.191304347826087, |
| "grad_norm": 0.6810442805290222, |
| "learning_rate": 4.951638031677081e-06, |
| "loss": 0.0218, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.8518972396850586, |
| "learning_rate": 4.948997589426463e-06, |
| "loss": 0.0161, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.208695652173913, |
| "grad_norm": 0.9990325570106506, |
| "learning_rate": 4.94628771143819e-06, |
| "loss": 0.0134, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.2173913043478262, |
| "grad_norm": 0.86704421043396, |
| "learning_rate": 4.943508474544667e-06, |
| "loss": 0.0152, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.2260869565217392, |
| "grad_norm": 0.7871954441070557, |
| "learning_rate": 4.940659957544813e-06, |
| "loss": 0.014, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.2347826086956522, |
| "grad_norm": 1.038091778755188, |
| "learning_rate": 4.937742241201826e-06, |
| "loss": 0.0191, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.2434782608695651, |
| "grad_norm": 0.9917914867401123, |
| "learning_rate": 4.934755408240896e-06, |
| "loss": 0.0156, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.2521739130434781, |
| "grad_norm": 0.9815549254417419, |
| "learning_rate": 4.931699543346854e-06, |
| "loss": 0.0164, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.2608695652173914, |
| "grad_norm": 1.0097852945327759, |
| "learning_rate": 4.928574733161775e-06, |
| "loss": 0.0229, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.2695652173913043, |
| "grad_norm": 0.767440676689148, |
| "learning_rate": 4.925381066282522e-06, |
| "loss": 0.0144, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.2782608695652173, |
| "grad_norm": 0.883701503276825, |
| "learning_rate": 4.922118633258229e-06, |
| "loss": 0.0177, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.2869565217391306, |
| "grad_norm": 0.9894592761993408, |
| "learning_rate": 4.918787526587739e-06, |
| "loss": 0.0217, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.2956521739130435, |
| "grad_norm": 0.8317010998725891, |
| "learning_rate": 4.9153878407169815e-06, |
| "loss": 0.0162, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.3043478260869565, |
| "grad_norm": 0.4255380928516388, |
| "learning_rate": 4.911919672036291e-06, |
| "loss": 0.0107, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.3130434782608695, |
| "grad_norm": 0.614287793636322, |
| "learning_rate": 4.908383118877672e-06, |
| "loss": 0.0165, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.3217391304347825, |
| "grad_norm": 0.6787184476852417, |
| "learning_rate": 4.904778281512022e-06, |
| "loss": 0.0144, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.3304347826086955, |
| "grad_norm": 0.8526840209960938, |
| "learning_rate": 4.901105262146275e-06, |
| "loss": 0.0089, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.3391304347826087, |
| "grad_norm": 1.5631215572357178, |
| "learning_rate": 4.897364164920515e-06, |
| "loss": 0.0168, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.3478260869565217, |
| "grad_norm": 1.2826169729232788, |
| "learning_rate": 4.8935550959050135e-06, |
| "loss": 0.017, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.3565217391304347, |
| "grad_norm": 0.875330924987793, |
| "learning_rate": 4.889678163097233e-06, |
| "loss": 0.0144, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.365217391304348, |
| "grad_norm": 0.8514088988304138, |
| "learning_rate": 4.885733476418752e-06, |
| "loss": 0.0142, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.373913043478261, |
| "grad_norm": 1.1872806549072266, |
| "learning_rate": 4.8817211477121615e-06, |
| "loss": 0.0153, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.382608695652174, |
| "grad_norm": 0.7295001745223999, |
| "learning_rate": 4.8776412907378845e-06, |
| "loss": 0.0085, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.391304347826087, |
| "grad_norm": 0.7863972187042236, |
| "learning_rate": 4.8734940211709535e-06, |
| "loss": 0.0121, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 1.0080970525741577, |
| "learning_rate": 4.8692794565977335e-06, |
| "loss": 0.0107, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.4086956521739131, |
| "grad_norm": 1.3618495464324951, |
| "learning_rate": 4.864997716512584e-06, |
| "loss": 0.0157, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.4173913043478261, |
| "grad_norm": 1.3909306526184082, |
| "learning_rate": 4.8606489223144744e-06, |
| "loss": 0.0127, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.4260869565217391, |
| "grad_norm": 0.8300015330314636, |
| "learning_rate": 4.8562331973035396e-06, |
| "loss": 0.0114, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.434782608695652, |
| "grad_norm": 1.1622614860534668, |
| "learning_rate": 4.851750666677583e-06, |
| "loss": 0.0155, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.4434782608695653, |
| "grad_norm": 0.9247199296951294, |
| "learning_rate": 4.847201457528533e-06, |
| "loss": 0.0121, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.4521739130434783, |
| "grad_norm": 1.0048704147338867, |
| "learning_rate": 4.842585698838832e-06, |
| "loss": 0.0154, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.4608695652173913, |
| "grad_norm": 1.0037480592727661, |
| "learning_rate": 4.837903521477784e-06, |
| "loss": 0.0138, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.4695652173913043, |
| "grad_norm": 0.8620800971984863, |
| "learning_rate": 4.833155058197842e-06, |
| "loss": 0.0145, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.4782608695652173, |
| "grad_norm": 0.8210186958312988, |
| "learning_rate": 4.828340443630847e-06, |
| "loss": 0.0175, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.4869565217391305, |
| "grad_norm": 0.5821270942687988, |
| "learning_rate": 4.823459814284205e-06, |
| "loss": 0.0103, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.4956521739130435, |
| "grad_norm": 0.622521698474884, |
| "learning_rate": 4.818513308537025e-06, |
| "loss": 0.008, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.5043478260869565, |
| "grad_norm": 1.004116415977478, |
| "learning_rate": 4.813501066636188e-06, |
| "loss": 0.011, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.5130434782608697, |
| "grad_norm": 0.7552922964096069, |
| "learning_rate": 4.808423230692374e-06, |
| "loss": 0.0073, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.5217391304347827, |
| "grad_norm": 0.5907073616981506, |
| "learning_rate": 4.8032799446760326e-06, |
| "loss": 0.0083, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.5304347826086957, |
| "grad_norm": 0.40509092807769775, |
| "learning_rate": 4.798071354413302e-06, |
| "loss": 0.0067, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.5391304347826087, |
| "grad_norm": 1.027583122253418, |
| "learning_rate": 4.792797607581872e-06, |
| "loss": 0.0023, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.5478260869565217, |
| "grad_norm": 1.1601897478103638, |
| "learning_rate": 4.787458853706798e-06, |
| "loss": 0.0091, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.5565217391304347, |
| "grad_norm": 0.9880419373512268, |
| "learning_rate": 4.7820552441562625e-06, |
| "loss": 0.009, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.5652173913043477, |
| "grad_norm": 1.218587875366211, |
| "learning_rate": 4.7765869321372835e-06, |
| "loss": 0.0082, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.5739130434782609, |
| "grad_norm": 1.20319664478302, |
| "learning_rate": 4.771054072691367e-06, |
| "loss": 0.0123, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.5826086956521739, |
| "grad_norm": 0.43793171644210815, |
| "learning_rate": 4.7654568226901165e-06, |
| "loss": 0.0028, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.591304347826087, |
| "grad_norm": 0.5504114627838135, |
| "learning_rate": 4.759795340830782e-06, |
| "loss": 0.0054, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 2.2305221557617188, |
| "learning_rate": 4.754069787631761e-06, |
| "loss": 0.0094, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.608695652173913, |
| "grad_norm": 0.8716424703598022, |
| "learning_rate": 4.7482803254280485e-06, |
| "loss": 0.0093, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.617391304347826, |
| "grad_norm": 1.02736496925354, |
| "learning_rate": 4.742427118366632e-06, |
| "loss": 0.01, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.626086956521739, |
| "grad_norm": 0.6050184965133667, |
| "learning_rate": 4.736510332401841e-06, |
| "loss": 0.0054, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.634782608695652, |
| "grad_norm": 0.6020815372467041, |
| "learning_rate": 4.730530135290638e-06, |
| "loss": 0.0075, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.643478260869565, |
| "grad_norm": 1.0702062845230103, |
| "learning_rate": 4.724486696587862e-06, |
| "loss": 0.0058, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.6521739130434783, |
| "grad_norm": 1.1906393766403198, |
| "learning_rate": 4.718380187641429e-06, |
| "loss": 0.0061, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.6608695652173913, |
| "grad_norm": 0.36051103472709656, |
| "learning_rate": 4.712210781587463e-06, |
| "loss": 0.0027, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.6695652173913045, |
| "grad_norm": 0.5215916037559509, |
| "learning_rate": 4.705978653345392e-06, |
| "loss": 0.0061, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.6782608695652175, |
| "grad_norm": 0.2960876524448395, |
| "learning_rate": 4.699683979612991e-06, |
| "loss": 0.0038, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.6869565217391305, |
| "grad_norm": 0.48366671800613403, |
| "learning_rate": 4.693326938861367e-06, |
| "loss": 0.0045, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.6956521739130435, |
| "grad_norm": 0.4141710698604584, |
| "learning_rate": 4.686907711329903e-06, |
| "loss": 0.0055, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.7043478260869565, |
| "grad_norm": 1.1449170112609863, |
| "learning_rate": 4.680426479021147e-06, |
| "loss": 0.0081, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.7130434782608694, |
| "grad_norm": 0.2535400390625, |
| "learning_rate": 4.67388342569565e-06, |
| "loss": 0.0027, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.7217391304347827, |
| "grad_norm": 0.2910863161087036, |
| "learning_rate": 4.667278736866755e-06, |
| "loss": 0.0026, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.7304347826086957, |
| "grad_norm": 0.7556977272033691, |
| "learning_rate": 4.660612599795343e-06, |
| "loss": 0.0059, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.7391304347826086, |
| "grad_norm": 0.45072904229164124, |
| "learning_rate": 4.653885203484516e-06, |
| "loss": 0.0039, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.7478260869565219, |
| "grad_norm": 0.45628419518470764, |
| "learning_rate": 4.647096738674243e-06, |
| "loss": 0.0017, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.7565217391304349, |
| "grad_norm": 0.31578192114830017, |
| "learning_rate": 4.640247397835953e-06, |
| "loss": 0.0021, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.7652173913043478, |
| "grad_norm": 0.6553907990455627, |
| "learning_rate": 4.633337375167074e-06, |
| "loss": 0.0035, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.7739130434782608, |
| "grad_norm": 0.39887183904647827, |
| "learning_rate": 4.626366866585528e-06, |
| "loss": 0.0027, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.7826086956521738, |
| "grad_norm": 0.5914686918258667, |
| "learning_rate": 4.619336069724177e-06, |
| "loss": 0.0052, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.7913043478260868, |
| "grad_norm": 0.47106510400772095, |
| "learning_rate": 4.612245183925225e-06, |
| "loss": 0.0041, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.4898064136505127, |
| "learning_rate": 4.605094410234551e-06, |
| "loss": 0.0016, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.808695652173913, |
| "grad_norm": 0.584793210029602, |
| "learning_rate": 4.597883951396027e-06, |
| "loss": 0.002, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.8173913043478263, |
| "grad_norm": 0.5560891628265381, |
| "learning_rate": 4.590614011845758e-06, |
| "loss": 0.0039, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.8260869565217392, |
| "grad_norm": 0.5522558689117432, |
| "learning_rate": 4.583284797706288e-06, |
| "loss": 0.0023, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.8347826086956522, |
| "grad_norm": 0.6028457283973694, |
| "learning_rate": 4.575896516780757e-06, |
| "loss": 0.0033, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.8434782608695652, |
| "grad_norm": 0.7641825079917908, |
| "learning_rate": 4.568449378547011e-06, |
| "loss": 0.0061, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.8521739130434782, |
| "grad_norm": 2.1374592781066895, |
| "learning_rate": 4.560943594151657e-06, |
| "loss": 0.0064, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.8608695652173912, |
| "grad_norm": 1.233405351638794, |
| "learning_rate": 4.553379376404085e-06, |
| "loss": 0.003, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.8695652173913042, |
| "grad_norm": 0.3678194582462311, |
| "learning_rate": 4.5457569397704226e-06, |
| "loss": 0.0028, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.8782608695652174, |
| "grad_norm": 0.21607166528701782, |
| "learning_rate": 4.538076500367469e-06, |
| "loss": 0.0011, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.8869565217391304, |
| "grad_norm": 0.13464906811714172, |
| "learning_rate": 4.530338275956553e-06, |
| "loss": 0.0006, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.8956521739130436, |
| "grad_norm": 2.7032434940338135, |
| "learning_rate": 4.522542485937369e-06, |
| "loss": 0.0057, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.9043478260869566, |
| "grad_norm": 0.549523651599884, |
| "learning_rate": 4.514689351341751e-06, |
| "loss": 0.0026, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.9130434782608696, |
| "grad_norm": 0.26894959807395935, |
| "learning_rate": 4.506779094827409e-06, |
| "loss": 0.0017, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.9217391304347826, |
| "grad_norm": 0.09632059931755066, |
| "learning_rate": 4.498811940671615e-06, |
| "loss": 0.001, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.9304347826086956, |
| "grad_norm": 0.23288941383361816, |
| "learning_rate": 4.49078811476484e-06, |
| "loss": 0.0017, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.9391304347826086, |
| "grad_norm": 0.3012441396713257, |
| "learning_rate": 4.482707844604359e-06, |
| "loss": 0.0038, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.9478260869565216, |
| "grad_norm": 0.2756352126598358, |
| "learning_rate": 4.474571359287791e-06, |
| "loss": 0.0012, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.9565217391304348, |
| "grad_norm": 0.24359366297721863, |
| "learning_rate": 4.466378889506607e-06, |
| "loss": 0.0013, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.9652173913043478, |
| "grad_norm": 0.1803927719593048, |
| "learning_rate": 4.458130667539592e-06, |
| "loss": 0.0017, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.973913043478261, |
| "grad_norm": 0.5478237271308899, |
| "learning_rate": 4.449826927246257e-06, |
| "loss": 0.0024, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.982608695652174, |
| "grad_norm": 0.1454983353614807, |
| "learning_rate": 4.441467904060207e-06, |
| "loss": 0.0007, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.991304347826087, |
| "grad_norm": 0.7362433671951294, |
| "learning_rate": 4.4330538349824684e-06, |
| "loss": 0.0086, |
| "step": 229 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.12488367408514023, |
| "learning_rate": 4.424584958574766e-06, |
| "loss": 0.0014, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.008695652173913, |
| "grad_norm": 0.15276969969272614, |
| "learning_rate": 4.4160615149527646e-06, |
| "loss": 0.0015, |
| "step": 231 |
| }, |
| { |
| "epoch": 2.017391304347826, |
| "grad_norm": 0.19061294198036194, |
| "learning_rate": 4.407483745779256e-06, |
| "loss": 0.0007, |
| "step": 232 |
| }, |
| { |
| "epoch": 2.026086956521739, |
| "grad_norm": 0.31356436014175415, |
| "learning_rate": 4.39885189425731e-06, |
| "loss": 0.0008, |
| "step": 233 |
| }, |
| { |
| "epoch": 2.034782608695652, |
| "grad_norm": 0.3470340669155121, |
| "learning_rate": 4.3901662051233755e-06, |
| "loss": 0.002, |
| "step": 234 |
| }, |
| { |
| "epoch": 2.0434782608695654, |
| "grad_norm": 0.37595614790916443, |
| "learning_rate": 4.381426924640346e-06, |
| "loss": 0.0013, |
| "step": 235 |
| }, |
| { |
| "epoch": 2.0521739130434784, |
| "grad_norm": 0.08062059432268143, |
| "learning_rate": 4.372634300590578e-06, |
| "loss": 0.0004, |
| "step": 236 |
| }, |
| { |
| "epoch": 2.0608695652173914, |
| "grad_norm": 0.643486499786377, |
| "learning_rate": 4.363788582268857e-06, |
| "loss": 0.0012, |
| "step": 237 |
| }, |
| { |
| "epoch": 2.0695652173913044, |
| "grad_norm": 0.06273315846920013, |
| "learning_rate": 4.35489002047534e-06, |
| "loss": 0.0003, |
| "step": 238 |
| }, |
| { |
| "epoch": 2.0782608695652174, |
| "grad_norm": 0.20653115212917328, |
| "learning_rate": 4.345938867508439e-06, |
| "loss": 0.0013, |
| "step": 239 |
| }, |
| { |
| "epoch": 2.0869565217391304, |
| "grad_norm": 0.09760613739490509, |
| "learning_rate": 4.336935377157668e-06, |
| "loss": 0.0004, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.0956521739130434, |
| "grad_norm": 0.31116411089897156, |
| "learning_rate": 4.32787980469645e-06, |
| "loss": 0.0008, |
| "step": 241 |
| }, |
| { |
| "epoch": 2.1043478260869564, |
| "grad_norm": 0.24201999604701996, |
| "learning_rate": 4.318772406874873e-06, |
| "loss": 0.0005, |
| "step": 242 |
| }, |
| { |
| "epoch": 2.1130434782608694, |
| "grad_norm": 0.3692140579223633, |
| "learning_rate": 4.309613441912421e-06, |
| "loss": 0.0028, |
| "step": 243 |
| }, |
| { |
| "epoch": 2.121739130434783, |
| "grad_norm": 0.2164740115404129, |
| "learning_rate": 4.30040316949064e-06, |
| "loss": 0.0014, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.130434782608696, |
| "grad_norm": 0.05342044681310654, |
| "learning_rate": 4.291141850745788e-06, |
| "loss": 0.0002, |
| "step": 245 |
| }, |
| { |
| "epoch": 2.139130434782609, |
| "grad_norm": 0.19637945294380188, |
| "learning_rate": 4.281829748261422e-06, |
| "loss": 0.0014, |
| "step": 246 |
| }, |
| { |
| "epoch": 2.1478260869565218, |
| "grad_norm": 0.12176893651485443, |
| "learning_rate": 4.272467126060954e-06, |
| "loss": 0.0003, |
| "step": 247 |
| }, |
| { |
| "epoch": 2.1565217391304348, |
| "grad_norm": 0.15314780175685883, |
| "learning_rate": 4.263054249600172e-06, |
| "loss": 0.0005, |
| "step": 248 |
| }, |
| { |
| "epoch": 2.1652173913043478, |
| "grad_norm": 0.08518808335065842, |
| "learning_rate": 4.253591385759705e-06, |
| "loss": 0.0003, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.1739130434782608, |
| "grad_norm": 0.2676379680633545, |
| "learning_rate": 4.244078802837462e-06, |
| "loss": 0.0008, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.1826086956521737, |
| "grad_norm": 0.16651524603366852, |
| "learning_rate": 4.234516770541023e-06, |
| "loss": 0.0004, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.1913043478260867, |
| "grad_norm": 0.10508158802986145, |
| "learning_rate": 4.224905559979991e-06, |
| "loss": 0.0005, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.18296688795089722, |
| "learning_rate": 4.215245443658307e-06, |
| "loss": 0.0008, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.208695652173913, |
| "grad_norm": 0.1019248366355896, |
| "learning_rate": 4.205536695466524e-06, |
| "loss": 0.0004, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.217391304347826, |
| "grad_norm": 0.31611302495002747, |
| "learning_rate": 4.1957795906740404e-06, |
| "loss": 0.0031, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.226086956521739, |
| "grad_norm": 0.1458199918270111, |
| "learning_rate": 4.1859744059212945e-06, |
| "loss": 0.0006, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.234782608695652, |
| "grad_norm": 0.11204175651073456, |
| "learning_rate": 4.176121419211924e-06, |
| "loss": 0.0001, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.243478260869565, |
| "grad_norm": 0.1589597463607788, |
| "learning_rate": 4.16622090990488e-06, |
| "loss": 0.0004, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.252173913043478, |
| "grad_norm": 0.17105410993099213, |
| "learning_rate": 4.15627315870651e-06, |
| "loss": 0.001, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.260869565217391, |
| "grad_norm": 0.025157850235700607, |
| "learning_rate": 4.146278447662597e-06, |
| "loss": 0.0001, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.269565217391304, |
| "grad_norm": 0.3840286135673523, |
| "learning_rate": 4.136237060150363e-06, |
| "loss": 0.0013, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.2782608695652176, |
| "grad_norm": 0.4167248606681824, |
| "learning_rate": 4.126149280870434e-06, |
| "loss": 0.0009, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.2869565217391306, |
| "grad_norm": 0.07005563378334045, |
| "learning_rate": 4.116015395838772e-06, |
| "loss": 0.0001, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.2956521739130435, |
| "grad_norm": 0.33618828654289246, |
| "learning_rate": 4.105835692378557e-06, |
| "loss": 0.0005, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.3043478260869565, |
| "grad_norm": 0.22151310741901398, |
| "learning_rate": 4.095610459112051e-06, |
| "loss": 0.0007, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.3130434782608695, |
| "grad_norm": 0.6059427261352539, |
| "learning_rate": 4.0853399859524066e-06, |
| "loss": 0.0043, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.3217391304347825, |
| "grad_norm": 0.28740134835243225, |
| "learning_rate": 4.075024564095452e-06, |
| "loss": 0.0005, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.3304347826086955, |
| "grad_norm": 0.3348411023616791, |
| "learning_rate": 4.064664486011433e-06, |
| "loss": 0.0005, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.3391304347826085, |
| "grad_norm": 0.024883409962058067, |
| "learning_rate": 4.05426004543672e-06, |
| "loss": 0.0001, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.3478260869565215, |
| "grad_norm": 0.01836530677974224, |
| "learning_rate": 4.04381153736548e-06, |
| "loss": 0.0001, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.356521739130435, |
| "grad_norm": 0.09869615733623505, |
| "learning_rate": 4.033319258041316e-06, |
| "loss": 0.0002, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.365217391304348, |
| "grad_norm": 0.026213495060801506, |
| "learning_rate": 4.022783504948862e-06, |
| "loss": 0.0001, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.373913043478261, |
| "grad_norm": 0.010184271261096, |
| "learning_rate": 4.012204576805352e-06, |
| "loss": 0.0, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.382608695652174, |
| "grad_norm": 0.5611284971237183, |
| "learning_rate": 4.001582773552153e-06, |
| "loss": 0.0026, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.391304347826087, |
| "grad_norm": 0.16219332814216614, |
| "learning_rate": 3.990918396346254e-06, |
| "loss": 0.0002, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.016482144594192505, |
| "learning_rate": 3.9802117475517335e-06, |
| "loss": 0.0, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.408695652173913, |
| "grad_norm": 0.7226945757865906, |
| "learning_rate": 3.969463130731183e-06, |
| "loss": 0.0036, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.417391304347826, |
| "grad_norm": 0.14231452345848083, |
| "learning_rate": 3.958672850637104e-06, |
| "loss": 0.0001, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.426086956521739, |
| "grad_norm": 0.3705194890499115, |
| "learning_rate": 3.947841213203262e-06, |
| "loss": 0.0007, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.4347826086956523, |
| "grad_norm": 0.2774112820625305, |
| "learning_rate": 3.936968525536018e-06, |
| "loss": 0.0025, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.4434782608695653, |
| "grad_norm": 0.4041725695133209, |
| "learning_rate": 3.926055095905616e-06, |
| "loss": 0.0015, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.4521739130434783, |
| "grad_norm": 0.2777579128742218, |
| "learning_rate": 3.9151012337374495e-06, |
| "loss": 0.0013, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.4608695652173913, |
| "grad_norm": 0.2716004550457001, |
| "learning_rate": 3.9041072496032805e-06, |
| "loss": 0.001, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.4695652173913043, |
| "grad_norm": 0.06586720794439316, |
| "learning_rate": 3.893073455212438e-06, |
| "loss": 0.0003, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.4782608695652173, |
| "grad_norm": 0.22377079725265503, |
| "learning_rate": 3.882000163402984e-06, |
| "loss": 0.0006, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.4869565217391303, |
| "grad_norm": 0.6000126600265503, |
| "learning_rate": 3.870887688132834e-06, |
| "loss": 0.0015, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.4956521739130437, |
| "grad_norm": 0.2133096158504486, |
| "learning_rate": 3.859736344470866e-06, |
| "loss": 0.0008, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.5043478260869563, |
| "grad_norm": 0.4249497652053833, |
| "learning_rate": 3.8485464485879785e-06, |
| "loss": 0.0024, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.5130434782608697, |
| "grad_norm": 0.1991584748029709, |
| "learning_rate": 3.837318317748134e-06, |
| "loss": 0.0017, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.5217391304347827, |
| "grad_norm": 0.21226167678833008, |
| "learning_rate": 3.826052270299356e-06, |
| "loss": 0.0021, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.5304347826086957, |
| "grad_norm": 0.1490064263343811, |
| "learning_rate": 3.814748625664711e-06, |
| "loss": 0.0005, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.5391304347826087, |
| "grad_norm": 0.05340861901640892, |
| "learning_rate": 3.8034077043332463e-06, |
| "loss": 0.0002, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.5478260869565217, |
| "grad_norm": 0.05676361918449402, |
| "learning_rate": 3.7920298278509028e-06, |
| "loss": 0.0002, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.5565217391304347, |
| "grad_norm": 0.033280279487371445, |
| "learning_rate": 3.7806153188114027e-06, |
| "loss": 0.0001, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.5652173913043477, |
| "grad_norm": 0.13904324173927307, |
| "learning_rate": 3.7691645008471e-06, |
| "loss": 0.0008, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.573913043478261, |
| "grad_norm": 0.2639399766921997, |
| "learning_rate": 3.7576776986198064e-06, |
| "loss": 0.0012, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.5826086956521737, |
| "grad_norm": 0.5102531909942627, |
| "learning_rate": 3.7461552378115833e-06, |
| "loss": 0.0018, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.591304347826087, |
| "grad_norm": 0.34142470359802246, |
| "learning_rate": 3.734597445115511e-06, |
| "loss": 0.0013, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.13264404237270355, |
| "learning_rate": 3.7230046482264256e-06, |
| "loss": 0.0004, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.608695652173913, |
| "grad_norm": 0.036736320704221725, |
| "learning_rate": 3.711377175831626e-06, |
| "loss": 0.0002, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.617391304347826, |
| "grad_norm": 0.1738046258687973, |
| "learning_rate": 3.6997153576015552e-06, |
| "loss": 0.0014, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.626086956521739, |
| "grad_norm": 0.12404807657003403, |
| "learning_rate": 3.6880195241804567e-06, |
| "loss": 0.0006, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.634782608695652, |
| "grad_norm": 0.02482556365430355, |
| "learning_rate": 3.676290007176994e-06, |
| "loss": 0.0001, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.643478260869565, |
| "grad_norm": 0.05156688019633293, |
| "learning_rate": 3.6645271391548542e-06, |
| "loss": 0.0004, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.6521739130434785, |
| "grad_norm": 0.08253592997789383, |
| "learning_rate": 3.652731253623315e-06, |
| "loss": 0.0007, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.660869565217391, |
| "grad_norm": 0.10988730192184448, |
| "learning_rate": 3.6409026850277908e-06, |
| "loss": 0.0008, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.6695652173913045, |
| "grad_norm": 0.13908977806568146, |
| "learning_rate": 3.6290417687403485e-06, |
| "loss": 0.0009, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.6782608695652175, |
| "grad_norm": 0.0466957651078701, |
| "learning_rate": 3.617148841050202e-06, |
| "loss": 0.0002, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.6869565217391305, |
| "grad_norm": 0.18122495710849762, |
| "learning_rate": 3.6052242391541746e-06, |
| "loss": 0.0017, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.6956521739130435, |
| "grad_norm": 0.1811421513557434, |
| "learning_rate": 3.593268301147139e-06, |
| "loss": 0.0007, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.7043478260869565, |
| "grad_norm": 0.11010754853487015, |
| "learning_rate": 3.5812813660124313e-06, |
| "loss": 0.0004, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.7130434782608694, |
| "grad_norm": 0.19528372585773468, |
| "learning_rate": 3.5692637736122427e-06, |
| "loss": 0.0011, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.7217391304347824, |
| "grad_norm": 0.29341739416122437, |
| "learning_rate": 3.5572158646779787e-06, |
| "loss": 0.0009, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.730434782608696, |
| "grad_norm": 0.13021045923233032, |
| "learning_rate": 3.5451379808006014e-06, |
| "loss": 0.0003, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.7391304347826084, |
| "grad_norm": 0.17587915062904358, |
| "learning_rate": 3.5330304644209456e-06, |
| "loss": 0.0002, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.747826086956522, |
| "grad_norm": 0.06901020556688309, |
| "learning_rate": 3.520893658820007e-06, |
| "loss": 0.0003, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.756521739130435, |
| "grad_norm": 0.06725924462080002, |
| "learning_rate": 3.50872790810921e-06, |
| "loss": 0.0002, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.765217391304348, |
| "grad_norm": 0.49958744645118713, |
| "learning_rate": 3.4965335572206516e-06, |
| "loss": 0.0011, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.773913043478261, |
| "grad_norm": 0.04429077357053757, |
| "learning_rate": 3.484310951897323e-06, |
| "loss": 0.0001, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.782608695652174, |
| "grad_norm": 0.027020800858736038, |
| "learning_rate": 3.4720604386833024e-06, |
| "loss": 0.0001, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.791304347826087, |
| "grad_norm": 0.7644746899604797, |
| "learning_rate": 3.459782364913935e-06, |
| "loss": 0.0015, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.07157191634178162, |
| "learning_rate": 3.447477078705983e-06, |
| "loss": 0.0003, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.8086956521739133, |
| "grad_norm": 0.009031714871525764, |
| "learning_rate": 3.4351449289477543e-06, |
| "loss": 0.0, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.8173913043478263, |
| "grad_norm": 0.02938353642821312, |
| "learning_rate": 3.4227862652892106e-06, |
| "loss": 0.0001, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.8260869565217392, |
| "grad_norm": 0.2537288963794708, |
| "learning_rate": 3.410401438132056e-06, |
| "loss": 0.0009, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.8347826086956522, |
| "grad_norm": 0.2668483555316925, |
| "learning_rate": 3.3979907986197996e-06, |
| "loss": 0.0004, |
| "step": 326 |
| }, |
| { |
| "epoch": 2.8434782608695652, |
| "grad_norm": 0.276434987783432, |
| "learning_rate": 3.385554698627803e-06, |
| "loss": 0.0005, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.8521739130434782, |
| "grad_norm": 0.01908428780734539, |
| "learning_rate": 3.3730934907532997e-06, |
| "loss": 0.0, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.860869565217391, |
| "grad_norm": 0.09876787662506104, |
| "learning_rate": 3.3606075283054005e-06, |
| "loss": 0.0002, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.869565217391304, |
| "grad_norm": 0.24581114947795868, |
| "learning_rate": 3.3480971652950757e-06, |
| "loss": 0.0001, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.878260869565217, |
| "grad_norm": 0.028530647978186607, |
| "learning_rate": 3.3355627564251185e-06, |
| "loss": 0.0001, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.8869565217391306, |
| "grad_norm": 0.036605849862098694, |
| "learning_rate": 3.3230046570800866e-06, |
| "loss": 0.0001, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.8956521739130436, |
| "grad_norm": 0.19084055721759796, |
| "learning_rate": 3.3104232233162272e-06, |
| "loss": 0.0003, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.9043478260869566, |
| "grad_norm": 0.02695636637508869, |
| "learning_rate": 3.2978188118513814e-06, |
| "loss": 0.0001, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.9130434782608696, |
| "grad_norm": 0.046026572585105896, |
| "learning_rate": 3.2851917800548726e-06, |
| "loss": 0.0001, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.9217391304347826, |
| "grad_norm": 0.22791646420955658, |
| "learning_rate": 3.272542485937369e-06, |
| "loss": 0.0003, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.9304347826086956, |
| "grad_norm": 0.021017853170633316, |
| "learning_rate": 3.259871288140738e-06, |
| "loss": 0.0, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.9391304347826086, |
| "grad_norm": 0.009931406937539577, |
| "learning_rate": 3.247178545927876e-06, |
| "loss": 0.0, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.9478260869565216, |
| "grad_norm": 0.2339964658021927, |
| "learning_rate": 3.234464619172522e-06, |
| "loss": 0.0002, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.9565217391304346, |
| "grad_norm": 0.018152186647057533, |
| "learning_rate": 3.221729868349053e-06, |
| "loss": 0.0, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.965217391304348, |
| "grad_norm": 0.013054460287094116, |
| "learning_rate": 3.208974654522266e-06, |
| "loss": 0.0, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.973913043478261, |
| "grad_norm": 0.24946050345897675, |
| "learning_rate": 3.1961993393371405e-06, |
| "loss": 0.0003, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.982608695652174, |
| "grad_norm": 0.30698102712631226, |
| "learning_rate": 3.183404285008582e-06, |
| "loss": 0.001, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.991304347826087, |
| "grad_norm": 0.1075379028916359, |
| "learning_rate": 3.1705898543111576e-06, |
| "loss": 0.0005, |
| "step": 344 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.2779870927333832, |
| "learning_rate": 3.157756410568803e-06, |
| "loss": 0.0008, |
| "step": 345 |
| }, |
| { |
| "epoch": 3.008695652173913, |
| "grad_norm": 0.1857200264930725, |
| "learning_rate": 3.14490431764453e-06, |
| "loss": 0.0002, |
| "step": 346 |
| }, |
| { |
| "epoch": 3.017391304347826, |
| "grad_norm": 0.05824211612343788, |
| "learning_rate": 3.132033939930101e-06, |
| "loss": 0.0001, |
| "step": 347 |
| }, |
| { |
| "epoch": 3.026086956521739, |
| "grad_norm": 0.032336920499801636, |
| "learning_rate": 3.1191456423357047e-06, |
| "loss": 0.0001, |
| "step": 348 |
| }, |
| { |
| "epoch": 3.034782608695652, |
| "grad_norm": 0.14255207777023315, |
| "learning_rate": 3.106239790279606e-06, |
| "loss": 0.0002, |
| "step": 349 |
| }, |
| { |
| "epoch": 3.0434782608695654, |
| "grad_norm": 0.047609731554985046, |
| "learning_rate": 3.093316749677788e-06, |
| "loss": 0.0, |
| "step": 350 |
| }, |
| { |
| "epoch": 3.0521739130434784, |
| "grad_norm": 0.06523387879133224, |
| "learning_rate": 3.0803768869335726e-06, |
| "loss": 0.0001, |
| "step": 351 |
| }, |
| { |
| "epoch": 3.0608695652173914, |
| "grad_norm": 0.01587139070034027, |
| "learning_rate": 3.0674205689272378e-06, |
| "loss": 0.0, |
| "step": 352 |
| }, |
| { |
| "epoch": 3.0695652173913044, |
| "grad_norm": 0.02261805161833763, |
| "learning_rate": 3.054448163005613e-06, |
| "loss": 0.0001, |
| "step": 353 |
| }, |
| { |
| "epoch": 3.0782608695652174, |
| "grad_norm": 0.01384738925844431, |
| "learning_rate": 3.041460036971664e-06, |
| "loss": 0.0, |
| "step": 354 |
| }, |
| { |
| "epoch": 3.0869565217391304, |
| "grad_norm": 0.03285490348935127, |
| "learning_rate": 3.028456559074061e-06, |
| "loss": 0.0001, |
| "step": 355 |
| }, |
| { |
| "epoch": 3.0956521739130434, |
| "grad_norm": 0.0027383833657950163, |
| "learning_rate": 3.0154380979967456e-06, |
| "loss": 0.0, |
| "step": 356 |
| }, |
| { |
| "epoch": 3.1043478260869564, |
| "grad_norm": 0.18999581038951874, |
| "learning_rate": 3.0024050228484713e-06, |
| "loss": 0.0002, |
| "step": 357 |
| }, |
| { |
| "epoch": 3.1130434782608694, |
| "grad_norm": 0.09708679467439651, |
| "learning_rate": 2.9893577031523403e-06, |
| "loss": 0.0005, |
| "step": 358 |
| }, |
| { |
| "epoch": 3.121739130434783, |
| "grad_norm": 0.1416860669851303, |
| "learning_rate": 2.976296508835326e-06, |
| "loss": 0.0001, |
| "step": 359 |
| }, |
| { |
| "epoch": 3.130434782608696, |
| "grad_norm": 0.08077018707990646, |
| "learning_rate": 2.963221810217786e-06, |
| "loss": 0.0001, |
| "step": 360 |
| }, |
| { |
| "epoch": 3.139130434782609, |
| "grad_norm": 0.0910787582397461, |
| "learning_rate": 2.9501339780029614e-06, |
| "loss": 0.0005, |
| "step": 361 |
| }, |
| { |
| "epoch": 3.1478260869565218, |
| "grad_norm": 0.08710524439811707, |
| "learning_rate": 2.937033383266466e-06, |
| "loss": 0.0004, |
| "step": 362 |
| }, |
| { |
| "epoch": 3.1565217391304348, |
| "grad_norm": 0.00581876328215003, |
| "learning_rate": 2.923920397445766e-06, |
| "loss": 0.0, |
| "step": 363 |
| }, |
| { |
| "epoch": 3.1652173913043478, |
| "grad_norm": 0.2737768292427063, |
| "learning_rate": 2.910795392329649e-06, |
| "loss": 0.0005, |
| "step": 364 |
| }, |
| { |
| "epoch": 3.1739130434782608, |
| "grad_norm": 0.003364310134202242, |
| "learning_rate": 2.8976587400476804e-06, |
| "loss": 0.0, |
| "step": 365 |
| }, |
| { |
| "epoch": 3.1826086956521737, |
| "grad_norm": 0.004156508948653936, |
| "learning_rate": 2.884510813059657e-06, |
| "loss": 0.0, |
| "step": 366 |
| }, |
| { |
| "epoch": 3.1913043478260867, |
| "grad_norm": 0.01153493206948042, |
| "learning_rate": 2.871351984145042e-06, |
| "loss": 0.0, |
| "step": 367 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.003978746011853218, |
| "learning_rate": 2.8581826263923993e-06, |
| "loss": 0.0, |
| "step": 368 |
| }, |
| { |
| "epoch": 3.208695652173913, |
| "grad_norm": 0.007011134643107653, |
| "learning_rate": 2.8450031131888147e-06, |
| "loss": 0.0, |
| "step": 369 |
| }, |
| { |
| "epoch": 3.217391304347826, |
| "grad_norm": 0.0030873024370521307, |
| "learning_rate": 2.8318138182093053e-06, |
| "loss": 0.0, |
| "step": 370 |
| }, |
| { |
| "epoch": 3.226086956521739, |
| "grad_norm": 0.005853947252035141, |
| "learning_rate": 2.8186151154062314e-06, |
| "loss": 0.0, |
| "step": 371 |
| }, |
| { |
| "epoch": 3.234782608695652, |
| "grad_norm": 0.005672338884323835, |
| "learning_rate": 2.8054073789986884e-06, |
| "loss": 0.0, |
| "step": 372 |
| }, |
| { |
| "epoch": 3.243478260869565, |
| "grad_norm": 0.002228250727057457, |
| "learning_rate": 2.792190983461902e-06, |
| "loss": 0.0, |
| "step": 373 |
| }, |
| { |
| "epoch": 3.252173913043478, |
| "grad_norm": 0.03767574205994606, |
| "learning_rate": 2.7789663035166035e-06, |
| "loss": 0.0001, |
| "step": 374 |
| }, |
| { |
| "epoch": 3.260869565217391, |
| "grad_norm": 0.04303343966603279, |
| "learning_rate": 2.7657337141184137e-06, |
| "loss": 0.0, |
| "step": 375 |
| }, |
| { |
| "epoch": 3.269565217391304, |
| "grad_norm": 1.011793613433838, |
| "learning_rate": 2.7524935904472056e-06, |
| "loss": 0.0003, |
| "step": 376 |
| }, |
| { |
| "epoch": 3.2782608695652176, |
| "grad_norm": 0.003984061535447836, |
| "learning_rate": 2.73924630789647e-06, |
| "loss": 0.0, |
| "step": 377 |
| }, |
| { |
| "epoch": 3.2869565217391306, |
| "grad_norm": 0.0033289589919149876, |
| "learning_rate": 2.7259922420626705e-06, |
| "loss": 0.0, |
| "step": 378 |
| }, |
| { |
| "epoch": 3.2956521739130435, |
| "grad_norm": 0.008105777204036713, |
| "learning_rate": 2.7127317687345973e-06, |
| "loss": 0.0, |
| "step": 379 |
| }, |
| { |
| "epoch": 3.3043478260869565, |
| "grad_norm": 0.0022414308041334152, |
| "learning_rate": 2.699465263882708e-06, |
| "loss": 0.0, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.3130434782608695, |
| "grad_norm": 0.004690825939178467, |
| "learning_rate": 2.686193103648472e-06, |
| "loss": 0.0, |
| "step": 381 |
| }, |
| { |
| "epoch": 3.3217391304347825, |
| "grad_norm": 0.07751515507698059, |
| "learning_rate": 2.672915664333704e-06, |
| "loss": 0.0004, |
| "step": 382 |
| }, |
| { |
| "epoch": 3.3304347826086955, |
| "grad_norm": 0.0041257767006754875, |
| "learning_rate": 2.6596333223898934e-06, |
| "loss": 0.0, |
| "step": 383 |
| }, |
| { |
| "epoch": 3.3391304347826085, |
| "grad_norm": 0.001377386855892837, |
| "learning_rate": 2.6463464544075344e-06, |
| "loss": 0.0, |
| "step": 384 |
| }, |
| { |
| "epoch": 3.3478260869565215, |
| "grad_norm": 0.08914496004581451, |
| "learning_rate": 2.6330554371054466e-06, |
| "loss": 0.0001, |
| "step": 385 |
| }, |
| { |
| "epoch": 3.356521739130435, |
| "grad_norm": 0.2693479359149933, |
| "learning_rate": 2.6197606473200924e-06, |
| "loss": 0.0002, |
| "step": 386 |
| }, |
| { |
| "epoch": 3.365217391304348, |
| "grad_norm": 0.009363976307213306, |
| "learning_rate": 2.6064624619948966e-06, |
| "loss": 0.0, |
| "step": 387 |
| }, |
| { |
| "epoch": 3.373913043478261, |
| "grad_norm": 0.0025562141090631485, |
| "learning_rate": 2.593161258169554e-06, |
| "loss": 0.0, |
| "step": 388 |
| }, |
| { |
| "epoch": 3.382608695652174, |
| "grad_norm": 0.031549256294965744, |
| "learning_rate": 2.579857412969345e-06, |
| "loss": 0.0, |
| "step": 389 |
| }, |
| { |
| "epoch": 3.391304347826087, |
| "grad_norm": 0.1631297767162323, |
| "learning_rate": 2.5665513035944373e-06, |
| "loss": 0.0004, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 0.0037248400039970875, |
| "learning_rate": 2.5532433073091967e-06, |
| "loss": 0.0, |
| "step": 391 |
| }, |
| { |
| "epoch": 3.408695652173913, |
| "grad_norm": 0.010381661355495453, |
| "learning_rate": 2.539933801431487e-06, |
| "loss": 0.0, |
| "step": 392 |
| }, |
| { |
| "epoch": 3.417391304347826, |
| "grad_norm": 0.01955062709748745, |
| "learning_rate": 2.5266231633219733e-06, |
| "loss": 0.0, |
| "step": 393 |
| }, |
| { |
| "epoch": 3.426086956521739, |
| "grad_norm": 0.00635969964787364, |
| "learning_rate": 2.513311770373421e-06, |
| "loss": 0.0, |
| "step": 394 |
| }, |
| { |
| "epoch": 3.4347826086956523, |
| "grad_norm": 0.07282191514968872, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0001, |
| "step": 395 |
| }, |
| { |
| "epoch": 3.4434782608695653, |
| "grad_norm": 0.00482774805277586, |
| "learning_rate": 2.4866882296265797e-06, |
| "loss": 0.0, |
| "step": 396 |
| }, |
| { |
| "epoch": 3.4521739130434783, |
| "grad_norm": 0.006832276936620474, |
| "learning_rate": 2.473376836678028e-06, |
| "loss": 0.0, |
| "step": 397 |
| }, |
| { |
| "epoch": 3.4608695652173913, |
| "grad_norm": 0.0018247866537421942, |
| "learning_rate": 2.4600661985685132e-06, |
| "loss": 0.0, |
| "step": 398 |
| }, |
| { |
| "epoch": 3.4695652173913043, |
| "grad_norm": 0.056768789887428284, |
| "learning_rate": 2.446756692690804e-06, |
| "loss": 0.0001, |
| "step": 399 |
| }, |
| { |
| "epoch": 3.4782608695652173, |
| "grad_norm": 0.013176783919334412, |
| "learning_rate": 2.4334486964055635e-06, |
| "loss": 0.0, |
| "step": 400 |
| }, |
| { |
| "epoch": 3.4869565217391303, |
| "grad_norm": 0.012747037224471569, |
| "learning_rate": 2.4201425870306566e-06, |
| "loss": 0.0, |
| "step": 401 |
| }, |
| { |
| "epoch": 3.4956521739130437, |
| "grad_norm": 0.013687407597899437, |
| "learning_rate": 2.406838741830446e-06, |
| "loss": 0.0, |
| "step": 402 |
| }, |
| { |
| "epoch": 3.5043478260869563, |
| "grad_norm": 0.23240718245506287, |
| "learning_rate": 2.393537538005104e-06, |
| "loss": 0.0001, |
| "step": 403 |
| }, |
| { |
| "epoch": 3.5130434782608697, |
| "grad_norm": 0.008161540143191814, |
| "learning_rate": 2.380239352679908e-06, |
| "loss": 0.0, |
| "step": 404 |
| }, |
| { |
| "epoch": 3.5217391304347827, |
| "grad_norm": 0.05203310027718544, |
| "learning_rate": 2.3669445628945543e-06, |
| "loss": 0.0, |
| "step": 405 |
| }, |
| { |
| "epoch": 3.5304347826086957, |
| "grad_norm": 0.007062139920890331, |
| "learning_rate": 2.3536535455924656e-06, |
| "loss": 0.0, |
| "step": 406 |
| }, |
| { |
| "epoch": 3.5391304347826087, |
| "grad_norm": 0.01181522011756897, |
| "learning_rate": 2.340366677610107e-06, |
| "loss": 0.0, |
| "step": 407 |
| }, |
| { |
| "epoch": 3.5478260869565217, |
| "grad_norm": 0.00283506466075778, |
| "learning_rate": 2.327084335666297e-06, |
| "loss": 0.0, |
| "step": 408 |
| }, |
| { |
| "epoch": 3.5565217391304347, |
| "grad_norm": 0.0024429778568446636, |
| "learning_rate": 2.313806896351529e-06, |
| "loss": 0.0, |
| "step": 409 |
| }, |
| { |
| "epoch": 3.5652173913043477, |
| "grad_norm": 0.004740845412015915, |
| "learning_rate": 2.300534736117292e-06, |
| "loss": 0.0, |
| "step": 410 |
| }, |
| { |
| "epoch": 3.573913043478261, |
| "grad_norm": 0.04463421553373337, |
| "learning_rate": 2.2872682312654035e-06, |
| "loss": 0.0002, |
| "step": 411 |
| }, |
| { |
| "epoch": 3.5826086956521737, |
| "grad_norm": 0.005315855145454407, |
| "learning_rate": 2.2740077579373303e-06, |
| "loss": 0.0, |
| "step": 412 |
| }, |
| { |
| "epoch": 3.591304347826087, |
| "grad_norm": 0.002299713436514139, |
| "learning_rate": 2.2607536921035313e-06, |
| "loss": 0.0, |
| "step": 413 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.01868036948144436, |
| "learning_rate": 2.247506409552795e-06, |
| "loss": 0.0, |
| "step": 414 |
| }, |
| { |
| "epoch": 3.608695652173913, |
| "grad_norm": 0.01010120939463377, |
| "learning_rate": 2.234266285881587e-06, |
| "loss": 0.0, |
| "step": 415 |
| }, |
| { |
| "epoch": 3.617391304347826, |
| "grad_norm": 0.005287417210638523, |
| "learning_rate": 2.221033696483397e-06, |
| "loss": 0.0, |
| "step": 416 |
| }, |
| { |
| "epoch": 3.626086956521739, |
| "grad_norm": 0.005691695027053356, |
| "learning_rate": 2.2078090165380992e-06, |
| "loss": 0.0, |
| "step": 417 |
| }, |
| { |
| "epoch": 3.634782608695652, |
| "grad_norm": 0.0028878487646579742, |
| "learning_rate": 2.194592621001311e-06, |
| "loss": 0.0, |
| "step": 418 |
| }, |
| { |
| "epoch": 3.643478260869565, |
| "grad_norm": 0.0035773522686213255, |
| "learning_rate": 2.1813848845937695e-06, |
| "loss": 0.0, |
| "step": 419 |
| }, |
| { |
| "epoch": 3.6521739130434785, |
| "grad_norm": 0.0027882216963917017, |
| "learning_rate": 2.1681861817906955e-06, |
| "loss": 0.0, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.660869565217391, |
| "grad_norm": 0.00462096743285656, |
| "learning_rate": 2.1549968868111866e-06, |
| "loss": 0.0, |
| "step": 421 |
| }, |
| { |
| "epoch": 3.6695652173913045, |
| "grad_norm": 0.00283925817348063, |
| "learning_rate": 2.141817373607601e-06, |
| "loss": 0.0, |
| "step": 422 |
| }, |
| { |
| "epoch": 3.6782608695652175, |
| "grad_norm": 0.002093710470944643, |
| "learning_rate": 2.1286480158549583e-06, |
| "loss": 0.0, |
| "step": 423 |
| }, |
| { |
| "epoch": 3.6869565217391305, |
| "grad_norm": 0.0020851960871368647, |
| "learning_rate": 2.1154891869403436e-06, |
| "loss": 0.0, |
| "step": 424 |
| }, |
| { |
| "epoch": 3.6956521739130435, |
| "grad_norm": 0.005665747448801994, |
| "learning_rate": 2.1023412599523204e-06, |
| "loss": 0.0, |
| "step": 425 |
| }, |
| { |
| "epoch": 3.7043478260869565, |
| "grad_norm": 0.00797071773558855, |
| "learning_rate": 2.089204607670352e-06, |
| "loss": 0.0, |
| "step": 426 |
| }, |
| { |
| "epoch": 3.7130434782608694, |
| "grad_norm": 0.0011778065236285329, |
| "learning_rate": 2.0760796025542342e-06, |
| "loss": 0.0, |
| "step": 427 |
| }, |
| { |
| "epoch": 3.7217391304347824, |
| "grad_norm": 0.0023867737036198378, |
| "learning_rate": 2.0629666167335344e-06, |
| "loss": 0.0, |
| "step": 428 |
| }, |
| { |
| "epoch": 3.730434782608696, |
| "grad_norm": 0.00638067489489913, |
| "learning_rate": 2.0498660219970395e-06, |
| "loss": 0.0, |
| "step": 429 |
| }, |
| { |
| "epoch": 3.7391304347826084, |
| "grad_norm": 0.012454311363399029, |
| "learning_rate": 2.0367781897822147e-06, |
| "loss": 0.0, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.747826086956522, |
| "grad_norm": 0.002240461064502597, |
| "learning_rate": 2.0237034911646745e-06, |
| "loss": 0.0, |
| "step": 431 |
| }, |
| { |
| "epoch": 3.756521739130435, |
| "grad_norm": 0.001390191144309938, |
| "learning_rate": 2.0106422968476606e-06, |
| "loss": 0.0, |
| "step": 432 |
| }, |
| { |
| "epoch": 3.765217391304348, |
| "grad_norm": 0.003424042835831642, |
| "learning_rate": 1.9975949771515296e-06, |
| "loss": 0.0, |
| "step": 433 |
| }, |
| { |
| "epoch": 3.773913043478261, |
| "grad_norm": 0.0024630806874483824, |
| "learning_rate": 1.9845619020032552e-06, |
| "loss": 0.0, |
| "step": 434 |
| }, |
| { |
| "epoch": 3.782608695652174, |
| "grad_norm": 0.0031529609113931656, |
| "learning_rate": 1.9715434409259393e-06, |
| "loss": 0.0, |
| "step": 435 |
| }, |
| { |
| "epoch": 3.791304347826087, |
| "grad_norm": 0.005373111926019192, |
| "learning_rate": 1.958539963028337e-06, |
| "loss": 0.0, |
| "step": 436 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 0.0037128408439457417, |
| "learning_rate": 1.9455518369943873e-06, |
| "loss": 0.0, |
| "step": 437 |
| }, |
| { |
| "epoch": 3.8086956521739133, |
| "grad_norm": 0.002308671362698078, |
| "learning_rate": 1.9325794310727626e-06, |
| "loss": 0.0, |
| "step": 438 |
| }, |
| { |
| "epoch": 3.8173913043478263, |
| "grad_norm": 0.002887170063331723, |
| "learning_rate": 1.9196231130664282e-06, |
| "loss": 0.0, |
| "step": 439 |
| }, |
| { |
| "epoch": 3.8260869565217392, |
| "grad_norm": 0.003565647406503558, |
| "learning_rate": 1.906683250322213e-06, |
| "loss": 0.0, |
| "step": 440 |
| }, |
| { |
| "epoch": 3.8347826086956522, |
| "grad_norm": 0.0023865115363150835, |
| "learning_rate": 1.8937602097203945e-06, |
| "loss": 0.0, |
| "step": 441 |
| }, |
| { |
| "epoch": 3.8434782608695652, |
| "grad_norm": 0.004892145283520222, |
| "learning_rate": 1.8808543576642966e-06, |
| "loss": 0.0, |
| "step": 442 |
| }, |
| { |
| "epoch": 3.8521739130434782, |
| "grad_norm": 0.0018583799246698618, |
| "learning_rate": 1.8679660600698996e-06, |
| "loss": 0.0, |
| "step": 443 |
| }, |
| { |
| "epoch": 3.860869565217391, |
| "grad_norm": 0.15015952289104462, |
| "learning_rate": 1.8550956823554708e-06, |
| "loss": 0.0003, |
| "step": 444 |
| }, |
| { |
| "epoch": 3.869565217391304, |
| "grad_norm": 0.0015204929513856769, |
| "learning_rate": 1.8422435894311973e-06, |
| "loss": 0.0, |
| "step": 445 |
| }, |
| { |
| "epoch": 3.878260869565217, |
| "grad_norm": 0.0579560324549675, |
| "learning_rate": 1.8294101456888433e-06, |
| "loss": 0.0001, |
| "step": 446 |
| }, |
| { |
| "epoch": 3.8869565217391306, |
| "grad_norm": 0.002745965728536248, |
| "learning_rate": 1.8165957149914182e-06, |
| "loss": 0.0, |
| "step": 447 |
| }, |
| { |
| "epoch": 3.8956521739130436, |
| "grad_norm": 0.00269194389693439, |
| "learning_rate": 1.8038006606628599e-06, |
| "loss": 0.0, |
| "step": 448 |
| }, |
| { |
| "epoch": 3.9043478260869566, |
| "grad_norm": 0.003470352618023753, |
| "learning_rate": 1.7910253454777346e-06, |
| "loss": 0.0, |
| "step": 449 |
| }, |
| { |
| "epoch": 3.9130434782608696, |
| "grad_norm": 0.007889937609434128, |
| "learning_rate": 1.7782701316509482e-06, |
| "loss": 0.0, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.9217391304347826, |
| "grad_norm": 0.002997358562424779, |
| "learning_rate": 1.7655353808274795e-06, |
| "loss": 0.0, |
| "step": 451 |
| }, |
| { |
| "epoch": 3.9304347826086956, |
| "grad_norm": 0.06782340258359909, |
| "learning_rate": 1.752821454072124e-06, |
| "loss": 0.0003, |
| "step": 452 |
| }, |
| { |
| "epoch": 3.9391304347826086, |
| "grad_norm": 0.001661222893744707, |
| "learning_rate": 1.7401287118592626e-06, |
| "loss": 0.0, |
| "step": 453 |
| }, |
| { |
| "epoch": 3.9478260869565216, |
| "grad_norm": 0.0512867271900177, |
| "learning_rate": 1.7274575140626318e-06, |
| "loss": 0.0, |
| "step": 454 |
| }, |
| { |
| "epoch": 3.9565217391304346, |
| "grad_norm": 0.0020310841500759125, |
| "learning_rate": 1.7148082199451288e-06, |
| "loss": 0.0, |
| "step": 455 |
| }, |
| { |
| "epoch": 3.965217391304348, |
| "grad_norm": 0.0014185906620696187, |
| "learning_rate": 1.7021811881486186e-06, |
| "loss": 0.0, |
| "step": 456 |
| }, |
| { |
| "epoch": 3.973913043478261, |
| "grad_norm": 0.00783043447881937, |
| "learning_rate": 1.6895767766837734e-06, |
| "loss": 0.0, |
| "step": 457 |
| }, |
| { |
| "epoch": 3.982608695652174, |
| "grad_norm": 0.015537451021373272, |
| "learning_rate": 1.6769953429199142e-06, |
| "loss": 0.0, |
| "step": 458 |
| }, |
| { |
| "epoch": 3.991304347826087, |
| "grad_norm": 0.003787545021623373, |
| "learning_rate": 1.6644372435748823e-06, |
| "loss": 0.0, |
| "step": 459 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.0017769169062376022, |
| "learning_rate": 1.6519028347049242e-06, |
| "loss": 0.0, |
| "step": 460 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 690, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 115, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1611348682774938e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|