| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.8433179723502304, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0018433179723502304, |
| "grad_norm": 2.02589750289917, |
| "learning_rate": 1.8404907975460125e-07, |
| "loss": 1.9595, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.003686635944700461, |
| "grad_norm": 1.9972498416900635, |
| "learning_rate": 3.680981595092025e-07, |
| "loss": 1.9238, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.005529953917050691, |
| "grad_norm": 1.9820266962051392, |
| "learning_rate": 5.521472392638038e-07, |
| "loss": 1.9764, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.007373271889400922, |
| "grad_norm": 1.9296741485595703, |
| "learning_rate": 7.36196319018405e-07, |
| "loss": 1.9161, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.009216589861751152, |
| "grad_norm": 2.019129514694214, |
| "learning_rate": 9.202453987730061e-07, |
| "loss": 1.9629, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.011059907834101382, |
| "grad_norm": 1.9039007425308228, |
| "learning_rate": 1.1042944785276075e-06, |
| "loss": 1.9158, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.012903225806451613, |
| "grad_norm": 1.9537602663040161, |
| "learning_rate": 1.2883435582822088e-06, |
| "loss": 1.9572, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.014746543778801843, |
| "grad_norm": 1.9474399089813232, |
| "learning_rate": 1.47239263803681e-06, |
| "loss": 1.9453, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.016589861751152075, |
| "grad_norm": 1.8985520601272583, |
| "learning_rate": 1.656441717791411e-06, |
| "loss": 1.9377, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.018433179723502304, |
| "grad_norm": 1.9258599281311035, |
| "learning_rate": 1.8404907975460122e-06, |
| "loss": 1.9312, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.020276497695852536, |
| "grad_norm": 1.8695780038833618, |
| "learning_rate": 2.0245398773006137e-06, |
| "loss": 1.9273, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.022119815668202765, |
| "grad_norm": 1.8287005424499512, |
| "learning_rate": 2.208588957055215e-06, |
| "loss": 1.8951, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.023963133640552997, |
| "grad_norm": 1.7955673933029175, |
| "learning_rate": 2.392638036809816e-06, |
| "loss": 1.9054, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.025806451612903226, |
| "grad_norm": 1.8065942525863647, |
| "learning_rate": 2.5766871165644175e-06, |
| "loss": 1.9343, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.027649769585253458, |
| "grad_norm": 1.7342883348464966, |
| "learning_rate": 2.7607361963190186e-06, |
| "loss": 1.8964, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.029493087557603687, |
| "grad_norm": 1.7930785417556763, |
| "learning_rate": 2.94478527607362e-06, |
| "loss": 1.9385, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03133640552995392, |
| "grad_norm": 1.6384600400924683, |
| "learning_rate": 3.1288343558282206e-06, |
| "loss": 1.9145, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03317972350230415, |
| "grad_norm": 1.5618699789047241, |
| "learning_rate": 3.312883435582822e-06, |
| "loss": 1.9392, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.035023041474654376, |
| "grad_norm": 1.5488923788070679, |
| "learning_rate": 3.496932515337423e-06, |
| "loss": 1.8919, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.03686635944700461, |
| "grad_norm": 1.3122743368148804, |
| "learning_rate": 3.6809815950920245e-06, |
| "loss": 1.8735, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03870967741935484, |
| "grad_norm": 1.2238839864730835, |
| "learning_rate": 3.865030674846626e-06, |
| "loss": 1.9463, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04055299539170507, |
| "grad_norm": 1.0287063121795654, |
| "learning_rate": 4.049079754601227e-06, |
| "loss": 1.8726, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0423963133640553, |
| "grad_norm": 0.8679629564285278, |
| "learning_rate": 4.233128834355828e-06, |
| "loss": 1.8418, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.04423963133640553, |
| "grad_norm": 0.7079375386238098, |
| "learning_rate": 4.41717791411043e-06, |
| "loss": 1.8251, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04608294930875576, |
| "grad_norm": 0.9220630526542664, |
| "learning_rate": 4.601226993865031e-06, |
| "loss": 1.7802, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.047926267281105994, |
| "grad_norm": 0.8715929985046387, |
| "learning_rate": 4.785276073619632e-06, |
| "loss": 1.8777, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.04976958525345622, |
| "grad_norm": 0.9550811052322388, |
| "learning_rate": 4.969325153374233e-06, |
| "loss": 1.8371, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05161290322580645, |
| "grad_norm": 0.90284264087677, |
| "learning_rate": 5.153374233128835e-06, |
| "loss": 1.9045, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.053456221198156684, |
| "grad_norm": 0.8552197217941284, |
| "learning_rate": 5.337423312883436e-06, |
| "loss": 1.8053, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.055299539170506916, |
| "grad_norm": 0.8264591693878174, |
| "learning_rate": 5.521472392638037e-06, |
| "loss": 1.8451, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05714285714285714, |
| "grad_norm": 0.8775638341903687, |
| "learning_rate": 5.7055214723926385e-06, |
| "loss": 1.8552, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.05898617511520737, |
| "grad_norm": 0.807953417301178, |
| "learning_rate": 5.88957055214724e-06, |
| "loss": 1.8164, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.060829493087557605, |
| "grad_norm": 0.7050386071205139, |
| "learning_rate": 6.0736196319018406e-06, |
| "loss": 1.7819, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06267281105990784, |
| "grad_norm": 0.6614200472831726, |
| "learning_rate": 6.257668711656441e-06, |
| "loss": 1.8147, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.06451612903225806, |
| "grad_norm": 0.5420933365821838, |
| "learning_rate": 6.4417177914110434e-06, |
| "loss": 1.7534, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0663594470046083, |
| "grad_norm": 0.42041122913360596, |
| "learning_rate": 6.625766871165644e-06, |
| "loss": 1.7973, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.06820276497695853, |
| "grad_norm": 0.3737146854400635, |
| "learning_rate": 6.8098159509202454e-06, |
| "loss": 1.8, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.07004608294930875, |
| "grad_norm": 0.37116268277168274, |
| "learning_rate": 6.993865030674846e-06, |
| "loss": 1.713, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07188940092165899, |
| "grad_norm": 0.37946197390556335, |
| "learning_rate": 7.177914110429448e-06, |
| "loss": 1.7656, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.07373271889400922, |
| "grad_norm": 0.39086607098579407, |
| "learning_rate": 7.361963190184049e-06, |
| "loss": 1.7899, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07557603686635944, |
| "grad_norm": 0.4194660186767578, |
| "learning_rate": 7.54601226993865e-06, |
| "loss": 1.7247, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.07741935483870968, |
| "grad_norm": 0.41651099920272827, |
| "learning_rate": 7.730061349693252e-06, |
| "loss": 1.7437, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0792626728110599, |
| "grad_norm": 0.4337350130081177, |
| "learning_rate": 7.914110429447852e-06, |
| "loss": 1.7758, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.08110599078341015, |
| "grad_norm": 0.39606988430023193, |
| "learning_rate": 8.098159509202455e-06, |
| "loss": 1.7264, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.08294930875576037, |
| "grad_norm": 0.35638228058815, |
| "learning_rate": 8.282208588957055e-06, |
| "loss": 1.6925, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0847926267281106, |
| "grad_norm": 0.3481025695800781, |
| "learning_rate": 8.466257668711656e-06, |
| "loss": 1.7767, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.08663594470046083, |
| "grad_norm": 0.3551636040210724, |
| "learning_rate": 8.650306748466258e-06, |
| "loss": 1.8833, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.08847926267281106, |
| "grad_norm": 0.3395654261112213, |
| "learning_rate": 8.83435582822086e-06, |
| "loss": 1.7341, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.09032258064516129, |
| "grad_norm": 0.3334786891937256, |
| "learning_rate": 9.01840490797546e-06, |
| "loss": 1.7423, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.09216589861751152, |
| "grad_norm": 0.3836233615875244, |
| "learning_rate": 9.202453987730062e-06, |
| "loss": 1.7198, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09400921658986175, |
| "grad_norm": 0.3345714807510376, |
| "learning_rate": 9.386503067484664e-06, |
| "loss": 1.7294, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.09585253456221199, |
| "grad_norm": 0.3579282760620117, |
| "learning_rate": 9.570552147239264e-06, |
| "loss": 1.7277, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.09769585253456221, |
| "grad_norm": 0.3569463789463043, |
| "learning_rate": 9.754601226993865e-06, |
| "loss": 1.7979, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.09953917050691244, |
| "grad_norm": 0.3132180869579315, |
| "learning_rate": 9.938650306748466e-06, |
| "loss": 1.7392, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.10138248847926268, |
| "grad_norm": 0.3075284957885742, |
| "learning_rate": 1.0122699386503068e-05, |
| "loss": 1.6929, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1032258064516129, |
| "grad_norm": 0.3144418001174927, |
| "learning_rate": 1.030674846625767e-05, |
| "loss": 1.7455, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.10506912442396313, |
| "grad_norm": 0.30215781927108765, |
| "learning_rate": 1.0490797546012269e-05, |
| "loss": 1.7015, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.10691244239631337, |
| "grad_norm": 0.3008810579776764, |
| "learning_rate": 1.0674846625766871e-05, |
| "loss": 1.702, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.10875576036866359, |
| "grad_norm": 0.2983757555484772, |
| "learning_rate": 1.0858895705521474e-05, |
| "loss": 1.7593, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.11059907834101383, |
| "grad_norm": 0.3113389015197754, |
| "learning_rate": 1.1042944785276074e-05, |
| "loss": 1.7446, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11244239631336406, |
| "grad_norm": 0.2977043390274048, |
| "learning_rate": 1.1226993865030675e-05, |
| "loss": 1.7666, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.11428571428571428, |
| "grad_norm": 0.3056432902812958, |
| "learning_rate": 1.1411042944785277e-05, |
| "loss": 1.7226, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.11612903225806452, |
| "grad_norm": 0.2962339520454407, |
| "learning_rate": 1.1595092024539878e-05, |
| "loss": 1.7503, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.11797235023041475, |
| "grad_norm": 0.30185747146606445, |
| "learning_rate": 1.177914110429448e-05, |
| "loss": 1.7478, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.11981566820276497, |
| "grad_norm": 0.29040098190307617, |
| "learning_rate": 1.1963190184049079e-05, |
| "loss": 1.779, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.12165898617511521, |
| "grad_norm": 0.2708752155303955, |
| "learning_rate": 1.2147239263803681e-05, |
| "loss": 1.6816, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.12350230414746544, |
| "grad_norm": 0.2978411018848419, |
| "learning_rate": 1.2331288343558283e-05, |
| "loss": 1.7439, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.12534562211981568, |
| "grad_norm": 0.28008463978767395, |
| "learning_rate": 1.2515337423312882e-05, |
| "loss": 1.6918, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1271889400921659, |
| "grad_norm": 0.29284995794296265, |
| "learning_rate": 1.2699386503067485e-05, |
| "loss": 1.7043, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.12903225806451613, |
| "grad_norm": 0.3163938522338867, |
| "learning_rate": 1.2883435582822087e-05, |
| "loss": 1.7501, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.13087557603686636, |
| "grad_norm": 0.28190693259239197, |
| "learning_rate": 1.3067484662576687e-05, |
| "loss": 1.7396, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1327188940092166, |
| "grad_norm": 0.27740147709846497, |
| "learning_rate": 1.3251533742331288e-05, |
| "loss": 1.7276, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.13456221198156681, |
| "grad_norm": 0.29363468289375305, |
| "learning_rate": 1.343558282208589e-05, |
| "loss": 1.7103, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.13640552995391705, |
| "grad_norm": 0.28017935156822205, |
| "learning_rate": 1.3619631901840491e-05, |
| "loss": 1.7418, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.1382488479262673, |
| "grad_norm": 0.2688934803009033, |
| "learning_rate": 1.3803680981595093e-05, |
| "loss": 1.7143, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1400921658986175, |
| "grad_norm": 0.28602007031440735, |
| "learning_rate": 1.3987730061349692e-05, |
| "loss": 1.7034, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.14193548387096774, |
| "grad_norm": 0.2809165120124817, |
| "learning_rate": 1.4171779141104294e-05, |
| "loss": 1.726, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.14377880184331798, |
| "grad_norm": 0.2808230519294739, |
| "learning_rate": 1.4355828220858897e-05, |
| "loss": 1.7184, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1456221198156682, |
| "grad_norm": 0.2852628827095032, |
| "learning_rate": 1.4539877300613497e-05, |
| "loss": 1.7506, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.14746543778801843, |
| "grad_norm": 0.28406837582588196, |
| "learning_rate": 1.4723926380368098e-05, |
| "loss": 1.667, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.14930875576036867, |
| "grad_norm": 0.2859112322330475, |
| "learning_rate": 1.49079754601227e-05, |
| "loss": 1.7629, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.15115207373271888, |
| "grad_norm": 0.2811156213283539, |
| "learning_rate": 1.50920245398773e-05, |
| "loss": 1.7055, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.15299539170506912, |
| "grad_norm": 0.27561160922050476, |
| "learning_rate": 1.52760736196319e-05, |
| "loss": 1.7149, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.15483870967741936, |
| "grad_norm": 0.28458908200263977, |
| "learning_rate": 1.5460122699386504e-05, |
| "loss": 1.702, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.15668202764976957, |
| "grad_norm": 0.2881270945072174, |
| "learning_rate": 1.5644171779141104e-05, |
| "loss": 1.7771, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.1585253456221198, |
| "grad_norm": 0.2840360701084137, |
| "learning_rate": 1.5828220858895705e-05, |
| "loss": 1.704, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.16036866359447005, |
| "grad_norm": 0.28690823912620544, |
| "learning_rate": 1.601226993865031e-05, |
| "loss": 1.7032, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.1622119815668203, |
| "grad_norm": 0.28307586908340454, |
| "learning_rate": 1.619631901840491e-05, |
| "loss": 1.7504, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.1640552995391705, |
| "grad_norm": 0.2825368046760559, |
| "learning_rate": 1.638036809815951e-05, |
| "loss": 1.6755, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.16589861751152074, |
| "grad_norm": 0.2756441533565521, |
| "learning_rate": 1.656441717791411e-05, |
| "loss": 1.6542, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.16774193548387098, |
| "grad_norm": 0.2785171866416931, |
| "learning_rate": 1.674846625766871e-05, |
| "loss": 1.7041, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1695852534562212, |
| "grad_norm": 0.2845054864883423, |
| "learning_rate": 1.693251533742331e-05, |
| "loss": 1.664, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.17142857142857143, |
| "grad_norm": 0.2794802486896515, |
| "learning_rate": 1.7116564417177916e-05, |
| "loss": 1.674, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.17327188940092167, |
| "grad_norm": 0.30158528685569763, |
| "learning_rate": 1.7300613496932516e-05, |
| "loss": 1.6702, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.17511520737327188, |
| "grad_norm": 0.3102332353591919, |
| "learning_rate": 1.7484662576687117e-05, |
| "loss": 1.6675, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.17695852534562212, |
| "grad_norm": 0.30239519476890564, |
| "learning_rate": 1.766871165644172e-05, |
| "loss": 1.735, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.17880184331797236, |
| "grad_norm": 0.3213733732700348, |
| "learning_rate": 1.785276073619632e-05, |
| "loss": 1.7476, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.18064516129032257, |
| "grad_norm": 0.30181968212127686, |
| "learning_rate": 1.803680981595092e-05, |
| "loss": 1.7693, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1824884792626728, |
| "grad_norm": 0.29706043004989624, |
| "learning_rate": 1.8220858895705523e-05, |
| "loss": 1.6862, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.18433179723502305, |
| "grad_norm": 0.31851696968078613, |
| "learning_rate": 1.8404907975460123e-05, |
| "loss": 1.713, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.18617511520737326, |
| "grad_norm": 0.2876022756099701, |
| "learning_rate": 1.8588957055214724e-05, |
| "loss": 1.6564, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.1880184331797235, |
| "grad_norm": 0.30441394448280334, |
| "learning_rate": 1.8773006134969328e-05, |
| "loss": 1.7286, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.18986175115207374, |
| "grad_norm": 0.2926851809024811, |
| "learning_rate": 1.8957055214723928e-05, |
| "loss": 1.6485, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.19170506912442398, |
| "grad_norm": 0.2925703525543213, |
| "learning_rate": 1.914110429447853e-05, |
| "loss": 1.7372, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.1935483870967742, |
| "grad_norm": 0.29036155343055725, |
| "learning_rate": 1.9325153374233126e-05, |
| "loss": 1.6868, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.19539170506912443, |
| "grad_norm": 0.3051284849643707, |
| "learning_rate": 1.950920245398773e-05, |
| "loss": 1.7132, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.19723502304147467, |
| "grad_norm": 0.295520156621933, |
| "learning_rate": 1.969325153374233e-05, |
| "loss": 1.6949, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.19907834101382488, |
| "grad_norm": 0.3010528087615967, |
| "learning_rate": 1.987730061349693e-05, |
| "loss": 1.6948, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.20092165898617512, |
| "grad_norm": 0.3026741147041321, |
| "learning_rate": 2.0061349693251535e-05, |
| "loss": 1.7014, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.20276497695852536, |
| "grad_norm": 0.32000070810317993, |
| "learning_rate": 2.0245398773006136e-05, |
| "loss": 1.709, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.20460829493087557, |
| "grad_norm": 0.29906362295150757, |
| "learning_rate": 2.0429447852760736e-05, |
| "loss": 1.6834, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2064516129032258, |
| "grad_norm": 0.31326961517333984, |
| "learning_rate": 2.061349693251534e-05, |
| "loss": 1.5777, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.20829493087557605, |
| "grad_norm": 0.3156152069568634, |
| "learning_rate": 2.0797546012269938e-05, |
| "loss": 1.6934, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.21013824884792626, |
| "grad_norm": 0.31258904933929443, |
| "learning_rate": 2.0981595092024538e-05, |
| "loss": 1.6717, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.2119815668202765, |
| "grad_norm": 0.30550417304039, |
| "learning_rate": 2.1165644171779142e-05, |
| "loss": 1.6857, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.21382488479262673, |
| "grad_norm": 0.3166547417640686, |
| "learning_rate": 2.1349693251533743e-05, |
| "loss": 1.6959, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.21566820276497695, |
| "grad_norm": 0.30488964915275574, |
| "learning_rate": 2.1533742331288343e-05, |
| "loss": 1.6764, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.21751152073732719, |
| "grad_norm": 0.30913662910461426, |
| "learning_rate": 2.1717791411042947e-05, |
| "loss": 1.6252, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.21935483870967742, |
| "grad_norm": 0.33525151014328003, |
| "learning_rate": 2.1901840490797548e-05, |
| "loss": 1.6692, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.22119815668202766, |
| "grad_norm": 0.329086035490036, |
| "learning_rate": 2.208588957055215e-05, |
| "loss": 1.6462, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.22304147465437787, |
| "grad_norm": 0.3236452341079712, |
| "learning_rate": 2.226993865030675e-05, |
| "loss": 1.7658, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.2248847926267281, |
| "grad_norm": 0.3473586142063141, |
| "learning_rate": 2.245398773006135e-05, |
| "loss": 1.6979, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.22672811059907835, |
| "grad_norm": 0.2963578402996063, |
| "learning_rate": 2.263803680981595e-05, |
| "loss": 1.692, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.22857142857142856, |
| "grad_norm": 0.3080257475376129, |
| "learning_rate": 2.2822085889570554e-05, |
| "loss": 1.6947, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2304147465437788, |
| "grad_norm": 0.38422197103500366, |
| "learning_rate": 2.3006134969325155e-05, |
| "loss": 1.7041, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.23225806451612904, |
| "grad_norm": 0.30777859687805176, |
| "learning_rate": 2.3190184049079755e-05, |
| "loss": 1.7073, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.23410138248847925, |
| "grad_norm": 0.43660104274749756, |
| "learning_rate": 2.337423312883436e-05, |
| "loss": 1.6824, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.2359447004608295, |
| "grad_norm": 0.30751895904541016, |
| "learning_rate": 2.355828220858896e-05, |
| "loss": 1.7052, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.23778801843317973, |
| "grad_norm": 0.3926418125629425, |
| "learning_rate": 2.3742331288343557e-05, |
| "loss": 1.6765, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.23963133640552994, |
| "grad_norm": 0.32357603311538696, |
| "learning_rate": 2.3926380368098158e-05, |
| "loss": 1.6804, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.24147465437788018, |
| "grad_norm": 0.3306221663951874, |
| "learning_rate": 2.411042944785276e-05, |
| "loss": 1.709, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.24331797235023042, |
| "grad_norm": 0.38641783595085144, |
| "learning_rate": 2.4294478527607362e-05, |
| "loss": 1.6391, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.24516129032258063, |
| "grad_norm": 0.3362821638584137, |
| "learning_rate": 2.4478527607361963e-05, |
| "loss": 1.6555, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.24700460829493087, |
| "grad_norm": 0.37096643447875977, |
| "learning_rate": 2.4662576687116567e-05, |
| "loss": 1.7266, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.2488479262672811, |
| "grad_norm": 0.3151519000530243, |
| "learning_rate": 2.4846625766871167e-05, |
| "loss": 1.7161, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.25069124423963135, |
| "grad_norm": 0.32760757207870483, |
| "learning_rate": 2.5030674846625765e-05, |
| "loss": 1.641, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.25253456221198156, |
| "grad_norm": 0.37584084272384644, |
| "learning_rate": 2.521472392638037e-05, |
| "loss": 1.7422, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2543778801843318, |
| "grad_norm": 0.31268641352653503, |
| "learning_rate": 2.539877300613497e-05, |
| "loss": 1.7261, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.25622119815668204, |
| "grad_norm": 0.36679184436798096, |
| "learning_rate": 2.558282208588957e-05, |
| "loss": 1.7018, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.25806451612903225, |
| "grad_norm": 0.31372857093811035, |
| "learning_rate": 2.5766871165644174e-05, |
| "loss": 1.6838, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.25990783410138246, |
| "grad_norm": 0.35413920879364014, |
| "learning_rate": 2.5950920245398774e-05, |
| "loss": 1.7003, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.26175115207373273, |
| "grad_norm": 0.3250369727611542, |
| "learning_rate": 2.6134969325153375e-05, |
| "loss": 1.6799, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.26359447004608294, |
| "grad_norm": 0.33544111251831055, |
| "learning_rate": 2.631901840490798e-05, |
| "loss": 1.6357, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2654377880184332, |
| "grad_norm": 0.31145980954170227, |
| "learning_rate": 2.6503067484662576e-05, |
| "loss": 1.6062, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.2672811059907834, |
| "grad_norm": 0.31925103068351746, |
| "learning_rate": 2.6687116564417177e-05, |
| "loss": 1.7424, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.26912442396313363, |
| "grad_norm": 0.3047640919685364, |
| "learning_rate": 2.687116564417178e-05, |
| "loss": 1.7195, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2709677419354839, |
| "grad_norm": 0.29877611994743347, |
| "learning_rate": 2.705521472392638e-05, |
| "loss": 1.6467, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2728110599078341, |
| "grad_norm": 0.31033533811569214, |
| "learning_rate": 2.7239263803680982e-05, |
| "loss": 1.6234, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2746543778801843, |
| "grad_norm": 0.3081960380077362, |
| "learning_rate": 2.7423312883435586e-05, |
| "loss": 1.6945, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.2764976958525346, |
| "grad_norm": 0.30935296416282654, |
| "learning_rate": 2.7607361963190186e-05, |
| "loss": 1.6172, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2783410138248848, |
| "grad_norm": 0.3287109136581421, |
| "learning_rate": 2.7791411042944787e-05, |
| "loss": 1.6771, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.280184331797235, |
| "grad_norm": 0.3306177258491516, |
| "learning_rate": 2.7975460122699384e-05, |
| "loss": 1.6522, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.2820276497695853, |
| "grad_norm": 0.30568936467170715, |
| "learning_rate": 2.8159509202453988e-05, |
| "loss": 1.6456, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.2838709677419355, |
| "grad_norm": 0.31628063321113586, |
| "learning_rate": 2.834355828220859e-05, |
| "loss": 1.6521, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.3133561909198761, |
| "learning_rate": 2.852760736196319e-05, |
| "loss": 1.6517, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.28755760368663597, |
| "grad_norm": 0.327334463596344, |
| "learning_rate": 2.8711656441717793e-05, |
| "loss": 1.6422, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.2894009216589862, |
| "grad_norm": 0.3071538209915161, |
| "learning_rate": 2.8895705521472394e-05, |
| "loss": 1.6693, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.2912442396313364, |
| "grad_norm": 0.3373123109340668, |
| "learning_rate": 2.9079754601226994e-05, |
| "loss": 1.7079, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.29308755760368665, |
| "grad_norm": 0.32550349831581116, |
| "learning_rate": 2.92638036809816e-05, |
| "loss": 1.7135, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.29493087557603687, |
| "grad_norm": 0.2980889678001404, |
| "learning_rate": 2.9447852760736196e-05, |
| "loss": 1.6375, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2967741935483871, |
| "grad_norm": 0.3041258156299591, |
| "learning_rate": 2.9631901840490796e-05, |
| "loss": 1.6542, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.29861751152073734, |
| "grad_norm": 0.32162049412727356, |
| "learning_rate": 2.98159509202454e-05, |
| "loss": 1.6308, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.30046082949308756, |
| "grad_norm": 0.31242045760154724, |
| "learning_rate": 3e-05, |
| "loss": 1.7211, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.30230414746543777, |
| "grad_norm": 0.32431089878082275, |
| "learning_rate": 2.9999965416241516e-05, |
| "loss": 1.6273, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.30414746543778803, |
| "grad_norm": 0.34065431356430054, |
| "learning_rate": 2.999986166512553e-05, |
| "loss": 1.7136, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.30599078341013825, |
| "grad_norm": 0.3061734437942505, |
| "learning_rate": 2.9999688747130467e-05, |
| "loss": 1.6912, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.30783410138248846, |
| "grad_norm": 0.31865042448043823, |
| "learning_rate": 2.999944666305367e-05, |
| "loss": 1.6703, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3096774193548387, |
| "grad_norm": 0.32718777656555176, |
| "learning_rate": 2.999913541401143e-05, |
| "loss": 1.5595, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.31152073732718893, |
| "grad_norm": 0.31636691093444824, |
| "learning_rate": 2.9998755001438975e-05, |
| "loss": 1.6433, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.31336405529953915, |
| "grad_norm": 0.35521432757377625, |
| "learning_rate": 2.999830542709045e-05, |
| "loss": 1.6257, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3152073732718894, |
| "grad_norm": 0.34638574719429016, |
| "learning_rate": 2.9997786693038913e-05, |
| "loss": 1.6341, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.3170506912442396, |
| "grad_norm": 0.3070574700832367, |
| "learning_rate": 2.9997198801676335e-05, |
| "loss": 1.646, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.31889400921658984, |
| "grad_norm": 0.3159651458263397, |
| "learning_rate": 2.9996541755713585e-05, |
| "loss": 1.6753, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3207373271889401, |
| "grad_norm": 0.32200679183006287, |
| "learning_rate": 2.999581555818041e-05, |
| "loss": 1.6883, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3225806451612903, |
| "grad_norm": 0.30645352602005005, |
| "learning_rate": 2.9995020212425432e-05, |
| "loss": 1.656, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.3244239631336406, |
| "grad_norm": 0.34474891424179077, |
| "learning_rate": 2.9994155722116118e-05, |
| "loss": 1.725, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.3262672811059908, |
| "grad_norm": 0.37686192989349365, |
| "learning_rate": 2.999322209123878e-05, |
| "loss": 1.7542, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.328110599078341, |
| "grad_norm": 0.3260898292064667, |
| "learning_rate": 2.9992219324098545e-05, |
| "loss": 1.6049, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.32995391705069127, |
| "grad_norm": 0.37945932149887085, |
| "learning_rate": 2.9991147425319346e-05, |
| "loss": 1.637, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.3317972350230415, |
| "grad_norm": 0.36349308490753174, |
| "learning_rate": 2.9990006399843884e-05, |
| "loss": 1.7051, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3336405529953917, |
| "grad_norm": 0.35796141624450684, |
| "learning_rate": 2.998879625293362e-05, |
| "loss": 1.7094, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.33548387096774196, |
| "grad_norm": 0.35643306374549866, |
| "learning_rate": 2.9987516990168743e-05, |
| "loss": 1.6021, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.33732718894009217, |
| "grad_norm": 0.3745727837085724, |
| "learning_rate": 2.9986168617448153e-05, |
| "loss": 1.6267, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3391705069124424, |
| "grad_norm": 0.32228630781173706, |
| "learning_rate": 2.9984751140989417e-05, |
| "loss": 1.662, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.34101382488479265, |
| "grad_norm": 0.3829532563686371, |
| "learning_rate": 2.9983264567328756e-05, |
| "loss": 1.6909, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.34285714285714286, |
| "grad_norm": 0.3452930152416229, |
| "learning_rate": 2.9981708903321017e-05, |
| "loss": 1.6502, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.34470046082949307, |
| "grad_norm": 0.3659391701221466, |
| "learning_rate": 2.9980084156139625e-05, |
| "loss": 1.6409, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.34654377880184334, |
| "grad_norm": 0.38383013010025024, |
| "learning_rate": 2.9978390333276565e-05, |
| "loss": 1.6765, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.34838709677419355, |
| "grad_norm": 0.34483417868614197, |
| "learning_rate": 2.9976627442542325e-05, |
| "loss": 1.7486, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.35023041474654376, |
| "grad_norm": 0.3946613669395447, |
| "learning_rate": 2.997479549206591e-05, |
| "loss": 1.6863, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.35207373271889403, |
| "grad_norm": 0.3084375262260437, |
| "learning_rate": 2.9972894490294738e-05, |
| "loss": 1.6223, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.35391705069124424, |
| "grad_norm": 0.42278456687927246, |
| "learning_rate": 2.9970924445994645e-05, |
| "loss": 1.7044, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.35576036866359445, |
| "grad_norm": 0.3211970925331116, |
| "learning_rate": 2.9968885368249847e-05, |
| "loss": 1.6907, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3576036866359447, |
| "grad_norm": 0.3954881727695465, |
| "learning_rate": 2.9966777266462863e-05, |
| "loss": 1.7002, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.35944700460829493, |
| "grad_norm": 0.3460248112678528, |
| "learning_rate": 2.9964600150354512e-05, |
| "loss": 1.639, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.36129032258064514, |
| "grad_norm": 0.3247598707675934, |
| "learning_rate": 2.9962354029963835e-05, |
| "loss": 1.679, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3631336405529954, |
| "grad_norm": 0.4037436544895172, |
| "learning_rate": 2.9960038915648076e-05, |
| "loss": 1.7343, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.3649769585253456, |
| "grad_norm": 0.3309732973575592, |
| "learning_rate": 2.9957654818082615e-05, |
| "loss": 1.6759, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.36682027649769583, |
| "grad_norm": 0.38870948553085327, |
| "learning_rate": 2.9955201748260923e-05, |
| "loss": 1.7189, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.3686635944700461, |
| "grad_norm": 0.2937754988670349, |
| "learning_rate": 2.9952679717494516e-05, |
| "loss": 1.6882, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3705069124423963, |
| "grad_norm": 0.3606261909008026, |
| "learning_rate": 2.9950088737412898e-05, |
| "loss": 1.6536, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.3723502304147465, |
| "grad_norm": 0.29472067952156067, |
| "learning_rate": 2.9947428819963526e-05, |
| "loss": 1.6957, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3741935483870968, |
| "grad_norm": 0.2893930673599243, |
| "learning_rate": 2.994469997741171e-05, |
| "loss": 1.6434, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.376036866359447, |
| "grad_norm": 0.36216485500335693, |
| "learning_rate": 2.994190222234061e-05, |
| "loss": 1.6897, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3778801843317972, |
| "grad_norm": 0.2846887409687042, |
| "learning_rate": 2.9939035567651146e-05, |
| "loss": 1.6727, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.3797235023041475, |
| "grad_norm": 0.34425318241119385, |
| "learning_rate": 2.9936100026561933e-05, |
| "loss": 1.6824, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.3815668202764977, |
| "grad_norm": 0.31369149684906006, |
| "learning_rate": 2.9933095612609253e-05, |
| "loss": 1.6703, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.38341013824884795, |
| "grad_norm": 0.31020811200141907, |
| "learning_rate": 2.993002233964696e-05, |
| "loss": 1.7284, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.38525345622119817, |
| "grad_norm": 0.3632746934890747, |
| "learning_rate": 2.9926880221846435e-05, |
| "loss": 1.6617, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.3870967741935484, |
| "grad_norm": 0.2814621329307556, |
| "learning_rate": 2.9923669273696506e-05, |
| "loss": 1.5947, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.38894009216589864, |
| "grad_norm": 0.32484638690948486, |
| "learning_rate": 2.9920389510003395e-05, |
| "loss": 1.6403, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.39078341013824885, |
| "grad_norm": 0.31668680906295776, |
| "learning_rate": 2.9917040945890638e-05, |
| "loss": 1.7241, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.39262672811059907, |
| "grad_norm": 0.29519209265708923, |
| "learning_rate": 2.9913623596799032e-05, |
| "loss": 1.6997, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.39447004608294933, |
| "grad_norm": 0.3414634168148041, |
| "learning_rate": 2.9910137478486545e-05, |
| "loss": 1.6451, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.39631336405529954, |
| "grad_norm": 0.2943251132965088, |
| "learning_rate": 2.990658260702826e-05, |
| "loss": 1.6784, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.39815668202764976, |
| "grad_norm": 0.3138771653175354, |
| "learning_rate": 2.9902958998816274e-05, |
| "loss": 1.7088, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.296895831823349, |
| "learning_rate": 2.989926667055966e-05, |
| "loss": 1.6855, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.40184331797235023, |
| "grad_norm": 0.29469338059425354, |
| "learning_rate": 2.989550563928436e-05, |
| "loss": 1.6437, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.40368663594470044, |
| "grad_norm": 0.3117813169956207, |
| "learning_rate": 2.9891675922333125e-05, |
| "loss": 1.6708, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.4055299539170507, |
| "grad_norm": 0.2911444902420044, |
| "learning_rate": 2.9887777537365416e-05, |
| "loss": 1.6655, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4073732718894009, |
| "grad_norm": 0.29274192452430725, |
| "learning_rate": 2.9883810502357346e-05, |
| "loss": 1.6737, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.40921658986175113, |
| "grad_norm": 0.325166255235672, |
| "learning_rate": 2.9879774835601574e-05, |
| "loss": 1.6562, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4110599078341014, |
| "grad_norm": 0.3723132312297821, |
| "learning_rate": 2.987567055570724e-05, |
| "loss": 1.696, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.4129032258064516, |
| "grad_norm": 0.2948567867279053, |
| "learning_rate": 2.987149768159987e-05, |
| "loss": 1.5771, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.4147465437788018, |
| "grad_norm": 0.3130621314048767, |
| "learning_rate": 2.986725623252128e-05, |
| "loss": 1.7274, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4165898617511521, |
| "grad_norm": 0.283635675907135, |
| "learning_rate": 2.9862946228029507e-05, |
| "loss": 1.6277, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.4184331797235023, |
| "grad_norm": 0.2891738712787628, |
| "learning_rate": 2.9858567687998702e-05, |
| "loss": 1.6161, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.4202764976958525, |
| "grad_norm": 0.3050073981285095, |
| "learning_rate": 2.9854120632619053e-05, |
| "loss": 1.6358, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4221198156682028, |
| "grad_norm": 0.29393428564071655, |
| "learning_rate": 2.9849605082396678e-05, |
| "loss": 1.7176, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.423963133640553, |
| "grad_norm": 0.29933053255081177, |
| "learning_rate": 2.9845021058153532e-05, |
| "loss": 1.6292, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4258064516129032, |
| "grad_norm": 0.2925868034362793, |
| "learning_rate": 2.984036858102732e-05, |
| "loss": 1.6453, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.42764976958525347, |
| "grad_norm": 0.30412405729293823, |
| "learning_rate": 2.98356476724714e-05, |
| "loss": 1.7311, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4294930875576037, |
| "grad_norm": 0.29768475890159607, |
| "learning_rate": 2.9830858354254672e-05, |
| "loss": 1.632, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.4313364055299539, |
| "grad_norm": 0.3007776141166687, |
| "learning_rate": 2.9826000648461484e-05, |
| "loss": 1.6307, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.43317972350230416, |
| "grad_norm": 0.32172518968582153, |
| "learning_rate": 2.982107457749153e-05, |
| "loss": 1.6314, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.43502304147465437, |
| "grad_norm": 0.28600960969924927, |
| "learning_rate": 2.9816080164059758e-05, |
| "loss": 1.6417, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.4368663594470046, |
| "grad_norm": 0.2792605757713318, |
| "learning_rate": 2.981101743119624e-05, |
| "loss": 1.5736, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.43870967741935485, |
| "grad_norm": 0.3138410747051239, |
| "learning_rate": 2.9805886402246084e-05, |
| "loss": 1.6921, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.44055299539170506, |
| "grad_norm": 0.2832198739051819, |
| "learning_rate": 2.9800687100869334e-05, |
| "loss": 1.642, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.4423963133640553, |
| "grad_norm": 0.29424023628234863, |
| "learning_rate": 2.9795419551040836e-05, |
| "loss": 1.6786, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.44423963133640554, |
| "grad_norm": 0.30614927411079407, |
| "learning_rate": 2.9790083777050148e-05, |
| "loss": 1.6565, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.44608294930875575, |
| "grad_norm": 0.29164189100265503, |
| "learning_rate": 2.9784679803501416e-05, |
| "loss": 1.7311, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.447926267281106, |
| "grad_norm": 0.29889318346977234, |
| "learning_rate": 2.977920765531327e-05, |
| "loss": 1.6551, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.4497695852534562, |
| "grad_norm": 0.2796136438846588, |
| "learning_rate": 2.9773667357718706e-05, |
| "loss": 1.6495, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.45161290322580644, |
| "grad_norm": 0.3036425709724426, |
| "learning_rate": 2.9768058936264967e-05, |
| "loss": 1.6847, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.4534562211981567, |
| "grad_norm": 0.2765255868434906, |
| "learning_rate": 2.976238241681342e-05, |
| "loss": 1.642, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.4552995391705069, |
| "grad_norm": 0.28592586517333984, |
| "learning_rate": 2.9756637825539453e-05, |
| "loss": 1.5912, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.45714285714285713, |
| "grad_norm": 0.29503703117370605, |
| "learning_rate": 2.9750825188932334e-05, |
| "loss": 1.6017, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.4589861751152074, |
| "grad_norm": 0.2970612645149231, |
| "learning_rate": 2.9744944533795112e-05, |
| "loss": 1.6603, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.4608294930875576, |
| "grad_norm": 0.28196001052856445, |
| "learning_rate": 2.973899588724448e-05, |
| "loss": 1.6234, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4626728110599078, |
| "grad_norm": 0.28284621238708496, |
| "learning_rate": 2.973297927671063e-05, |
| "loss": 1.6411, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.4645161290322581, |
| "grad_norm": 0.28718075156211853, |
| "learning_rate": 2.9726894729937177e-05, |
| "loss": 1.6307, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.4663594470046083, |
| "grad_norm": 0.28701338171958923, |
| "learning_rate": 2.972074227498098e-05, |
| "loss": 1.6697, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.4682027649769585, |
| "grad_norm": 0.28013625741004944, |
| "learning_rate": 2.971452194021204e-05, |
| "loss": 1.6441, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.4700460829493088, |
| "grad_norm": 0.28640949726104736, |
| "learning_rate": 2.9708233754313365e-05, |
| "loss": 1.6774, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.471889400921659, |
| "grad_norm": 0.2968595027923584, |
| "learning_rate": 2.9701877746280843e-05, |
| "loss": 1.693, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4737327188940092, |
| "grad_norm": 0.28468331694602966, |
| "learning_rate": 2.9695453945423087e-05, |
| "loss": 1.6944, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.47557603686635946, |
| "grad_norm": 0.31435340642929077, |
| "learning_rate": 2.9688962381361317e-05, |
| "loss": 1.6628, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.4774193548387097, |
| "grad_norm": 0.2782823443412781, |
| "learning_rate": 2.968240308402923e-05, |
| "loss": 1.6312, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.4792626728110599, |
| "grad_norm": 0.288622111082077, |
| "learning_rate": 2.967577608367285e-05, |
| "loss": 1.6166, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.48110599078341015, |
| "grad_norm": 0.2862098217010498, |
| "learning_rate": 2.9669081410850378e-05, |
| "loss": 1.5918, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.48294930875576036, |
| "grad_norm": 0.2974812686443329, |
| "learning_rate": 2.966231909643208e-05, |
| "loss": 1.6475, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4847926267281106, |
| "grad_norm": 0.31905004382133484, |
| "learning_rate": 2.9655489171600118e-05, |
| "loss": 1.6218, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.48663594470046084, |
| "grad_norm": 0.29999393224716187, |
| "learning_rate": 2.9648591667848428e-05, |
| "loss": 1.7007, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.48847926267281105, |
| "grad_norm": 0.31066837906837463, |
| "learning_rate": 2.9641626616982555e-05, |
| "loss": 1.6758, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.49032258064516127, |
| "grad_norm": 0.30834177136421204, |
| "learning_rate": 2.9634594051119515e-05, |
| "loss": 1.6889, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.49216589861751153, |
| "grad_norm": 0.29685091972351074, |
| "learning_rate": 2.9627494002687653e-05, |
| "loss": 1.7099, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.49400921658986174, |
| "grad_norm": 0.3066437244415283, |
| "learning_rate": 2.9620326504426476e-05, |
| "loss": 1.6494, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.49585253456221196, |
| "grad_norm": 0.28298285603523254, |
| "learning_rate": 2.9613091589386526e-05, |
| "loss": 1.6435, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.4976958525345622, |
| "grad_norm": 0.2950513958930969, |
| "learning_rate": 2.9605789290929214e-05, |
| "loss": 1.6588, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.49953917050691243, |
| "grad_norm": 0.2809874713420868, |
| "learning_rate": 2.9598419642726655e-05, |
| "loss": 1.6463, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.5013824884792627, |
| "grad_norm": 0.29350385069847107, |
| "learning_rate": 2.9590982678761544e-05, |
| "loss": 1.6022, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5032258064516129, |
| "grad_norm": 0.28711917996406555, |
| "learning_rate": 2.958347843332696e-05, |
| "loss": 1.6602, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5050691244239631, |
| "grad_norm": 0.2757432162761688, |
| "learning_rate": 2.957590694102624e-05, |
| "loss": 1.6223, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.5069124423963134, |
| "grad_norm": 0.27851778268814087, |
| "learning_rate": 2.9568268236772816e-05, |
| "loss": 1.6716, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5087557603686635, |
| "grad_norm": 0.28100845217704773, |
| "learning_rate": 2.956056235579002e-05, |
| "loss": 1.6326, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5105990783410138, |
| "grad_norm": 0.2892681360244751, |
| "learning_rate": 2.955278933361097e-05, |
| "loss": 1.6584, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.5124423963133641, |
| "grad_norm": 0.27751055359840393, |
| "learning_rate": 2.9544949206078372e-05, |
| "loss": 1.6457, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5142857142857142, |
| "grad_norm": 0.26928141713142395, |
| "learning_rate": 2.9537042009344376e-05, |
| "loss": 1.6027, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5161290322580645, |
| "grad_norm": 0.26717764139175415, |
| "learning_rate": 2.9529067779870385e-05, |
| "loss": 1.6157, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5179723502304148, |
| "grad_norm": 0.26703840494155884, |
| "learning_rate": 2.952102655442692e-05, |
| "loss": 1.6148, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.5198156682027649, |
| "grad_norm": 0.2838272452354431, |
| "learning_rate": 2.9512918370093407e-05, |
| "loss": 1.6785, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5216589861751152, |
| "grad_norm": 0.281730592250824, |
| "learning_rate": 2.950474326425805e-05, |
| "loss": 1.5828, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.5235023041474655, |
| "grad_norm": 0.359958678483963, |
| "learning_rate": 2.949650127461764e-05, |
| "loss": 1.6606, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.5253456221198156, |
| "grad_norm": 0.276723712682724, |
| "learning_rate": 2.948819243917737e-05, |
| "loss": 1.7019, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5271889400921659, |
| "grad_norm": 0.28240787982940674, |
| "learning_rate": 2.947981679625067e-05, |
| "loss": 1.7214, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.5290322580645161, |
| "grad_norm": 0.278328001499176, |
| "learning_rate": 2.947137438445904e-05, |
| "loss": 1.6599, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.5308755760368664, |
| "grad_norm": 0.2821551561355591, |
| "learning_rate": 2.9462865242731856e-05, |
| "loss": 1.6602, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5327188940092166, |
| "grad_norm": 0.2736065685749054, |
| "learning_rate": 2.9454289410306202e-05, |
| "loss": 1.5909, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.5345622119815668, |
| "grad_norm": 0.32163509726524353, |
| "learning_rate": 2.944564692672667e-05, |
| "loss": 1.7039, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5364055299539171, |
| "grad_norm": 0.2821354568004608, |
| "learning_rate": 2.9436937831845217e-05, |
| "loss": 1.6789, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.5382488479262673, |
| "grad_norm": 0.30141276121139526, |
| "learning_rate": 2.942816216582093e-05, |
| "loss": 1.6341, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.5400921658986175, |
| "grad_norm": 0.2816147208213806, |
| "learning_rate": 2.9419319969119875e-05, |
| "loss": 1.5926, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.5419354838709678, |
| "grad_norm": 0.2912384569644928, |
| "learning_rate": 2.9410411282514913e-05, |
| "loss": 1.6507, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.543778801843318, |
| "grad_norm": 0.3174484670162201, |
| "learning_rate": 2.940143614708549e-05, |
| "loss": 1.6504, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5456221198156682, |
| "grad_norm": 0.2888404130935669, |
| "learning_rate": 2.939239460421746e-05, |
| "loss": 1.6762, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5474654377880185, |
| "grad_norm": 0.31422436237335205, |
| "learning_rate": 2.93832866956029e-05, |
| "loss": 1.6301, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5493087557603686, |
| "grad_norm": 0.3254394829273224, |
| "learning_rate": 2.9374112463239896e-05, |
| "loss": 1.7101, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5511520737327189, |
| "grad_norm": 0.3167421817779541, |
| "learning_rate": 2.9364871949432378e-05, |
| "loss": 1.6871, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.5529953917050692, |
| "grad_norm": 0.3247944712638855, |
| "learning_rate": 2.9355565196789906e-05, |
| "loss": 1.7028, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5548387096774193, |
| "grad_norm": 0.27095088362693787, |
| "learning_rate": 2.9346192248227476e-05, |
| "loss": 1.6229, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.5566820276497696, |
| "grad_norm": 0.31862378120422363, |
| "learning_rate": 2.9336753146965327e-05, |
| "loss": 1.666, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5585253456221199, |
| "grad_norm": 0.28205209970474243, |
| "learning_rate": 2.9327247936528742e-05, |
| "loss": 1.5925, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.56036866359447, |
| "grad_norm": 0.29554882645606995, |
| "learning_rate": 2.9317676660747837e-05, |
| "loss": 1.6605, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5622119815668203, |
| "grad_norm": 0.2860583961009979, |
| "learning_rate": 2.9308039363757372e-05, |
| "loss": 1.6371, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5640552995391706, |
| "grad_norm": 0.27211877703666687, |
| "learning_rate": 2.9298336089996538e-05, |
| "loss": 1.7176, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.5658986175115207, |
| "grad_norm": 0.3138637840747833, |
| "learning_rate": 2.9288566884208766e-05, |
| "loss": 1.6378, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.567741935483871, |
| "grad_norm": 0.2751595973968506, |
| "learning_rate": 2.9278731791441497e-05, |
| "loss": 1.6313, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.5695852534562212, |
| "grad_norm": 0.28140899538993835, |
| "learning_rate": 2.9268830857045997e-05, |
| "loss": 1.6114, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.2734344005584717, |
| "learning_rate": 2.9258864126677132e-05, |
| "loss": 1.6438, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5732718894009217, |
| "grad_norm": 0.30163639783859253, |
| "learning_rate": 2.9248831646293174e-05, |
| "loss": 1.6521, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.5751152073732719, |
| "grad_norm": 0.28159695863723755, |
| "learning_rate": 2.9238733462155564e-05, |
| "loss": 1.6399, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5769585253456221, |
| "grad_norm": 0.2891719341278076, |
| "learning_rate": 2.9228569620828735e-05, |
| "loss": 1.6316, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.5788018433179724, |
| "grad_norm": 0.27711349725723267, |
| "learning_rate": 2.921834016917986e-05, |
| "loss": 1.5787, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.5806451612903226, |
| "grad_norm": 0.2825881540775299, |
| "learning_rate": 2.920804515437865e-05, |
| "loss": 1.6223, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.5824884792626728, |
| "grad_norm": 0.2809242010116577, |
| "learning_rate": 2.9197684623897157e-05, |
| "loss": 1.6368, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.584331797235023, |
| "grad_norm": 0.2902085781097412, |
| "learning_rate": 2.9187258625509518e-05, |
| "loss": 1.6855, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.5861751152073733, |
| "grad_norm": 0.2923787236213684, |
| "learning_rate": 2.917676720729177e-05, |
| "loss": 1.6448, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5880184331797235, |
| "grad_norm": 0.2834003269672394, |
| "learning_rate": 2.916621041762159e-05, |
| "loss": 1.6295, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.5898617511520737, |
| "grad_norm": 0.2824580669403076, |
| "learning_rate": 2.9155588305178113e-05, |
| "loss": 1.5738, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.591705069124424, |
| "grad_norm": 0.30301326513290405, |
| "learning_rate": 2.9144900918941687e-05, |
| "loss": 1.6247, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.5935483870967742, |
| "grad_norm": 0.2766891121864319, |
| "learning_rate": 2.9134148308193637e-05, |
| "loss": 1.7135, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5953917050691244, |
| "grad_norm": 0.2816697061061859, |
| "learning_rate": 2.9123330522516053e-05, |
| "loss": 1.6522, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.5972350230414747, |
| "grad_norm": 0.28478461503982544, |
| "learning_rate": 2.9112447611791563e-05, |
| "loss": 1.6347, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.5990783410138248, |
| "grad_norm": 0.27743953466415405, |
| "learning_rate": 2.9101499626203102e-05, |
| "loss": 1.6071, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6009216589861751, |
| "grad_norm": 0.27698153257369995, |
| "learning_rate": 2.9090486616233654e-05, |
| "loss": 1.6191, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6027649769585254, |
| "grad_norm": 0.2867109477519989, |
| "learning_rate": 2.907940863266607e-05, |
| "loss": 1.6427, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.6046082949308755, |
| "grad_norm": 0.26966315507888794, |
| "learning_rate": 2.906826572658278e-05, |
| "loss": 1.5825, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.6064516129032258, |
| "grad_norm": 0.2749760150909424, |
| "learning_rate": 2.9057057949365602e-05, |
| "loss": 1.6189, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.6082949308755761, |
| "grad_norm": 0.30331194400787354, |
| "learning_rate": 2.904578535269547e-05, |
| "loss": 1.6485, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6101382488479262, |
| "grad_norm": 0.2790120244026184, |
| "learning_rate": 2.9034447988552227e-05, |
| "loss": 1.6874, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.6119815668202765, |
| "grad_norm": 0.2863958477973938, |
| "learning_rate": 2.902304590921435e-05, |
| "loss": 1.6805, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.6138248847926268, |
| "grad_norm": 0.28442642092704773, |
| "learning_rate": 2.9011579167258756e-05, |
| "loss": 1.6611, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.6156682027649769, |
| "grad_norm": 0.27127203345298767, |
| "learning_rate": 2.90000478155605e-05, |
| "loss": 1.5686, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.6175115207373272, |
| "grad_norm": 0.28976717591285706, |
| "learning_rate": 2.8988451907292594e-05, |
| "loss": 1.6636, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.6193548387096774, |
| "grad_norm": 0.2731335461139679, |
| "learning_rate": 2.8976791495925704e-05, |
| "loss": 1.7131, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6211981566820276, |
| "grad_norm": 0.2786687910556793, |
| "learning_rate": 2.896506663522795e-05, |
| "loss": 1.6664, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.6230414746543779, |
| "grad_norm": 0.2858924865722656, |
| "learning_rate": 2.8953277379264633e-05, |
| "loss": 1.6567, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.6248847926267281, |
| "grad_norm": 0.2715083956718445, |
| "learning_rate": 2.8941423782397987e-05, |
| "loss": 1.6504, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.6267281105990783, |
| "grad_norm": 0.2730218470096588, |
| "learning_rate": 2.892950589928694e-05, |
| "loss": 1.6381, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6285714285714286, |
| "grad_norm": 0.2796657383441925, |
| "learning_rate": 2.8917523784886846e-05, |
| "loss": 1.6845, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.6304147465437788, |
| "grad_norm": 0.28790879249572754, |
| "learning_rate": 2.890547749444925e-05, |
| "loss": 1.6751, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.632258064516129, |
| "grad_norm": 0.27353277802467346, |
| "learning_rate": 2.8893367083521616e-05, |
| "loss": 1.6247, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.6341013824884792, |
| "grad_norm": 0.2717505395412445, |
| "learning_rate": 2.888119260794708e-05, |
| "loss": 1.6086, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.6359447004608295, |
| "grad_norm": 0.27940690517425537, |
| "learning_rate": 2.8868954123864194e-05, |
| "loss": 1.653, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6377880184331797, |
| "grad_norm": 0.265103280544281, |
| "learning_rate": 2.885665168770666e-05, |
| "loss": 1.6432, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.6396313364055299, |
| "grad_norm": 0.26981207728385925, |
| "learning_rate": 2.8844285356203074e-05, |
| "loss": 1.6346, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.6414746543778802, |
| "grad_norm": 0.2731129229068756, |
| "learning_rate": 2.8831855186376672e-05, |
| "loss": 1.6907, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.6433179723502304, |
| "grad_norm": 0.2778747081756592, |
| "learning_rate": 2.8819361235545047e-05, |
| "loss": 1.699, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.6451612903225806, |
| "grad_norm": 0.27246907353401184, |
| "learning_rate": 2.8806803561319903e-05, |
| "loss": 1.6464, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6470046082949309, |
| "grad_norm": 0.2664584517478943, |
| "learning_rate": 2.8794182221606784e-05, |
| "loss": 1.5384, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.6488479262672812, |
| "grad_norm": 0.2673085033893585, |
| "learning_rate": 2.878149727460481e-05, |
| "loss": 1.571, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.6506912442396313, |
| "grad_norm": 0.28247496485710144, |
| "learning_rate": 2.876874877880639e-05, |
| "loss": 1.5831, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.6525345622119816, |
| "grad_norm": 0.2843359112739563, |
| "learning_rate": 2.8755936792996987e-05, |
| "loss": 1.6923, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.6543778801843319, |
| "grad_norm": 0.27128326892852783, |
| "learning_rate": 2.8743061376254813e-05, |
| "loss": 1.6356, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.656221198156682, |
| "grad_norm": 0.2898774743080139, |
| "learning_rate": 2.873012258795057e-05, |
| "loss": 1.6479, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.6580645161290323, |
| "grad_norm": 0.28426289558410645, |
| "learning_rate": 2.8717120487747193e-05, |
| "loss": 1.629, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.6599078341013825, |
| "grad_norm": 0.2633942663669586, |
| "learning_rate": 2.870405513559954e-05, |
| "loss": 1.5984, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.6617511520737327, |
| "grad_norm": 0.31980207562446594, |
| "learning_rate": 2.8690926591754142e-05, |
| "loss": 1.5927, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.663594470046083, |
| "grad_norm": 0.2780967056751251, |
| "learning_rate": 2.8677734916748927e-05, |
| "loss": 1.6457, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6654377880184332, |
| "grad_norm": 0.3023218810558319, |
| "learning_rate": 2.866448017141291e-05, |
| "loss": 1.6161, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.6672811059907834, |
| "grad_norm": 0.2830914556980133, |
| "learning_rate": 2.865116241686595e-05, |
| "loss": 1.6897, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.6691244239631337, |
| "grad_norm": 0.29188162088394165, |
| "learning_rate": 2.863778171451845e-05, |
| "loss": 1.6293, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.6709677419354839, |
| "grad_norm": 0.3087140619754791, |
| "learning_rate": 2.8624338126071073e-05, |
| "loss": 1.6143, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.6728110599078341, |
| "grad_norm": 0.26930543780326843, |
| "learning_rate": 2.861083171351446e-05, |
| "loss": 1.5878, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.6746543778801843, |
| "grad_norm": 0.30629977583885193, |
| "learning_rate": 2.8597262539128947e-05, |
| "loss": 1.6939, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.6764976958525346, |
| "grad_norm": 0.2783128321170807, |
| "learning_rate": 2.858363066548427e-05, |
| "loss": 1.66, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.6783410138248848, |
| "grad_norm": 0.27261894941329956, |
| "learning_rate": 2.856993615543929e-05, |
| "loss": 1.6183, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.680184331797235, |
| "grad_norm": 0.29020169377326965, |
| "learning_rate": 2.8556179072141693e-05, |
| "loss": 1.6187, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.6820276497695853, |
| "grad_norm": 0.2697452902793884, |
| "learning_rate": 2.8542359479027693e-05, |
| "loss": 1.5613, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6838709677419355, |
| "grad_norm": 0.27904996275901794, |
| "learning_rate": 2.8528477439821753e-05, |
| "loss": 1.6317, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.6857142857142857, |
| "grad_norm": 0.2748742997646332, |
| "learning_rate": 2.8514533018536286e-05, |
| "loss": 1.6531, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.687557603686636, |
| "grad_norm": 0.30121949315071106, |
| "learning_rate": 2.8500526279471362e-05, |
| "loss": 1.6045, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.6894009216589861, |
| "grad_norm": 0.26736557483673096, |
| "learning_rate": 2.8486457287214403e-05, |
| "loss": 1.6746, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.6912442396313364, |
| "grad_norm": 0.2701917886734009, |
| "learning_rate": 2.8472326106639896e-05, |
| "loss": 1.6882, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.6930875576036867, |
| "grad_norm": 0.2666465640068054, |
| "learning_rate": 2.8458132802909075e-05, |
| "loss": 1.5905, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.6949308755760368, |
| "grad_norm": 0.2787630259990692, |
| "learning_rate": 2.8443877441469653e-05, |
| "loss": 1.6351, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.6967741935483871, |
| "grad_norm": 0.2640557289123535, |
| "learning_rate": 2.8429560088055502e-05, |
| "loss": 1.6291, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.6986175115207374, |
| "grad_norm": 0.2768750488758087, |
| "learning_rate": 2.8415180808686326e-05, |
| "loss": 1.6113, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.7004608294930875, |
| "grad_norm": 0.27341142296791077, |
| "learning_rate": 2.84007396696674e-05, |
| "loss": 1.6397, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7023041474654378, |
| "grad_norm": 0.2810218334197998, |
| "learning_rate": 2.8386236737589244e-05, |
| "loss": 1.6255, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.7041474654377881, |
| "grad_norm": 0.2625837028026581, |
| "learning_rate": 2.8371672079327304e-05, |
| "loss": 1.5909, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.7059907834101382, |
| "grad_norm": 0.28470954298973083, |
| "learning_rate": 2.835704576204167e-05, |
| "loss": 1.668, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.7078341013824885, |
| "grad_norm": 0.2870181202888489, |
| "learning_rate": 2.8342357853176742e-05, |
| "loss": 1.655, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.7096774193548387, |
| "grad_norm": 0.2737495005130768, |
| "learning_rate": 2.8327608420460933e-05, |
| "loss": 1.6606, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.7115207373271889, |
| "grad_norm": 0.2716140151023865, |
| "learning_rate": 2.8312797531906346e-05, |
| "loss": 1.6487, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.7133640552995392, |
| "grad_norm": 0.2937834560871124, |
| "learning_rate": 2.8297925255808484e-05, |
| "loss": 1.5784, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.7152073732718894, |
| "grad_norm": 0.25758716464042664, |
| "learning_rate": 2.82829916607459e-05, |
| "loss": 1.5551, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.7170506912442396, |
| "grad_norm": 0.26855534315109253, |
| "learning_rate": 2.826799681557991e-05, |
| "loss": 1.6242, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.7188940092165899, |
| "grad_norm": 0.29118281602859497, |
| "learning_rate": 2.8252940789454268e-05, |
| "loss": 1.6404, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7207373271889401, |
| "grad_norm": 0.27645623683929443, |
| "learning_rate": 2.823782365179482e-05, |
| "loss": 1.7135, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.7225806451612903, |
| "grad_norm": 0.2710598409175873, |
| "learning_rate": 2.822264547230924e-05, |
| "loss": 1.6472, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.7244239631336405, |
| "grad_norm": 0.2802148163318634, |
| "learning_rate": 2.820740632098665e-05, |
| "loss": 1.5996, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.7262672811059908, |
| "grad_norm": 0.28701063990592957, |
| "learning_rate": 2.8192106268097336e-05, |
| "loss": 1.6192, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.728110599078341, |
| "grad_norm": 0.28330931067466736, |
| "learning_rate": 2.8176745384192417e-05, |
| "loss": 1.6183, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.7299539170506912, |
| "grad_norm": 0.26512524485588074, |
| "learning_rate": 2.8161323740103495e-05, |
| "loss": 1.6092, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.7317972350230415, |
| "grad_norm": 0.27630165219306946, |
| "learning_rate": 2.814584140694237e-05, |
| "loss": 1.6938, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.7336405529953917, |
| "grad_norm": 0.2831325829029083, |
| "learning_rate": 2.8130298456100667e-05, |
| "loss": 1.6665, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.7354838709677419, |
| "grad_norm": 0.2725171744823456, |
| "learning_rate": 2.811469495924955e-05, |
| "loss": 1.6464, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.7373271889400922, |
| "grad_norm": 0.2628806233406067, |
| "learning_rate": 2.8099030988339353e-05, |
| "loss": 1.5455, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7391705069124423, |
| "grad_norm": 0.26690706610679626, |
| "learning_rate": 2.8083306615599283e-05, |
| "loss": 1.6348, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.7410138248847926, |
| "grad_norm": 0.2794962227344513, |
| "learning_rate": 2.8067521913537047e-05, |
| "loss": 1.6365, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.7428571428571429, |
| "grad_norm": 0.2658675014972687, |
| "learning_rate": 2.8051676954938574e-05, |
| "loss": 1.6348, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.744700460829493, |
| "grad_norm": 0.27006804943084717, |
| "learning_rate": 2.8035771812867613e-05, |
| "loss": 1.6384, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.7465437788018433, |
| "grad_norm": 0.2776554822921753, |
| "learning_rate": 2.801980656066545e-05, |
| "loss": 1.6978, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.7483870967741936, |
| "grad_norm": 0.2728254497051239, |
| "learning_rate": 2.8003781271950535e-05, |
| "loss": 1.6924, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.7502304147465437, |
| "grad_norm": 0.2657453715801239, |
| "learning_rate": 2.7987696020618163e-05, |
| "loss": 1.5734, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.752073732718894, |
| "grad_norm": 0.26352396607398987, |
| "learning_rate": 2.7971550880840138e-05, |
| "loss": 1.6261, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.7539170506912443, |
| "grad_norm": 0.2759632468223572, |
| "learning_rate": 2.79553459270644e-05, |
| "loss": 1.6264, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.7557603686635944, |
| "grad_norm": 0.27137723565101624, |
| "learning_rate": 2.7939081234014708e-05, |
| "loss": 1.6432, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7576036866359447, |
| "grad_norm": 0.26721593737602234, |
| "learning_rate": 2.7922756876690298e-05, |
| "loss": 1.6903, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.759447004608295, |
| "grad_norm": 0.2769939601421356, |
| "learning_rate": 2.790637293036552e-05, |
| "loss": 1.6626, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.7612903225806451, |
| "grad_norm": 0.2946414351463318, |
| "learning_rate": 2.7889929470589494e-05, |
| "loss": 1.6489, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.7631336405529954, |
| "grad_norm": 0.27718386054039, |
| "learning_rate": 2.7873426573185777e-05, |
| "loss": 1.664, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.7649769585253456, |
| "grad_norm": 0.2768406271934509, |
| "learning_rate": 2.7856864314251994e-05, |
| "loss": 1.6475, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.7668202764976959, |
| "grad_norm": 0.2640882134437561, |
| "learning_rate": 2.78402427701595e-05, |
| "loss": 1.6332, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.7686635944700461, |
| "grad_norm": 0.26694199442863464, |
| "learning_rate": 2.782356201755303e-05, |
| "loss": 1.6633, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.7705069124423963, |
| "grad_norm": 0.26702558994293213, |
| "learning_rate": 2.780682213335033e-05, |
| "loss": 1.6281, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.7723502304147466, |
| "grad_norm": 0.2785816490650177, |
| "learning_rate": 2.7790023194741812e-05, |
| "loss": 1.6733, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.7741935483870968, |
| "grad_norm": 0.27653270959854126, |
| "learning_rate": 2.7773165279190206e-05, |
| "loss": 1.6269, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.776036866359447, |
| "grad_norm": 0.27750319242477417, |
| "learning_rate": 2.7756248464430186e-05, |
| "loss": 1.6292, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.7778801843317973, |
| "grad_norm": 0.2917342185974121, |
| "learning_rate": 2.7739272828468022e-05, |
| "loss": 1.6159, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.7797235023041474, |
| "grad_norm": 0.26250651478767395, |
| "learning_rate": 2.7722238449581227e-05, |
| "loss": 1.6183, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.7815668202764977, |
| "grad_norm": 0.28476646542549133, |
| "learning_rate": 2.7705145406318167e-05, |
| "loss": 1.7191, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.783410138248848, |
| "grad_norm": 0.2587452232837677, |
| "learning_rate": 2.7687993777497747e-05, |
| "loss": 1.5733, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.7852534562211981, |
| "grad_norm": 0.2674744427204132, |
| "learning_rate": 2.7670783642208996e-05, |
| "loss": 1.6225, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.7870967741935484, |
| "grad_norm": 0.2689223289489746, |
| "learning_rate": 2.7653515079810744e-05, |
| "loss": 1.6964, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.7889400921658987, |
| "grad_norm": 0.26800140738487244, |
| "learning_rate": 2.7636188169931217e-05, |
| "loss": 1.6728, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.7907834101382488, |
| "grad_norm": 0.27116596698760986, |
| "learning_rate": 2.7618802992467718e-05, |
| "loss": 1.6971, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.7926267281105991, |
| "grad_norm": 0.26951897144317627, |
| "learning_rate": 2.760135962758621e-05, |
| "loss": 1.6763, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7944700460829494, |
| "grad_norm": 0.26193779706954956, |
| "learning_rate": 2.7583858155720977e-05, |
| "loss": 1.6291, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.7963133640552995, |
| "grad_norm": 0.27084240317344666, |
| "learning_rate": 2.756629865757424e-05, |
| "loss": 1.6718, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.7981566820276498, |
| "grad_norm": 0.2781943380832672, |
| "learning_rate": 2.7548681214115798e-05, |
| "loss": 1.5723, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.27311035990715027, |
| "learning_rate": 2.7531005906582628e-05, |
| "loss": 1.6833, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.8018433179723502, |
| "grad_norm": 0.27680864930152893, |
| "learning_rate": 2.7513272816478554e-05, |
| "loss": 1.6166, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.8036866359447005, |
| "grad_norm": 0.26824140548706055, |
| "learning_rate": 2.7495482025573817e-05, |
| "loss": 1.6716, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.8055299539170507, |
| "grad_norm": 0.27081307768821716, |
| "learning_rate": 2.7477633615904744e-05, |
| "loss": 1.6573, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.8073732718894009, |
| "grad_norm": 0.2916286587715149, |
| "learning_rate": 2.7459727669773344e-05, |
| "loss": 1.6609, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.8092165898617512, |
| "grad_norm": 0.2842768132686615, |
| "learning_rate": 2.7441764269746946e-05, |
| "loss": 1.6401, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.8110599078341014, |
| "grad_norm": 0.2689710557460785, |
| "learning_rate": 2.7423743498657794e-05, |
| "loss": 1.6313, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8129032258064516, |
| "grad_norm": 0.28198468685150146, |
| "learning_rate": 2.7405665439602695e-05, |
| "loss": 1.729, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.8147465437788018, |
| "grad_norm": 0.28444287180900574, |
| "learning_rate": 2.7387530175942604e-05, |
| "loss": 1.6436, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.8165898617511521, |
| "grad_norm": 0.289588987827301, |
| "learning_rate": 2.7369337791302272e-05, |
| "loss": 1.6492, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.8184331797235023, |
| "grad_norm": 0.2716604173183441, |
| "learning_rate": 2.7351088369569833e-05, |
| "loss": 1.6683, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.8202764976958525, |
| "grad_norm": 0.2785453796386719, |
| "learning_rate": 2.7332781994896438e-05, |
| "loss": 1.6595, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.8221198156682028, |
| "grad_norm": 0.26734933257102966, |
| "learning_rate": 2.7314418751695845e-05, |
| "loss": 1.6208, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.823963133640553, |
| "grad_norm": 0.27588117122650146, |
| "learning_rate": 2.7295998724644058e-05, |
| "loss": 1.6085, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.8258064516129032, |
| "grad_norm": 0.26697295904159546, |
| "learning_rate": 2.7277521998678904e-05, |
| "loss": 1.6348, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.8276497695852535, |
| "grad_norm": 0.27423718571662903, |
| "learning_rate": 2.725898865899967e-05, |
| "loss": 1.6787, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.8294930875576036, |
| "grad_norm": 0.26683908700942993, |
| "learning_rate": 2.72403987910667e-05, |
| "loss": 1.6271, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.8313364055299539, |
| "grad_norm": 0.26200321316719055, |
| "learning_rate": 2.722175248060099e-05, |
| "loss": 1.6035, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.8331797235023042, |
| "grad_norm": 0.2699339985847473, |
| "learning_rate": 2.7203049813583803e-05, |
| "loss": 1.5928, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.8350230414746543, |
| "grad_norm": 0.27287527918815613, |
| "learning_rate": 2.7184290876256278e-05, |
| "loss": 1.6073, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.8368663594470046, |
| "grad_norm": 0.2751379907131195, |
| "learning_rate": 2.716547575511903e-05, |
| "loss": 1.6385, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.8387096774193549, |
| "grad_norm": 0.2756018340587616, |
| "learning_rate": 2.714660453693173e-05, |
| "loss": 1.6921, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.840552995391705, |
| "grad_norm": 0.28198951482772827, |
| "learning_rate": 2.7127677308712733e-05, |
| "loss": 1.6651, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.8423963133640553, |
| "grad_norm": 0.28402063250541687, |
| "learning_rate": 2.710869415773867e-05, |
| "loss": 1.5813, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.8442396313364056, |
| "grad_norm": 0.29829660058021545, |
| "learning_rate": 2.7089655171544026e-05, |
| "loss": 1.6971, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.8460829493087557, |
| "grad_norm": 0.2694368362426758, |
| "learning_rate": 2.707056043792077e-05, |
| "loss": 1.6268, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.847926267281106, |
| "grad_norm": 0.2761029303073883, |
| "learning_rate": 2.705141004491792e-05, |
| "loss": 1.6883, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8497695852534562, |
| "grad_norm": 0.280799001455307, |
| "learning_rate": 2.703220408084115e-05, |
| "loss": 1.6409, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.8516129032258064, |
| "grad_norm": 0.2578011453151703, |
| "learning_rate": 2.7012942634252384e-05, |
| "loss": 1.5454, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.8534562211981567, |
| "grad_norm": 0.30007144808769226, |
| "learning_rate": 2.6993625793969383e-05, |
| "loss": 1.6845, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.8552995391705069, |
| "grad_norm": 0.26995283365249634, |
| "learning_rate": 2.697425364906534e-05, |
| "loss": 1.6339, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.3060062527656555, |
| "learning_rate": 2.6954826288868463e-05, |
| "loss": 1.6226, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.8589861751152074, |
| "grad_norm": 0.27042827010154724, |
| "learning_rate": 2.693534380296158e-05, |
| "loss": 1.5902, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.8608294930875576, |
| "grad_norm": 0.2701798975467682, |
| "learning_rate": 2.6915806281181688e-05, |
| "loss": 1.6444, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.8626728110599078, |
| "grad_norm": 0.2839266061782837, |
| "learning_rate": 2.6896213813619592e-05, |
| "loss": 1.631, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.864516129032258, |
| "grad_norm": 0.2714848816394806, |
| "learning_rate": 2.6876566490619437e-05, |
| "loss": 1.5984, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.8663594470046083, |
| "grad_norm": 0.26444998383522034, |
| "learning_rate": 2.685686440277833e-05, |
| "loss": 1.6318, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8682027649769585, |
| "grad_norm": 0.28271374106407166, |
| "learning_rate": 2.6837107640945904e-05, |
| "loss": 1.6931, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.8700460829493087, |
| "grad_norm": 0.26922810077667236, |
| "learning_rate": 2.681729629622391e-05, |
| "loss": 1.5986, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.871889400921659, |
| "grad_norm": 0.2678123712539673, |
| "learning_rate": 2.6797430459965766e-05, |
| "loss": 1.6511, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.8737327188940092, |
| "grad_norm": 0.2775745093822479, |
| "learning_rate": 2.6777510223776187e-05, |
| "loss": 1.6248, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.8755760368663594, |
| "grad_norm": 0.2708311378955841, |
| "learning_rate": 2.6757535679510727e-05, |
| "loss": 1.6032, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.8774193548387097, |
| "grad_norm": 0.27130743861198425, |
| "learning_rate": 2.6737506919275363e-05, |
| "loss": 1.6658, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.8792626728110599, |
| "grad_norm": 0.32221710681915283, |
| "learning_rate": 2.6717424035426054e-05, |
| "loss": 1.6324, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.8811059907834101, |
| "grad_norm": 0.27446186542510986, |
| "learning_rate": 2.6697287120568364e-05, |
| "loss": 1.6608, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.8829493087557604, |
| "grad_norm": 0.2836889922618866, |
| "learning_rate": 2.6677096267556984e-05, |
| "loss": 1.5962, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.8847926267281107, |
| "grad_norm": 0.29300132393836975, |
| "learning_rate": 2.6656851569495316e-05, |
| "loss": 1.6496, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8866359447004608, |
| "grad_norm": 0.2937772572040558, |
| "learning_rate": 2.6636553119735066e-05, |
| "loss": 1.6164, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.8884792626728111, |
| "grad_norm": 0.30279168486595154, |
| "learning_rate": 2.6616201011875792e-05, |
| "loss": 1.6317, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.8903225806451613, |
| "grad_norm": 0.2589039206504822, |
| "learning_rate": 2.6595795339764478e-05, |
| "loss": 1.6076, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.8921658986175115, |
| "grad_norm": 0.29674026370048523, |
| "learning_rate": 2.6575336197495098e-05, |
| "loss": 1.6106, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.8940092165898618, |
| "grad_norm": 0.2732203006744385, |
| "learning_rate": 2.6554823679408195e-05, |
| "loss": 1.6597, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.895852534562212, |
| "grad_norm": 0.26109176874160767, |
| "learning_rate": 2.653425788009043e-05, |
| "loss": 1.5526, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.8976958525345622, |
| "grad_norm": 0.3179691433906555, |
| "learning_rate": 2.6513638894374158e-05, |
| "loss": 1.687, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.8995391705069125, |
| "grad_norm": 0.26771122217178345, |
| "learning_rate": 2.6492966817336977e-05, |
| "loss": 1.619, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.9013824884792627, |
| "grad_norm": 0.2821449637413025, |
| "learning_rate": 2.6472241744301304e-05, |
| "loss": 1.5945, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.9032258064516129, |
| "grad_norm": 0.28389427065849304, |
| "learning_rate": 2.645146377083393e-05, |
| "loss": 1.5934, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.9050691244239631, |
| "grad_norm": 0.25905099511146545, |
| "learning_rate": 2.6430632992745577e-05, |
| "loss": 1.6376, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.9069124423963134, |
| "grad_norm": 0.2909289598464966, |
| "learning_rate": 2.6409749506090456e-05, |
| "loss": 1.6398, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.9087557603686636, |
| "grad_norm": 0.30152249336242676, |
| "learning_rate": 2.638881340716583e-05, |
| "loss": 1.7085, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.9105990783410138, |
| "grad_norm": 0.2955312430858612, |
| "learning_rate": 2.6367824792511565e-05, |
| "loss": 1.663, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.9124423963133641, |
| "grad_norm": 0.3120115101337433, |
| "learning_rate": 2.6346783758909683e-05, |
| "loss": 1.6809, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.9142857142857143, |
| "grad_norm": 0.26377126574516296, |
| "learning_rate": 2.632569040338392e-05, |
| "loss": 1.6235, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.9161290322580645, |
| "grad_norm": 0.2728709876537323, |
| "learning_rate": 2.6304544823199282e-05, |
| "loss": 1.6263, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.9179723502304148, |
| "grad_norm": 0.28993189334869385, |
| "learning_rate": 2.6283347115861586e-05, |
| "loss": 1.6395, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.919815668202765, |
| "grad_norm": 0.27762892842292786, |
| "learning_rate": 2.6262097379117015e-05, |
| "loss": 1.6613, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.9216589861751152, |
| "grad_norm": 0.26995396614074707, |
| "learning_rate": 2.624079571095167e-05, |
| "loss": 1.6483, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9235023041474655, |
| "grad_norm": 0.27732783555984497, |
| "learning_rate": 2.6219442209591123e-05, |
| "loss": 1.6918, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.9253456221198156, |
| "grad_norm": 0.28199324011802673, |
| "learning_rate": 2.619803697349994e-05, |
| "loss": 1.6222, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.9271889400921659, |
| "grad_norm": 0.28517088294029236, |
| "learning_rate": 2.6176580101381273e-05, |
| "loss": 1.6017, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.9290322580645162, |
| "grad_norm": 0.2852960526943207, |
| "learning_rate": 2.6155071692176348e-05, |
| "loss": 1.6117, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.9308755760368663, |
| "grad_norm": 0.2706369459629059, |
| "learning_rate": 2.613351184506405e-05, |
| "loss": 1.6483, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.9327188940092166, |
| "grad_norm": 0.31629517674446106, |
| "learning_rate": 2.6111900659460455e-05, |
| "loss": 1.63, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.9345622119815669, |
| "grad_norm": 0.2743918001651764, |
| "learning_rate": 2.6090238235018365e-05, |
| "loss": 1.617, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.936405529953917, |
| "grad_norm": 0.26636356115341187, |
| "learning_rate": 2.6068524671626856e-05, |
| "loss": 1.6671, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.9382488479262673, |
| "grad_norm": 0.2727503478527069, |
| "learning_rate": 2.6046760069410806e-05, |
| "loss": 1.6101, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.9400921658986175, |
| "grad_norm": 0.27820461988449097, |
| "learning_rate": 2.6024944528730453e-05, |
| "loss": 1.5903, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.9419354838709677, |
| "grad_norm": 0.2720506191253662, |
| "learning_rate": 2.6003078150180922e-05, |
| "loss": 1.6722, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.943778801843318, |
| "grad_norm": 0.2729189097881317, |
| "learning_rate": 2.598116103459174e-05, |
| "loss": 1.6232, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.9456221198156682, |
| "grad_norm": 0.2625363767147064, |
| "learning_rate": 2.595919328302641e-05, |
| "loss": 1.5969, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.9474654377880184, |
| "grad_norm": 0.26478803157806396, |
| "learning_rate": 2.5937174996781927e-05, |
| "loss": 1.5817, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.9493087557603687, |
| "grad_norm": 0.28460946679115295, |
| "learning_rate": 2.5915106277388293e-05, |
| "loss": 1.5845, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.9511520737327189, |
| "grad_norm": 0.2615947127342224, |
| "learning_rate": 2.5892987226608082e-05, |
| "loss": 1.6227, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.9529953917050691, |
| "grad_norm": 0.25825098156929016, |
| "learning_rate": 2.5870817946435953e-05, |
| "loss": 1.5853, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.9548387096774194, |
| "grad_norm": 0.2917359471321106, |
| "learning_rate": 2.5848598539098164e-05, |
| "loss": 1.6514, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.9566820276497696, |
| "grad_norm": 0.25932732224464417, |
| "learning_rate": 2.5826329107052144e-05, |
| "loss": 1.603, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.9585253456221198, |
| "grad_norm": 0.25399070978164673, |
| "learning_rate": 2.5804009752985975e-05, |
| "loss": 1.6073, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.96036866359447, |
| "grad_norm": 0.28060877323150635, |
| "learning_rate": 2.5781640579817946e-05, |
| "loss": 1.6337, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.9622119815668203, |
| "grad_norm": 0.28021928668022156, |
| "learning_rate": 2.5759221690696062e-05, |
| "loss": 1.6345, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.9640552995391705, |
| "grad_norm": 0.27043914794921875, |
| "learning_rate": 2.573675318899759e-05, |
| "loss": 1.6471, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.9658986175115207, |
| "grad_norm": 0.2874245345592499, |
| "learning_rate": 2.5714235178328554e-05, |
| "loss": 1.6632, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.967741935483871, |
| "grad_norm": 0.2559823989868164, |
| "learning_rate": 2.5691667762523284e-05, |
| "loss": 1.6133, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.9695852534562212, |
| "grad_norm": 0.2857271432876587, |
| "learning_rate": 2.566905104564393e-05, |
| "loss": 1.628, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.9714285714285714, |
| "grad_norm": 0.27135902643203735, |
| "learning_rate": 2.564638513197995e-05, |
| "loss": 1.5717, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.9732718894009217, |
| "grad_norm": 0.25891175866127014, |
| "learning_rate": 2.562367012604769e-05, |
| "loss": 1.6104, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.9751152073732718, |
| "grad_norm": 0.27881482243537903, |
| "learning_rate": 2.5600906132589846e-05, |
| "loss": 1.6325, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.9769585253456221, |
| "grad_norm": 0.2634258270263672, |
| "learning_rate": 2.557809325657501e-05, |
| "loss": 1.605, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.9788018433179724, |
| "grad_norm": 0.2687437832355499, |
| "learning_rate": 2.555523160319719e-05, |
| "loss": 1.6302, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.9806451612903225, |
| "grad_norm": 0.27842116355895996, |
| "learning_rate": 2.5532321277875305e-05, |
| "loss": 1.6813, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.9824884792626728, |
| "grad_norm": 0.26950204372406006, |
| "learning_rate": 2.5509362386252702e-05, |
| "loss": 1.6166, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.9843317972350231, |
| "grad_norm": 0.2951159179210663, |
| "learning_rate": 2.5486355034196686e-05, |
| "loss": 1.669, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.9861751152073732, |
| "grad_norm": 0.2618483603000641, |
| "learning_rate": 2.5463299327798015e-05, |
| "loss": 1.6714, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.9880184331797235, |
| "grad_norm": 0.25726941227912903, |
| "learning_rate": 2.544019537337043e-05, |
| "loss": 1.6314, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.9898617511520738, |
| "grad_norm": 0.2638774812221527, |
| "learning_rate": 2.541704327745013e-05, |
| "loss": 1.6458, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.9917050691244239, |
| "grad_norm": 0.2752140760421753, |
| "learning_rate": 2.539384314679532e-05, |
| "loss": 1.6564, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.9935483870967742, |
| "grad_norm": 0.27124327421188354, |
| "learning_rate": 2.5370595088385696e-05, |
| "loss": 1.6071, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.9953917050691244, |
| "grad_norm": 0.26434096693992615, |
| "learning_rate": 2.5347299209421955e-05, |
| "loss": 1.611, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.9972350230414746, |
| "grad_norm": 0.2855331003665924, |
| "learning_rate": 2.53239556173253e-05, |
| "loss": 1.6311, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.9990783410138249, |
| "grad_norm": 0.2693633437156677, |
| "learning_rate": 2.530056441973696e-05, |
| "loss": 1.5659, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.0009216589861751, |
| "grad_norm": 0.26715287566185, |
| "learning_rate": 2.5277125724517665e-05, |
| "loss": 1.6523, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.0027649769585254, |
| "grad_norm": 0.30740463733673096, |
| "learning_rate": 2.525363963974717e-05, |
| "loss": 1.655, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.0046082949308757, |
| "grad_norm": 0.2541782557964325, |
| "learning_rate": 2.523010627372376e-05, |
| "loss": 1.5368, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.0064516129032257, |
| "grad_norm": 0.27700361609458923, |
| "learning_rate": 2.520652573496373e-05, |
| "loss": 1.6131, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.008294930875576, |
| "grad_norm": 0.2821163237094879, |
| "learning_rate": 2.51828981322009e-05, |
| "loss": 1.6299, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.0101382488479262, |
| "grad_norm": 0.26375892758369446, |
| "learning_rate": 2.5159223574386117e-05, |
| "loss": 1.6282, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.0119815668202765, |
| "grad_norm": 0.26267147064208984, |
| "learning_rate": 2.513550217068673e-05, |
| "loss": 1.6306, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.0138248847926268, |
| "grad_norm": 0.2676134407520294, |
| "learning_rate": 2.5111734030486127e-05, |
| "loss": 1.6022, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.015668202764977, |
| "grad_norm": 0.27351808547973633, |
| "learning_rate": 2.508791926338317e-05, |
| "loss": 1.6113, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.017511520737327, |
| "grad_norm": 0.2688004970550537, |
| "learning_rate": 2.5064057979191766e-05, |
| "loss": 1.6101, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.0193548387096774, |
| "grad_norm": 0.26147159934043884, |
| "learning_rate": 2.5040150287940286e-05, |
| "loss": 1.611, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.0211981566820276, |
| "grad_norm": 0.272300660610199, |
| "learning_rate": 2.5016196299871115e-05, |
| "loss": 1.6068, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.023041474654378, |
| "grad_norm": 0.26897957921028137, |
| "learning_rate": 2.49921961254401e-05, |
| "loss": 1.6466, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.0248847926267282, |
| "grad_norm": 0.26811644434928894, |
| "learning_rate": 2.496814987531609e-05, |
| "loss": 1.651, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.0267281105990784, |
| "grad_norm": 0.265045702457428, |
| "learning_rate": 2.4944057660380363e-05, |
| "loss": 1.6455, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.0285714285714285, |
| "grad_norm": 0.2766599953174591, |
| "learning_rate": 2.4919919591726175e-05, |
| "loss": 1.6231, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.0304147465437787, |
| "grad_norm": 0.27361610531806946, |
| "learning_rate": 2.489573578065821e-05, |
| "loss": 1.6258, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.032258064516129, |
| "grad_norm": 0.26939770579338074, |
| "learning_rate": 2.487150633869207e-05, |
| "loss": 1.5856, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.0341013824884793, |
| "grad_norm": 0.28874027729034424, |
| "learning_rate": 2.484723137755379e-05, |
| "loss": 1.5899, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.0359447004608295, |
| "grad_norm": 0.2698168158531189, |
| "learning_rate": 2.482291100917928e-05, |
| "loss": 1.7224, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.0377880184331798, |
| "grad_norm": 0.26175767183303833, |
| "learning_rate": 2.4798545345713837e-05, |
| "loss": 1.6187, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.0396313364055298, |
| "grad_norm": 0.27548477053642273, |
| "learning_rate": 2.4774134499511636e-05, |
| "loss": 1.7049, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.0414746543778801, |
| "grad_norm": 0.257304847240448, |
| "learning_rate": 2.4749678583135175e-05, |
| "loss": 1.5474, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.0433179723502304, |
| "grad_norm": 0.266632080078125, |
| "learning_rate": 2.472517770935479e-05, |
| "loss": 1.623, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.0451612903225806, |
| "grad_norm": 0.2716248631477356, |
| "learning_rate": 2.4700631991148126e-05, |
| "loss": 1.5814, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.047004608294931, |
| "grad_norm": 0.2753863036632538, |
| "learning_rate": 2.46760415416996e-05, |
| "loss": 1.644, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.0488479262672812, |
| "grad_norm": 0.2726069390773773, |
| "learning_rate": 2.465140647439991e-05, |
| "loss": 1.6133, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.0506912442396312, |
| "grad_norm": 0.28230923414230347, |
| "learning_rate": 2.4626726902845477e-05, |
| "loss": 1.6963, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.0525345622119815, |
| "grad_norm": 0.25983119010925293, |
| "learning_rate": 2.4602002940837948e-05, |
| "loss": 1.5626, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.0543778801843318, |
| "grad_norm": 0.2676817774772644, |
| "learning_rate": 2.4577234702383666e-05, |
| "loss": 1.5422, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.056221198156682, |
| "grad_norm": 0.26019108295440674, |
| "learning_rate": 2.4552422301693128e-05, |
| "loss": 1.5826, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.0580645161290323, |
| "grad_norm": 0.26668256521224976, |
| "learning_rate": 2.452756585318048e-05, |
| "loss": 1.596, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.0599078341013826, |
| "grad_norm": 0.28593432903289795, |
| "learning_rate": 2.4502665471462983e-05, |
| "loss": 1.6028, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.0617511520737328, |
| "grad_norm": 0.2791599929332733, |
| "learning_rate": 2.447772127136046e-05, |
| "loss": 1.5927, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.0635944700460829, |
| "grad_norm": 0.28675881028175354, |
| "learning_rate": 2.4452733367894816e-05, |
| "loss": 1.5879, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.0654377880184331, |
| "grad_norm": 0.29501160979270935, |
| "learning_rate": 2.4427701876289465e-05, |
| "loss": 1.5583, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.0672811059907834, |
| "grad_norm": 0.2674134075641632, |
| "learning_rate": 2.440262691196881e-05, |
| "loss": 1.6205, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.0691244239631337, |
| "grad_norm": 0.32356998324394226, |
| "learning_rate": 2.437750859055773e-05, |
| "loss": 1.6112, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.070967741935484, |
| "grad_norm": 0.2775920629501343, |
| "learning_rate": 2.4352347027881003e-05, |
| "loss": 1.6036, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.072811059907834, |
| "grad_norm": 0.28417059779167175, |
| "learning_rate": 2.4327142339962827e-05, |
| "loss": 1.6073, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.0746543778801843, |
| "grad_norm": 0.316342294216156, |
| "learning_rate": 2.430189464302625e-05, |
| "loss": 1.6312, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.0764976958525345, |
| "grad_norm": 0.2634347081184387, |
| "learning_rate": 2.4276604053492636e-05, |
| "loss": 1.6042, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.0783410138248848, |
| "grad_norm": 0.2889562249183655, |
| "learning_rate": 2.425127068798113e-05, |
| "loss": 1.586, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.080184331797235, |
| "grad_norm": 0.2724316716194153, |
| "learning_rate": 2.422589466330814e-05, |
| "loss": 1.6629, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.0820276497695853, |
| "grad_norm": 0.263497531414032, |
| "learning_rate": 2.4200476096486774e-05, |
| "loss": 1.5843, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.0838709677419356, |
| "grad_norm": 0.27481377124786377, |
| "learning_rate": 2.4175015104726306e-05, |
| "loss": 1.6378, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.0857142857142856, |
| "grad_norm": 0.28347697854042053, |
| "learning_rate": 2.414951180543164e-05, |
| "loss": 1.7082, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.087557603686636, |
| "grad_norm": 0.2818866968154907, |
| "learning_rate": 2.4123966316202768e-05, |
| "loss": 1.5482, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.0894009216589862, |
| "grad_norm": 0.26917752623558044, |
| "learning_rate": 2.4098378754834227e-05, |
| "loss": 1.6042, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.0912442396313364, |
| "grad_norm": 0.2925183176994324, |
| "learning_rate": 2.4072749239314565e-05, |
| "loss": 1.5839, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.0930875576036867, |
| "grad_norm": 0.2812125086784363, |
| "learning_rate": 2.4047077887825765e-05, |
| "loss": 1.5705, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.094930875576037, |
| "grad_norm": 0.2660687565803528, |
| "learning_rate": 2.402136481874275e-05, |
| "loss": 1.6325, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.096774193548387, |
| "grad_norm": 0.308992862701416, |
| "learning_rate": 2.399561015063278e-05, |
| "loss": 1.5755, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.0986175115207373, |
| "grad_norm": 0.2750917971134186, |
| "learning_rate": 2.3969814002254965e-05, |
| "loss": 1.6258, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.1004608294930875, |
| "grad_norm": 0.277424156665802, |
| "learning_rate": 2.3943976492559675e-05, |
| "loss": 1.6046, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.1023041474654378, |
| "grad_norm": 0.2793235182762146, |
| "learning_rate": 2.3918097740687987e-05, |
| "loss": 1.6198, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.104147465437788, |
| "grad_norm": 0.30500084161758423, |
| "learning_rate": 2.3892177865971183e-05, |
| "loss": 1.5345, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.1059907834101383, |
| "grad_norm": 0.2782265841960907, |
| "learning_rate": 2.386621698793015e-05, |
| "loss": 1.6041, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.1078341013824884, |
| "grad_norm": 0.2845817506313324, |
| "learning_rate": 2.3840215226274847e-05, |
| "loss": 1.5975, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.1096774193548387, |
| "grad_norm": 0.31969916820526123, |
| "learning_rate": 2.3814172700903775e-05, |
| "loss": 1.6021, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.111520737327189, |
| "grad_norm": 0.26726001501083374, |
| "learning_rate": 2.3788089531903372e-05, |
| "loss": 1.5317, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.1133640552995392, |
| "grad_norm": 0.2735467553138733, |
| "learning_rate": 2.3761965839547515e-05, |
| "loss": 1.5867, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.1152073732718895, |
| "grad_norm": 0.31699496507644653, |
| "learning_rate": 2.3735801744296934e-05, |
| "loss": 1.6256, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.1170506912442397, |
| "grad_norm": 0.27312713861465454, |
| "learning_rate": 2.3709597366798662e-05, |
| "loss": 1.6208, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.1188940092165898, |
| "grad_norm": 0.2782924473285675, |
| "learning_rate": 2.3683352827885472e-05, |
| "loss": 1.6535, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.12073732718894, |
| "grad_norm": 0.30579322576522827, |
| "learning_rate": 2.365706824857535e-05, |
| "loss": 1.606, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.1225806451612903, |
| "grad_norm": 0.28099164366722107, |
| "learning_rate": 2.3630743750070892e-05, |
| "loss": 1.5968, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.1244239631336406, |
| "grad_norm": 0.27450433373451233, |
| "learning_rate": 2.360437945375878e-05, |
| "loss": 1.6303, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.1262672811059908, |
| "grad_norm": 0.27543413639068604, |
| "learning_rate": 2.3577975481209214e-05, |
| "loss": 1.6004, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.128110599078341, |
| "grad_norm": 0.27525603771209717, |
| "learning_rate": 2.3551531954175335e-05, |
| "loss": 1.6507, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.1299539170506911, |
| "grad_norm": 0.26268866658210754, |
| "learning_rate": 2.3525048994592684e-05, |
| "loss": 1.5314, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.1317972350230414, |
| "grad_norm": 0.2847149968147278, |
| "learning_rate": 2.3498526724578637e-05, |
| "loss": 1.5997, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.1336405529953917, |
| "grad_norm": 0.2706824243068695, |
| "learning_rate": 2.3471965266431824e-05, |
| "loss": 1.6192, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.135483870967742, |
| "grad_norm": 0.28432345390319824, |
| "learning_rate": 2.3445364742631592e-05, |
| "loss": 1.5632, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.1373271889400922, |
| "grad_norm": 0.2760394811630249, |
| "learning_rate": 2.3418725275837413e-05, |
| "loss": 1.6104, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.1391705069124425, |
| "grad_norm": 0.26547399163246155, |
| "learning_rate": 2.3392046988888345e-05, |
| "loss": 1.5942, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.1410138248847925, |
| "grad_norm": 0.28383272886276245, |
| "learning_rate": 2.3365330004802443e-05, |
| "loss": 1.6284, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 0.2690708339214325, |
| "learning_rate": 2.33385744467762e-05, |
| "loss": 1.5903, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.144700460829493, |
| "grad_norm": 0.2783527970314026, |
| "learning_rate": 2.331178043818399e-05, |
| "loss": 1.6339, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.1465437788018433, |
| "grad_norm": 0.2668425738811493, |
| "learning_rate": 2.328494810257748e-05, |
| "loss": 1.5174, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.1483870967741936, |
| "grad_norm": 0.2701328992843628, |
| "learning_rate": 2.3258077563685072e-05, |
| "loss": 1.6126, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.1502304147465439, |
| "grad_norm": 0.26650527119636536, |
| "learning_rate": 2.3231168945411326e-05, |
| "loss": 1.5872, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.1520737327188941, |
| "grad_norm": 0.27082017064094543, |
| "learning_rate": 2.320422237183641e-05, |
| "loss": 1.6007, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.1539170506912442, |
| "grad_norm": 0.28844255208969116, |
| "learning_rate": 2.317723796721547e-05, |
| "loss": 1.5988, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.1557603686635944, |
| "grad_norm": 0.26136210560798645, |
| "learning_rate": 2.315021585597815e-05, |
| "loss": 1.5385, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.1576036866359447, |
| "grad_norm": 0.28600579500198364, |
| "learning_rate": 2.3123156162727923e-05, |
| "loss": 1.6156, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.159447004608295, |
| "grad_norm": 0.27295541763305664, |
| "learning_rate": 2.3096059012241583e-05, |
| "loss": 1.5353, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.1612903225806452, |
| "grad_norm": 0.2765044867992401, |
| "learning_rate": 2.3068924529468638e-05, |
| "loss": 1.6577, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.1631336405529953, |
| "grad_norm": 0.28675732016563416, |
| "learning_rate": 2.3041752839530735e-05, |
| "loss": 1.6112, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.1649769585253456, |
| "grad_norm": 0.2791755795478821, |
| "learning_rate": 2.3014544067721096e-05, |
| "loss": 1.5268, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.1668202764976958, |
| "grad_norm": 0.2844353914260864, |
| "learning_rate": 2.298729833950394e-05, |
| "loss": 1.5635, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.168663594470046, |
| "grad_norm": 0.2774640917778015, |
| "learning_rate": 2.2960015780513893e-05, |
| "loss": 1.6243, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.1705069124423964, |
| "grad_norm": 0.27996620535850525, |
| "learning_rate": 2.2932696516555396e-05, |
| "loss": 1.5647, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.1723502304147466, |
| "grad_norm": 0.2877368927001953, |
| "learning_rate": 2.2905340673602184e-05, |
| "loss": 1.5705, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.1741935483870969, |
| "grad_norm": 0.2850167751312256, |
| "learning_rate": 2.287794837779662e-05, |
| "loss": 1.6524, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.176036866359447, |
| "grad_norm": 0.2903353273868561, |
| "learning_rate": 2.2850519755449183e-05, |
| "loss": 1.57, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.1778801843317972, |
| "grad_norm": 0.2763937711715698, |
| "learning_rate": 2.282305493303785e-05, |
| "loss": 1.6409, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.1797235023041475, |
| "grad_norm": 0.2943311929702759, |
| "learning_rate": 2.2795554037207528e-05, |
| "loss": 1.6925, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.1815668202764977, |
| "grad_norm": 0.2771497070789337, |
| "learning_rate": 2.2768017194769466e-05, |
| "loss": 1.5796, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.183410138248848, |
| "grad_norm": 0.26944899559020996, |
| "learning_rate": 2.2740444532700657e-05, |
| "loss": 1.6039, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.185253456221198, |
| "grad_norm": 0.2843589782714844, |
| "learning_rate": 2.271283617814328e-05, |
| "loss": 1.6457, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.1870967741935483, |
| "grad_norm": 0.2836996614933014, |
| "learning_rate": 2.268519225840409e-05, |
| "loss": 1.5728, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.1889400921658986, |
| "grad_norm": 0.28848952054977417, |
| "learning_rate": 2.2657512900953832e-05, |
| "loss": 1.617, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.1907834101382488, |
| "grad_norm": 0.2769070267677307, |
| "learning_rate": 2.2629798233426677e-05, |
| "loss": 1.6127, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.192626728110599, |
| "grad_norm": 0.2685301601886749, |
| "learning_rate": 2.26020483836196e-05, |
| "loss": 1.5747, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.1944700460829494, |
| "grad_norm": 0.2858518660068512, |
| "learning_rate": 2.2574263479491816e-05, |
| "loss": 1.6335, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.1963133640552996, |
| "grad_norm": 0.27150848507881165, |
| "learning_rate": 2.2546443649164186e-05, |
| "loss": 1.5749, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.1981566820276497, |
| "grad_norm": 0.27711644768714905, |
| "learning_rate": 2.2518589020918612e-05, |
| "loss": 1.6022, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.27468806505203247, |
| "learning_rate": 2.2490699723197454e-05, |
| "loss": 1.6034, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.2018433179723502, |
| "grad_norm": 0.2741892635822296, |
| "learning_rate": 2.2462775884602954e-05, |
| "loss": 1.6301, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.2036866359447005, |
| "grad_norm": 0.2700754702091217, |
| "learning_rate": 2.243481763389661e-05, |
| "loss": 1.5741, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.2055299539170508, |
| "grad_norm": 0.28095367550849915, |
| "learning_rate": 2.24068250999986e-05, |
| "loss": 1.5861, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.2073732718894008, |
| "grad_norm": 0.27654770016670227, |
| "learning_rate": 2.2378798411987218e-05, |
| "loss": 1.6016, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.209216589861751, |
| "grad_norm": 0.27032870054244995, |
| "learning_rate": 2.2350737699098203e-05, |
| "loss": 1.6194, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.2110599078341013, |
| "grad_norm": 0.2779473066329956, |
| "learning_rate": 2.2322643090724218e-05, |
| "loss": 1.6285, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.2129032258064516, |
| "grad_norm": 0.2657751739025116, |
| "learning_rate": 2.229451471641422e-05, |
| "loss": 1.6217, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.2147465437788019, |
| "grad_norm": 0.27549269795417786, |
| "learning_rate": 2.226635270587286e-05, |
| "loss": 1.5244, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.2165898617511521, |
| "grad_norm": 0.2700861692428589, |
| "learning_rate": 2.2238157188959893e-05, |
| "loss": 1.5988, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.2184331797235024, |
| "grad_norm": 0.27551552653312683, |
| "learning_rate": 2.2209928295689582e-05, |
| "loss": 1.6695, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.2202764976958524, |
| "grad_norm": 0.27419742941856384, |
| "learning_rate": 2.2181666156230082e-05, |
| "loss": 1.5763, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.2221198156682027, |
| "grad_norm": 0.26684898138046265, |
| "learning_rate": 2.2153370900902872e-05, |
| "loss": 1.5866, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.223963133640553, |
| "grad_norm": 0.2631971836090088, |
| "learning_rate": 2.2125042660182115e-05, |
| "loss": 1.555, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.2258064516129032, |
| "grad_norm": 0.26897555589675903, |
| "learning_rate": 2.2096681564694087e-05, |
| "loss": 1.6108, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.2276497695852535, |
| "grad_norm": 0.2753186523914337, |
| "learning_rate": 2.2068287745216552e-05, |
| "loss": 1.6178, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.2294930875576038, |
| "grad_norm": 0.272468626499176, |
| "learning_rate": 2.203986133267818e-05, |
| "loss": 1.5851, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.2313364055299538, |
| "grad_norm": 0.28555935621261597, |
| "learning_rate": 2.2011402458157935e-05, |
| "loss": 1.657, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.233179723502304, |
| "grad_norm": 0.2571600079536438, |
| "learning_rate": 2.198291125288445e-05, |
| "loss": 1.5385, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.2350230414746544, |
| "grad_norm": 0.2795640528202057, |
| "learning_rate": 2.1954387848235455e-05, |
| "loss": 1.5856, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.2368663594470046, |
| "grad_norm": 0.2699826657772064, |
| "learning_rate": 2.1925832375737168e-05, |
| "loss": 1.587, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.238709677419355, |
| "grad_norm": 0.27142706513404846, |
| "learning_rate": 2.1897244967063653e-05, |
| "loss": 1.6016, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.2405529953917052, |
| "grad_norm": 0.2868463099002838, |
| "learning_rate": 2.1868625754036256e-05, |
| "loss": 1.5758, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.2423963133640552, |
| "grad_norm": 0.2734906077384949, |
| "learning_rate": 2.1839974868622956e-05, |
| "loss": 1.5834, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.2442396313364055, |
| "grad_norm": 0.27085962891578674, |
| "learning_rate": 2.1811292442937808e-05, |
| "loss": 1.5689, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.2460829493087557, |
| "grad_norm": 0.2795475125312805, |
| "learning_rate": 2.1782578609240286e-05, |
| "loss": 1.5531, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.247926267281106, |
| "grad_norm": 0.27187928557395935, |
| "learning_rate": 2.1753833499934694e-05, |
| "loss": 1.5728, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.2497695852534563, |
| "grad_norm": 0.2647460103034973, |
| "learning_rate": 2.1725057247569552e-05, |
| "loss": 1.5917, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.2516129032258063, |
| "grad_norm": 0.2762637436389923, |
| "learning_rate": 2.1696249984836993e-05, |
| "loss": 1.6209, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.2534562211981566, |
| "grad_norm": 0.27510347962379456, |
| "learning_rate": 2.166741184457214e-05, |
| "loss": 1.6489, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.2552995391705069, |
| "grad_norm": 0.2649478316307068, |
| "learning_rate": 2.1638542959752485e-05, |
| "loss": 1.5935, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.2571428571428571, |
| "grad_norm": 0.263662189245224, |
| "learning_rate": 2.160964346349731e-05, |
| "loss": 1.6304, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.2589861751152074, |
| "grad_norm": 0.280752956867218, |
| "learning_rate": 2.1580713489067043e-05, |
| "loss": 1.6311, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.2608294930875577, |
| "grad_norm": 0.2848096191883087, |
| "learning_rate": 2.155175316986265e-05, |
| "loss": 1.6682, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.262672811059908, |
| "grad_norm": 0.2672868072986603, |
| "learning_rate": 2.1522762639425012e-05, |
| "loss": 1.5798, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.2645161290322582, |
| "grad_norm": 0.2747519612312317, |
| "learning_rate": 2.1493742031434343e-05, |
| "loss": 1.5585, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.2663594470046082, |
| "grad_norm": 0.27021849155426025, |
| "learning_rate": 2.1464691479709534e-05, |
| "loss": 1.5789, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.2682027649769585, |
| "grad_norm": 0.26815730333328247, |
| "learning_rate": 2.1435611118207546e-05, |
| "loss": 1.564, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.2700460829493088, |
| "grad_norm": 0.2694461941719055, |
| "learning_rate": 2.140650108102281e-05, |
| "loss": 1.5709, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.271889400921659, |
| "grad_norm": 0.27626311779022217, |
| "learning_rate": 2.137736150238659e-05, |
| "loss": 1.6146, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.2737327188940093, |
| "grad_norm": 0.2797856628894806, |
| "learning_rate": 2.1348192516666376e-05, |
| "loss": 1.6126, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.2755760368663593, |
| "grad_norm": 0.2678052484989166, |
| "learning_rate": 2.1318994258365253e-05, |
| "loss": 1.5817, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.2774193548387096, |
| "grad_norm": 0.2734876275062561, |
| "learning_rate": 2.128976686212129e-05, |
| "loss": 1.5634, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.2792626728110599, |
| "grad_norm": 0.2710317373275757, |
| "learning_rate": 2.1260510462706914e-05, |
| "loss": 1.6467, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.2811059907834101, |
| "grad_norm": 0.2857086956501007, |
| "learning_rate": 2.12312251950283e-05, |
| "loss": 1.5887, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.2829493087557604, |
| "grad_norm": 0.26261481642723083, |
| "learning_rate": 2.120191119412472e-05, |
| "loss": 1.6167, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.2847926267281107, |
| "grad_norm": 0.26819396018981934, |
| "learning_rate": 2.117256859516795e-05, |
| "loss": 1.5946, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.286635944700461, |
| "grad_norm": 0.2797357439994812, |
| "learning_rate": 2.1143197533461655e-05, |
| "loss": 1.5888, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.288479262672811, |
| "grad_norm": 0.2809047996997833, |
| "learning_rate": 2.1113798144440712e-05, |
| "loss": 1.5984, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.2903225806451613, |
| "grad_norm": 0.2751614451408386, |
| "learning_rate": 2.108437056367064e-05, |
| "loss": 1.6601, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.2921658986175115, |
| "grad_norm": 0.26571017503738403, |
| "learning_rate": 2.1054914926846957e-05, |
| "loss": 1.5355, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.2940092165898618, |
| "grad_norm": 0.3031296133995056, |
| "learning_rate": 2.1025431369794546e-05, |
| "loss": 1.6608, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.295852534562212, |
| "grad_norm": 0.28314271569252014, |
| "learning_rate": 2.0995920028467027e-05, |
| "loss": 1.6063, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.297695852534562, |
| "grad_norm": 0.27367842197418213, |
| "learning_rate": 2.096638103894616e-05, |
| "loss": 1.5948, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.2995391705069124, |
| "grad_norm": 0.2784302234649658, |
| "learning_rate": 2.0936814537441173e-05, |
| "loss": 1.5953, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.3013824884792626, |
| "grad_norm": 0.2915882468223572, |
| "learning_rate": 2.0907220660288166e-05, |
| "loss": 1.5376, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.303225806451613, |
| "grad_norm": 0.26861122250556946, |
| "learning_rate": 2.087759954394948e-05, |
| "loss": 1.6224, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.3050691244239632, |
| "grad_norm": 0.2893337309360504, |
| "learning_rate": 2.084795132501304e-05, |
| "loss": 1.6294, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.3069124423963134, |
| "grad_norm": 0.2968802750110626, |
| "learning_rate": 2.081827614019177e-05, |
| "loss": 1.5813, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.3087557603686637, |
| "grad_norm": 0.2807689309120178, |
| "learning_rate": 2.0788574126322928e-05, |
| "loss": 1.596, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.3105990783410137, |
| "grad_norm": 0.27932870388031006, |
| "learning_rate": 2.0758845420367474e-05, |
| "loss": 1.5958, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.312442396313364, |
| "grad_norm": 0.3168034553527832, |
| "learning_rate": 2.0729090159409467e-05, |
| "loss": 1.5696, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.3142857142857143, |
| "grad_norm": 0.2953466475009918, |
| "learning_rate": 2.0699308480655397e-05, |
| "loss": 1.5669, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.3161290322580645, |
| "grad_norm": 0.26360318064689636, |
| "learning_rate": 2.06695005214336e-05, |
| "loss": 1.6061, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.3179723502304148, |
| "grad_norm": 0.31883785128593445, |
| "learning_rate": 2.0639666419193565e-05, |
| "loss": 1.6457, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.3198156682027649, |
| "grad_norm": 0.30148056149482727, |
| "learning_rate": 2.0609806311505345e-05, |
| "loss": 1.6045, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.3216589861751151, |
| "grad_norm": 0.2783588767051697, |
| "learning_rate": 2.057992033605891e-05, |
| "loss": 1.6246, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.3235023041474654, |
| "grad_norm": 0.2826476991176605, |
| "learning_rate": 2.0550008630663507e-05, |
| "loss": 1.6577, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.3253456221198157, |
| "grad_norm": 0.32222914695739746, |
| "learning_rate": 2.0520071333247025e-05, |
| "loss": 1.6668, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.327188940092166, |
| "grad_norm": 0.2784786522388458, |
| "learning_rate": 2.049010858185537e-05, |
| "loss": 1.6636, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.3290322580645162, |
| "grad_norm": 0.27896296977996826, |
| "learning_rate": 2.0460120514651814e-05, |
| "loss": 1.5561, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.3308755760368665, |
| "grad_norm": 0.2953389585018158, |
| "learning_rate": 2.0430107269916368e-05, |
| "loss": 1.5208, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.3327188940092167, |
| "grad_norm": 0.27998074889183044, |
| "learning_rate": 2.0400068986045142e-05, |
| "loss": 1.6206, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.3345622119815668, |
| "grad_norm": 0.2782033383846283, |
| "learning_rate": 2.03700058015497e-05, |
| "loss": 1.6127, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.336405529953917, |
| "grad_norm": 0.2825208008289337, |
| "learning_rate": 2.0339917855056428e-05, |
| "loss": 1.5904, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.3382488479262673, |
| "grad_norm": 0.2724984586238861, |
| "learning_rate": 2.0309805285305905e-05, |
| "loss": 1.5929, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.3400921658986176, |
| "grad_norm": 0.2638327479362488, |
| "learning_rate": 2.0279668231152233e-05, |
| "loss": 1.5806, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.3419354838709676, |
| "grad_norm": 0.27951404452323914, |
| "learning_rate": 2.024950683156243e-05, |
| "loss": 1.6097, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.3437788018433179, |
| "grad_norm": 0.2717166841030121, |
| "learning_rate": 2.021932122561577e-05, |
| "loss": 1.5724, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.3456221198156681, |
| "grad_norm": 0.2744804620742798, |
| "learning_rate": 2.0189111552503142e-05, |
| "loss": 1.6343, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.3474654377880184, |
| "grad_norm": 0.2739951014518738, |
| "learning_rate": 2.015887795152643e-05, |
| "loss": 1.609, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.3493087557603687, |
| "grad_norm": 0.2745543420314789, |
| "learning_rate": 2.0128620562097834e-05, |
| "loss": 1.634, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.351152073732719, |
| "grad_norm": 0.2853536903858185, |
| "learning_rate": 2.009833952373925e-05, |
| "loss": 1.6915, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.3529953917050692, |
| "grad_norm": 0.2606966495513916, |
| "learning_rate": 2.0068034976081637e-05, |
| "loss": 1.5641, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.3548387096774195, |
| "grad_norm": 0.2707135081291199, |
| "learning_rate": 2.0037707058864343e-05, |
| "loss": 1.5901, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.3566820276497695, |
| "grad_norm": 0.2738732397556305, |
| "learning_rate": 2.0007355911934473e-05, |
| "loss": 1.5878, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.3585253456221198, |
| "grad_norm": 0.27152132987976074, |
| "learning_rate": 1.997698167524628e-05, |
| "loss": 1.6212, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.36036866359447, |
| "grad_norm": 0.279140442609787, |
| "learning_rate": 1.9946584488860454e-05, |
| "loss": 1.5909, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.3622119815668203, |
| "grad_norm": 0.2838742434978485, |
| "learning_rate": 1.9916164492943518e-05, |
| "loss": 1.6337, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.3640552995391704, |
| "grad_norm": 0.2730039358139038, |
| "learning_rate": 1.9885721827767185e-05, |
| "loss": 1.674, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.3658986175115206, |
| "grad_norm": 0.27932366728782654, |
| "learning_rate": 1.9855256633707692e-05, |
| "loss": 1.6264, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.367741935483871, |
| "grad_norm": 0.2915544807910919, |
| "learning_rate": 1.9824769051245157e-05, |
| "loss": 1.6138, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.3695852534562212, |
| "grad_norm": 0.2782309949398041, |
| "learning_rate": 1.979425922096294e-05, |
| "loss": 1.6153, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.3714285714285714, |
| "grad_norm": 0.29362061619758606, |
| "learning_rate": 1.976372728354699e-05, |
| "loss": 1.6308, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.3732718894009217, |
| "grad_norm": 0.2930099368095398, |
| "learning_rate": 1.9733173379785188e-05, |
| "loss": 1.5748, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.375115207373272, |
| "grad_norm": 0.27453601360321045, |
| "learning_rate": 1.9702597650566723e-05, |
| "loss": 1.5993, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.3769585253456222, |
| "grad_norm": 0.281548410654068, |
| "learning_rate": 1.9672000236881397e-05, |
| "loss": 1.6467, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.3788018433179723, |
| "grad_norm": 0.28580373525619507, |
| "learning_rate": 1.9641381279819028e-05, |
| "loss": 1.6643, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.3806451612903226, |
| "grad_norm": 0.2795877754688263, |
| "learning_rate": 1.9610740920568764e-05, |
| "loss": 1.6006, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.3824884792626728, |
| "grad_norm": 0.27349579334259033, |
| "learning_rate": 1.9580079300418444e-05, |
| "loss": 1.654, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.384331797235023, |
| "grad_norm": 0.27888205647468567, |
| "learning_rate": 1.954939656075394e-05, |
| "loss": 1.6131, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.3861751152073734, |
| "grad_norm": 0.27265042066574097, |
| "learning_rate": 1.9518692843058514e-05, |
| "loss": 1.6203, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.3880184331797234, |
| "grad_norm": 0.2736769914627075, |
| "learning_rate": 1.9487968288912164e-05, |
| "loss": 1.6011, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.3898617511520737, |
| "grad_norm": 0.27037787437438965, |
| "learning_rate": 1.9457223039990963e-05, |
| "loss": 1.5475, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.391705069124424, |
| "grad_norm": 0.27303317189216614, |
| "learning_rate": 1.942645723806641e-05, |
| "loss": 1.6335, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.3935483870967742, |
| "grad_norm": 0.27159225940704346, |
| "learning_rate": 1.9395671025004777e-05, |
| "loss": 1.5606, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.3953917050691245, |
| "grad_norm": 0.2682175934314728, |
| "learning_rate": 1.936486454276647e-05, |
| "loss": 1.555, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.3972350230414747, |
| "grad_norm": 0.27098003029823303, |
| "learning_rate": 1.9334037933405337e-05, |
| "loss": 1.5385, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.399078341013825, |
| "grad_norm": 0.28845879435539246, |
| "learning_rate": 1.9303191339068048e-05, |
| "loss": 1.6211, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.400921658986175, |
| "grad_norm": 0.2876651883125305, |
| "learning_rate": 1.9272324901993436e-05, |
| "loss": 1.6319, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.4027649769585253, |
| "grad_norm": 0.2796455919742584, |
| "learning_rate": 1.9241438764511805e-05, |
| "loss": 1.6263, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.4046082949308756, |
| "grad_norm": 0.27480548620224, |
| "learning_rate": 1.9210533069044334e-05, |
| "loss": 1.613, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.4064516129032258, |
| "grad_norm": 0.28065502643585205, |
| "learning_rate": 1.9179607958102356e-05, |
| "loss": 1.5789, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.4082949308755761, |
| "grad_norm": 0.28731146454811096, |
| "learning_rate": 1.9148663574286757e-05, |
| "loss": 1.5297, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.4101382488479262, |
| "grad_norm": 0.2914833724498749, |
| "learning_rate": 1.911770006028728e-05, |
| "loss": 1.5977, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.4119815668202764, |
| "grad_norm": 0.300193190574646, |
| "learning_rate": 1.908671755888188e-05, |
| "loss": 1.6296, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.4138248847926267, |
| "grad_norm": 0.29380565881729126, |
| "learning_rate": 1.9055716212936075e-05, |
| "loss": 1.6149, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.415668202764977, |
| "grad_norm": 0.32037287950515747, |
| "learning_rate": 1.9024696165402272e-05, |
| "loss": 1.6513, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.4175115207373272, |
| "grad_norm": 0.29914116859436035, |
| "learning_rate": 1.899365755931911e-05, |
| "loss": 1.5963, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.4193548387096775, |
| "grad_norm": 0.26687341928482056, |
| "learning_rate": 1.8962600537810824e-05, |
| "loss": 1.536, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.4211981566820278, |
| "grad_norm": 0.28808560967445374, |
| "learning_rate": 1.893152524408653e-05, |
| "loss": 1.6214, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.4230414746543778, |
| "grad_norm": 0.28396207094192505, |
| "learning_rate": 1.8900431821439644e-05, |
| "loss": 1.6478, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.424884792626728, |
| "grad_norm": 0.2826618552207947, |
| "learning_rate": 1.886932041324714e-05, |
| "loss": 1.5832, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.4267281105990783, |
| "grad_norm": 0.2729291617870331, |
| "learning_rate": 1.883819116296895e-05, |
| "loss": 1.5696, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.28617042303085327, |
| "learning_rate": 1.880704421414726e-05, |
| "loss": 1.606, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.4304147465437789, |
| "grad_norm": 0.30494067072868347, |
| "learning_rate": 1.8775879710405893e-05, |
| "loss": 1.6557, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.432258064516129, |
| "grad_norm": 0.2694535255432129, |
| "learning_rate": 1.8744697795449588e-05, |
| "loss": 1.5447, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.4341013824884792, |
| "grad_norm": 0.2713358998298645, |
| "learning_rate": 1.8713498613063403e-05, |
| "loss": 1.5635, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.4359447004608294, |
| "grad_norm": 0.27467259764671326, |
| "learning_rate": 1.8682282307111988e-05, |
| "loss": 1.6066, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.4377880184331797, |
| "grad_norm": 0.2681735157966614, |
| "learning_rate": 1.865104902153898e-05, |
| "loss": 1.5669, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.43963133640553, |
| "grad_norm": 0.2794997990131378, |
| "learning_rate": 1.8619798900366298e-05, |
| "loss": 1.6059, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.4414746543778802, |
| "grad_norm": 0.27287647128105164, |
| "learning_rate": 1.8588532087693485e-05, |
| "loss": 1.5776, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.4433179723502305, |
| "grad_norm": 0.2879515290260315, |
| "learning_rate": 1.8557248727697068e-05, |
| "loss": 1.6362, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.4451612903225808, |
| "grad_norm": 0.28515344858169556, |
| "learning_rate": 1.852594896462987e-05, |
| "loss": 1.5876, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.4470046082949308, |
| "grad_norm": 0.2730424702167511, |
| "learning_rate": 1.849463294282035e-05, |
| "loss": 1.5707, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.448847926267281, |
| "grad_norm": 0.27559229731559753, |
| "learning_rate": 1.8463300806671936e-05, |
| "loss": 1.5538, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.4506912442396314, |
| "grad_norm": 0.28002747893333435, |
| "learning_rate": 1.8431952700662375e-05, |
| "loss": 1.6236, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.4525345622119816, |
| "grad_norm": 0.28345340490341187, |
| "learning_rate": 1.840058876934303e-05, |
| "loss": 1.6436, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.4543778801843317, |
| "grad_norm": 0.26679205894470215, |
| "learning_rate": 1.8369209157338262e-05, |
| "loss": 1.49, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.456221198156682, |
| "grad_norm": 0.2743077874183655, |
| "learning_rate": 1.8337814009344716e-05, |
| "loss": 1.592, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.4580645161290322, |
| "grad_norm": 0.2735365927219391, |
| "learning_rate": 1.83064034701307e-05, |
| "loss": 1.5771, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.4599078341013825, |
| "grad_norm": 0.27030524611473083, |
| "learning_rate": 1.8274977684535478e-05, |
| "loss": 1.5751, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.4617511520737327, |
| "grad_norm": 0.28183117508888245, |
| "learning_rate": 1.824353679746861e-05, |
| "loss": 1.5485, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.463594470046083, |
| "grad_norm": 0.2746550738811493, |
| "learning_rate": 1.821208095390931e-05, |
| "loss": 1.5738, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.4654377880184333, |
| "grad_norm": 0.2818034887313843, |
| "learning_rate": 1.8180610298905758e-05, |
| "loss": 1.6364, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.4672811059907835, |
| "grad_norm": 0.2696681320667267, |
| "learning_rate": 1.8149124977574417e-05, |
| "loss": 1.6, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.4691244239631336, |
| "grad_norm": 0.2887495458126068, |
| "learning_rate": 1.8117625135099386e-05, |
| "loss": 1.6686, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.4709677419354839, |
| "grad_norm": 0.2815185785293579, |
| "learning_rate": 1.8086110916731724e-05, |
| "loss": 1.6131, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.4728110599078341, |
| "grad_norm": 0.27256280183792114, |
| "learning_rate": 1.805458246778878e-05, |
| "loss": 1.5867, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.4746543778801844, |
| "grad_norm": 0.2779887914657593, |
| "learning_rate": 1.802303993365353e-05, |
| "loss": 1.5557, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.4764976958525344, |
| "grad_norm": 0.27857843041419983, |
| "learning_rate": 1.7991483459773887e-05, |
| "loss": 1.6668, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.4783410138248847, |
| "grad_norm": 0.26913055777549744, |
| "learning_rate": 1.795991319166204e-05, |
| "loss": 1.6072, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.480184331797235, |
| "grad_norm": 0.2818872630596161, |
| "learning_rate": 1.79283292748938e-05, |
| "loss": 1.5957, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.4820276497695852, |
| "grad_norm": 0.2774152159690857, |
| "learning_rate": 1.7896731855107908e-05, |
| "loss": 1.5923, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.4838709677419355, |
| "grad_norm": 0.2798556983470917, |
| "learning_rate": 1.7865121078005365e-05, |
| "loss": 1.5798, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.4857142857142858, |
| "grad_norm": 0.2692021429538727, |
| "learning_rate": 1.7833497089348772e-05, |
| "loss": 1.6172, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.487557603686636, |
| "grad_norm": 0.267347514629364, |
| "learning_rate": 1.780186003496164e-05, |
| "loss": 1.6114, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.4894009216589863, |
| "grad_norm": 0.2735111713409424, |
| "learning_rate": 1.7770210060727748e-05, |
| "loss": 1.5757, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.4912442396313363, |
| "grad_norm": 0.27940452098846436, |
| "learning_rate": 1.7738547312590426e-05, |
| "loss": 1.6085, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.4930875576036866, |
| "grad_norm": 0.2675354480743408, |
| "learning_rate": 1.770687193655192e-05, |
| "loss": 1.6032, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.4949308755760369, |
| "grad_norm": 0.276353120803833, |
| "learning_rate": 1.7675184078672714e-05, |
| "loss": 1.6087, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.4967741935483871, |
| "grad_norm": 0.276836097240448, |
| "learning_rate": 1.7643483885070827e-05, |
| "loss": 1.6077, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.4986175115207372, |
| "grad_norm": 0.27562999725341797, |
| "learning_rate": 1.7611771501921174e-05, |
| "loss": 1.6598, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.5004608294930875, |
| "grad_norm": 0.2816264033317566, |
| "learning_rate": 1.7580047075454877e-05, |
| "loss": 1.6591, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.5023041474654377, |
| "grad_norm": 0.2649102807044983, |
| "learning_rate": 1.7548310751958588e-05, |
| "loss": 1.5475, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.504147465437788, |
| "grad_norm": 0.26588475704193115, |
| "learning_rate": 1.751656267777382e-05, |
| "loss": 1.6011, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.5059907834101383, |
| "grad_norm": 0.2885441482067108, |
| "learning_rate": 1.748480299929627e-05, |
| "loss": 1.6321, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.5078341013824885, |
| "grad_norm": 0.2782214283943176, |
| "learning_rate": 1.7453031862975146e-05, |
| "loss": 1.5943, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.5096774193548388, |
| "grad_norm": 0.2720677852630615, |
| "learning_rate": 1.742124941531249e-05, |
| "loss": 1.5845, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.511520737327189, |
| "grad_norm": 0.27073559165000916, |
| "learning_rate": 1.73894558028625e-05, |
| "loss": 1.6024, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.5133640552995393, |
| "grad_norm": 0.2795216739177704, |
| "learning_rate": 1.7357651172230852e-05, |
| "loss": 1.5477, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.5152073732718894, |
| "grad_norm": 0.27710703015327454, |
| "learning_rate": 1.7325835670074044e-05, |
| "loss": 1.6505, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.5170506912442396, |
| "grad_norm": 0.2705666124820709, |
| "learning_rate": 1.729400944309869e-05, |
| "loss": 1.5482, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.51889400921659, |
| "grad_norm": 0.27922290563583374, |
| "learning_rate": 1.7262172638060865e-05, |
| "loss": 1.6243, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.52073732718894, |
| "grad_norm": 0.27905556559562683, |
| "learning_rate": 1.7230325401765415e-05, |
| "loss": 1.5902, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.5225806451612902, |
| "grad_norm": 0.2822147607803345, |
| "learning_rate": 1.7198467881065292e-05, |
| "loss": 1.6411, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.5244239631336405, |
| "grad_norm": 0.27511006593704224, |
| "learning_rate": 1.7166600222860876e-05, |
| "loss": 1.6384, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.5262672811059907, |
| "grad_norm": 0.27862241864204407, |
| "learning_rate": 1.713472257409928e-05, |
| "loss": 1.5762, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.528110599078341, |
| "grad_norm": 0.27879902720451355, |
| "learning_rate": 1.7102835081773686e-05, |
| "loss": 1.6238, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.5299539170506913, |
| "grad_norm": 0.29454904794692993, |
| "learning_rate": 1.707093789292269e-05, |
| "loss": 1.6545, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.5317972350230415, |
| "grad_norm": 0.2856805920600891, |
| "learning_rate": 1.7039031154629567e-05, |
| "loss": 1.5693, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.5336405529953918, |
| "grad_norm": 0.27673226594924927, |
| "learning_rate": 1.700711501402164e-05, |
| "loss": 1.5427, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.535483870967742, |
| "grad_norm": 0.29206371307373047, |
| "learning_rate": 1.6975189618269592e-05, |
| "loss": 1.6024, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.5373271889400921, |
| "grad_norm": 0.2787723243236542, |
| "learning_rate": 1.6943255114586788e-05, |
| "loss": 1.5581, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.5391705069124424, |
| "grad_norm": 0.27273157238960266, |
| "learning_rate": 1.6911311650228574e-05, |
| "loss": 1.5769, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.5410138248847927, |
| "grad_norm": 0.2830120325088501, |
| "learning_rate": 1.687935937249163e-05, |
| "loss": 1.5915, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.5428571428571427, |
| "grad_norm": 0.27038127183914185, |
| "learning_rate": 1.6847398428713256e-05, |
| "loss": 1.5609, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.544700460829493, |
| "grad_norm": 0.2764512896537781, |
| "learning_rate": 1.681542896627075e-05, |
| "loss": 1.6441, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.5465437788018432, |
| "grad_norm": 0.27623921632766724, |
| "learning_rate": 1.678345113258065e-05, |
| "loss": 1.6269, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.5483870967741935, |
| "grad_norm": 0.26323872804641724, |
| "learning_rate": 1.6751465075098115e-05, |
| "loss": 1.5342, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.5502304147465438, |
| "grad_norm": 0.27324774861335754, |
| "learning_rate": 1.6719470941316228e-05, |
| "loss": 1.6072, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.552073732718894, |
| "grad_norm": 0.2702793776988983, |
| "learning_rate": 1.668746887876531e-05, |
| "loss": 1.5937, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.5539170506912443, |
| "grad_norm": 0.2792288362979889, |
| "learning_rate": 1.6655459035012237e-05, |
| "loss": 1.5874, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.5557603686635946, |
| "grad_norm": 0.2696288824081421, |
| "learning_rate": 1.662344155765977e-05, |
| "loss": 1.5788, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.5576036866359448, |
| "grad_norm": 0.2754669189453125, |
| "learning_rate": 1.659141659434587e-05, |
| "loss": 1.6263, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.5594470046082949, |
| "grad_norm": 0.2766124904155731, |
| "learning_rate": 1.655938429274302e-05, |
| "loss": 1.6164, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.5612903225806452, |
| "grad_norm": 0.2690373361110687, |
| "learning_rate": 1.6527344800557534e-05, |
| "loss": 1.5735, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.5631336405529954, |
| "grad_norm": 0.27754339575767517, |
| "learning_rate": 1.6495298265528883e-05, |
| "loss": 1.6258, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.5649769585253455, |
| "grad_norm": 0.27151575684547424, |
| "learning_rate": 1.646324483542902e-05, |
| "loss": 1.6568, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.5668202764976957, |
| "grad_norm": 0.28297892212867737, |
| "learning_rate": 1.64311846580617e-05, |
| "loss": 1.6342, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.568663594470046, |
| "grad_norm": 0.27492207288742065, |
| "learning_rate": 1.639911788126177e-05, |
| "loss": 1.665, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.5705069124423963, |
| "grad_norm": 0.2828580439090729, |
| "learning_rate": 1.6367044652894515e-05, |
| "loss": 1.5696, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.5723502304147465, |
| "grad_norm": 0.2775357961654663, |
| "learning_rate": 1.6334965120854986e-05, |
| "loss": 1.6489, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.5741935483870968, |
| "grad_norm": 0.28201982378959656, |
| "learning_rate": 1.6302879433067274e-05, |
| "loss": 1.6067, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.576036866359447, |
| "grad_norm": 0.273805171251297, |
| "learning_rate": 1.6270787737483877e-05, |
| "loss": 1.636, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.5778801843317973, |
| "grad_norm": 0.27378591895103455, |
| "learning_rate": 1.623869018208499e-05, |
| "loss": 1.5383, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.5797235023041476, |
| "grad_norm": 0.2734542787075043, |
| "learning_rate": 1.6206586914877816e-05, |
| "loss": 1.5959, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.5815668202764976, |
| "grad_norm": 0.2755284905433655, |
| "learning_rate": 1.6174478083895922e-05, |
| "loss": 1.5877, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.583410138248848, |
| "grad_norm": 0.2930653393268585, |
| "learning_rate": 1.6142363837198504e-05, |
| "loss": 1.6763, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.5852534562211982, |
| "grad_norm": 0.28103727102279663, |
| "learning_rate": 1.6110244322869746e-05, |
| "loss": 1.6154, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.5870967741935482, |
| "grad_norm": 0.2758486568927765, |
| "learning_rate": 1.607811968901812e-05, |
| "loss": 1.5919, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.5889400921658985, |
| "grad_norm": 0.27371156215667725, |
| "learning_rate": 1.6045990083775703e-05, |
| "loss": 1.6255, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.5907834101382488, |
| "grad_norm": 0.2752973139286041, |
| "learning_rate": 1.6013855655297498e-05, |
| "loss": 1.6017, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.592626728110599, |
| "grad_norm": 0.26971954107284546, |
| "learning_rate": 1.5981716551760735e-05, |
| "loss": 1.5115, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.5944700460829493, |
| "grad_norm": 0.2720949053764343, |
| "learning_rate": 1.5949572921364226e-05, |
| "loss": 1.573, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.5963133640552996, |
| "grad_norm": 0.2740275263786316, |
| "learning_rate": 1.5917424912327644e-05, |
| "loss": 1.6024, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.5981566820276498, |
| "grad_norm": 0.2787199318408966, |
| "learning_rate": 1.5885272672890842e-05, |
| "loss": 1.6263, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.2864430248737335, |
| "learning_rate": 1.58531163513132e-05, |
| "loss": 1.6032, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.6018433179723504, |
| "grad_norm": 0.26860958337783813, |
| "learning_rate": 1.5820956095872914e-05, |
| "loss": 1.6096, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.6036866359447006, |
| "grad_norm": 0.27173295617103577, |
| "learning_rate": 1.5788792054866314e-05, |
| "loss": 1.5589, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.6055299539170507, |
| "grad_norm": 0.27752557396888733, |
| "learning_rate": 1.5756624376607193e-05, |
| "loss": 1.5585, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.607373271889401, |
| "grad_norm": 0.2819308042526245, |
| "learning_rate": 1.5724453209426108e-05, |
| "loss": 1.578, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.6092165898617512, |
| "grad_norm": 0.2847549617290497, |
| "learning_rate": 1.5692278701669712e-05, |
| "loss": 1.6011, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.6110599078341012, |
| "grad_norm": 0.27277764678001404, |
| "learning_rate": 1.566010100170007e-05, |
| "loss": 1.571, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.6129032258064515, |
| "grad_norm": 0.2827809751033783, |
| "learning_rate": 1.5627920257893934e-05, |
| "loss": 1.5961, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.6147465437788018, |
| "grad_norm": 0.28699159622192383, |
| "learning_rate": 1.5595736618642126e-05, |
| "loss": 1.6229, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.616589861751152, |
| "grad_norm": 0.2775685787200928, |
| "learning_rate": 1.5563550232348813e-05, |
| "loss": 1.5469, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.6184331797235023, |
| "grad_norm": 0.2769649922847748, |
| "learning_rate": 1.553136124743081e-05, |
| "loss": 1.591, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.6202764976958526, |
| "grad_norm": 0.26347190141677856, |
| "learning_rate": 1.5499169812316937e-05, |
| "loss": 1.5349, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.6221198156682028, |
| "grad_norm": 0.2721926271915436, |
| "learning_rate": 1.5466976075447295e-05, |
| "loss": 1.5831, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.6239631336405531, |
| "grad_norm": 0.2716809809207916, |
| "learning_rate": 1.5434780185272616e-05, |
| "loss": 1.5148, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.6258064516129034, |
| "grad_norm": 0.2808883786201477, |
| "learning_rate": 1.5402582290253547e-05, |
| "loss": 1.57, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.6276497695852534, |
| "grad_norm": 0.2672334313392639, |
| "learning_rate": 1.537038253885998e-05, |
| "loss": 1.5688, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.6294930875576037, |
| "grad_norm": 0.27970781922340393, |
| "learning_rate": 1.533818107957038e-05, |
| "loss": 1.5723, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.631336405529954, |
| "grad_norm": 0.2877601087093353, |
| "learning_rate": 1.5305978060871083e-05, |
| "loss": 1.6315, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.633179723502304, |
| "grad_norm": 0.2766900658607483, |
| "learning_rate": 1.5273773631255602e-05, |
| "loss": 1.632, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.6350230414746543, |
| "grad_norm": 0.26466235518455505, |
| "learning_rate": 1.524156793922396e-05, |
| "loss": 1.546, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.6368663594470045, |
| "grad_norm": 0.27303576469421387, |
| "learning_rate": 1.5209361133282022e-05, |
| "loss": 1.574, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.6387096774193548, |
| "grad_norm": 0.28713706135749817, |
| "learning_rate": 1.517715336194077e-05, |
| "loss": 1.6679, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.640552995391705, |
| "grad_norm": 0.271852046251297, |
| "learning_rate": 1.5144944773715635e-05, |
| "loss": 1.5074, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.6423963133640553, |
| "grad_norm": 0.2695780098438263, |
| "learning_rate": 1.511273551712583e-05, |
| "loss": 1.5874, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.6442396313364056, |
| "grad_norm": 0.27548617124557495, |
| "learning_rate": 1.5080525740693635e-05, |
| "loss": 1.5366, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.6460829493087559, |
| "grad_norm": 0.27405300736427307, |
| "learning_rate": 1.5048315592943743e-05, |
| "loss": 1.6149, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.6479262672811061, |
| "grad_norm": 0.2837236821651459, |
| "learning_rate": 1.5016105222402546e-05, |
| "loss": 1.574, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.6497695852534562, |
| "grad_norm": 0.274830162525177, |
| "learning_rate": 1.4983894777597461e-05, |
| "loss": 1.6566, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.6516129032258065, |
| "grad_norm": 0.2712487578392029, |
| "learning_rate": 1.495168440705626e-05, |
| "loss": 1.5382, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.6534562211981567, |
| "grad_norm": 0.26845115423202515, |
| "learning_rate": 1.4919474259306362e-05, |
| "loss": 1.5384, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.6552995391705068, |
| "grad_norm": 0.2784380316734314, |
| "learning_rate": 1.4887264482874173e-05, |
| "loss": 1.5575, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.657142857142857, |
| "grad_norm": 0.2723720967769623, |
| "learning_rate": 1.4855055226284367e-05, |
| "loss": 1.5714, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.6589861751152073, |
| "grad_norm": 0.2713598608970642, |
| "learning_rate": 1.4822846638059234e-05, |
| "loss": 1.5896, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.6608294930875576, |
| "grad_norm": 0.28683602809906006, |
| "learning_rate": 1.4790638866717984e-05, |
| "loss": 1.6283, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.6626728110599078, |
| "grad_norm": 0.2853665053844452, |
| "learning_rate": 1.4758432060776044e-05, |
| "loss": 1.5921, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.664516129032258, |
| "grad_norm": 0.279269278049469, |
| "learning_rate": 1.4726226368744404e-05, |
| "loss": 1.595, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.6663594470046084, |
| "grad_norm": 0.2805596590042114, |
| "learning_rate": 1.4694021939128925e-05, |
| "loss": 1.589, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.6682027649769586, |
| "grad_norm": 0.2930944859981537, |
| "learning_rate": 1.466181892042962e-05, |
| "loss": 1.6052, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.670046082949309, |
| "grad_norm": 0.27433332800865173, |
| "learning_rate": 1.462961746114002e-05, |
| "loss": 1.6106, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.671889400921659, |
| "grad_norm": 0.29066669940948486, |
| "learning_rate": 1.4597417709746454e-05, |
| "loss": 1.6252, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.6737327188940092, |
| "grad_norm": 0.2851257026195526, |
| "learning_rate": 1.4565219814727388e-05, |
| "loss": 1.5807, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.6755760368663595, |
| "grad_norm": 0.2750568389892578, |
| "learning_rate": 1.4533023924552706e-05, |
| "loss": 1.5701, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.6774193548387095, |
| "grad_norm": 0.28285452723503113, |
| "learning_rate": 1.4500830187683066e-05, |
| "loss": 1.6027, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.6792626728110598, |
| "grad_norm": 0.28591400384902954, |
| "learning_rate": 1.4468638752569193e-05, |
| "loss": 1.6226, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.68110599078341, |
| "grad_norm": 0.279861718416214, |
| "learning_rate": 1.4436449767651191e-05, |
| "loss": 1.5525, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.6829493087557603, |
| "grad_norm": 0.26922115683555603, |
| "learning_rate": 1.4404263381357873e-05, |
| "loss": 1.5962, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.6847926267281106, |
| "grad_norm": 0.2725696265697479, |
| "learning_rate": 1.437207974210607e-05, |
| "loss": 1.5739, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.6866359447004609, |
| "grad_norm": 0.280691921710968, |
| "learning_rate": 1.4339898998299936e-05, |
| "loss": 1.6128, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.6884792626728111, |
| "grad_norm": 0.27789539098739624, |
| "learning_rate": 1.4307721298330284e-05, |
| "loss": 1.5863, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.6903225806451614, |
| "grad_norm": 0.2718709707260132, |
| "learning_rate": 1.4275546790573895e-05, |
| "loss": 1.5724, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.6921658986175117, |
| "grad_norm": 0.2684226930141449, |
| "learning_rate": 1.4243375623392808e-05, |
| "loss": 1.5473, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.6940092165898617, |
| "grad_norm": 0.28611379861831665, |
| "learning_rate": 1.4211207945133685e-05, |
| "loss": 1.6016, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.695852534562212, |
| "grad_norm": 0.28246620297431946, |
| "learning_rate": 1.417904390412709e-05, |
| "loss": 1.5635, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.6976958525345622, |
| "grad_norm": 0.2792202830314636, |
| "learning_rate": 1.41468836486868e-05, |
| "loss": 1.5302, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.6995391705069123, |
| "grad_norm": 0.2743517756462097, |
| "learning_rate": 1.411472732710916e-05, |
| "loss": 1.5837, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.7013824884792625, |
| "grad_norm": 0.28153350949287415, |
| "learning_rate": 1.4082575087672363e-05, |
| "loss": 1.6095, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.7032258064516128, |
| "grad_norm": 0.30301469564437866, |
| "learning_rate": 1.4050427078635777e-05, |
| "loss": 1.5882, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.705069124423963, |
| "grad_norm": 0.27385833859443665, |
| "learning_rate": 1.4018283448239266e-05, |
| "loss": 1.5997, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.7069124423963133, |
| "grad_norm": 0.2762070298194885, |
| "learning_rate": 1.398614434470251e-05, |
| "loss": 1.5694, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.7087557603686636, |
| "grad_norm": 0.27967438101768494, |
| "learning_rate": 1.3954009916224299e-05, |
| "loss": 1.6191, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.7105990783410139, |
| "grad_norm": 0.2745825946331024, |
| "learning_rate": 1.3921880310981878e-05, |
| "loss": 1.5921, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.7124423963133641, |
| "grad_norm": 0.2806471586227417, |
| "learning_rate": 1.3889755677130253e-05, |
| "loss": 1.6333, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 0.28569644689559937, |
| "learning_rate": 1.3857636162801499e-05, |
| "loss": 1.6297, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.7161290322580647, |
| "grad_norm": 0.27753588557243347, |
| "learning_rate": 1.3825521916104082e-05, |
| "loss": 1.5691, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.7179723502304147, |
| "grad_norm": 0.2785869538784027, |
| "learning_rate": 1.3793413085122183e-05, |
| "loss": 1.6432, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.719815668202765, |
| "grad_norm": 0.2777843773365021, |
| "learning_rate": 1.3761309817915017e-05, |
| "loss": 1.6167, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.7216589861751153, |
| "grad_norm": 0.2760448157787323, |
| "learning_rate": 1.3729212262516124e-05, |
| "loss": 1.6157, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.7235023041474653, |
| "grad_norm": 0.2856210172176361, |
| "learning_rate": 1.3697120566932727e-05, |
| "loss": 1.5703, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.7253456221198156, |
| "grad_norm": 0.27198657393455505, |
| "learning_rate": 1.3665034879145022e-05, |
| "loss": 1.6183, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.7271889400921658, |
| "grad_norm": 0.2798546254634857, |
| "learning_rate": 1.3632955347105487e-05, |
| "loss": 1.6312, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.729032258064516, |
| "grad_norm": 0.2872222065925598, |
| "learning_rate": 1.3600882118738232e-05, |
| "loss": 1.6336, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.7308755760368664, |
| "grad_norm": 0.2799721360206604, |
| "learning_rate": 1.3568815341938303e-05, |
| "loss": 1.6183, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.7327188940092166, |
| "grad_norm": 0.2797256410121918, |
| "learning_rate": 1.3536755164570977e-05, |
| "loss": 1.6386, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.734562211981567, |
| "grad_norm": 0.2698177099227905, |
| "learning_rate": 1.3504701734471117e-05, |
| "loss": 1.5957, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.7364055299539172, |
| "grad_norm": 0.2770727872848511, |
| "learning_rate": 1.3472655199442473e-05, |
| "loss": 1.5977, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.7382488479262674, |
| "grad_norm": 0.2778874635696411, |
| "learning_rate": 1.3440615707256984e-05, |
| "loss": 1.5497, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.7400921658986175, |
| "grad_norm": 0.27074167132377625, |
| "learning_rate": 1.340858340565413e-05, |
| "loss": 1.5602, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.7419354838709677, |
| "grad_norm": 0.2706243395805359, |
| "learning_rate": 1.3376558442340233e-05, |
| "loss": 1.5655, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.743778801843318, |
| "grad_norm": 0.27816662192344666, |
| "learning_rate": 1.3344540964987766e-05, |
| "loss": 1.5792, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.745622119815668, |
| "grad_norm": 0.27811580896377563, |
| "learning_rate": 1.331253112123469e-05, |
| "loss": 1.6535, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.7474654377880183, |
| "grad_norm": 0.28679028153419495, |
| "learning_rate": 1.3280529058683778e-05, |
| "loss": 1.5837, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.7493087557603686, |
| "grad_norm": 0.2821553647518158, |
| "learning_rate": 1.3248534924901887e-05, |
| "loss": 1.5671, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.7511520737327189, |
| "grad_norm": 0.27457195520401, |
| "learning_rate": 1.3216548867419352e-05, |
| "loss": 1.6098, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.7529953917050691, |
| "grad_norm": 0.27999773621559143, |
| "learning_rate": 1.3184571033729253e-05, |
| "loss": 1.5503, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.7548387096774194, |
| "grad_norm": 0.27848124504089355, |
| "learning_rate": 1.3152601571286746e-05, |
| "loss": 1.5739, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.7566820276497697, |
| "grad_norm": 0.2803337275981903, |
| "learning_rate": 1.3120640627508376e-05, |
| "loss": 1.5847, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.75852534562212, |
| "grad_norm": 0.27653852105140686, |
| "learning_rate": 1.3088688349771425e-05, |
| "loss": 1.6444, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.7603686635944702, |
| "grad_norm": 0.27364403009414673, |
| "learning_rate": 1.3056744885413216e-05, |
| "loss": 1.603, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.7622119815668202, |
| "grad_norm": 0.286454439163208, |
| "learning_rate": 1.3024810381730409e-05, |
| "loss": 1.6084, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.7640552995391705, |
| "grad_norm": 0.27595254778862, |
| "learning_rate": 1.2992884985978363e-05, |
| "loss": 1.6451, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.7658986175115208, |
| "grad_norm": 0.27956917881965637, |
| "learning_rate": 1.2960968845370443e-05, |
| "loss": 1.5732, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.7677419354838708, |
| "grad_norm": 0.2734554708003998, |
| "learning_rate": 1.2929062107077315e-05, |
| "loss": 1.5397, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.769585253456221, |
| "grad_norm": 0.27924489974975586, |
| "learning_rate": 1.2897164918226311e-05, |
| "loss": 1.552, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.7714285714285714, |
| "grad_norm": 0.26942330598831177, |
| "learning_rate": 1.2865277425900725e-05, |
| "loss": 1.5747, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.7732718894009216, |
| "grad_norm": 0.2752183675765991, |
| "learning_rate": 1.2833399777139128e-05, |
| "loss": 1.5018, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.7751152073732719, |
| "grad_norm": 0.29580366611480713, |
| "learning_rate": 1.2801532118934708e-05, |
| "loss": 1.5727, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.7769585253456222, |
| "grad_norm": 0.2765806019306183, |
| "learning_rate": 1.276967459823459e-05, |
| "loss": 1.5706, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.7788018433179724, |
| "grad_norm": 0.2774519622325897, |
| "learning_rate": 1.273782736193914e-05, |
| "loss": 1.6598, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.7806451612903227, |
| "grad_norm": 0.27766409516334534, |
| "learning_rate": 1.2705990556901311e-05, |
| "loss": 1.584, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.782488479262673, |
| "grad_norm": 0.2720947265625, |
| "learning_rate": 1.2674164329925961e-05, |
| "loss": 1.5988, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.784331797235023, |
| "grad_norm": 0.2917366623878479, |
| "learning_rate": 1.2642348827769152e-05, |
| "loss": 1.5834, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.7861751152073733, |
| "grad_norm": 0.2705937623977661, |
| "learning_rate": 1.2610544197137502e-05, |
| "loss": 1.5643, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.7880184331797235, |
| "grad_norm": 0.2769400477409363, |
| "learning_rate": 1.257875058468751e-05, |
| "loss": 1.6284, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.7898617511520736, |
| "grad_norm": 0.28370919823646545, |
| "learning_rate": 1.2546968137024856e-05, |
| "loss": 1.6223, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.7917050691244238, |
| "grad_norm": 0.27669093012809753, |
| "learning_rate": 1.251519700070373e-05, |
| "loss": 1.5396, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.793548387096774, |
| "grad_norm": 0.272204726934433, |
| "learning_rate": 1.2483437322226178e-05, |
| "loss": 1.5131, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.7953917050691244, |
| "grad_norm": 0.27512574195861816, |
| "learning_rate": 1.2451689248041416e-05, |
| "loss": 1.6107, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.7972350230414746, |
| "grad_norm": 0.2767089307308197, |
| "learning_rate": 1.2419952924545125e-05, |
| "loss": 1.5571, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.799078341013825, |
| "grad_norm": 0.2801191806793213, |
| "learning_rate": 1.2388228498078827e-05, |
| "loss": 1.6405, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.8009216589861752, |
| "grad_norm": 0.27706649899482727, |
| "learning_rate": 1.2356516114929176e-05, |
| "loss": 1.6042, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.8027649769585254, |
| "grad_norm": 0.2814716100692749, |
| "learning_rate": 1.2324815921327288e-05, |
| "loss": 1.6023, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.8046082949308757, |
| "grad_norm": 0.2865283191204071, |
| "learning_rate": 1.2293128063448078e-05, |
| "loss": 1.5884, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.8064516129032258, |
| "grad_norm": 0.26941248774528503, |
| "learning_rate": 1.2261452687409576e-05, |
| "loss": 1.603, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.808294930875576, |
| "grad_norm": 0.2709951400756836, |
| "learning_rate": 1.2229789939272253e-05, |
| "loss": 1.5548, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.8101382488479263, |
| "grad_norm": 0.28433525562286377, |
| "learning_rate": 1.2198139965038356e-05, |
| "loss": 1.6292, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.8119815668202763, |
| "grad_norm": 0.2793513536453247, |
| "learning_rate": 1.2166502910651232e-05, |
| "loss": 1.6037, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.8138248847926266, |
| "grad_norm": 0.2772260308265686, |
| "learning_rate": 1.2134878921994634e-05, |
| "loss": 1.5707, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.8156682027649769, |
| "grad_norm": 0.2733345031738281, |
| "learning_rate": 1.210326814489209e-05, |
| "loss": 1.5857, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.8175115207373271, |
| "grad_norm": 0.27603036165237427, |
| "learning_rate": 1.2071670725106203e-05, |
| "loss": 1.533, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.8193548387096774, |
| "grad_norm": 0.282755047082901, |
| "learning_rate": 1.2040086808337965e-05, |
| "loss": 1.5974, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.8211981566820277, |
| "grad_norm": 0.2730749249458313, |
| "learning_rate": 1.2008516540226115e-05, |
| "loss": 1.5904, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.823041474654378, |
| "grad_norm": 0.28955116868019104, |
| "learning_rate": 1.1976960066346474e-05, |
| "loss": 1.5456, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.8248847926267282, |
| "grad_norm": 0.2911273241043091, |
| "learning_rate": 1.194541753221122e-05, |
| "loss": 1.5873, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.8267281105990785, |
| "grad_norm": 0.2708721458911896, |
| "learning_rate": 1.1913889083268278e-05, |
| "loss": 1.517, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.8285714285714287, |
| "grad_norm": 0.27571648359298706, |
| "learning_rate": 1.1882374864900616e-05, |
| "loss": 1.5257, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.8304147465437788, |
| "grad_norm": 0.2964298725128174, |
| "learning_rate": 1.1850875022425587e-05, |
| "loss": 1.5693, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.832258064516129, |
| "grad_norm": 0.2749471664428711, |
| "learning_rate": 1.1819389701094241e-05, |
| "loss": 1.5939, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.8341013824884793, |
| "grad_norm": 0.27718687057495117, |
| "learning_rate": 1.1787919046090686e-05, |
| "loss": 1.501, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.8359447004608294, |
| "grad_norm": 0.2840654253959656, |
| "learning_rate": 1.1756463202531392e-05, |
| "loss": 1.6314, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.8377880184331796, |
| "grad_norm": 0.27605095505714417, |
| "learning_rate": 1.1725022315464528e-05, |
| "loss": 1.5555, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.83963133640553, |
| "grad_norm": 0.30078697204589844, |
| "learning_rate": 1.16935965298693e-05, |
| "loss": 1.6408, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.8414746543778802, |
| "grad_norm": 0.2889043092727661, |
| "learning_rate": 1.1662185990655285e-05, |
| "loss": 1.5627, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.8433179723502304, |
| "grad_norm": 0.2852741777896881, |
| "learning_rate": 1.1630790842661742e-05, |
| "loss": 1.5737, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1626, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2068108629966848e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|