| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9846938775510203, |
| "eval_steps": 500, |
| "global_step": 390, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007653061224489796, |
| "grad_norm": 7.900599139656812, |
| "learning_rate": 2.564102564102564e-07, |
| "loss": 1.2547, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.015306122448979591, |
| "grad_norm": 8.119600107369656, |
| "learning_rate": 5.128205128205128e-07, |
| "loss": 1.2846, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.02295918367346939, |
| "grad_norm": 7.716599625963762, |
| "learning_rate": 7.692307692307694e-07, |
| "loss": 1.2358, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.030612244897959183, |
| "grad_norm": 8.053428990837991, |
| "learning_rate": 1.0256410256410257e-06, |
| "loss": 1.2664, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.03826530612244898, |
| "grad_norm": 7.669927836204581, |
| "learning_rate": 1.282051282051282e-06, |
| "loss": 1.2258, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.04591836734693878, |
| "grad_norm": 7.202844535122422, |
| "learning_rate": 1.5384615384615387e-06, |
| "loss": 1.2287, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.05357142857142857, |
| "grad_norm": 6.242569470438793, |
| "learning_rate": 1.794871794871795e-06, |
| "loss": 1.2074, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.061224489795918366, |
| "grad_norm": 5.60650986860704, |
| "learning_rate": 2.0512820512820513e-06, |
| "loss": 1.1714, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.06887755102040816, |
| "grad_norm": 5.288779120719776, |
| "learning_rate": 2.307692307692308e-06, |
| "loss": 1.1896, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.07653061224489796, |
| "grad_norm": 2.9734467693838242, |
| "learning_rate": 2.564102564102564e-06, |
| "loss": 1.1187, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08418367346938775, |
| "grad_norm": 2.6447321687920935, |
| "learning_rate": 2.8205128205128207e-06, |
| "loss": 1.1054, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.09183673469387756, |
| "grad_norm": 2.500691487000675, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 1.1232, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.09948979591836735, |
| "grad_norm": 3.7896412560526667, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.0521, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.10714285714285714, |
| "grad_norm": 4.330435607713412, |
| "learning_rate": 3.58974358974359e-06, |
| "loss": 1.0695, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.11479591836734694, |
| "grad_norm": 4.065967025129537, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 1.0492, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.12244897959183673, |
| "grad_norm": 3.8218626260606254, |
| "learning_rate": 4.102564102564103e-06, |
| "loss": 1.0507, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.13010204081632654, |
| "grad_norm": 2.913408611179218, |
| "learning_rate": 4.358974358974359e-06, |
| "loss": 1.0352, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1377551020408163, |
| "grad_norm": 2.181426061973388, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 0.9931, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.14540816326530612, |
| "grad_norm": 2.231160799643045, |
| "learning_rate": 4.871794871794872e-06, |
| "loss": 0.9909, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.15306122448979592, |
| "grad_norm": 1.8886705253423084, |
| "learning_rate": 5.128205128205128e-06, |
| "loss": 0.9585, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.16071428571428573, |
| "grad_norm": 1.4487020320934674, |
| "learning_rate": 5.384615384615385e-06, |
| "loss": 0.9512, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.1683673469387755, |
| "grad_norm": 1.1474113193070334, |
| "learning_rate": 5.641025641025641e-06, |
| "loss": 0.9211, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.1760204081632653, |
| "grad_norm": 1.1082161392111283, |
| "learning_rate": 5.897435897435898e-06, |
| "loss": 0.9276, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.1836734693877551, |
| "grad_norm": 1.0650174913457753, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 0.8952, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.1913265306122449, |
| "grad_norm": 1.0639688813292578, |
| "learning_rate": 6.410256410256412e-06, |
| "loss": 0.9, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1989795918367347, |
| "grad_norm": 1.026817161303438, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.91, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.2066326530612245, |
| "grad_norm": 0.8417599731227993, |
| "learning_rate": 6.923076923076923e-06, |
| "loss": 0.8814, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 0.6981856946789826, |
| "learning_rate": 7.17948717948718e-06, |
| "loss": 0.8919, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.22193877551020408, |
| "grad_norm": 0.8305699974027309, |
| "learning_rate": 7.435897435897437e-06, |
| "loss": 0.8725, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.22959183673469388, |
| "grad_norm": 0.8347298264686718, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 0.8632, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2372448979591837, |
| "grad_norm": 0.7073808513621134, |
| "learning_rate": 7.948717948717949e-06, |
| "loss": 0.8851, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.24489795918367346, |
| "grad_norm": 0.696082003876517, |
| "learning_rate": 8.205128205128205e-06, |
| "loss": 0.8703, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.25255102040816324, |
| "grad_norm": 0.6487862374432037, |
| "learning_rate": 8.461538461538462e-06, |
| "loss": 0.8635, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.2602040816326531, |
| "grad_norm": 0.6887600459571754, |
| "learning_rate": 8.717948717948719e-06, |
| "loss": 0.857, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.26785714285714285, |
| "grad_norm": 0.6265375198163766, |
| "learning_rate": 8.974358974358976e-06, |
| "loss": 0.8674, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2755102040816326, |
| "grad_norm": 0.5853042860058177, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 0.8643, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.28316326530612246, |
| "grad_norm": 0.5778280304486835, |
| "learning_rate": 9.487179487179487e-06, |
| "loss": 0.8524, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.29081632653061223, |
| "grad_norm": 0.47109648165463025, |
| "learning_rate": 9.743589743589744e-06, |
| "loss": 0.8345, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.29846938775510207, |
| "grad_norm": 0.6185653217679288, |
| "learning_rate": 1e-05, |
| "loss": 0.8383, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.30612244897959184, |
| "grad_norm": 0.47985971027522895, |
| "learning_rate": 9.999799726899261e-06, |
| "loss": 0.8445, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3137755102040816, |
| "grad_norm": 0.5168786180126856, |
| "learning_rate": 9.999198923640774e-06, |
| "loss": 0.8339, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.32142857142857145, |
| "grad_norm": 0.4697815414710563, |
| "learning_rate": 9.998197638354428e-06, |
| "loss": 0.8395, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.32908163265306123, |
| "grad_norm": 0.43778336023032777, |
| "learning_rate": 9.996795951252427e-06, |
| "loss": 0.8447, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.336734693877551, |
| "grad_norm": 0.4556959273145522, |
| "learning_rate": 9.994993974622863e-06, |
| "loss": 0.8388, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.34438775510204084, |
| "grad_norm": 0.4348649136417768, |
| "learning_rate": 9.992791852820709e-06, |
| "loss": 0.8107, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.3520408163265306, |
| "grad_norm": 0.3789542620181641, |
| "learning_rate": 9.990189762256275e-06, |
| "loss": 0.8345, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.3596938775510204, |
| "grad_norm": 0.38275331303639004, |
| "learning_rate": 9.987187911381059e-06, |
| "loss": 0.8341, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.3673469387755102, |
| "grad_norm": 0.4117058990117418, |
| "learning_rate": 9.983786540671052e-06, |
| "loss": 0.8221, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.4382241759510747, |
| "learning_rate": 9.979985922607476e-06, |
| "loss": 0.8122, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.3826530612244898, |
| "grad_norm": 0.3762436724149032, |
| "learning_rate": 9.975786361654959e-06, |
| "loss": 0.8266, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3903061224489796, |
| "grad_norm": 0.41057346443737175, |
| "learning_rate": 9.971188194237141e-06, |
| "loss": 0.8088, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.3979591836734694, |
| "grad_norm": 0.4047860097370241, |
| "learning_rate": 9.966191788709716e-06, |
| "loss": 0.8104, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.40561224489795916, |
| "grad_norm": 0.33943575533118764, |
| "learning_rate": 9.960797545330936e-06, |
| "loss": 0.7912, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.413265306122449, |
| "grad_norm": 0.4320896078070966, |
| "learning_rate": 9.955005896229543e-06, |
| "loss": 0.812, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.42091836734693877, |
| "grad_norm": 0.36490629588097284, |
| "learning_rate": 9.948817305370145e-06, |
| "loss": 0.817, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.3811135120016499, |
| "learning_rate": 9.942232268516051e-06, |
| "loss": 0.8101, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.4362244897959184, |
| "grad_norm": 0.3406018259398481, |
| "learning_rate": 9.935251313189564e-06, |
| "loss": 0.7987, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.44387755102040816, |
| "grad_norm": 0.41568382541199506, |
| "learning_rate": 9.927874998629714e-06, |
| "loss": 0.8122, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.45153061224489793, |
| "grad_norm": 0.37378450752963704, |
| "learning_rate": 9.920103915747452e-06, |
| "loss": 0.8113, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.45918367346938777, |
| "grad_norm": 0.38855892151900484, |
| "learning_rate": 9.911938687078324e-06, |
| "loss": 0.7859, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.46683673469387754, |
| "grad_norm": 0.4427608830523002, |
| "learning_rate": 9.9033799667326e-06, |
| "loss": 0.802, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.4744897959183674, |
| "grad_norm": 0.35381982973714704, |
| "learning_rate": 9.89442844034286e-06, |
| "loss": 0.8253, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.48214285714285715, |
| "grad_norm": 0.39211658446728886, |
| "learning_rate": 9.885084825009085e-06, |
| "loss": 0.8105, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.4897959183673469, |
| "grad_norm": 0.40435166675919726, |
| "learning_rate": 9.875349869241202e-06, |
| "loss": 0.7953, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.49744897959183676, |
| "grad_norm": 0.3549380749010608, |
| "learning_rate": 9.86522435289912e-06, |
| "loss": 0.8013, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5051020408163265, |
| "grad_norm": 0.30865050646134456, |
| "learning_rate": 9.854709087130261e-06, |
| "loss": 0.7995, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.5127551020408163, |
| "grad_norm": 0.37493039088229285, |
| "learning_rate": 9.843804914304578e-06, |
| "loss": 0.7896, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.5204081632653061, |
| "grad_norm": 0.35445041874178557, |
| "learning_rate": 9.83251270794707e-06, |
| "loss": 0.7825, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.5280612244897959, |
| "grad_norm": 0.37028381618760225, |
| "learning_rate": 9.820833372667813e-06, |
| "loss": 0.8065, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.5357142857142857, |
| "grad_norm": 0.3401014836367619, |
| "learning_rate": 9.80876784408948e-06, |
| "loss": 0.7736, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5433673469387755, |
| "grad_norm": 0.38135639736053345, |
| "learning_rate": 9.796317088772402e-06, |
| "loss": 0.8006, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.5510204081632653, |
| "grad_norm": 0.3449580873725305, |
| "learning_rate": 9.783482104137127e-06, |
| "loss": 0.793, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.5586734693877551, |
| "grad_norm": 0.39773824101732064, |
| "learning_rate": 9.770263918384523e-06, |
| "loss": 0.7628, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.5663265306122449, |
| "grad_norm": 0.36658929825409153, |
| "learning_rate": 9.75666359041341e-06, |
| "loss": 0.7849, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.5739795918367347, |
| "grad_norm": 0.40174916793470977, |
| "learning_rate": 9.742682209735727e-06, |
| "loss": 0.7813, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5816326530612245, |
| "grad_norm": 0.3675910724991157, |
| "learning_rate": 9.728320896389263e-06, |
| "loss": 0.7717, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.5892857142857143, |
| "grad_norm": 0.35241618551974574, |
| "learning_rate": 9.713580800847917e-06, |
| "loss": 0.7964, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.5969387755102041, |
| "grad_norm": 0.42227511752715124, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 0.783, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.6045918367346939, |
| "grad_norm": 0.41099008300592765, |
| "learning_rate": 9.682969016701357e-06, |
| "loss": 0.8124, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.6122448979591837, |
| "grad_norm": 0.3588362815681614, |
| "learning_rate": 9.66709978038292e-06, |
| "loss": 0.8027, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6198979591836735, |
| "grad_norm": 0.44603372884723097, |
| "learning_rate": 9.650856666246693e-06, |
| "loss": 0.7884, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.6275510204081632, |
| "grad_norm": 0.36738127885232164, |
| "learning_rate": 9.63424097551621e-06, |
| "loss": 0.7914, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.6352040816326531, |
| "grad_norm": 0.3461400435266106, |
| "learning_rate": 9.617254039261835e-06, |
| "loss": 0.7698, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 0.3746146031615924, |
| "learning_rate": 9.599897218294122e-06, |
| "loss": 0.7958, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.6505102040816326, |
| "grad_norm": 0.37460698420532035, |
| "learning_rate": 9.582171903054815e-06, |
| "loss": 0.7677, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.6581632653061225, |
| "grad_norm": 0.3433298671011665, |
| "learning_rate": 9.564079513505455e-06, |
| "loss": 0.8015, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.6658163265306123, |
| "grad_norm": 0.3482571197001568, |
| "learning_rate": 9.54562149901362e-06, |
| "loss": 0.7638, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.673469387755102, |
| "grad_norm": 0.40316278701128183, |
| "learning_rate": 9.526799338236828e-06, |
| "loss": 0.7843, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.6811224489795918, |
| "grad_norm": 0.3555197534077026, |
| "learning_rate": 9.507614539004082e-06, |
| "loss": 0.7925, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.6887755102040817, |
| "grad_norm": 0.38348347862843785, |
| "learning_rate": 9.488068638195072e-06, |
| "loss": 0.7894, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6964285714285714, |
| "grad_norm": 0.3578221543396146, |
| "learning_rate": 9.468163201617063e-06, |
| "loss": 0.7625, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.7040816326530612, |
| "grad_norm": 0.3512021435202505, |
| "learning_rate": 9.447899823879456e-06, |
| "loss": 0.7664, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.7117346938775511, |
| "grad_norm": 0.359766111570709, |
| "learning_rate": 9.427280128266049e-06, |
| "loss": 0.7827, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.7193877551020408, |
| "grad_norm": 0.3342360288463418, |
| "learning_rate": 9.406305766604996e-06, |
| "loss": 0.7868, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.7270408163265306, |
| "grad_norm": 0.32321038061189955, |
| "learning_rate": 9.384978419136469e-06, |
| "loss": 0.7898, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.7346938775510204, |
| "grad_norm": 0.34171882937229003, |
| "learning_rate": 9.363299794378072e-06, |
| "loss": 0.7665, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.7423469387755102, |
| "grad_norm": 0.35118533636970833, |
| "learning_rate": 9.34127162898797e-06, |
| "loss": 0.7726, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.3370944528784506, |
| "learning_rate": 9.318895687625752e-06, |
| "loss": 0.7835, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.7576530612244898, |
| "grad_norm": 0.29470217608260335, |
| "learning_rate": 9.296173762811084e-06, |
| "loss": 0.7777, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.7653061224489796, |
| "grad_norm": 0.3454016966220543, |
| "learning_rate": 9.273107674780102e-06, |
| "loss": 0.789, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7729591836734694, |
| "grad_norm": 0.39511779117121837, |
| "learning_rate": 9.249699271339594e-06, |
| "loss": 0.7603, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.7806122448979592, |
| "grad_norm": 0.38047057845879517, |
| "learning_rate": 9.225950427718974e-06, |
| "loss": 0.7825, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.7882653061224489, |
| "grad_norm": 0.4086929427189167, |
| "learning_rate": 9.201863046420065e-06, |
| "loss": 0.7736, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.7959183673469388, |
| "grad_norm": 0.40106266010702624, |
| "learning_rate": 9.177439057064684e-06, |
| "loss": 0.7846, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.8035714285714286, |
| "grad_norm": 0.4228915644292435, |
| "learning_rate": 9.152680416240059e-06, |
| "loss": 0.7666, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.8112244897959183, |
| "grad_norm": 0.3146448558809326, |
| "learning_rate": 9.1275891073421e-06, |
| "loss": 0.7657, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.8188775510204082, |
| "grad_norm": 0.43451140759881535, |
| "learning_rate": 9.102167140416503e-06, |
| "loss": 0.7709, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.826530612244898, |
| "grad_norm": 0.33837608510390754, |
| "learning_rate": 9.076416551997721e-06, |
| "loss": 0.7744, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.8341836734693877, |
| "grad_norm": 0.37565083198791915, |
| "learning_rate": 9.050339404945834e-06, |
| "loss": 0.7827, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.8418367346938775, |
| "grad_norm": 0.365977104631186, |
| "learning_rate": 9.023937788281278e-06, |
| "loss": 0.7834, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8494897959183674, |
| "grad_norm": 0.4486855497610728, |
| "learning_rate": 8.997213817017508e-06, |
| "loss": 0.7644, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.3817323486497425, |
| "learning_rate": 8.970169631991556e-06, |
| "loss": 0.779, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.8647959183673469, |
| "grad_norm": 0.38664365638910025, |
| "learning_rate": 8.942807399692543e-06, |
| "loss": 0.7688, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.8724489795918368, |
| "grad_norm": 0.37215420738034083, |
| "learning_rate": 8.915129312088112e-06, |
| "loss": 0.7757, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.8801020408163265, |
| "grad_norm": 0.35426054376993094, |
| "learning_rate": 8.88713758644883e-06, |
| "loss": 0.7674, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.8877551020408163, |
| "grad_norm": 0.4195483369441139, |
| "learning_rate": 8.858834465170576e-06, |
| "loss": 0.7677, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.8954081632653061, |
| "grad_norm": 0.41930027175437984, |
| "learning_rate": 8.83022221559489e-06, |
| "loss": 0.7568, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.9030612244897959, |
| "grad_norm": 0.3279073397597126, |
| "learning_rate": 8.801303129827352e-06, |
| "loss": 0.7822, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.9107142857142857, |
| "grad_norm": 0.357892031294145, |
| "learning_rate": 8.772079524553951e-06, |
| "loss": 0.7534, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.9183673469387755, |
| "grad_norm": 0.3165448329232979, |
| "learning_rate": 8.742553740855507e-06, |
| "loss": 0.7739, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9260204081632653, |
| "grad_norm": 0.3383676190282577, |
| "learning_rate": 8.712728144020118e-06, |
| "loss": 0.7603, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.9336734693877551, |
| "grad_norm": 0.3208003771597576, |
| "learning_rate": 8.682605123353685e-06, |
| "loss": 0.7629, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.9413265306122449, |
| "grad_norm": 0.32756065557104463, |
| "learning_rate": 8.652187091988516e-06, |
| "loss": 0.7751, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.9489795918367347, |
| "grad_norm": 0.3144217008561782, |
| "learning_rate": 8.621476486689991e-06, |
| "loss": 0.7617, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.9566326530612245, |
| "grad_norm": 0.34740205977487404, |
| "learning_rate": 8.590475767661371e-06, |
| "loss": 0.7715, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.9642857142857143, |
| "grad_norm": 0.36365085803217173, |
| "learning_rate": 8.559187418346703e-06, |
| "loss": 0.7702, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.9719387755102041, |
| "grad_norm": 0.3242477226265553, |
| "learning_rate": 8.527613945231886e-06, |
| "loss": 0.7486, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.9795918367346939, |
| "grad_norm": 0.33767240411889204, |
| "learning_rate": 8.495757877643857e-06, |
| "loss": 0.7539, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.9872448979591837, |
| "grad_norm": 0.3728149497460986, |
| "learning_rate": 8.463621767547998e-06, |
| "loss": 0.7823, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.9948979591836735, |
| "grad_norm": 0.3037904703658018, |
| "learning_rate": 8.43120818934367e-06, |
| "loss": 0.7676, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.0025510204081634, |
| "grad_norm": 0.3657607958037338, |
| "learning_rate": 8.398519739657997e-06, |
| "loss": 0.7447, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.010204081632653, |
| "grad_norm": 0.32702463131138215, |
| "learning_rate": 8.36555903713785e-06, |
| "loss": 0.7192, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.0178571428571428, |
| "grad_norm": 0.3241261143033043, |
| "learning_rate": 8.332328722240072e-06, |
| "loss": 0.7463, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.0255102040816326, |
| "grad_norm": 0.34857830092581826, |
| "learning_rate": 8.298831457019943e-06, |
| "loss": 0.7477, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.0331632653061225, |
| "grad_norm": 0.34054768296119625, |
| "learning_rate": 8.265069924917925e-06, |
| "loss": 0.7205, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.0408163265306123, |
| "grad_norm": 0.3456929422423653, |
| "learning_rate": 8.231046830544716e-06, |
| "loss": 0.74, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.0484693877551021, |
| "grad_norm": 0.3535133441712295, |
| "learning_rate": 8.196764899464552e-06, |
| "loss": 0.7269, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.0561224489795917, |
| "grad_norm": 0.3547241666178161, |
| "learning_rate": 8.162226877976886e-06, |
| "loss": 0.7554, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.0637755102040816, |
| "grad_norm": 0.3107043446143115, |
| "learning_rate": 8.127435532896388e-06, |
| "loss": 0.7288, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 0.3373069931944795, |
| "learning_rate": 8.092393651331275e-06, |
| "loss": 0.7355, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0790816326530612, |
| "grad_norm": 0.3279896770891133, |
| "learning_rate": 8.057104040460062e-06, |
| "loss": 0.7385, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.086734693877551, |
| "grad_norm": 0.3261929334088148, |
| "learning_rate": 8.021569527306663e-06, |
| "loss": 0.728, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.094387755102041, |
| "grad_norm": 0.3174495615043381, |
| "learning_rate": 7.985792958513932e-06, |
| "loss": 0.7408, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.1020408163265305, |
| "grad_norm": 0.3067789820058703, |
| "learning_rate": 7.949777200115617e-06, |
| "loss": 0.7227, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.1096938775510203, |
| "grad_norm": 0.35433845526552205, |
| "learning_rate": 7.913525137306756e-06, |
| "loss": 0.7287, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.1173469387755102, |
| "grad_norm": 0.34741723337985486, |
| "learning_rate": 7.877039674212569e-06, |
| "loss": 0.7258, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 0.3280286780388698, |
| "learning_rate": 7.84032373365578e-06, |
| "loss": 0.7262, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.1326530612244898, |
| "grad_norm": 0.30769648626514395, |
| "learning_rate": 7.803380256922495e-06, |
| "loss": 0.7281, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.1403061224489797, |
| "grad_norm": 0.3815259137143696, |
| "learning_rate": 7.76621220352657e-06, |
| "loss": 0.7189, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.1479591836734695, |
| "grad_norm": 0.3003966668112163, |
| "learning_rate": 7.728822550972523e-06, |
| "loss": 0.7372, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.155612244897959, |
| "grad_norm": 0.3730853217142341, |
| "learning_rate": 7.69121429451702e-06, |
| "loss": 0.7327, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.163265306122449, |
| "grad_norm": 0.3077673668382533, |
| "learning_rate": 7.65339044692891e-06, |
| "loss": 0.7243, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.1709183673469388, |
| "grad_norm": 0.3419341487539606, |
| "learning_rate": 7.615354038247889e-06, |
| "loss": 0.7132, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.1785714285714286, |
| "grad_norm": 0.4263455028715549, |
| "learning_rate": 7.577108115541761e-06, |
| "loss": 0.7136, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.1862244897959184, |
| "grad_norm": 0.3659660917830282, |
| "learning_rate": 7.53865574266234e-06, |
| "loss": 0.7423, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.193877551020408, |
| "grad_norm": 0.38942680340360186, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.7194, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.2015306122448979, |
| "grad_norm": 0.4077561265291119, |
| "learning_rate": 7.461143984236925e-06, |
| "loss": 0.7152, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.2091836734693877, |
| "grad_norm": 0.31673843482064173, |
| "learning_rate": 7.422090808099014e-06, |
| "loss": 0.7164, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.2168367346938775, |
| "grad_norm": 0.3411891380728348, |
| "learning_rate": 7.382843600106539e-06, |
| "loss": 0.7232, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.2244897959183674, |
| "grad_norm": 0.35077489655944777, |
| "learning_rate": 7.343405504323519e-06, |
| "loss": 0.7306, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.2321428571428572, |
| "grad_norm": 0.35048005407769184, |
| "learning_rate": 7.303779680105844e-06, |
| "loss": 0.7252, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.239795918367347, |
| "grad_norm": 0.3074506613629546, |
| "learning_rate": 7.263969301848188e-06, |
| "loss": 0.7401, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.2474489795918366, |
| "grad_norm": 0.3522836731747551, |
| "learning_rate": 7.223977558729707e-06, |
| "loss": 0.7235, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.2551020408163265, |
| "grad_norm": 0.320879835567969, |
| "learning_rate": 7.183807654458565e-06, |
| "loss": 0.7269, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.2627551020408163, |
| "grad_norm": 0.3369884807808652, |
| "learning_rate": 7.143462807015271e-06, |
| "loss": 0.7314, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.2704081632653061, |
| "grad_norm": 0.3106327480512219, |
| "learning_rate": 7.102946248394908e-06, |
| "loss": 0.7311, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.278061224489796, |
| "grad_norm": 0.3350969859669365, |
| "learning_rate": 7.0622612243482035e-06, |
| "loss": 0.7454, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 0.32981543690241394, |
| "learning_rate": 7.021410994121525e-06, |
| "loss": 0.716, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.2933673469387754, |
| "grad_norm": 0.3124947696287792, |
| "learning_rate": 6.980398830195785e-06, |
| "loss": 0.7227, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.3010204081632653, |
| "grad_norm": 0.29800502106565513, |
| "learning_rate": 6.939228018024275e-06, |
| "loss": 0.7334, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.308673469387755, |
| "grad_norm": 0.33815713937721337, |
| "learning_rate": 6.897901855769483e-06, |
| "loss": 0.7138, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.316326530612245, |
| "grad_norm": 0.307107701344449, |
| "learning_rate": 6.856423654038868e-06, |
| "loss": 0.7267, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.3239795918367347, |
| "grad_norm": 0.3373642844242084, |
| "learning_rate": 6.814796735619664e-06, |
| "loss": 0.7144, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.3316326530612246, |
| "grad_norm": 0.2967492516461531, |
| "learning_rate": 6.773024435212678e-06, |
| "loss": 0.7139, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.3392857142857144, |
| "grad_norm": 0.2944971616996988, |
| "learning_rate": 6.731110099165165e-06, |
| "loss": 0.7417, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.346938775510204, |
| "grad_norm": 0.293601269092346, |
| "learning_rate": 6.689057085202737e-06, |
| "loss": 0.7292, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.3545918367346939, |
| "grad_norm": 0.2857412083314722, |
| "learning_rate": 6.646868762160399e-06, |
| "loss": 0.7169, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.3622448979591837, |
| "grad_norm": 0.2793697266770511, |
| "learning_rate": 6.6045485097126585e-06, |
| "loss": 0.723, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.3698979591836735, |
| "grad_norm": 0.28168414558063304, |
| "learning_rate": 6.562099718102788e-06, |
| "loss": 0.7111, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.3775510204081631, |
| "grad_norm": 0.31727867501147977, |
| "learning_rate": 6.519525787871235e-06, |
| "loss": 0.728, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.385204081632653, |
| "grad_norm": 0.298603631519449, |
| "learning_rate": 6.476830129583207e-06, |
| "loss": 0.7136, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.3928571428571428, |
| "grad_norm": 0.29156749391023734, |
| "learning_rate": 6.434016163555452e-06, |
| "loss": 0.7229, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.4005102040816326, |
| "grad_norm": 0.3216966381557731, |
| "learning_rate": 6.391087319582264e-06, |
| "loss": 0.7329, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.4081632653061225, |
| "grad_norm": 0.3100392635441308, |
| "learning_rate": 6.34804703666072e-06, |
| "loss": 0.6948, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.4158163265306123, |
| "grad_norm": 0.3205090950935673, |
| "learning_rate": 6.304898762715187e-06, |
| "loss": 0.7185, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.4234693877551021, |
| "grad_norm": 0.31408394255073874, |
| "learning_rate": 6.261645954321109e-06, |
| "loss": 0.7155, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.431122448979592, |
| "grad_norm": 0.3615372791973604, |
| "learning_rate": 6.21829207642811e-06, |
| "loss": 0.72, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.4387755102040816, |
| "grad_norm": 0.3179800138909638, |
| "learning_rate": 6.1748406020824115e-06, |
| "loss": 0.7215, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.4464285714285714, |
| "grad_norm": 0.39197778035917613, |
| "learning_rate": 6.131295012148613e-06, |
| "loss": 0.7161, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.4540816326530612, |
| "grad_norm": 0.3475079137600484, |
| "learning_rate": 6.087658795030838e-06, |
| "loss": 0.7212, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.461734693877551, |
| "grad_norm": 0.3303825779966534, |
| "learning_rate": 6.043935446393294e-06, |
| "loss": 0.7138, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.469387755102041, |
| "grad_norm": 0.3397781093231611, |
| "learning_rate": 6.000128468880223e-06, |
| "loss": 0.7357, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.4770408163265305, |
| "grad_norm": 0.28796995234216377, |
| "learning_rate": 5.956241371835312e-06, |
| "loss": 0.7329, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.4846938775510203, |
| "grad_norm": 0.33155770096938897, |
| "learning_rate": 5.912277671020564e-06, |
| "loss": 0.7301, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.4923469387755102, |
| "grad_norm": 0.3094759577498115, |
| "learning_rate": 5.8682408883346535e-06, |
| "loss": 0.6932, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.3318537172693951, |
| "learning_rate": 5.824134551530783e-06, |
| "loss": 0.7326, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.5076530612244898, |
| "grad_norm": 0.30879561481901263, |
| "learning_rate": 5.77996219393409e-06, |
| "loss": 0.7195, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.5153061224489797, |
| "grad_norm": 0.3303797600701955, |
| "learning_rate": 5.735727354158581e-06, |
| "loss": 0.7353, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.5229591836734695, |
| "grad_norm": 0.29685163959288646, |
| "learning_rate": 5.6914335758236665e-06, |
| "loss": 0.7262, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.5306122448979593, |
| "grad_norm": 0.3006334315846896, |
| "learning_rate": 5.647084407270277e-06, |
| "loss": 0.725, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.538265306122449, |
| "grad_norm": 0.3181086382843995, |
| "learning_rate": 5.6026834012766155e-06, |
| "loss": 0.717, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.5459183673469388, |
| "grad_norm": 0.2828538681954745, |
| "learning_rate": 5.5582341147735396e-06, |
| "loss": 0.7134, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.5535714285714286, |
| "grad_norm": 0.2949909914868706, |
| "learning_rate": 5.5137401085596224e-06, |
| "loss": 0.7334, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.5612244897959182, |
| "grad_norm": 0.3090417583811012, |
| "learning_rate": 5.469204947015897e-06, |
| "loss": 0.7189, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.568877551020408, |
| "grad_norm": 0.28811678419113473, |
| "learning_rate": 5.424632197820325e-06, |
| "loss": 0.6947, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.5765306122448979, |
| "grad_norm": 0.29198617120064, |
| "learning_rate": 5.380025431661981e-06, |
| "loss": 0.7146, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.5841836734693877, |
| "grad_norm": 0.30576199585440467, |
| "learning_rate": 5.335388221955012e-06, |
| "loss": 0.7131, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.5918367346938775, |
| "grad_norm": 0.2812362549753351, |
| "learning_rate": 5.290724144552379e-06, |
| "loss": 0.7221, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.5994897959183674, |
| "grad_norm": 0.2760501677497026, |
| "learning_rate": 5.246036777459391e-06, |
| "loss": 0.7141, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.6071428571428572, |
| "grad_norm": 0.2688411176432346, |
| "learning_rate": 5.201329700547077e-06, |
| "loss": 0.7154, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.614795918367347, |
| "grad_norm": 0.30444010616896106, |
| "learning_rate": 5.156606495265402e-06, |
| "loss": 0.7185, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.6224489795918369, |
| "grad_norm": 0.2834241043348135, |
| "learning_rate": 5.111870744356366e-06, |
| "loss": 0.6954, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.6301020408163265, |
| "grad_norm": 0.2839078787011765, |
| "learning_rate": 5.067126031566988e-06, |
| "loss": 0.7199, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.6377551020408163, |
| "grad_norm": 0.2894314952948115, |
| "learning_rate": 5.022375941362218e-06, |
| "loss": 0.7206, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.6454081632653061, |
| "grad_norm": 0.30105116531481235, |
| "learning_rate": 4.977624058637783e-06, |
| "loss": 0.7104, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.6530612244897958, |
| "grad_norm": 0.30603121168713104, |
| "learning_rate": 4.932873968433014e-06, |
| "loss": 0.7214, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.6607142857142856, |
| "grad_norm": 0.2897952522073487, |
| "learning_rate": 4.8881292556436355e-06, |
| "loss": 0.7157, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.6683673469387754, |
| "grad_norm": 0.3064132815038337, |
| "learning_rate": 4.8433935047346e-06, |
| "loss": 0.7129, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.6760204081632653, |
| "grad_norm": 0.2729773805617788, |
| "learning_rate": 4.798670299452926e-06, |
| "loss": 0.7228, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.683673469387755, |
| "grad_norm": 0.2860890205936976, |
| "learning_rate": 4.75396322254061e-06, |
| "loss": 0.7329, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.691326530612245, |
| "grad_norm": 0.28789253491280553, |
| "learning_rate": 4.7092758554476215e-06, |
| "loss": 0.714, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.6989795918367347, |
| "grad_norm": 0.28710361826225933, |
| "learning_rate": 4.664611778044988e-06, |
| "loss": 0.7269, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.7066326530612246, |
| "grad_norm": 0.2607466160182967, |
| "learning_rate": 4.619974568338021e-06, |
| "loss": 0.7254, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 0.3109562373122327, |
| "learning_rate": 4.575367802179675e-06, |
| "loss": 0.7266, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.7219387755102042, |
| "grad_norm": 0.2671326490442337, |
| "learning_rate": 4.530795052984104e-06, |
| "loss": 0.7402, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.7295918367346939, |
| "grad_norm": 0.3024449623652671, |
| "learning_rate": 4.48625989144038e-06, |
| "loss": 0.7165, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.7372448979591837, |
| "grad_norm": 0.29297628220674093, |
| "learning_rate": 4.441765885226462e-06, |
| "loss": 0.7233, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.7448979591836735, |
| "grad_norm": 0.2703897681423039, |
| "learning_rate": 4.397316598723385e-06, |
| "loss": 0.729, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.7525510204081631, |
| "grad_norm": 0.30928488016271816, |
| "learning_rate": 4.352915592729723e-06, |
| "loss": 0.7242, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.760204081632653, |
| "grad_norm": 0.2706286337944358, |
| "learning_rate": 4.308566424176336e-06, |
| "loss": 0.7154, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.7678571428571428, |
| "grad_norm": 0.260276440616788, |
| "learning_rate": 4.264272645841419e-06, |
| "loss": 0.7079, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.7755102040816326, |
| "grad_norm": 0.27550251553605226, |
| "learning_rate": 4.220037806065911e-06, |
| "loss": 0.727, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.7831632653061225, |
| "grad_norm": 0.27167673630299904, |
| "learning_rate": 4.175865448469219e-06, |
| "loss": 0.7284, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.7908163265306123, |
| "grad_norm": 0.27870909240366326, |
| "learning_rate": 4.131759111665349e-06, |
| "loss": 0.7228, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.7984693877551021, |
| "grad_norm": 0.25737644360984807, |
| "learning_rate": 4.087722328979437e-06, |
| "loss": 0.7248, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.806122448979592, |
| "grad_norm": 0.2678752125292958, |
| "learning_rate": 4.043758628164688e-06, |
| "loss": 0.7276, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.8137755102040818, |
| "grad_norm": 0.2937753087166277, |
| "learning_rate": 3.999871531119779e-06, |
| "loss": 0.7172, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.8214285714285714, |
| "grad_norm": 0.2935500143060239, |
| "learning_rate": 3.956064553606708e-06, |
| "loss": 0.7096, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.8290816326530612, |
| "grad_norm": 0.2872174760470752, |
| "learning_rate": 3.912341204969164e-06, |
| "loss": 0.7085, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.836734693877551, |
| "grad_norm": 0.27257890683898406, |
| "learning_rate": 3.86870498785139e-06, |
| "loss": 0.7079, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.8443877551020407, |
| "grad_norm": 0.2569539726278704, |
| "learning_rate": 3.825159397917589e-06, |
| "loss": 0.7196, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.8520408163265305, |
| "grad_norm": 0.27151411294136946, |
| "learning_rate": 3.781707923571891e-06, |
| "loss": 0.7026, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.8596938775510203, |
| "grad_norm": 0.27930041542818224, |
| "learning_rate": 3.7383540456788915e-06, |
| "loss": 0.7268, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.8673469387755102, |
| "grad_norm": 0.26864299220619914, |
| "learning_rate": 3.695101237284815e-06, |
| "loss": 0.7248, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 0.27555317317712996, |
| "learning_rate": 3.6519529633392825e-06, |
| "loss": 0.7084, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.8826530612244898, |
| "grad_norm": 0.2804770635369131, |
| "learning_rate": 3.6089126804177373e-06, |
| "loss": 0.7355, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.8903061224489797, |
| "grad_norm": 0.273320036360711, |
| "learning_rate": 3.5659838364445505e-06, |
| "loss": 0.7243, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.8979591836734695, |
| "grad_norm": 0.28634409896833934, |
| "learning_rate": 3.523169870416795e-06, |
| "loss": 0.7144, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.9056122448979593, |
| "grad_norm": 0.2731707658644953, |
| "learning_rate": 3.480474212128766e-06, |
| "loss": 0.7085, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.913265306122449, |
| "grad_norm": 0.2856724418711564, |
| "learning_rate": 3.4379002818972122e-06, |
| "loss": 0.6994, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.9209183673469388, |
| "grad_norm": 0.25169725455713693, |
| "learning_rate": 3.3954514902873427e-06, |
| "loss": 0.7198, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.9285714285714286, |
| "grad_norm": 0.2707259735033453, |
| "learning_rate": 3.3531312378396026e-06, |
| "loss": 0.7195, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.9362244897959182, |
| "grad_norm": 0.2575058815163588, |
| "learning_rate": 3.310942914797265e-06, |
| "loss": 0.7221, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.943877551020408, |
| "grad_norm": 0.2624601348833616, |
| "learning_rate": 3.2688899008348386e-06, |
| "loss": 0.7098, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.9515306122448979, |
| "grad_norm": 0.26759811934604133, |
| "learning_rate": 3.226975564787322e-06, |
| "loss": 0.715, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.9591836734693877, |
| "grad_norm": 0.26105450865585245, |
| "learning_rate": 3.1852032643803377e-06, |
| "loss": 0.7115, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.9668367346938775, |
| "grad_norm": 0.2679020391433801, |
| "learning_rate": 3.143576345961132e-06, |
| "loss": 0.7108, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.9744897959183674, |
| "grad_norm": 0.2619436598503336, |
| "learning_rate": 3.1020981442305187e-06, |
| "loss": 0.7177, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.9821428571428572, |
| "grad_norm": 0.28254050960476124, |
| "learning_rate": 3.0607719819757264e-06, |
| "loss": 0.714, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.989795918367347, |
| "grad_norm": 0.25168812703672494, |
| "learning_rate": 3.019601169804216e-06, |
| "loss": 0.6937, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.9974489795918369, |
| "grad_norm": 0.25916481234256045, |
| "learning_rate": 2.978589005878476e-06, |
| "loss": 0.728, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.0051020408163267, |
| "grad_norm": 0.29079133938515395, |
| "learning_rate": 2.937738775651798e-06, |
| "loss": 0.712, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.0127551020408165, |
| "grad_norm": 0.30679942022688345, |
| "learning_rate": 2.8970537516050935e-06, |
| "loss": 0.6888, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.020408163265306, |
| "grad_norm": 0.26247554064017004, |
| "learning_rate": 2.8565371929847286e-06, |
| "loss": 0.6857, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.0280612244897958, |
| "grad_norm": 0.25777996123866176, |
| "learning_rate": 2.816192345541437e-06, |
| "loss": 0.67, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.0357142857142856, |
| "grad_norm": 0.27157311348157104, |
| "learning_rate": 2.776022441270295e-06, |
| "loss": 0.677, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.0433673469387754, |
| "grad_norm": 0.2906167681301434, |
| "learning_rate": 2.736030698151815e-06, |
| "loss": 0.6901, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.0510204081632653, |
| "grad_norm": 0.2682203092623702, |
| "learning_rate": 2.6962203198941587e-06, |
| "loss": 0.6799, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.058673469387755, |
| "grad_norm": 0.26952637185558564, |
| "learning_rate": 2.656594495676482e-06, |
| "loss": 0.6867, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.066326530612245, |
| "grad_norm": 0.2976523762247378, |
| "learning_rate": 2.6171563998934605e-06, |
| "loss": 0.6798, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.0739795918367347, |
| "grad_norm": 0.2952839578045796, |
| "learning_rate": 2.577909191900988e-06, |
| "loss": 0.6775, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.0816326530612246, |
| "grad_norm": 0.26694864359766846, |
| "learning_rate": 2.5388560157630765e-06, |
| "loss": 0.687, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.0892857142857144, |
| "grad_norm": 0.2846824142546979, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.6933, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.0969387755102042, |
| "grad_norm": 0.2603852023863315, |
| "learning_rate": 2.4613442573376625e-06, |
| "loss": 0.6932, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.104591836734694, |
| "grad_norm": 0.25484664824654074, |
| "learning_rate": 2.422891884458241e-06, |
| "loss": 0.686, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.1122448979591835, |
| "grad_norm": 0.26504213354211553, |
| "learning_rate": 2.384645961752113e-06, |
| "loss": 0.6794, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.1198979591836733, |
| "grad_norm": 0.25708180544743425, |
| "learning_rate": 2.346609553071093e-06, |
| "loss": 0.6757, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.127551020408163, |
| "grad_norm": 0.2495286574798052, |
| "learning_rate": 2.308785705482982e-06, |
| "loss": 0.6904, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.135204081632653, |
| "grad_norm": 0.258995496102722, |
| "learning_rate": 2.2711774490274767e-06, |
| "loss": 0.6816, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 0.26920967816385, |
| "learning_rate": 2.2337877964734324e-06, |
| "loss": 0.6798, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.1505102040816326, |
| "grad_norm": 0.25763183428685615, |
| "learning_rate": 2.1966197430775056e-06, |
| "loss": 0.6861, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.1581632653061225, |
| "grad_norm": 0.2476928851183991, |
| "learning_rate": 2.159676266344222e-06, |
| "loss": 0.6929, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.1658163265306123, |
| "grad_norm": 0.2509285445340273, |
| "learning_rate": 2.122960325787432e-06, |
| "loss": 0.6828, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.173469387755102, |
| "grad_norm": 0.2507947715753146, |
| "learning_rate": 2.086474862693244e-06, |
| "loss": 0.6871, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.181122448979592, |
| "grad_norm": 0.2630733156124699, |
| "learning_rate": 2.050222799884387e-06, |
| "loss": 0.6799, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.188775510204082, |
| "grad_norm": 0.26301976727268556, |
| "learning_rate": 2.0142070414860704e-06, |
| "loss": 0.6858, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.1964285714285716, |
| "grad_norm": 0.24578041007237145, |
| "learning_rate": 1.9784304726933384e-06, |
| "loss": 0.6787, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.204081632653061, |
| "grad_norm": 0.2461786949708224, |
| "learning_rate": 1.942895959539939e-06, |
| "loss": 0.6994, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.211734693877551, |
| "grad_norm": 0.24930437639531394, |
| "learning_rate": 1.9076063486687256e-06, |
| "loss": 0.6958, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.2193877551020407, |
| "grad_norm": 0.24702757253020044, |
| "learning_rate": 1.8725644671036125e-06, |
| "loss": 0.6841, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.2270408163265305, |
| "grad_norm": 0.24810798041544824, |
| "learning_rate": 1.8377731220231144e-06, |
| "loss": 0.6817, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.2346938775510203, |
| "grad_norm": 0.2398463945344876, |
| "learning_rate": 1.803235100535452e-06, |
| "loss": 0.6928, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.24234693877551, |
| "grad_norm": 0.25536410312526053, |
| "learning_rate": 1.7689531694552863e-06, |
| "loss": 0.6924, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.24193948267148485, |
| "learning_rate": 1.7349300750820758e-06, |
| "loss": 0.7014, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.25765306122449, |
| "grad_norm": 0.25314351545479186, |
| "learning_rate": 1.7011685429800596e-06, |
| "loss": 0.6829, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.2653061224489797, |
| "grad_norm": 0.2557447381261113, |
| "learning_rate": 1.6676712777599275e-06, |
| "loss": 0.686, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.2729591836734695, |
| "grad_norm": 0.24015367436036683, |
| "learning_rate": 1.6344409628621482e-06, |
| "loss": 0.6977, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.2806122448979593, |
| "grad_norm": 0.24549848915323214, |
| "learning_rate": 1.6014802603420044e-06, |
| "loss": 0.6929, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.288265306122449, |
| "grad_norm": 0.2554793618240299, |
| "learning_rate": 1.5687918106563326e-06, |
| "loss": 0.6917, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.295918367346939, |
| "grad_norm": 0.23893288637534513, |
| "learning_rate": 1.5363782324520033e-06, |
| "loss": 0.6919, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.3035714285714284, |
| "grad_norm": 0.2564714188554446, |
| "learning_rate": 1.504242122356143e-06, |
| "loss": 0.7097, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.311224489795918, |
| "grad_norm": 0.24283921622086807, |
| "learning_rate": 1.4723860547681163e-06, |
| "loss": 0.6849, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.318877551020408, |
| "grad_norm": 0.26173546523391394, |
| "learning_rate": 1.4408125816532981e-06, |
| "loss": 0.6993, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.326530612244898, |
| "grad_norm": 0.25486241688912814, |
| "learning_rate": 1.4095242323386305e-06, |
| "loss": 0.6788, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.3341836734693877, |
| "grad_norm": 0.2361817323877386, |
| "learning_rate": 1.3785235133100088e-06, |
| "loss": 0.6905, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.3418367346938775, |
| "grad_norm": 0.24162144820607892, |
| "learning_rate": 1.347812908011485e-06, |
| "loss": 0.6841, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.3494897959183674, |
| "grad_norm": 0.2490597468001364, |
| "learning_rate": 1.3173948766463146e-06, |
| "loss": 0.6802, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.357142857142857, |
| "grad_norm": 0.25398677763039623, |
| "learning_rate": 1.2872718559798852e-06, |
| "loss": 0.6886, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.364795918367347, |
| "grad_norm": 0.23490252476576118, |
| "learning_rate": 1.257446259144494e-06, |
| "loss": 0.6868, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.372448979591837, |
| "grad_norm": 0.23763836546961506, |
| "learning_rate": 1.2279204754460494e-06, |
| "loss": 0.6941, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.3801020408163267, |
| "grad_norm": 0.2355524230958337, |
| "learning_rate": 1.1986968701726492e-06, |
| "loss": 0.6915, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.387755102040816, |
| "grad_norm": 0.24752535017353638, |
| "learning_rate": 1.1697777844051105e-06, |
| "loss": 0.6921, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.395408163265306, |
| "grad_norm": 0.2471974082020202, |
| "learning_rate": 1.141165534829425e-06, |
| "loss": 0.6893, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.4030612244897958, |
| "grad_norm": 0.23381448120221882, |
| "learning_rate": 1.1128624135511712e-06, |
| "loss": 0.6731, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.4107142857142856, |
| "grad_norm": 0.2389896648493918, |
| "learning_rate": 1.0848706879118893e-06, |
| "loss": 0.6969, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.4183673469387754, |
| "grad_norm": 0.24224169159980133, |
| "learning_rate": 1.057192600307456e-06, |
| "loss": 0.6955, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.4260204081632653, |
| "grad_norm": 0.24100521101649902, |
| "learning_rate": 1.0298303680084448e-06, |
| "loss": 0.6896, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.433673469387755, |
| "grad_norm": 0.2344749626752725, |
| "learning_rate": 1.0027861829824953e-06, |
| "loss": 0.6819, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.441326530612245, |
| "grad_norm": 0.2319344660343658, |
| "learning_rate": 9.760622117187234e-07, |
| "loss": 0.6786, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.4489795918367347, |
| "grad_norm": 0.23391479387830102, |
| "learning_rate": 9.496605950541676e-07, |
| "loss": 0.7039, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.4566326530612246, |
| "grad_norm": 0.23186779062668114, |
| "learning_rate": 9.235834480022788e-07, |
| "loss": 0.688, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.4642857142857144, |
| "grad_norm": 0.23669253409024169, |
| "learning_rate": 8.978328595834984e-07, |
| "loss": 0.7044, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.4719387755102042, |
| "grad_norm": 0.24077368080825076, |
| "learning_rate": 8.724108926579e-07, |
| "loss": 0.6872, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.479591836734694, |
| "grad_norm": 0.2388246013777763, |
| "learning_rate": 8.473195837599419e-07, |
| "loss": 0.6916, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.487244897959184, |
| "grad_norm": 0.24710155251980784, |
| "learning_rate": 8.225609429353187e-07, |
| "loss": 0.684, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.4948979591836733, |
| "grad_norm": 0.24220633252180443, |
| "learning_rate": 7.981369535799354e-07, |
| "loss": 0.6879, |
| "step": 326 |
| }, |
| { |
| "epoch": 2.502551020408163, |
| "grad_norm": 0.2302557057062771, |
| "learning_rate": 7.740495722810271e-07, |
| "loss": 0.7039, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.510204081632653, |
| "grad_norm": 0.24237334538109565, |
| "learning_rate": 7.50300728660407e-07, |
| "loss": 0.6929, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.517857142857143, |
| "grad_norm": 0.23815797746653947, |
| "learning_rate": 7.26892325219899e-07, |
| "loss": 0.6959, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.5255102040816326, |
| "grad_norm": 0.23135128489800952, |
| "learning_rate": 7.03826237188916e-07, |
| "loss": 0.675, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.5331632653061225, |
| "grad_norm": 0.22784611175554673, |
| "learning_rate": 6.811043123742494e-07, |
| "loss": 0.6977, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.5408163265306123, |
| "grad_norm": 0.2405465208719118, |
| "learning_rate": 6.587283710120324e-07, |
| "loss": 0.6749, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.548469387755102, |
| "grad_norm": 0.2309843149804867, |
| "learning_rate": 6.367002056219285e-07, |
| "loss": 0.6909, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.556122448979592, |
| "grad_norm": 0.22895030021484628, |
| "learning_rate": 6.150215808635334e-07, |
| "loss": 0.6832, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.563775510204082, |
| "grad_norm": 0.22696479172732587, |
| "learning_rate": 5.936942333950063e-07, |
| "loss": 0.6746, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.571428571428571, |
| "grad_norm": 0.22839398316256343, |
| "learning_rate": 5.727198717339511e-07, |
| "loss": 0.6866, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.579081632653061, |
| "grad_norm": 0.22787946605540033, |
| "learning_rate": 5.521001761205441e-07, |
| "loss": 0.6995, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.586734693877551, |
| "grad_norm": 0.22190853543659503, |
| "learning_rate": 5.318367983829393e-07, |
| "loss": 0.6785, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.5943877551020407, |
| "grad_norm": 0.23020833242127728, |
| "learning_rate": 5.119313618049309e-07, |
| "loss": 0.6965, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.6020408163265305, |
| "grad_norm": 0.24859089494546147, |
| "learning_rate": 4.9238546099592e-07, |
| "loss": 0.6799, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.6096938775510203, |
| "grad_norm": 0.24590659281484603, |
| "learning_rate": 4.732006617631729e-07, |
| "loss": 0.6705, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.61734693877551, |
| "grad_norm": 0.23193659731275013, |
| "learning_rate": 4.54378500986381e-07, |
| "loss": 0.6899, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.625, |
| "grad_norm": 0.22818730097649043, |
| "learning_rate": 4.35920486494546e-07, |
| "loss": 0.6735, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.63265306122449, |
| "grad_norm": 0.23430936758279247, |
| "learning_rate": 4.1782809694518533e-07, |
| "loss": 0.691, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.6403061224489797, |
| "grad_norm": 0.22808387025349178, |
| "learning_rate": 4.001027817058789e-07, |
| "loss": 0.6862, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.6479591836734695, |
| "grad_norm": 0.22146676723869588, |
| "learning_rate": 3.8274596073816784e-07, |
| "loss": 0.6863, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.6556122448979593, |
| "grad_norm": 0.22664786178462878, |
| "learning_rate": 3.657590244837911e-07, |
| "loss": 0.6849, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.663265306122449, |
| "grad_norm": 0.23451188263459435, |
| "learning_rate": 3.49143333753309e-07, |
| "loss": 0.6877, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.670918367346939, |
| "grad_norm": 0.24867897398782676, |
| "learning_rate": 3.3290021961708163e-07, |
| "loss": 0.6802, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.678571428571429, |
| "grad_norm": 0.22314324400743446, |
| "learning_rate": 3.1703098329864237e-07, |
| "loss": 0.6824, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.686224489795918, |
| "grad_norm": 0.2236801602770835, |
| "learning_rate": 3.015368960704584e-07, |
| "loss": 0.6928, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.693877551020408, |
| "grad_norm": 0.2385665875720026, |
| "learning_rate": 2.864191991520848e-07, |
| "loss": 0.6846, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.701530612244898, |
| "grad_norm": 0.23375450743740805, |
| "learning_rate": 2.71679103610738e-07, |
| "loss": 0.6909, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.7091836734693877, |
| "grad_norm": 0.22520474139439234, |
| "learning_rate": 2.573177902642726e-07, |
| "loss": 0.6788, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.7168367346938775, |
| "grad_norm": 0.22804273783537157, |
| "learning_rate": 2.4333640958659144e-07, |
| "loss": 0.7087, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.7244897959183674, |
| "grad_norm": 0.22739247348429448, |
| "learning_rate": 2.2973608161547755e-07, |
| "loss": 0.695, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.732142857142857, |
| "grad_norm": 0.23455037703234047, |
| "learning_rate": 2.1651789586287442e-07, |
| "loss": 0.6805, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.739795918367347, |
| "grad_norm": 0.23159720573508338, |
| "learning_rate": 2.0368291122759898e-07, |
| "loss": 0.6891, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.747448979591837, |
| "grad_norm": 0.22563711327867858, |
| "learning_rate": 1.9123215591052014e-07, |
| "loss": 0.6874, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.7551020408163263, |
| "grad_norm": 0.2244203065125607, |
| "learning_rate": 1.7916662733218848e-07, |
| "loss": 0.6779, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.762755102040816, |
| "grad_norm": 0.22559981924958145, |
| "learning_rate": 1.6748729205293024e-07, |
| "loss": 0.6673, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.770408163265306, |
| "grad_norm": 0.23162805532792882, |
| "learning_rate": 1.5619508569542363e-07, |
| "loss": 0.6607, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.7780612244897958, |
| "grad_norm": 0.23717254901729207, |
| "learning_rate": 1.4529091286973994e-07, |
| "loss": 0.6771, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.7857142857142856, |
| "grad_norm": 0.22534766714917717, |
| "learning_rate": 1.3477564710088097e-07, |
| "loss": 0.6853, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.7933673469387754, |
| "grad_norm": 0.23074216180625626, |
| "learning_rate": 1.2465013075879884e-07, |
| "loss": 0.6898, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.8010204081632653, |
| "grad_norm": 0.23196388562982978, |
| "learning_rate": 1.1491517499091498e-07, |
| "loss": 0.6936, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.808673469387755, |
| "grad_norm": 0.2358477783756292, |
| "learning_rate": 1.055715596571405e-07, |
| "loss": 0.6814, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.816326530612245, |
| "grad_norm": 0.22714752868258517, |
| "learning_rate": 9.662003326740166e-08, |
| "loss": 0.6808, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.8239795918367347, |
| "grad_norm": 0.21518858671804875, |
| "learning_rate": 8.80613129216762e-08, |
| "loss": 0.6754, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.8316326530612246, |
| "grad_norm": 0.23291929200714853, |
| "learning_rate": 7.989608425254924e-08, |
| "loss": 0.6787, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.8392857142857144, |
| "grad_norm": 0.22181996825718708, |
| "learning_rate": 7.212500137028789e-08, |
| "loss": 0.6894, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.8469387755102042, |
| "grad_norm": 0.22193037565442786, |
| "learning_rate": 6.474868681043578e-08, |
| "loss": 0.6698, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.854591836734694, |
| "grad_norm": 0.22934416722405232, |
| "learning_rate": 5.776773148394976e-08, |
| "loss": 0.6952, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.862244897959184, |
| "grad_norm": 0.22584203654514134, |
| "learning_rate": 5.1182694629857145e-08, |
| "loss": 0.6957, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.8698979591836737, |
| "grad_norm": 0.22461460701111055, |
| "learning_rate": 4.499410377045765e-08, |
| "loss": 0.6886, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.877551020408163, |
| "grad_norm": 0.22733876386433874, |
| "learning_rate": 3.9202454669063915e-08, |
| "loss": 0.6977, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.885204081632653, |
| "grad_norm": 0.22962431221391724, |
| "learning_rate": 3.3808211290284886e-08, |
| "loss": 0.698, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.892857142857143, |
| "grad_norm": 0.23151231532840025, |
| "learning_rate": 2.8811805762860578e-08, |
| "loss": 0.6915, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.9005102040816326, |
| "grad_norm": 0.22229087776784215, |
| "learning_rate": 2.4213638345040868e-08, |
| "loss": 0.6814, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.9081632653061225, |
| "grad_norm": 0.2266877035262688, |
| "learning_rate": 2.0014077392525035e-08, |
| "loss": 0.6795, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.9158163265306123, |
| "grad_norm": 0.2250462204085745, |
| "learning_rate": 1.6213459328950355e-08, |
| "loss": 0.6788, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.923469387755102, |
| "grad_norm": 0.23211476979219928, |
| "learning_rate": 1.2812088618942009e-08, |
| "loss": 0.6957, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.931122448979592, |
| "grad_norm": 0.22076310527156465, |
| "learning_rate": 9.810237743724805e-09, |
| "loss": 0.6813, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.938775510204082, |
| "grad_norm": 0.2275087137841747, |
| "learning_rate": 7.2081471792911914e-09, |
| "loss": 0.685, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.946428571428571, |
| "grad_norm": 0.22791650362368526, |
| "learning_rate": 5.006025377138901e-09, |
| "loss": 0.6748, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.954081632653061, |
| "grad_norm": 0.22210915099031533, |
| "learning_rate": 3.204048747573185e-09, |
| "loss": 0.6824, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.961734693877551, |
| "grad_norm": 0.22179326719231918, |
| "learning_rate": 1.8023616455731253e-09, |
| "loss": 0.6791, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.9693877551020407, |
| "grad_norm": 0.21982432832999604, |
| "learning_rate": 8.010763592264381e-10, |
| "loss": 0.6825, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.9770408163265305, |
| "grad_norm": 0.23354611683548948, |
| "learning_rate": 2.0027310073833516e-10, |
| "loss": 0.6797, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.9846938775510203, |
| "grad_norm": 0.22225271783019312, |
| "learning_rate": 0.0, |
| "loss": 0.6847, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.9846938775510203, |
| "step": 390, |
| "total_flos": 1.3384682412693258e+18, |
| "train_loss": 0.0, |
| "train_runtime": 16.2295, |
| "train_samples_per_second": 2317.452, |
| "train_steps_per_second": 24.03 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 390, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3384682412693258e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|