| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 335, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0029850746268656717, | |
| "grad_norm": 8.253611034452266, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 2.2247, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005970149253731343, | |
| "grad_norm": 7.395306126962346, | |
| "learning_rate": 1.8181818181818183e-06, | |
| "loss": 2.1255, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008955223880597015, | |
| "grad_norm": 8.26748388260573, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 2.1023, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.011940298507462687, | |
| "grad_norm": 7.1200500158632, | |
| "learning_rate": 3.6363636363636366e-06, | |
| "loss": 2.0025, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.014925373134328358, | |
| "grad_norm": 5.9633773670024315, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 1.9076, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01791044776119403, | |
| "grad_norm": 5.546547205735884, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 1.8587, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.020895522388059702, | |
| "grad_norm": 5.042219157986929, | |
| "learning_rate": 6.363636363636364e-06, | |
| "loss": 1.8198, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.023880597014925373, | |
| "grad_norm": 3.5185430665767887, | |
| "learning_rate": 7.272727272727273e-06, | |
| "loss": 1.5117, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.026865671641791045, | |
| "grad_norm": 3.1354746744528867, | |
| "learning_rate": 8.181818181818183e-06, | |
| "loss": 1.4414, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.029850746268656716, | |
| "grad_norm": 2.757132384211869, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 1.4059, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03283582089552239, | |
| "grad_norm": 5.307800727766051, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4367, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03582089552238806, | |
| "grad_norm": 4.364662300190612, | |
| "learning_rate": 9.99976495753613e-06, | |
| "loss": 1.4175, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03880597014925373, | |
| "grad_norm": 3.136010913151125, | |
| "learning_rate": 9.999059852242508e-06, | |
| "loss": 1.297, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.041791044776119404, | |
| "grad_norm": 2.6314270494566716, | |
| "learning_rate": 9.997884750411004e-06, | |
| "loss": 1.178, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04477611940298507, | |
| "grad_norm": 2.8108610676101105, | |
| "learning_rate": 9.996239762521152e-06, | |
| "loss": 1.311, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.04776119402985075, | |
| "grad_norm": 2.646130122162001, | |
| "learning_rate": 9.994125043229753e-06, | |
| "loss": 1.1026, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.050746268656716415, | |
| "grad_norm": 2.3026683820565506, | |
| "learning_rate": 9.991540791356342e-06, | |
| "loss": 1.0708, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.05373134328358209, | |
| "grad_norm": 2.2060475680712157, | |
| "learning_rate": 9.98848724986449e-06, | |
| "loss": 1.1484, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.056716417910447764, | |
| "grad_norm": 2.22608658907851, | |
| "learning_rate": 9.98496470583896e-06, | |
| "loss": 1.1819, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05970149253731343, | |
| "grad_norm": 2.012346588234655, | |
| "learning_rate": 9.980973490458728e-06, | |
| "loss": 1.1226, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0626865671641791, | |
| "grad_norm": 2.0805737688444155, | |
| "learning_rate": 9.976513978965829e-06, | |
| "loss": 1.0251, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.06567164179104477, | |
| "grad_norm": 2.384555566832582, | |
| "learning_rate": 9.971586590630094e-06, | |
| "loss": 1.0278, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.06865671641791045, | |
| "grad_norm": 2.1582387653704886, | |
| "learning_rate": 9.966191788709716e-06, | |
| "loss": 1.014, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.07164179104477612, | |
| "grad_norm": 2.0561968840026026, | |
| "learning_rate": 9.960330080407712e-06, | |
| "loss": 1.0074, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.07462686567164178, | |
| "grad_norm": 2.1634547485342828, | |
| "learning_rate": 9.954002016824226e-06, | |
| "loss": 1.0355, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.07761194029850746, | |
| "grad_norm": 2.000183275885172, | |
| "learning_rate": 9.947208192904722e-06, | |
| "loss": 1.0241, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.08059701492537313, | |
| "grad_norm": 1.9928462865297418, | |
| "learning_rate": 9.939949247384046e-06, | |
| "loss": 0.9917, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.08358208955223881, | |
| "grad_norm": 2.1100381727065094, | |
| "learning_rate": 9.93222586272637e-06, | |
| "loss": 1.0393, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.08656716417910448, | |
| "grad_norm": 1.9979189088773692, | |
| "learning_rate": 9.924038765061042e-06, | |
| "loss": 1.0134, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.08955223880597014, | |
| "grad_norm": 2.1566867938814784, | |
| "learning_rate": 9.915388724114301e-06, | |
| "loss": 0.937, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09253731343283582, | |
| "grad_norm": 2.0548646219588886, | |
| "learning_rate": 9.906276553136924e-06, | |
| "loss": 1.0227, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0955223880597015, | |
| "grad_norm": 2.1094100666663493, | |
| "learning_rate": 9.896703108827758e-06, | |
| "loss": 0.9474, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.09850746268656717, | |
| "grad_norm": 2.196878885778515, | |
| "learning_rate": 9.886669291253178e-06, | |
| "loss": 0.8954, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.10149253731343283, | |
| "grad_norm": 1.9709535105103204, | |
| "learning_rate": 9.876176043762467e-06, | |
| "loss": 0.8858, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.1044776119402985, | |
| "grad_norm": 2.4157359301325028, | |
| "learning_rate": 9.86522435289912e-06, | |
| "loss": 0.9492, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.10746268656716418, | |
| "grad_norm": 2.0251547989723453, | |
| "learning_rate": 9.853815248308101e-06, | |
| "loss": 0.9819, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.11044776119402985, | |
| "grad_norm": 2.043073108115636, | |
| "learning_rate": 9.841949802639031e-06, | |
| "loss": 0.9856, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.11343283582089553, | |
| "grad_norm": 1.9051508783430386, | |
| "learning_rate": 9.829629131445342e-06, | |
| "loss": 0.8811, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.11641791044776119, | |
| "grad_norm": 1.9720465747167384, | |
| "learning_rate": 9.816854393079402e-06, | |
| "loss": 0.8809, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.11940298507462686, | |
| "grad_norm": 2.115788113435234, | |
| "learning_rate": 9.803626788583603e-06, | |
| "loss": 0.8754, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.12238805970149254, | |
| "grad_norm": 2.0483615805890003, | |
| "learning_rate": 9.789947561577445e-06, | |
| "loss": 0.9111, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.1253731343283582, | |
| "grad_norm": 2.0931565027152925, | |
| "learning_rate": 9.775817998140615e-06, | |
| "loss": 0.883, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.12835820895522387, | |
| "grad_norm": 2.1619767559285896, | |
| "learning_rate": 9.761239426692077e-06, | |
| "loss": 0.8859, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.13134328358208955, | |
| "grad_norm": 2.1803394559484515, | |
| "learning_rate": 9.74621321786517e-06, | |
| "loss": 0.9756, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.13432835820895522, | |
| "grad_norm": 1.9473692197801435, | |
| "learning_rate": 9.730740784378755e-06, | |
| "loss": 0.8856, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.1373134328358209, | |
| "grad_norm": 1.9440539475047278, | |
| "learning_rate": 9.71482358090438e-06, | |
| "loss": 0.8619, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.14029850746268657, | |
| "grad_norm": 1.9268937369891654, | |
| "learning_rate": 9.698463103929542e-06, | |
| "loss": 0.8958, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.14328358208955225, | |
| "grad_norm": 1.9576735974697994, | |
| "learning_rate": 9.681660891616967e-06, | |
| "loss": 0.9014, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.14626865671641792, | |
| "grad_norm": 2.2502624703010787, | |
| "learning_rate": 9.664418523660004e-06, | |
| "loss": 0.9054, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.14925373134328357, | |
| "grad_norm": 1.9625578640953845, | |
| "learning_rate": 9.646737621134112e-06, | |
| "loss": 0.9438, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15223880597014924, | |
| "grad_norm": 2.11803005668703, | |
| "learning_rate": 9.628619846344453e-06, | |
| "loss": 0.933, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.15522388059701492, | |
| "grad_norm": 1.9405175828921, | |
| "learning_rate": 9.610066902669593e-06, | |
| "loss": 0.892, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1582089552238806, | |
| "grad_norm": 1.940879796450495, | |
| "learning_rate": 9.591080534401371e-06, | |
| "loss": 0.8912, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.16119402985074627, | |
| "grad_norm": 2.1315443999032726, | |
| "learning_rate": 9.571662526580898e-06, | |
| "loss": 0.8837, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.16417910447761194, | |
| "grad_norm": 1.9936535994324662, | |
| "learning_rate": 9.551814704830734e-06, | |
| "loss": 0.936, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.16716417910447762, | |
| "grad_norm": 2.119381573598528, | |
| "learning_rate": 9.531538935183252e-06, | |
| "loss": 0.8668, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1701492537313433, | |
| "grad_norm": 2.0767671264680665, | |
| "learning_rate": 9.51083712390519e-06, | |
| "loss": 0.9144, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.17313432835820897, | |
| "grad_norm": 1.7259669938296525, | |
| "learning_rate": 9.48971121731844e-06, | |
| "loss": 0.7978, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.1761194029850746, | |
| "grad_norm": 2.2712488027683895, | |
| "learning_rate": 9.468163201617063e-06, | |
| "loss": 0.8566, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.1791044776119403, | |
| "grad_norm": 2.1653559706182692, | |
| "learning_rate": 9.446195102680531e-06, | |
| "loss": 0.8772, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.18208955223880596, | |
| "grad_norm": 2.019437924263334, | |
| "learning_rate": 9.423808985883289e-06, | |
| "loss": 0.8102, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.18507462686567164, | |
| "grad_norm": 2.1016655793843895, | |
| "learning_rate": 9.401006955900555e-06, | |
| "loss": 0.8829, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1880597014925373, | |
| "grad_norm": 1.7789596254753794, | |
| "learning_rate": 9.377791156510456e-06, | |
| "loss": 0.8882, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.191044776119403, | |
| "grad_norm": 3.956571817793095, | |
| "learning_rate": 9.35416377039246e-06, | |
| "loss": 0.9216, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.19402985074626866, | |
| "grad_norm": 2.0814813313495764, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 0.8714, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.19701492537313434, | |
| "grad_norm": 2.16440601583317, | |
| "learning_rate": 9.305683161962569e-06, | |
| "loss": 0.9033, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 2.121322858320113, | |
| "learning_rate": 9.280834497651334e-06, | |
| "loss": 0.8866, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.20298507462686566, | |
| "grad_norm": 2.125810496652349, | |
| "learning_rate": 9.255583362184998e-06, | |
| "loss": 0.9084, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.20597014925373133, | |
| "grad_norm": 2.2332151510392926, | |
| "learning_rate": 9.229932129599206e-06, | |
| "loss": 0.8995, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.208955223880597, | |
| "grad_norm": 1.8844452106307579, | |
| "learning_rate": 9.203883211545517e-06, | |
| "loss": 0.8985, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.21194029850746268, | |
| "grad_norm": 2.1170173520344253, | |
| "learning_rate": 9.177439057064684e-06, | |
| "loss": 0.8688, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.21492537313432836, | |
| "grad_norm": 1.9805965194267483, | |
| "learning_rate": 9.150602152356394e-06, | |
| "loss": 0.8909, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.21791044776119403, | |
| "grad_norm": 2.1059921689798116, | |
| "learning_rate": 9.123375020545534e-06, | |
| "loss": 0.9781, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.2208955223880597, | |
| "grad_norm": 1.9571325614730988, | |
| "learning_rate": 9.09576022144496e-06, | |
| "loss": 0.8519, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.22388059701492538, | |
| "grad_norm": 2.1730772437003867, | |
| "learning_rate": 9.067760351314838e-06, | |
| "loss": 0.8709, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.22686567164179106, | |
| "grad_norm": 2.236623739502403, | |
| "learning_rate": 9.039378042618556e-06, | |
| "loss": 1.0019, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.2298507462686567, | |
| "grad_norm": 2.049051045380969, | |
| "learning_rate": 9.01061596377522e-06, | |
| "loss": 0.8222, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.23283582089552238, | |
| "grad_norm": 2.0708856760876833, | |
| "learning_rate": 8.981476818908778e-06, | |
| "loss": 0.9427, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.23582089552238805, | |
| "grad_norm": 1.796129619411125, | |
| "learning_rate": 8.951963347593797e-06, | |
| "loss": 0.8325, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.23880597014925373, | |
| "grad_norm": 2.1961650618323687, | |
| "learning_rate": 8.92207832459788e-06, | |
| "loss": 0.9022, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2417910447761194, | |
| "grad_norm": 2.1631152148881023, | |
| "learning_rate": 8.891824559620801e-06, | |
| "loss": 0.8278, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.24477611940298508, | |
| "grad_norm": 2.025147970703757, | |
| "learning_rate": 8.861204897030346e-06, | |
| "loss": 0.7237, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.24776119402985075, | |
| "grad_norm": 1.9375472063035821, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 0.7376, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.2507462686567164, | |
| "grad_norm": 1.9368693802128247, | |
| "learning_rate": 8.798879428212748e-06, | |
| "loss": 0.8809, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.2537313432835821, | |
| "grad_norm": 1.7919474666326674, | |
| "learning_rate": 8.767179481638303e-06, | |
| "loss": 0.827, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.25671641791044775, | |
| "grad_norm": 1.8293764770314178, | |
| "learning_rate": 8.735125356204982e-06, | |
| "loss": 0.8348, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.25970149253731345, | |
| "grad_norm": 2.2466601113818983, | |
| "learning_rate": 8.702720065545024e-06, | |
| "loss": 0.7715, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.2626865671641791, | |
| "grad_norm": 2.007946173229526, | |
| "learning_rate": 8.669966656306176e-06, | |
| "loss": 0.7911, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.2656716417910448, | |
| "grad_norm": 1.8905119743926408, | |
| "learning_rate": 8.636868207865244e-06, | |
| "loss": 0.8525, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.26865671641791045, | |
| "grad_norm": 2.216132567769806, | |
| "learning_rate": 8.603427832038574e-06, | |
| "loss": 0.8743, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2716417910447761, | |
| "grad_norm": 2.0337720619135142, | |
| "learning_rate": 8.569648672789496e-06, | |
| "loss": 0.8652, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.2746268656716418, | |
| "grad_norm": 2.117953862074952, | |
| "learning_rate": 8.535533905932739e-06, | |
| "loss": 0.8499, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.27761194029850744, | |
| "grad_norm": 2.013179581976092, | |
| "learning_rate": 8.501086738835843e-06, | |
| "loss": 0.8635, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.28059701492537314, | |
| "grad_norm": 2.148194857680393, | |
| "learning_rate": 8.466310410117622e-06, | |
| "loss": 0.8781, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2835820895522388, | |
| "grad_norm": 2.1704401998940024, | |
| "learning_rate": 8.43120818934367e-06, | |
| "loss": 0.8618, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2865671641791045, | |
| "grad_norm": 2.065149455076995, | |
| "learning_rate": 8.395783376718967e-06, | |
| "loss": 0.8133, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.28955223880597014, | |
| "grad_norm": 2.0953263142220693, | |
| "learning_rate": 8.360039302777614e-06, | |
| "loss": 0.8189, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.29253731343283584, | |
| "grad_norm": 1.8700626853391247, | |
| "learning_rate": 8.323979328069689e-06, | |
| "loss": 0.8338, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2955223880597015, | |
| "grad_norm": 1.886379993426298, | |
| "learning_rate": 8.28760684284532e-06, | |
| "loss": 0.8397, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.29850746268656714, | |
| "grad_norm": 4.887769993107663, | |
| "learning_rate": 8.25092526673592e-06, | |
| "loss": 0.9114, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.30149253731343284, | |
| "grad_norm": 2.1831478421086237, | |
| "learning_rate": 8.213938048432697e-06, | |
| "loss": 0.838, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.3044776119402985, | |
| "grad_norm": 2.048417012849108, | |
| "learning_rate": 8.176648665362426e-06, | |
| "loss": 0.7323, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.3074626865671642, | |
| "grad_norm": 2.245486182262689, | |
| "learning_rate": 8.139060623360494e-06, | |
| "loss": 0.8361, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.31044776119402984, | |
| "grad_norm": 2.1131445651881937, | |
| "learning_rate": 8.101177456341301e-06, | |
| "loss": 0.8112, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.31343283582089554, | |
| "grad_norm": 1.9438690743970823, | |
| "learning_rate": 8.063002725966014e-06, | |
| "loss": 0.8172, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.3164179104477612, | |
| "grad_norm": 1.8330162019695133, | |
| "learning_rate": 8.024540021307709e-06, | |
| "loss": 0.7779, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.3194029850746269, | |
| "grad_norm": 1.9694508107519921, | |
| "learning_rate": 7.985792958513932e-06, | |
| "loss": 0.8582, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.32238805970149254, | |
| "grad_norm": 1.9336855933985957, | |
| "learning_rate": 7.946765180466725e-06, | |
| "loss": 0.7725, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.3253731343283582, | |
| "grad_norm": 2.0505954912615127, | |
| "learning_rate": 7.907460356440133e-06, | |
| "loss": 0.8778, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.3283582089552239, | |
| "grad_norm": 2.2263425751875183, | |
| "learning_rate": 7.86788218175523e-06, | |
| "loss": 0.8253, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.33134328358208953, | |
| "grad_norm": 1.8159778540989733, | |
| "learning_rate": 7.828034377432694e-06, | |
| "loss": 0.8297, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.33432835820895523, | |
| "grad_norm": 1.9962851923382634, | |
| "learning_rate": 7.787920689842965e-06, | |
| "loss": 0.8761, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.3373134328358209, | |
| "grad_norm": 1.978659019282353, | |
| "learning_rate": 7.747544890354031e-06, | |
| "loss": 0.8364, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.3402985074626866, | |
| "grad_norm": 1.8570435757482742, | |
| "learning_rate": 7.706910774976849e-06, | |
| "loss": 0.7402, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.34328358208955223, | |
| "grad_norm": 1.9873552845505367, | |
| "learning_rate": 7.666022164008458e-06, | |
| "loss": 0.8159, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.34626865671641793, | |
| "grad_norm": 1.8774777253882997, | |
| "learning_rate": 7.624882901672801e-06, | |
| "loss": 0.8035, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.3492537313432836, | |
| "grad_norm": 2.042368536282115, | |
| "learning_rate": 7.5834968557593155e-06, | |
| "loss": 0.8852, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.3522388059701492, | |
| "grad_norm": 1.9585431439709604, | |
| "learning_rate": 7.541867917259278e-06, | |
| "loss": 0.9395, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.35522388059701493, | |
| "grad_norm": 1.9690363236772885, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.7769, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3582089552238806, | |
| "grad_norm": 1.9997251291932896, | |
| "learning_rate": 7.457897040276853e-06, | |
| "loss": 0.8867, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3611940298507463, | |
| "grad_norm": 1.9091614861364634, | |
| "learning_rate": 7.415562996483193e-06, | |
| "loss": 0.8203, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3641791044776119, | |
| "grad_norm": 2.0154748461484964, | |
| "learning_rate": 7.373001848738203e-06, | |
| "loss": 0.907, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.36716417910447763, | |
| "grad_norm": 1.8850762145928235, | |
| "learning_rate": 7.330217598512696e-06, | |
| "loss": 0.7268, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.3701492537313433, | |
| "grad_norm": 2.002378822284434, | |
| "learning_rate": 7.2872142682529045e-06, | |
| "loss": 0.75, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.373134328358209, | |
| "grad_norm": 1.8683346372861278, | |
| "learning_rate": 7.243995901002312e-06, | |
| "loss": 0.7998, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3761194029850746, | |
| "grad_norm": 1.8603961929214259, | |
| "learning_rate": 7.200566560021525e-06, | |
| "loss": 0.7736, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.37910447761194027, | |
| "grad_norm": 2.086337061985904, | |
| "learning_rate": 7.156930328406268e-06, | |
| "loss": 0.8291, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.382089552238806, | |
| "grad_norm": 2.0488227138684203, | |
| "learning_rate": 7.113091308703498e-06, | |
| "loss": 0.8032, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3850746268656716, | |
| "grad_norm": 2.2156317114896504, | |
| "learning_rate": 7.069053622525697e-06, | |
| "loss": 0.9299, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.3880597014925373, | |
| "grad_norm": 1.959090575118802, | |
| "learning_rate": 7.0248214101633685e-06, | |
| "loss": 0.7743, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.39104477611940297, | |
| "grad_norm": 2.040790454179876, | |
| "learning_rate": 6.980398830195785e-06, | |
| "loss": 0.9517, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.3940298507462687, | |
| "grad_norm": 2.0095877017198394, | |
| "learning_rate": 6.9357900591000034e-06, | |
| "loss": 0.8367, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.3970149253731343, | |
| "grad_norm": 2.053685963143701, | |
| "learning_rate": 6.890999290858213e-06, | |
| "loss": 0.908, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.9552949066858203, | |
| "learning_rate": 6.8460307365634225e-06, | |
| "loss": 0.7769, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.40298507462686567, | |
| "grad_norm": 1.9422682262777784, | |
| "learning_rate": 6.800888624023552e-06, | |
| "loss": 0.8613, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.4059701492537313, | |
| "grad_norm": 2.0018278352562784, | |
| "learning_rate": 6.755577197363945e-06, | |
| "loss": 0.7314, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.408955223880597, | |
| "grad_norm": 2.0748265555866023, | |
| "learning_rate": 6.710100716628345e-06, | |
| "loss": 0.7806, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.41194029850746267, | |
| "grad_norm": 1.9674200834017632, | |
| "learning_rate": 6.6644634573783825e-06, | |
| "loss": 0.8074, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.41492537313432837, | |
| "grad_norm": 1.9207512896621706, | |
| "learning_rate": 6.618669710291607e-06, | |
| "loss": 0.7148, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.417910447761194, | |
| "grad_norm": 1.905193379967269, | |
| "learning_rate": 6.572723780758069e-06, | |
| "loss": 0.8142, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4208955223880597, | |
| "grad_norm": 2.1965158090202697, | |
| "learning_rate": 6.526629988475567e-06, | |
| "loss": 0.8123, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.42388059701492536, | |
| "grad_norm": 1.8753650627785756, | |
| "learning_rate": 6.4803926670435e-06, | |
| "loss": 0.8388, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.42686567164179107, | |
| "grad_norm": 2.0267686855160156, | |
| "learning_rate": 6.434016163555452e-06, | |
| "loss": 0.8065, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.4298507462686567, | |
| "grad_norm": 1.9454127213119508, | |
| "learning_rate": 6.387504838190479e-06, | |
| "loss": 0.7268, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.43283582089552236, | |
| "grad_norm": 1.9502519457009935, | |
| "learning_rate": 6.340863063803187e-06, | |
| "loss": 0.744, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.43582089552238806, | |
| "grad_norm": 1.9053847115703428, | |
| "learning_rate": 6.294095225512604e-06, | |
| "loss": 0.8448, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.4388059701492537, | |
| "grad_norm": 2.099027132432326, | |
| "learning_rate": 6.247205720289907e-06, | |
| "loss": 0.9298, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.4417910447761194, | |
| "grad_norm": 2.0781658570138153, | |
| "learning_rate": 6.2001989565450305e-06, | |
| "loss": 0.7678, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.44477611940298506, | |
| "grad_norm": 2.017544918765642, | |
| "learning_rate": 6.153079353712201e-06, | |
| "loss": 0.7815, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.44776119402985076, | |
| "grad_norm": 2.0613800967577203, | |
| "learning_rate": 6.105851341834439e-06, | |
| "loss": 0.7604, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4507462686567164, | |
| "grad_norm": 1.8534108525240118, | |
| "learning_rate": 6.058519361147055e-06, | |
| "loss": 0.7744, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.4537313432835821, | |
| "grad_norm": 2.0589449906784525, | |
| "learning_rate": 6.011087861660191e-06, | |
| "loss": 0.7458, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.45671641791044776, | |
| "grad_norm": 1.7521074071886629, | |
| "learning_rate": 5.9635613027404495e-06, | |
| "loss": 0.7427, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.4597014925373134, | |
| "grad_norm": 1.952094332549687, | |
| "learning_rate": 5.915944152691634e-06, | |
| "loss": 0.7228, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4626865671641791, | |
| "grad_norm": 2.0059139166291162, | |
| "learning_rate": 5.8682408883346535e-06, | |
| "loss": 0.7888, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.46567164179104475, | |
| "grad_norm": 1.9863550415015798, | |
| "learning_rate": 5.820455994586621e-06, | |
| "loss": 0.7431, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.46865671641791046, | |
| "grad_norm": 2.186012143543713, | |
| "learning_rate": 5.772593964039203e-06, | |
| "loss": 0.9548, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.4716417910447761, | |
| "grad_norm": 2.0569327383361524, | |
| "learning_rate": 5.724659296536234e-06, | |
| "loss": 0.7608, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4746268656716418, | |
| "grad_norm": 1.8427207627927604, | |
| "learning_rate": 5.6766564987506564e-06, | |
| "loss": 0.728, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.47761194029850745, | |
| "grad_norm": 2.0454749932374874, | |
| "learning_rate": 5.628590083760815e-06, | |
| "loss": 0.7987, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.48059701492537316, | |
| "grad_norm": 1.997088234967905, | |
| "learning_rate": 5.5804645706261515e-06, | |
| "loss": 0.87, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.4835820895522388, | |
| "grad_norm": 2.0854705687983897, | |
| "learning_rate": 5.532284483962341e-06, | |
| "loss": 0.7816, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.48656716417910445, | |
| "grad_norm": 2.063156543618824, | |
| "learning_rate": 5.484054353515896e-06, | |
| "loss": 0.7953, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.48955223880597015, | |
| "grad_norm": 2.0601249949949025, | |
| "learning_rate": 5.435778713738292e-06, | |
| "loss": 0.8433, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4925373134328358, | |
| "grad_norm": 1.9082330488593775, | |
| "learning_rate": 5.387462103359655e-06, | |
| "loss": 0.78, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.4955223880597015, | |
| "grad_norm": 1.861403808976405, | |
| "learning_rate": 5.339109064962047e-06, | |
| "loss": 0.6939, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.49850746268656715, | |
| "grad_norm": 2.1165013908329393, | |
| "learning_rate": 5.290724144552379e-06, | |
| "loss": 0.6902, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.5014925373134328, | |
| "grad_norm": 2.4674070182101544, | |
| "learning_rate": 5.242311891135016e-06, | |
| "loss": 0.8382, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.5044776119402985, | |
| "grad_norm": 1.8675037579070943, | |
| "learning_rate": 5.193876856284085e-06, | |
| "loss": 0.7193, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.5074626865671642, | |
| "grad_norm": 2.0252014042338415, | |
| "learning_rate": 5.145423593715558e-06, | |
| "loss": 0.8031, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5104477611940299, | |
| "grad_norm": 1.9108898103865828, | |
| "learning_rate": 5.096956658859122e-06, | |
| "loss": 0.7634, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.5134328358208955, | |
| "grad_norm": 2.139726380565598, | |
| "learning_rate": 5.048480608429893e-06, | |
| "loss": 0.7139, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.5164179104477612, | |
| "grad_norm": 1.9120901416594451, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7729, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.5194029850746269, | |
| "grad_norm": 1.766403288334722, | |
| "learning_rate": 4.951519391570108e-06, | |
| "loss": 0.7792, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.5223880597014925, | |
| "grad_norm": 1.875054628847942, | |
| "learning_rate": 4.903043341140879e-06, | |
| "loss": 0.7026, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.5253731343283582, | |
| "grad_norm": 2.0649412907897817, | |
| "learning_rate": 4.854576406284443e-06, | |
| "loss": 0.6678, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.5283582089552239, | |
| "grad_norm": 1.9766676200083877, | |
| "learning_rate": 4.806123143715916e-06, | |
| "loss": 0.794, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.5313432835820896, | |
| "grad_norm": 1.9628257930544168, | |
| "learning_rate": 4.7576881088649865e-06, | |
| "loss": 0.8011, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.5343283582089552, | |
| "grad_norm": 1.993236983375051, | |
| "learning_rate": 4.7092758554476215e-06, | |
| "loss": 0.7871, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.5373134328358209, | |
| "grad_norm": 1.8504281412049604, | |
| "learning_rate": 4.660890935037954e-06, | |
| "loss": 0.7736, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5402985074626866, | |
| "grad_norm": 1.9028018031804006, | |
| "learning_rate": 4.6125378966403465e-06, | |
| "loss": 0.7578, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.5432835820895522, | |
| "grad_norm": 1.8659479024734795, | |
| "learning_rate": 4.564221286261709e-06, | |
| "loss": 0.7557, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5462686567164179, | |
| "grad_norm": 2.11712649211751, | |
| "learning_rate": 4.515945646484105e-06, | |
| "loss": 0.8088, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.5492537313432836, | |
| "grad_norm": 1.789112706708441, | |
| "learning_rate": 4.467715516037659e-06, | |
| "loss": 0.8087, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5522388059701493, | |
| "grad_norm": 1.9892189797387887, | |
| "learning_rate": 4.4195354293738484e-06, | |
| "loss": 0.8395, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.5552238805970149, | |
| "grad_norm": 2.1883581521829316, | |
| "learning_rate": 4.371409916239188e-06, | |
| "loss": 0.7684, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.5582089552238806, | |
| "grad_norm": 2.1631251071113176, | |
| "learning_rate": 4.323343501249346e-06, | |
| "loss": 0.736, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.5611940298507463, | |
| "grad_norm": 1.9289233899504126, | |
| "learning_rate": 4.275340703463767e-06, | |
| "loss": 0.6405, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.564179104477612, | |
| "grad_norm": 1.9857433587359077, | |
| "learning_rate": 4.227406035960798e-06, | |
| "loss": 0.7959, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.5671641791044776, | |
| "grad_norm": 1.8258458953238406, | |
| "learning_rate": 4.17954400541338e-06, | |
| "loss": 0.7396, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5701492537313433, | |
| "grad_norm": 1.9073211443331697, | |
| "learning_rate": 4.131759111665349e-06, | |
| "loss": 0.7894, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.573134328358209, | |
| "grad_norm": 2.1144318330609955, | |
| "learning_rate": 4.084055847308367e-06, | |
| "loss": 0.7564, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5761194029850746, | |
| "grad_norm": 1.9704301567554263, | |
| "learning_rate": 4.036438697259551e-06, | |
| "loss": 0.768, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.5791044776119403, | |
| "grad_norm": 2.641711220430738, | |
| "learning_rate": 3.988912138339812e-06, | |
| "loss": 0.7862, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.582089552238806, | |
| "grad_norm": 2.1626033614930247, | |
| "learning_rate": 3.941480638852948e-06, | |
| "loss": 0.7856, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5850746268656717, | |
| "grad_norm": 1.9284923265396965, | |
| "learning_rate": 3.894148658165562e-06, | |
| "loss": 0.7674, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5880597014925373, | |
| "grad_norm": 2.058823111277626, | |
| "learning_rate": 3.8469206462878e-06, | |
| "loss": 0.7663, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.591044776119403, | |
| "grad_norm": 2.1351322093158744, | |
| "learning_rate": 3.7998010434549716e-06, | |
| "loss": 0.8399, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5940298507462687, | |
| "grad_norm": 1.9380000299821818, | |
| "learning_rate": 3.752794279710094e-06, | |
| "loss": 0.7321, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 1.8441505158750409, | |
| "learning_rate": 3.705904774487396e-06, | |
| "loss": 0.774, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.0907330198564313, | |
| "learning_rate": 3.6591369361968127e-06, | |
| "loss": 0.7847, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.6029850746268657, | |
| "grad_norm": 1.9371066770940577, | |
| "learning_rate": 3.6124951618095224e-06, | |
| "loss": 0.748, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.6059701492537314, | |
| "grad_norm": 1.8374945672921557, | |
| "learning_rate": 3.5659838364445505e-06, | |
| "loss": 0.7537, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.608955223880597, | |
| "grad_norm": 2.081262815281359, | |
| "learning_rate": 3.519607332956502e-06, | |
| "loss": 0.815, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.6119402985074627, | |
| "grad_norm": 2.074788649262812, | |
| "learning_rate": 3.473370011524435e-06, | |
| "loss": 0.8674, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.6149253731343284, | |
| "grad_norm": 1.8698000805877122, | |
| "learning_rate": 3.427276219241933e-06, | |
| "loss": 0.7202, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.6179104477611941, | |
| "grad_norm": 1.9048492100127103, | |
| "learning_rate": 3.3813302897083955e-06, | |
| "loss": 0.8188, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.6208955223880597, | |
| "grad_norm": 1.979184398237071, | |
| "learning_rate": 3.335536542621617e-06, | |
| "loss": 0.7503, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.6238805970149254, | |
| "grad_norm": 1.9760275557890097, | |
| "learning_rate": 3.289899283371657e-06, | |
| "loss": 0.7997, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.6268656716417911, | |
| "grad_norm": 2.1124769220840944, | |
| "learning_rate": 3.244422802636057e-06, | |
| "loss": 0.831, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6298507462686567, | |
| "grad_norm": 2.034333485591399, | |
| "learning_rate": 3.1991113759764493e-06, | |
| "loss": 0.7732, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.6328358208955224, | |
| "grad_norm": 1.897048944733691, | |
| "learning_rate": 3.1539692634365788e-06, | |
| "loss": 0.754, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.6358208955223881, | |
| "grad_norm": 1.9618035338024724, | |
| "learning_rate": 3.1090007091417884e-06, | |
| "loss": 0.6767, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.6388059701492538, | |
| "grad_norm": 2.0148083710716764, | |
| "learning_rate": 3.0642099408999982e-06, | |
| "loss": 0.8271, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.6417910447761194, | |
| "grad_norm": 2.283043013050116, | |
| "learning_rate": 3.019601169804216e-06, | |
| "loss": 0.7971, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.6447761194029851, | |
| "grad_norm": 2.11854512509762, | |
| "learning_rate": 2.975178589836632e-06, | |
| "loss": 0.7609, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.6477611940298508, | |
| "grad_norm": 1.8298105473763824, | |
| "learning_rate": 2.9309463774743047e-06, | |
| "loss": 0.8506, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.6507462686567164, | |
| "grad_norm": 1.7780813510143145, | |
| "learning_rate": 2.886908691296504e-06, | |
| "loss": 0.7632, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.6537313432835821, | |
| "grad_norm": 1.9126798345749187, | |
| "learning_rate": 2.843069671593734e-06, | |
| "loss": 0.7782, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.6567164179104478, | |
| "grad_norm": 1.9350084312265137, | |
| "learning_rate": 2.7994334399784773e-06, | |
| "loss": 0.7372, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6597014925373135, | |
| "grad_norm": 1.9251939137126606, | |
| "learning_rate": 2.7560040989976894e-06, | |
| "loss": 0.7357, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.6626865671641791, | |
| "grad_norm": 1.9256071124163803, | |
| "learning_rate": 2.7127857317470967e-06, | |
| "loss": 0.7855, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.6656716417910448, | |
| "grad_norm": 2.0598971965285338, | |
| "learning_rate": 2.6697824014873076e-06, | |
| "loss": 0.7324, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.6686567164179105, | |
| "grad_norm": 1.823285258131258, | |
| "learning_rate": 2.626998151261798e-06, | |
| "loss": 0.6943, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.6716417910447762, | |
| "grad_norm": 2.7227864446392678, | |
| "learning_rate": 2.5844370035168077e-06, | |
| "loss": 0.7098, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.6746268656716418, | |
| "grad_norm": 2.1471535886447035, | |
| "learning_rate": 2.5421029597231476e-06, | |
| "loss": 0.7776, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6776119402985075, | |
| "grad_norm": 2.429389297486461, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.7514, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.6805970149253732, | |
| "grad_norm": 2.0726794837198725, | |
| "learning_rate": 2.458132082740724e-06, | |
| "loss": 0.7712, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.6835820895522388, | |
| "grad_norm": 2.1998151305942395, | |
| "learning_rate": 2.4165031442406857e-06, | |
| "loss": 0.7435, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.6865671641791045, | |
| "grad_norm": 2.02867494365551, | |
| "learning_rate": 2.3751170983272e-06, | |
| "loss": 0.8134, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6895522388059702, | |
| "grad_norm": 1.9535756316081252, | |
| "learning_rate": 2.333977835991545e-06, | |
| "loss": 0.7191, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6925373134328359, | |
| "grad_norm": 2.0484292921284477, | |
| "learning_rate": 2.293089225023152e-06, | |
| "loss": 0.8623, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6955223880597015, | |
| "grad_norm": 2.0146733780323505, | |
| "learning_rate": 2.2524551096459703e-06, | |
| "loss": 0.7654, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.6985074626865672, | |
| "grad_norm": 2.007942017411499, | |
| "learning_rate": 2.2120793101570366e-06, | |
| "loss": 0.7197, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.7014925373134329, | |
| "grad_norm": 1.9512262040861592, | |
| "learning_rate": 2.171965622567308e-06, | |
| "loss": 0.795, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.7044776119402985, | |
| "grad_norm": 2.0223484864555608, | |
| "learning_rate": 2.132117818244771e-06, | |
| "loss": 0.73, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.7074626865671642, | |
| "grad_norm": 1.9076970970748008, | |
| "learning_rate": 2.0925396435598665e-06, | |
| "loss": 0.7341, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.7104477611940299, | |
| "grad_norm": 2.0099705479490084, | |
| "learning_rate": 2.053234819533276e-06, | |
| "loss": 0.706, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.7134328358208956, | |
| "grad_norm": 2.088602599462484, | |
| "learning_rate": 2.0142070414860704e-06, | |
| "loss": 0.8061, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.7164179104477612, | |
| "grad_norm": 2.1414530746534437, | |
| "learning_rate": 1.9754599786922913e-06, | |
| "loss": 0.7735, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7194029850746269, | |
| "grad_norm": 1.756460059490162, | |
| "learning_rate": 1.936997274033986e-06, | |
| "loss": 0.652, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.7223880597014926, | |
| "grad_norm": 1.6835259072072402, | |
| "learning_rate": 1.8988225436587005e-06, | |
| "loss": 0.6575, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.7253731343283583, | |
| "grad_norm": 1.9338570463688378, | |
| "learning_rate": 1.8609393766395083e-06, | |
| "loss": 0.8504, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.7283582089552239, | |
| "grad_norm": 2.0279105685865884, | |
| "learning_rate": 1.823351334637576e-06, | |
| "loss": 0.8141, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.7313432835820896, | |
| "grad_norm": 1.9475282359983863, | |
| "learning_rate": 1.7860619515673034e-06, | |
| "loss": 0.7989, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.7343283582089553, | |
| "grad_norm": 2.0357960960016093, | |
| "learning_rate": 1.7490747332640833e-06, | |
| "loss": 0.8093, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.7373134328358208, | |
| "grad_norm": 2.0420189686519072, | |
| "learning_rate": 1.7123931571546826e-06, | |
| "loss": 0.8056, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.7402985074626866, | |
| "grad_norm": 1.7925260574915485, | |
| "learning_rate": 1.6760206719303107e-06, | |
| "loss": 0.7222, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.7432835820895523, | |
| "grad_norm": 1.9144780544797566, | |
| "learning_rate": 1.639960697222388e-06, | |
| "loss": 0.8697, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 1.9085137418012865, | |
| "learning_rate": 1.6042166232810346e-06, | |
| "loss": 0.7573, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7492537313432835, | |
| "grad_norm": 1.7594094715163107, | |
| "learning_rate": 1.5687918106563326e-06, | |
| "loss": 0.719, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.7522388059701492, | |
| "grad_norm": 1.9721722301749733, | |
| "learning_rate": 1.5336895898823801e-06, | |
| "loss": 0.805, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.755223880597015, | |
| "grad_norm": 2.2257145387956148, | |
| "learning_rate": 1.4989132611641576e-06, | |
| "loss": 0.7464, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.7582089552238805, | |
| "grad_norm": 2.2389879292796944, | |
| "learning_rate": 1.4644660940672628e-06, | |
| "loss": 0.7688, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.7611940298507462, | |
| "grad_norm": 2.2600400159677445, | |
| "learning_rate": 1.4303513272105057e-06, | |
| "loss": 0.6961, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.764179104477612, | |
| "grad_norm": 1.9828908431355627, | |
| "learning_rate": 1.396572167961427e-06, | |
| "loss": 0.8139, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.7671641791044777, | |
| "grad_norm": 2.123631920520936, | |
| "learning_rate": 1.3631317921347564e-06, | |
| "loss": 0.7817, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.7701492537313432, | |
| "grad_norm": 1.91427428961712, | |
| "learning_rate": 1.330033343693824e-06, | |
| "loss": 0.7411, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.7731343283582089, | |
| "grad_norm": 1.923869516924642, | |
| "learning_rate": 1.297279934454978e-06, | |
| "loss": 0.7464, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.7761194029850746, | |
| "grad_norm": 2.097695064363166, | |
| "learning_rate": 1.264874643795021e-06, | |
| "loss": 0.7835, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7791044776119403, | |
| "grad_norm": 1.866847274620371, | |
| "learning_rate": 1.2328205183616964e-06, | |
| "loss": 0.7576, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.7820895522388059, | |
| "grad_norm": 1.9518448868919547, | |
| "learning_rate": 1.2011205717872538e-06, | |
| "loss": 0.8182, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.7850746268656716, | |
| "grad_norm": 2.0133337408527483, | |
| "learning_rate": 1.1697777844051105e-06, | |
| "loss": 0.7536, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.7880597014925373, | |
| "grad_norm": 1.9817193200479692, | |
| "learning_rate": 1.1387951029696543e-06, | |
| "loss": 0.7337, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.7910447761194029, | |
| "grad_norm": 1.9081666878508032, | |
| "learning_rate": 1.1081754403792e-06, | |
| "loss": 0.7247, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.7940298507462686, | |
| "grad_norm": 1.7424632653916463, | |
| "learning_rate": 1.0779216754021215e-06, | |
| "loss": 0.7659, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.7970149253731343, | |
| "grad_norm": 2.1707469563008694, | |
| "learning_rate": 1.0480366524062041e-06, | |
| "loss": 0.7172, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.9358665287041594, | |
| "learning_rate": 1.0185231810912223e-06, | |
| "loss": 0.7672, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.8029850746268656, | |
| "grad_norm": 1.9633165798810948, | |
| "learning_rate": 9.893840362247809e-07, | |
| "loss": 0.7251, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.8059701492537313, | |
| "grad_norm": 1.811709330190077, | |
| "learning_rate": 9.606219573814447e-07, | |
| "loss": 0.7109, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.808955223880597, | |
| "grad_norm": 2.113644270821526, | |
| "learning_rate": 9.322396486851626e-07, | |
| "loss": 0.7905, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.8119402985074626, | |
| "grad_norm": 1.827142174127426, | |
| "learning_rate": 9.042397785550405e-07, | |
| "loss": 0.7433, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.8149253731343283, | |
| "grad_norm": 2.014601055555163, | |
| "learning_rate": 8.766249794544662e-07, | |
| "loss": 0.7755, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.817910447761194, | |
| "grad_norm": 2.091449213566336, | |
| "learning_rate": 8.49397847643606e-07, | |
| "loss": 0.8266, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.8208955223880597, | |
| "grad_norm": 2.3792124584042407, | |
| "learning_rate": 8.225609429353187e-07, | |
| "loss": 0.8152, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.8238805970149253, | |
| "grad_norm": 2.236371795361171, | |
| "learning_rate": 7.961167884544852e-07, | |
| "loss": 0.8004, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.826865671641791, | |
| "grad_norm": 1.8675998248443555, | |
| "learning_rate": 7.700678704007947e-07, | |
| "loss": 0.7616, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.8298507462686567, | |
| "grad_norm": 1.8934578283049441, | |
| "learning_rate": 7.444166378150014e-07, | |
| "loss": 0.7384, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.8328358208955224, | |
| "grad_norm": 1.806959761192126, | |
| "learning_rate": 7.191655023486682e-07, | |
| "loss": 0.7859, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.835820895522388, | |
| "grad_norm": 2.0920960597937883, | |
| "learning_rate": 6.94316838037431e-07, | |
| "loss": 0.6582, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8388059701492537, | |
| "grad_norm": 1.9049893572652696, | |
| "learning_rate": 6.698729810778065e-07, | |
| "loss": 0.7133, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.8417910447761194, | |
| "grad_norm": 2.021598834791704, | |
| "learning_rate": 6.458362296075399e-07, | |
| "loss": 0.6986, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.844776119402985, | |
| "grad_norm": 1.8785681628556798, | |
| "learning_rate": 6.222088434895462e-07, | |
| "loss": 0.7561, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.8477611940298507, | |
| "grad_norm": 1.7981294150989415, | |
| "learning_rate": 5.989930440994451e-07, | |
| "loss": 0.6988, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.8507462686567164, | |
| "grad_norm": 1.9804075333202549, | |
| "learning_rate": 5.76191014116711e-07, | |
| "loss": 0.7793, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.8537313432835821, | |
| "grad_norm": 1.8946196230697367, | |
| "learning_rate": 5.538048973194699e-07, | |
| "loss": 0.7378, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.8567164179104477, | |
| "grad_norm": 4.308097949167914, | |
| "learning_rate": 5.318367983829393e-07, | |
| "loss": 0.7545, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.8597014925373134, | |
| "grad_norm": 1.7900383798917394, | |
| "learning_rate": 5.102887826815589e-07, | |
| "loss": 0.6725, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.8626865671641791, | |
| "grad_norm": 1.8973145322914773, | |
| "learning_rate": 4.891628760948114e-07, | |
| "loss": 0.6943, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.8656716417910447, | |
| "grad_norm": 2.0037685592372516, | |
| "learning_rate": 4.6846106481675035e-07, | |
| "loss": 0.7904, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.8686567164179104, | |
| "grad_norm": 1.833347945444079, | |
| "learning_rate": 4.481852951692672e-07, | |
| "loss": 0.6899, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.8716417910447761, | |
| "grad_norm": 2.016728151056703, | |
| "learning_rate": 4.283374734191037e-07, | |
| "loss": 0.8379, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.8746268656716418, | |
| "grad_norm": 2.034748614996797, | |
| "learning_rate": 4.089194655986306e-07, | |
| "loss": 0.6884, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.8776119402985074, | |
| "grad_norm": 2.103995265841144, | |
| "learning_rate": 3.899330973304083e-07, | |
| "loss": 0.8119, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.8805970149253731, | |
| "grad_norm": 2.085775862013919, | |
| "learning_rate": 3.7138015365554834e-07, | |
| "loss": 0.7218, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.8835820895522388, | |
| "grad_norm": 1.859225098794404, | |
| "learning_rate": 3.5326237886588734e-07, | |
| "loss": 0.761, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.8865671641791045, | |
| "grad_norm": 1.9937848736813224, | |
| "learning_rate": 3.355814763399973e-07, | |
| "loss": 0.7734, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.8895522388059701, | |
| "grad_norm": 1.956060370495559, | |
| "learning_rate": 3.183391083830345e-07, | |
| "loss": 0.7522, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.8925373134328358, | |
| "grad_norm": 2.1663087142629953, | |
| "learning_rate": 3.015368960704584e-07, | |
| "loss": 0.7592, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.8955223880597015, | |
| "grad_norm": 1.8473510328793137, | |
| "learning_rate": 2.8517641909562075e-07, | |
| "loss": 0.7569, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8985074626865671, | |
| "grad_norm": 1.9818158957078058, | |
| "learning_rate": 2.6925921562124867e-07, | |
| "loss": 0.637, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.9014925373134328, | |
| "grad_norm": 1.9920228777916036, | |
| "learning_rate": 2.5378678213483057e-07, | |
| "loss": 0.766, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.9044776119402985, | |
| "grad_norm": 1.960285579935006, | |
| "learning_rate": 2.3876057330792344e-07, | |
| "loss": 0.7499, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.9074626865671642, | |
| "grad_norm": 1.94458918165242, | |
| "learning_rate": 2.2418200185938488e-07, | |
| "loss": 0.7358, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.9104477611940298, | |
| "grad_norm": 2.081159928237178, | |
| "learning_rate": 2.1005243842255552e-07, | |
| "loss": 0.882, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.9134328358208955, | |
| "grad_norm": 1.9550064790232975, | |
| "learning_rate": 1.9637321141639743e-07, | |
| "loss": 0.6894, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.9164179104477612, | |
| "grad_norm": 2.1049771274583486, | |
| "learning_rate": 1.8314560692059836e-07, | |
| "loss": 0.7991, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.9194029850746268, | |
| "grad_norm": 2.0857228086478448, | |
| "learning_rate": 1.7037086855465902e-07, | |
| "loss": 0.7768, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.9223880597014925, | |
| "grad_norm": 1.935207733713808, | |
| "learning_rate": 1.5805019736097105e-07, | |
| "loss": 0.7564, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.9253731343283582, | |
| "grad_norm": 2.072194118865995, | |
| "learning_rate": 1.4618475169190017e-07, | |
| "loss": 0.7604, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9283582089552239, | |
| "grad_norm": 2.1552417858238493, | |
| "learning_rate": 1.3477564710088097e-07, | |
| "loss": 0.793, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.9313432835820895, | |
| "grad_norm": 1.951097534718573, | |
| "learning_rate": 1.2382395623753484e-07, | |
| "loss": 0.7764, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.9343283582089552, | |
| "grad_norm": 1.9504459470286735, | |
| "learning_rate": 1.1333070874682217e-07, | |
| "loss": 0.7777, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.9373134328358209, | |
| "grad_norm": 2.175522821443901, | |
| "learning_rate": 1.0329689117224262e-07, | |
| "loss": 0.8003, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.9402985074626866, | |
| "grad_norm": 1.896018319659114, | |
| "learning_rate": 9.372344686307655e-08, | |
| "loss": 0.6819, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.9432835820895522, | |
| "grad_norm": 1.8626576145737677, | |
| "learning_rate": 8.461127588570039e-08, | |
| "loss": 0.7407, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.9462686567164179, | |
| "grad_norm": 2.255720786589443, | |
| "learning_rate": 7.59612349389599e-08, | |
| "loss": 0.8032, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.9492537313432836, | |
| "grad_norm": 1.988888031528982, | |
| "learning_rate": 6.777413727363069e-08, | |
| "loss": 0.6934, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.9522388059701492, | |
| "grad_norm": 1.936424618352781, | |
| "learning_rate": 6.005075261595495e-08, | |
| "loss": 0.7447, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.9552238805970149, | |
| "grad_norm": 2.166079975131979, | |
| "learning_rate": 5.279180709527765e-08, | |
| "loss": 0.6995, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.9582089552238806, | |
| "grad_norm": 1.910155391887948, | |
| "learning_rate": 4.599798317577342e-08, | |
| "loss": 0.7723, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.9611940298507463, | |
| "grad_norm": 2.015438544350679, | |
| "learning_rate": 3.9669919592288385e-08, | |
| "loss": 0.7478, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.9641791044776119, | |
| "grad_norm": 1.7894327871602922, | |
| "learning_rate": 3.3808211290284886e-08, | |
| "loss": 0.6971, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.9671641791044776, | |
| "grad_norm": 2.243535756266807, | |
| "learning_rate": 2.8413409369907887e-08, | |
| "loss": 0.8014, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.9701492537313433, | |
| "grad_norm": 2.104094911680065, | |
| "learning_rate": 2.3486021034170857e-08, | |
| "loss": 0.7373, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.9731343283582089, | |
| "grad_norm": 1.9746672549193203, | |
| "learning_rate": 1.9026509541272276e-08, | |
| "loss": 0.7462, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.9761194029850746, | |
| "grad_norm": 1.8638633602470398, | |
| "learning_rate": 1.5035294161039882e-08, | |
| "loss": 0.7276, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.9791044776119403, | |
| "grad_norm": 1.9221365215727475, | |
| "learning_rate": 1.1512750135511674e-08, | |
| "loss": 0.6948, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.982089552238806, | |
| "grad_norm": 1.9696506075193372, | |
| "learning_rate": 8.459208643659122e-09, | |
| "loss": 0.8016, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.9850746268656716, | |
| "grad_norm": 1.9260689325158074, | |
| "learning_rate": 5.874956770248186e-09, | |
| "loss": 0.772, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.9880597014925373, | |
| "grad_norm": 1.9022494846934377, | |
| "learning_rate": 3.760237478849793e-09, | |
| "loss": 0.639, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.991044776119403, | |
| "grad_norm": 2.1198036949572523, | |
| "learning_rate": 2.1152495889970035e-09, | |
| "loss": 0.7524, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.9940298507462687, | |
| "grad_norm": 2.520497927670345, | |
| "learning_rate": 9.401477574932927e-10, | |
| "loss": 0.7804, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.9970149253731343, | |
| "grad_norm": 1.9463118318794423, | |
| "learning_rate": 2.3504246386918394e-10, | |
| "loss": 0.6975, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.0407770920954924, | |
| "learning_rate": 0.0, | |
| "loss": 0.7463, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 335, | |
| "total_flos": 27958090317824.0, | |
| "train_loss": 0.8481638431549072, | |
| "train_runtime": 15355.884, | |
| "train_samples_per_second": 0.175, | |
| "train_steps_per_second": 0.022 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 335, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 2060, | |
| "total_flos": 27958090317824.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |