| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.985163204747774, | |
| "eval_steps": 500, | |
| "global_step": 735, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.006782534972445952, | |
| "grad_norm": 7.135720261444181, | |
| "learning_rate": 1.0810810810810812e-06, | |
| "loss": 1.1988, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.013565069944891903, | |
| "grad_norm": 7.10061152713567, | |
| "learning_rate": 2.1621621621621623e-06, | |
| "loss": 1.1926, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.020347604917337857, | |
| "grad_norm": 7.012644871929241, | |
| "learning_rate": 3.2432432432432437e-06, | |
| "loss": 1.1873, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.027130139889783807, | |
| "grad_norm": 6.4062690637507425, | |
| "learning_rate": 4.324324324324325e-06, | |
| "loss": 1.1664, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.03391267486222976, | |
| "grad_norm": 4.819533337520507, | |
| "learning_rate": 5.405405405405406e-06, | |
| "loss": 1.1316, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.040695209834675714, | |
| "grad_norm": 3.0815069633752463, | |
| "learning_rate": 6.486486486486487e-06, | |
| "loss": 1.0897, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.04747774480712166, | |
| "grad_norm": 4.965671310290254, | |
| "learning_rate": 7.567567567567569e-06, | |
| "loss": 1.0541, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.05426027977956761, | |
| "grad_norm": 5.523597681999487, | |
| "learning_rate": 8.64864864864865e-06, | |
| "loss": 1.0812, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.06104281475201356, | |
| "grad_norm": 4.7758478538296245, | |
| "learning_rate": 9.729729729729732e-06, | |
| "loss": 1.0615, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.06782534972445951, | |
| "grad_norm": 4.37471758206583, | |
| "learning_rate": 1.0810810810810812e-05, | |
| "loss": 1.0034, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07460788469690546, | |
| "grad_norm": 3.415263530374433, | |
| "learning_rate": 1.1891891891891894e-05, | |
| "loss": 0.9965, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.08139041966935143, | |
| "grad_norm": 2.1249671206986167, | |
| "learning_rate": 1.2972972972972975e-05, | |
| "loss": 0.9672, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.08817295464179738, | |
| "grad_norm": 2.804510150071328, | |
| "learning_rate": 1.4054054054054055e-05, | |
| "loss": 0.9248, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.09495548961424333, | |
| "grad_norm": 2.227059944094318, | |
| "learning_rate": 1.5135135135135138e-05, | |
| "loss": 0.9174, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.10173802458668928, | |
| "grad_norm": 1.9376154207127252, | |
| "learning_rate": 1.6216216216216218e-05, | |
| "loss": 0.9111, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.10852055955913523, | |
| "grad_norm": 1.5867898298795111, | |
| "learning_rate": 1.72972972972973e-05, | |
| "loss": 0.8742, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.11530309453158118, | |
| "grad_norm": 1.824405438277367, | |
| "learning_rate": 1.8378378378378383e-05, | |
| "loss": 0.8663, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.12208562950402713, | |
| "grad_norm": 1.2484939184792223, | |
| "learning_rate": 1.9459459459459463e-05, | |
| "loss": 0.8685, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.1288681644764731, | |
| "grad_norm": 1.6950608297204033, | |
| "learning_rate": 2.054054054054054e-05, | |
| "loss": 0.8643, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.13565069944891903, | |
| "grad_norm": 1.4257834623155268, | |
| "learning_rate": 2.1621621621621624e-05, | |
| "loss": 0.8589, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.142433234421365, | |
| "grad_norm": 1.2788529677027458, | |
| "learning_rate": 2.2702702702702705e-05, | |
| "loss": 0.832, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.14921576939381093, | |
| "grad_norm": 1.4653330199646428, | |
| "learning_rate": 2.378378378378379e-05, | |
| "loss": 0.8257, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.1559983043662569, | |
| "grad_norm": 1.1706491825282224, | |
| "learning_rate": 2.4864864864864866e-05, | |
| "loss": 0.8154, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.16278083933870285, | |
| "grad_norm": 1.575724350793329, | |
| "learning_rate": 2.594594594594595e-05, | |
| "loss": 0.83, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.1695633743111488, | |
| "grad_norm": 1.273387962120446, | |
| "learning_rate": 2.702702702702703e-05, | |
| "loss": 0.8327, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.17634590928359475, | |
| "grad_norm": 1.5867731702030288, | |
| "learning_rate": 2.810810810810811e-05, | |
| "loss": 0.8204, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1831284442560407, | |
| "grad_norm": 0.8564117760497798, | |
| "learning_rate": 2.918918918918919e-05, | |
| "loss": 0.8056, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.18991097922848665, | |
| "grad_norm": 1.4715395766174262, | |
| "learning_rate": 3.0270270270270275e-05, | |
| "loss": 0.8055, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.1966935142009326, | |
| "grad_norm": 0.8375306490548892, | |
| "learning_rate": 3.135135135135135e-05, | |
| "loss": 0.7937, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.20347604917337855, | |
| "grad_norm": 1.620124215802785, | |
| "learning_rate": 3.2432432432432436e-05, | |
| "loss": 0.8001, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2102585841458245, | |
| "grad_norm": 1.553317483743022, | |
| "learning_rate": 3.351351351351351e-05, | |
| "loss": 0.7876, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.21704111911827045, | |
| "grad_norm": 1.658909246335399, | |
| "learning_rate": 3.45945945945946e-05, | |
| "loss": 0.7891, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.22382365409071642, | |
| "grad_norm": 1.1987131034054377, | |
| "learning_rate": 3.567567567567568e-05, | |
| "loss": 0.7979, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.23060618906316235, | |
| "grad_norm": 2.2091757491329065, | |
| "learning_rate": 3.6756756756756765e-05, | |
| "loss": 0.7921, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.23738872403560832, | |
| "grad_norm": 2.173619464441479, | |
| "learning_rate": 3.783783783783784e-05, | |
| "loss": 0.7997, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.24417125900805425, | |
| "grad_norm": 1.3174649366078477, | |
| "learning_rate": 3.8918918918918926e-05, | |
| "loss": 0.768, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.2509537939805002, | |
| "grad_norm": 2.560545381997849, | |
| "learning_rate": 4e-05, | |
| "loss": 0.7938, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.2577363289529462, | |
| "grad_norm": 1.6460978741311298, | |
| "learning_rate": 4.108108108108108e-05, | |
| "loss": 0.7873, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.2645188639253921, | |
| "grad_norm": 2.1426591768978938, | |
| "learning_rate": 4.2162162162162164e-05, | |
| "loss": 0.781, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.27130139889783805, | |
| "grad_norm": 1.8160361307625077, | |
| "learning_rate": 4.324324324324325e-05, | |
| "loss": 0.7809, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.278083933870284, | |
| "grad_norm": 1.6910805765281172, | |
| "learning_rate": 4.4324324324324325e-05, | |
| "loss": 0.7744, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.28486646884273, | |
| "grad_norm": 1.9801882908067985, | |
| "learning_rate": 4.540540540540541e-05, | |
| "loss": 0.7732, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.29164900381517594, | |
| "grad_norm": 1.3912495139596244, | |
| "learning_rate": 4.6486486486486486e-05, | |
| "loss": 0.778, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.29843153878762185, | |
| "grad_norm": 1.9510858316270183, | |
| "learning_rate": 4.756756756756758e-05, | |
| "loss": 0.7628, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.3052140737600678, | |
| "grad_norm": 1.9755444838845442, | |
| "learning_rate": 4.8648648648648654e-05, | |
| "loss": 0.77, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.3119966087325138, | |
| "grad_norm": 1.7136629431095296, | |
| "learning_rate": 4.972972972972973e-05, | |
| "loss": 0.7776, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.31877914370495974, | |
| "grad_norm": 1.9262891096799022, | |
| "learning_rate": 5.081081081081081e-05, | |
| "loss": 0.7591, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.3255616786774057, | |
| "grad_norm": 1.8644045069770525, | |
| "learning_rate": 5.18918918918919e-05, | |
| "loss": 0.7564, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.3323442136498516, | |
| "grad_norm": 1.273771704881583, | |
| "learning_rate": 5.2972972972972976e-05, | |
| "loss": 0.7615, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.3391267486222976, | |
| "grad_norm": 2.4421063866094843, | |
| "learning_rate": 5.405405405405406e-05, | |
| "loss": 0.7803, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.34590928359474354, | |
| "grad_norm": 2.8264810278747197, | |
| "learning_rate": 5.513513513513514e-05, | |
| "loss": 0.7614, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.3526918185671895, | |
| "grad_norm": 2.2601831362375036, | |
| "learning_rate": 5.621621621621622e-05, | |
| "loss": 0.7727, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.3594743535396354, | |
| "grad_norm": 3.49645659835363, | |
| "learning_rate": 5.7297297297297305e-05, | |
| "loss": 0.7589, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.3662568885120814, | |
| "grad_norm": 2.8843912596082983, | |
| "learning_rate": 5.837837837837838e-05, | |
| "loss": 0.7561, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.37303942348452734, | |
| "grad_norm": 2.7371626553055965, | |
| "learning_rate": 5.945945945945946e-05, | |
| "loss": 0.7468, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.3798219584569733, | |
| "grad_norm": 2.4650676398708997, | |
| "learning_rate": 6.054054054054055e-05, | |
| "loss": 0.7501, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.38660449342941927, | |
| "grad_norm": 2.617962450345242, | |
| "learning_rate": 6.162162162162163e-05, | |
| "loss": 0.7504, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.3933870284018652, | |
| "grad_norm": 1.9045313378628999, | |
| "learning_rate": 6.27027027027027e-05, | |
| "loss": 0.7443, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.40016956337431114, | |
| "grad_norm": 3.214875277973139, | |
| "learning_rate": 6.378378378378379e-05, | |
| "loss": 0.756, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.4069520983467571, | |
| "grad_norm": 2.607985787054137, | |
| "learning_rate": 6.486486486486487e-05, | |
| "loss": 0.7575, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.41373463331920307, | |
| "grad_norm": 2.462297061139689, | |
| "learning_rate": 6.594594594594596e-05, | |
| "loss": 0.7395, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.420517168291649, | |
| "grad_norm": 1.7928802854683021, | |
| "learning_rate": 6.702702702702703e-05, | |
| "loss": 0.7443, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.42729970326409494, | |
| "grad_norm": 2.8782764835293015, | |
| "learning_rate": 6.810810810810811e-05, | |
| "loss": 0.7574, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.4340822382365409, | |
| "grad_norm": 2.107005343355103, | |
| "learning_rate": 6.91891891891892e-05, | |
| "loss": 0.7524, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.44086477320898687, | |
| "grad_norm": 2.400586779165952, | |
| "learning_rate": 7.027027027027028e-05, | |
| "loss": 0.7539, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.44764730818143283, | |
| "grad_norm": 1.8984648015553665, | |
| "learning_rate": 7.135135135135136e-05, | |
| "loss": 0.7661, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.45442984315387874, | |
| "grad_norm": 2.701931984195687, | |
| "learning_rate": 7.243243243243243e-05, | |
| "loss": 0.7423, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.4612123781263247, | |
| "grad_norm": 3.1216702832116807, | |
| "learning_rate": 7.351351351351353e-05, | |
| "loss": 0.7558, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.46799491309877067, | |
| "grad_norm": 1.7791622339381934, | |
| "learning_rate": 7.45945945945946e-05, | |
| "loss": 0.7338, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.47477744807121663, | |
| "grad_norm": 2.283223713003048, | |
| "learning_rate": 7.567567567567568e-05, | |
| "loss": 0.7512, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.48155998304366254, | |
| "grad_norm": 2.518755689574504, | |
| "learning_rate": 7.675675675675675e-05, | |
| "loss": 0.7371, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.4883425180161085, | |
| "grad_norm": 1.99481044416433, | |
| "learning_rate": 7.783783783783785e-05, | |
| "loss": 0.7478, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.49512505298855447, | |
| "grad_norm": 2.7463149548347183, | |
| "learning_rate": 7.891891891891892e-05, | |
| "loss": 0.7365, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.5019075879610004, | |
| "grad_norm": 2.138101333819343, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7254, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.5086901229334464, | |
| "grad_norm": 2.640544503377941, | |
| "learning_rate": 7.999954822103665e-05, | |
| "loss": 0.7417, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.5154726579058924, | |
| "grad_norm": 2.2402410683129688, | |
| "learning_rate": 7.999819289435179e-05, | |
| "loss": 0.7354, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.5222551928783383, | |
| "grad_norm": 2.49341837988937, | |
| "learning_rate": 7.999593405056084e-05, | |
| "loss": 0.738, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.5290377278507842, | |
| "grad_norm": 2.3287753265703253, | |
| "learning_rate": 7.999277174068872e-05, | |
| "loss": 0.7247, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.5358202628232301, | |
| "grad_norm": 2.2524400032207716, | |
| "learning_rate": 7.998870603616864e-05, | |
| "loss": 0.7368, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.5426027977956761, | |
| "grad_norm": 2.686780411560504, | |
| "learning_rate": 7.998373702884062e-05, | |
| "loss": 0.7315, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5493853327681221, | |
| "grad_norm": 1.7256862069861159, | |
| "learning_rate": 7.997786483094931e-05, | |
| "loss": 0.7492, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.556167867740568, | |
| "grad_norm": 2.6274112743201523, | |
| "learning_rate": 7.997108957514146e-05, | |
| "loss": 0.7229, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.562950402713014, | |
| "grad_norm": 2.0731276463095596, | |
| "learning_rate": 7.9963411414463e-05, | |
| "loss": 0.7375, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.56973293768546, | |
| "grad_norm": 2.6126411934480123, | |
| "learning_rate": 7.99548305223555e-05, | |
| "loss": 0.7284, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.5765154726579059, | |
| "grad_norm": 1.4633346753767393, | |
| "learning_rate": 7.994534709265226e-05, | |
| "loss": 0.7248, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5832980076303519, | |
| "grad_norm": 3.4984880607635307, | |
| "learning_rate": 7.993496133957401e-05, | |
| "loss": 0.7517, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.5900805426027977, | |
| "grad_norm": 2.4422440886664147, | |
| "learning_rate": 7.992367349772398e-05, | |
| "loss": 0.7378, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.5968630775752437, | |
| "grad_norm": 1.8295430254229907, | |
| "learning_rate": 7.991148382208265e-05, | |
| "loss": 0.7241, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.6036456125476897, | |
| "grad_norm": 2.9619629036982755, | |
| "learning_rate": 7.989839258800196e-05, | |
| "loss": 0.7422, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.6104281475201356, | |
| "grad_norm": 1.9664968756669656, | |
| "learning_rate": 7.988440009119911e-05, | |
| "loss": 0.751, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6172106824925816, | |
| "grad_norm": 1.6194583900993504, | |
| "learning_rate": 7.986950664774992e-05, | |
| "loss": 0.7346, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.6239932174650276, | |
| "grad_norm": 2.8148837821592796, | |
| "learning_rate": 7.985371259408157e-05, | |
| "loss": 0.7413, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.6307757524374735, | |
| "grad_norm": 1.486533554565982, | |
| "learning_rate": 7.983701828696515e-05, | |
| "loss": 0.7296, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.6375582874099195, | |
| "grad_norm": 3.229587748852655, | |
| "learning_rate": 7.98194241035075e-05, | |
| "loss": 0.7483, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.6443408223823655, | |
| "grad_norm": 2.518619876391423, | |
| "learning_rate": 7.980093044114269e-05, | |
| "loss": 0.7524, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.6511233573548114, | |
| "grad_norm": 2.6606747542222915, | |
| "learning_rate": 7.978153771762311e-05, | |
| "loss": 0.7531, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.6579058923272573, | |
| "grad_norm": 1.7367257209193254, | |
| "learning_rate": 7.976124637101e-05, | |
| "loss": 0.736, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.6646884272997032, | |
| "grad_norm": 1.8350871328487093, | |
| "learning_rate": 7.974005685966354e-05, | |
| "loss": 0.7248, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.6714709622721492, | |
| "grad_norm": 1.5007282529505472, | |
| "learning_rate": 7.971796966223248e-05, | |
| "loss": 0.7498, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.6782534972445952, | |
| "grad_norm": 1.4248528231943807, | |
| "learning_rate": 7.969498527764341e-05, | |
| "loss": 0.7208, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6850360322170411, | |
| "grad_norm": 2.217134309648403, | |
| "learning_rate": 7.967110422508936e-05, | |
| "loss": 0.7295, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.6918185671894871, | |
| "grad_norm": 2.2429893368943277, | |
| "learning_rate": 7.964632704401823e-05, | |
| "loss": 0.7429, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.698601102161933, | |
| "grad_norm": 1.317977635899577, | |
| "learning_rate": 7.962065429412046e-05, | |
| "loss": 0.7311, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.705383637134379, | |
| "grad_norm": 3.1540990631312718, | |
| "learning_rate": 7.959408655531646e-05, | |
| "loss": 0.7331, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.712166172106825, | |
| "grad_norm": 2.0946781410468382, | |
| "learning_rate": 7.956662442774351e-05, | |
| "loss": 0.7357, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.7189487070792708, | |
| "grad_norm": 3.6514929315006834, | |
| "learning_rate": 7.953826853174218e-05, | |
| "loss": 0.724, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.7257312420517168, | |
| "grad_norm": 2.743797555116305, | |
| "learning_rate": 7.950901950784236e-05, | |
| "loss": 0.7337, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.7325137770241628, | |
| "grad_norm": 2.6025703631899364, | |
| "learning_rate": 7.947887801674872e-05, | |
| "loss": 0.7231, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.7392963119966087, | |
| "grad_norm": 2.33134578145788, | |
| "learning_rate": 7.944784473932583e-05, | |
| "loss": 0.7268, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.7460788469690547, | |
| "grad_norm": 2.369166475084867, | |
| "learning_rate": 7.941592037658279e-05, | |
| "loss": 0.7175, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.7528613819415007, | |
| "grad_norm": 1.5983852946337034, | |
| "learning_rate": 7.93831056496574e-05, | |
| "loss": 0.7215, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.7596439169139466, | |
| "grad_norm": 2.3446201220972696, | |
| "learning_rate": 7.934940129979979e-05, | |
| "loss": 0.7314, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.7664264518863926, | |
| "grad_norm": 1.872428876067427, | |
| "learning_rate": 7.931480808835577e-05, | |
| "loss": 0.7287, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.7732089868588385, | |
| "grad_norm": 1.982878649441548, | |
| "learning_rate": 7.927932679674964e-05, | |
| "loss": 0.7193, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.7799915218312844, | |
| "grad_norm": 1.5101882188042894, | |
| "learning_rate": 7.924295822646643e-05, | |
| "loss": 0.7217, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.7867740568037304, | |
| "grad_norm": 1.5820041393952022, | |
| "learning_rate": 7.92057031990339e-05, | |
| "loss": 0.7199, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.7935565917761763, | |
| "grad_norm": 1.5630207257272397, | |
| "learning_rate": 7.91675625560039e-05, | |
| "loss": 0.7141, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.8003391267486223, | |
| "grad_norm": 2.457059696195913, | |
| "learning_rate": 7.91285371589335e-05, | |
| "loss": 0.7082, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.8071216617210683, | |
| "grad_norm": 1.6028005358543407, | |
| "learning_rate": 7.908862788936532e-05, | |
| "loss": 0.7149, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.8139041966935142, | |
| "grad_norm": 1.8116395322587184, | |
| "learning_rate": 7.904783564880779e-05, | |
| "loss": 0.7123, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.8206867316659602, | |
| "grad_norm": 2.0950388250383996, | |
| "learning_rate": 7.900616135871474e-05, | |
| "loss": 0.7354, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.8274692666384061, | |
| "grad_norm": 1.8624393412855933, | |
| "learning_rate": 7.896360596046453e-05, | |
| "loss": 0.7156, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.8342518016108521, | |
| "grad_norm": 1.4851216429547307, | |
| "learning_rate": 7.892017041533886e-05, | |
| "loss": 0.7114, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.841034336583298, | |
| "grad_norm": 2.9385975867591765, | |
| "learning_rate": 7.887585570450098e-05, | |
| "loss": 0.7127, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.8478168715557439, | |
| "grad_norm": 1.8479656443451273, | |
| "learning_rate": 7.883066282897362e-05, | |
| "loss": 0.7173, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.8545994065281899, | |
| "grad_norm": 2.894143467483064, | |
| "learning_rate": 7.878459280961629e-05, | |
| "loss": 0.7183, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.8613819415006359, | |
| "grad_norm": 2.167706950028001, | |
| "learning_rate": 7.873764668710228e-05, | |
| "loss": 0.7237, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.8681644764730818, | |
| "grad_norm": 2.646470616726372, | |
| "learning_rate": 7.868982552189514e-05, | |
| "loss": 0.7132, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.8749470114455278, | |
| "grad_norm": 2.242758826547572, | |
| "learning_rate": 7.864113039422464e-05, | |
| "loss": 0.7296, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.8817295464179737, | |
| "grad_norm": 2.493893447513006, | |
| "learning_rate": 7.859156240406252e-05, | |
| "loss": 0.7185, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8885120813904197, | |
| "grad_norm": 1.721360057791857, | |
| "learning_rate": 7.854112267109756e-05, | |
| "loss": 0.7244, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.8952946163628657, | |
| "grad_norm": 2.6683450004265583, | |
| "learning_rate": 7.848981233471024e-05, | |
| "loss": 0.7149, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.9020771513353115, | |
| "grad_norm": 2.061224678516105, | |
| "learning_rate": 7.843763255394711e-05, | |
| "loss": 0.7118, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.9088596863077575, | |
| "grad_norm": 1.9936485418092913, | |
| "learning_rate": 7.838458450749452e-05, | |
| "loss": 0.7057, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.9156422212802034, | |
| "grad_norm": 1.5127328421814061, | |
| "learning_rate": 7.833066939365206e-05, | |
| "loss": 0.7043, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.9224247562526494, | |
| "grad_norm": 2.040325487116571, | |
| "learning_rate": 7.827588843030543e-05, | |
| "loss": 0.7087, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.9292072912250954, | |
| "grad_norm": 1.6268257121266954, | |
| "learning_rate": 7.822024285489896e-05, | |
| "loss": 0.7105, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.9359898261975413, | |
| "grad_norm": 2.3150904956674263, | |
| "learning_rate": 7.81637339244077e-05, | |
| "loss": 0.7139, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.9427723611699873, | |
| "grad_norm": 1.841068832751663, | |
| "learning_rate": 7.810636291530893e-05, | |
| "loss": 0.7052, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.9495548961424333, | |
| "grad_norm": 1.9983783549939655, | |
| "learning_rate": 7.804813112355339e-05, | |
| "loss": 0.7115, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9563374311148792, | |
| "grad_norm": 1.9754428939529725, | |
| "learning_rate": 7.798903986453603e-05, | |
| "loss": 0.7142, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.9631199660873251, | |
| "grad_norm": 1.735226023700741, | |
| "learning_rate": 7.792909047306623e-05, | |
| "loss": 0.7205, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.969902501059771, | |
| "grad_norm": 2.184380091482261, | |
| "learning_rate": 7.786828430333769e-05, | |
| "loss": 0.7107, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.976685036032217, | |
| "grad_norm": 1.8757020725047386, | |
| "learning_rate": 7.78066227288978e-05, | |
| "loss": 0.7198, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.983467571004663, | |
| "grad_norm": 1.0079051823953802, | |
| "learning_rate": 7.77441071426167e-05, | |
| "loss": 0.714, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.9902501059771089, | |
| "grad_norm": 1.5288723958450625, | |
| "learning_rate": 7.768073895665573e-05, | |
| "loss": 0.7087, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.9970326409495549, | |
| "grad_norm": 2.3013059467419996, | |
| "learning_rate": 7.761651960243554e-05, | |
| "loss": 0.7366, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 1.0038151759220009, | |
| "grad_norm": 2.269978916078414, | |
| "learning_rate": 7.755145053060378e-05, | |
| "loss": 1.1363, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 1.0105977108944468, | |
| "grad_norm": 1.6332374526993498, | |
| "learning_rate": 7.748553321100238e-05, | |
| "loss": 0.697, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 1.0173802458668928, | |
| "grad_norm": 2.0144072844842054, | |
| "learning_rate": 7.741876913263422e-05, | |
| "loss": 0.7169, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.0241627808393388, | |
| "grad_norm": 1.9963737399863932, | |
| "learning_rate": 7.735115980362964e-05, | |
| "loss": 0.6942, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 1.0309453158117847, | |
| "grad_norm": 2.010956402670031, | |
| "learning_rate": 7.728270675121224e-05, | |
| "loss": 0.7108, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.0377278507842307, | |
| "grad_norm": 1.1256640161683924, | |
| "learning_rate": 7.721341152166448e-05, | |
| "loss": 0.706, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 1.0445103857566767, | |
| "grad_norm": 2.7360045649430287, | |
| "learning_rate": 7.714327568029272e-05, | |
| "loss": 0.7035, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 1.0512929207291226, | |
| "grad_norm": 1.8273909265523915, | |
| "learning_rate": 7.707230081139184e-05, | |
| "loss": 0.7064, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.0580754557015684, | |
| "grad_norm": 2.155560932773446, | |
| "learning_rate": 7.700048851820946e-05, | |
| "loss": 0.6991, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.0648579906740143, | |
| "grad_norm": 2.1669039610273493, | |
| "learning_rate": 7.692784042290976e-05, | |
| "loss": 0.7099, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 1.0716405256464603, | |
| "grad_norm": 1.503612527062841, | |
| "learning_rate": 7.685435816653681e-05, | |
| "loss": 0.6973, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 1.0784230606189062, | |
| "grad_norm": 1.937309301259521, | |
| "learning_rate": 7.678004340897747e-05, | |
| "loss": 0.7049, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 1.0852055955913522, | |
| "grad_norm": 1.3969091003754739, | |
| "learning_rate": 7.670489782892396e-05, | |
| "loss": 0.6886, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.0919881305637982, | |
| "grad_norm": 1.9964168232948958, | |
| "learning_rate": 7.662892312383592e-05, | |
| "loss": 0.6883, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 1.0987706655362441, | |
| "grad_norm": 1.7569755198383514, | |
| "learning_rate": 7.655212100990195e-05, | |
| "loss": 0.6987, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.10555320050869, | |
| "grad_norm": 1.4447704185089303, | |
| "learning_rate": 7.647449322200108e-05, | |
| "loss": 0.6906, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.112335735481136, | |
| "grad_norm": 1.2497557944098203, | |
| "learning_rate": 7.639604151366339e-05, | |
| "loss": 0.7009, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.119118270453582, | |
| "grad_norm": 2.3116148247793267, | |
| "learning_rate": 7.631676765703042e-05, | |
| "loss": 0.6895, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.125900805426028, | |
| "grad_norm": 1.330812583383646, | |
| "learning_rate": 7.623667344281522e-05, | |
| "loss": 0.7004, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.132683340398474, | |
| "grad_norm": 1.8610464265540336, | |
| "learning_rate": 7.615576068026187e-05, | |
| "loss": 0.7077, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.13946587537092, | |
| "grad_norm": 1.7667897999541395, | |
| "learning_rate": 7.607403119710453e-05, | |
| "loss": 0.6913, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.1462484103433659, | |
| "grad_norm": 1.208336959762333, | |
| "learning_rate": 7.599148683952628e-05, | |
| "loss": 0.6839, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.1530309453158119, | |
| "grad_norm": 1.9033319484816924, | |
| "learning_rate": 7.590812947211733e-05, | |
| "loss": 0.6898, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.1598134802882578, | |
| "grad_norm": 1.213830580906014, | |
| "learning_rate": 7.582396097783294e-05, | |
| "loss": 0.6866, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.1665960152607038, | |
| "grad_norm": 2.309488724064315, | |
| "learning_rate": 7.573898325795086e-05, | |
| "loss": 0.697, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.1733785502331497, | |
| "grad_norm": 1.3337697012970882, | |
| "learning_rate": 7.565319823202838e-05, | |
| "loss": 0.7083, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.1801610852055955, | |
| "grad_norm": 1.644453886623139, | |
| "learning_rate": 7.556660783785904e-05, | |
| "loss": 0.6905, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.1869436201780414, | |
| "grad_norm": 1.3203501723312958, | |
| "learning_rate": 7.547921403142874e-05, | |
| "loss": 0.7048, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.1937261551504874, | |
| "grad_norm": 1.4545383798679439, | |
| "learning_rate": 7.539101878687165e-05, | |
| "loss": 0.6865, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.2005086901229334, | |
| "grad_norm": 2.1879709481996987, | |
| "learning_rate": 7.53020240964256e-05, | |
| "loss": 0.6964, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.2072912250953793, | |
| "grad_norm": 1.8322469465167428, | |
| "learning_rate": 7.5212231970387e-05, | |
| "loss": 0.6921, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.2140737600678253, | |
| "grad_norm": 1.2841938775963582, | |
| "learning_rate": 7.512164443706555e-05, | |
| "loss": 0.6816, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.2208562950402713, | |
| "grad_norm": 3.1832942767833003, | |
| "learning_rate": 7.503026354273834e-05, | |
| "loss": 0.7107, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.2276388300127172, | |
| "grad_norm": 2.223671181736338, | |
| "learning_rate": 7.493809135160367e-05, | |
| "loss": 0.7068, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.2344213649851632, | |
| "grad_norm": 2.5365710286141727, | |
| "learning_rate": 7.484512994573438e-05, | |
| "loss": 0.6997, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.2412038999576092, | |
| "grad_norm": 1.7033409104571895, | |
| "learning_rate": 7.475138142503083e-05, | |
| "loss": 0.6973, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.2479864349300551, | |
| "grad_norm": 2.666703064771071, | |
| "learning_rate": 7.465684790717354e-05, | |
| "loss": 0.6904, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.254768969902501, | |
| "grad_norm": 1.6449288965298707, | |
| "learning_rate": 7.45615315275752e-05, | |
| "loss": 0.7008, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.261551504874947, | |
| "grad_norm": 2.670948821208354, | |
| "learning_rate": 7.446543443933258e-05, | |
| "loss": 0.7187, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.268334039847393, | |
| "grad_norm": 2.052845388192151, | |
| "learning_rate": 7.436855881317784e-05, | |
| "loss": 0.7154, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.275116574819839, | |
| "grad_norm": 2.3638246999276054, | |
| "learning_rate": 7.427090683742947e-05, | |
| "loss": 0.6942, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.281899109792285, | |
| "grad_norm": 2.367916777331452, | |
| "learning_rate": 7.417248071794288e-05, | |
| "loss": 0.6978, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.288681644764731, | |
| "grad_norm": 1.919070978827847, | |
| "learning_rate": 7.40732826780606e-05, | |
| "loss": 0.6778, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.2954641797371766, | |
| "grad_norm": 1.5807447282945328, | |
| "learning_rate": 7.397331495856199e-05, | |
| "loss": 0.6878, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.3022467147096228, | |
| "grad_norm": 1.5725379589793347, | |
| "learning_rate": 7.38725798176127e-05, | |
| "loss": 0.6942, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.3090292496820686, | |
| "grad_norm": 1.5854647888888829, | |
| "learning_rate": 7.37710795307136e-05, | |
| "loss": 0.6942, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.3158117846545148, | |
| "grad_norm": 0.9840396505987244, | |
| "learning_rate": 7.36688163906494e-05, | |
| "loss": 0.6822, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.3225943196269605, | |
| "grad_norm": 1.9293846538238884, | |
| "learning_rate": 7.356579270743689e-05, | |
| "loss": 0.707, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.3293768545994065, | |
| "grad_norm": 1.297177627346237, | |
| "learning_rate": 7.346201080827272e-05, | |
| "loss": 0.6966, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.3361593895718524, | |
| "grad_norm": 2.3709366791443456, | |
| "learning_rate": 7.335747303748079e-05, | |
| "loss": 0.7001, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.3429419245442984, | |
| "grad_norm": 1.7251163867182135, | |
| "learning_rate": 7.325218175645942e-05, | |
| "loss": 0.694, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.3497244595167444, | |
| "grad_norm": 2.088301626825016, | |
| "learning_rate": 7.314613934362788e-05, | |
| "loss": 0.6879, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.3565069944891903, | |
| "grad_norm": 1.9529188107557567, | |
| "learning_rate": 7.303934819437277e-05, | |
| "loss": 0.6944, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.3632895294616363, | |
| "grad_norm": 1.6962527949805855, | |
| "learning_rate": 7.293181072099377e-05, | |
| "loss": 0.6917, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.3700720644340822, | |
| "grad_norm": 1.6169230360470628, | |
| "learning_rate": 7.282352935264934e-05, | |
| "loss": 0.684, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.3768545994065282, | |
| "grad_norm": 1.485732940861678, | |
| "learning_rate": 7.271450653530167e-05, | |
| "loss": 0.6862, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.3836371343789742, | |
| "grad_norm": 1.6704659351121771, | |
| "learning_rate": 7.260474473166154e-05, | |
| "loss": 0.6932, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.3904196693514201, | |
| "grad_norm": 1.5799837137385815, | |
| "learning_rate": 7.249424642113266e-05, | |
| "loss": 0.697, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.397202204323866, | |
| "grad_norm": 1.2977311485642662, | |
| "learning_rate": 7.238301409975561e-05, | |
| "loss": 0.6919, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.403984739296312, | |
| "grad_norm": 1.451392232428334, | |
| "learning_rate": 7.227105028015156e-05, | |
| "loss": 0.699, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.410767274268758, | |
| "grad_norm": 1.2941406989928823, | |
| "learning_rate": 7.21583574914654e-05, | |
| "loss": 0.6708, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.417549809241204, | |
| "grad_norm": 1.6593482637225496, | |
| "learning_rate": 7.204493827930869e-05, | |
| "loss": 0.6871, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.4243323442136497, | |
| "grad_norm": 1.528014842634458, | |
| "learning_rate": 7.193079520570217e-05, | |
| "loss": 0.6777, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.431114879186096, | |
| "grad_norm": 1.2836973493312132, | |
| "learning_rate": 7.18159308490178e-05, | |
| "loss": 0.6741, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.4378974141585417, | |
| "grad_norm": 1.2069873065957852, | |
| "learning_rate": 7.170034780392055e-05, | |
| "loss": 0.6852, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.4446799491309876, | |
| "grad_norm": 1.6652484448685945, | |
| "learning_rate": 7.158404868130988e-05, | |
| "loss": 0.6711, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.4514624841034336, | |
| "grad_norm": 1.2576599603912495, | |
| "learning_rate": 7.14670361082606e-05, | |
| "loss": 0.6849, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.4582450190758796, | |
| "grad_norm": 1.5405832213240351, | |
| "learning_rate": 7.134931272796368e-05, | |
| "loss": 0.691, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.4650275540483255, | |
| "grad_norm": 1.684185898740359, | |
| "learning_rate": 7.123088119966647e-05, | |
| "loss": 0.6868, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.4718100890207715, | |
| "grad_norm": 1.0827904024479826, | |
| "learning_rate": 7.11117441986126e-05, | |
| "loss": 0.6878, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.4785926239932174, | |
| "grad_norm": 1.5453825081747208, | |
| "learning_rate": 7.099190441598161e-05, | |
| "loss": 0.6838, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.4853751589656634, | |
| "grad_norm": 1.0691158300615973, | |
| "learning_rate": 7.087136455882816e-05, | |
| "loss": 0.6738, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.4921576939381094, | |
| "grad_norm": 2.077881844593604, | |
| "learning_rate": 7.07501273500208e-05, | |
| "loss": 0.6912, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.4989402289105553, | |
| "grad_norm": 0.9909091986869899, | |
| "learning_rate": 7.06281955281806e-05, | |
| "loss": 0.6875, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.5057227638830013, | |
| "grad_norm": 1.826572971343745, | |
| "learning_rate": 7.050557184761915e-05, | |
| "loss": 0.6855, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.5125052988554473, | |
| "grad_norm": 1.279113591847188, | |
| "learning_rate": 7.038225907827639e-05, | |
| "loss": 0.6961, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.5192878338278932, | |
| "grad_norm": 1.229774591769691, | |
| "learning_rate": 7.02582600056581e-05, | |
| "loss": 0.6802, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.526070368800339, | |
| "grad_norm": 1.3203007597562126, | |
| "learning_rate": 7.013357743077289e-05, | |
| "loss": 0.6965, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.5328529037727852, | |
| "grad_norm": 1.2862135502175833, | |
| "learning_rate": 7.000821417006898e-05, | |
| "loss": 0.6815, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.539635438745231, | |
| "grad_norm": 1.0628503441369677, | |
| "learning_rate": 6.988217305537058e-05, | |
| "loss": 0.6768, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.546417973717677, | |
| "grad_norm": 1.3535468021784238, | |
| "learning_rate": 6.97554569338139e-05, | |
| "loss": 0.6959, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.5532005086901228, | |
| "grad_norm": 1.1587176301112725, | |
| "learning_rate": 6.962806866778284e-05, | |
| "loss": 0.6821, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.559983043662569, | |
| "grad_norm": 1.6895118703555423, | |
| "learning_rate": 6.950001113484432e-05, | |
| "loss": 0.6832, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.5667655786350148, | |
| "grad_norm": 1.3111534840956083, | |
| "learning_rate": 6.937128722768333e-05, | |
| "loss": 0.6747, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.573548113607461, | |
| "grad_norm": 1.2243130191258123, | |
| "learning_rate": 6.924189985403753e-05, | |
| "loss": 0.69, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.5803306485799067, | |
| "grad_norm": 1.4287675639720405, | |
| "learning_rate": 6.91118519366316e-05, | |
| "loss": 0.6801, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.5871131835523526, | |
| "grad_norm": 0.9267438225948489, | |
| "learning_rate": 6.898114641311122e-05, | |
| "loss": 0.6845, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.5938957185247986, | |
| "grad_norm": 1.3352877429277719, | |
| "learning_rate": 6.884978623597665e-05, | |
| "loss": 0.6993, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.6006782534972446, | |
| "grad_norm": 1.334254924822497, | |
| "learning_rate": 6.871777437251617e-05, | |
| "loss": 0.6817, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.6074607884696905, | |
| "grad_norm": 1.2977369297336303, | |
| "learning_rate": 6.858511380473887e-05, | |
| "loss": 0.6856, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.6142433234421365, | |
| "grad_norm": 1.1900674515806826, | |
| "learning_rate": 6.845180752930749e-05, | |
| "loss": 0.6947, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.6210258584145825, | |
| "grad_norm": 2.7317108581757057, | |
| "learning_rate": 6.831785855747054e-05, | |
| "loss": 0.6985, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.6278083933870284, | |
| "grad_norm": 1.2267690972200642, | |
| "learning_rate": 6.81832699149944e-05, | |
| "loss": 0.6773, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.6345909283594744, | |
| "grad_norm": 3.8760882746190255, | |
| "learning_rate": 6.804804464209498e-05, | |
| "loss": 0.6895, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.6413734633319204, | |
| "grad_norm": 3.5268150192285157, | |
| "learning_rate": 6.791218579336891e-05, | |
| "loss": 0.6994, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.6481559983043663, | |
| "grad_norm": 1.7590143767536321, | |
| "learning_rate": 6.777569643772472e-05, | |
| "loss": 0.683, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.654938533276812, | |
| "grad_norm": 1.7753106738494688, | |
| "learning_rate": 6.763857965831337e-05, | |
| "loss": 0.6811, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.6617210682492582, | |
| "grad_norm": 1.7854517795067268, | |
| "learning_rate": 6.750083855245869e-05, | |
| "loss": 0.6818, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.668503603221704, | |
| "grad_norm": 1.2937361964461016, | |
| "learning_rate": 6.736247623158738e-05, | |
| "loss": 0.6825, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.6752861381941502, | |
| "grad_norm": 1.2452039091600213, | |
| "learning_rate": 6.722349582115872e-05, | |
| "loss": 0.6779, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.682068673166596, | |
| "grad_norm": 1.6654925410640917, | |
| "learning_rate": 6.708390046059402e-05, | |
| "loss": 0.6897, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.688851208139042, | |
| "grad_norm": 1.1695940537064082, | |
| "learning_rate": 6.694369330320563e-05, | |
| "loss": 0.6894, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.6956337431114878, | |
| "grad_norm": 1.790433077118171, | |
| "learning_rate": 6.680287751612576e-05, | |
| "loss": 0.6965, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.702416278083934, | |
| "grad_norm": 1.3940506254821923, | |
| "learning_rate": 6.666145628023495e-05, | |
| "loss": 0.6883, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.7091988130563798, | |
| "grad_norm": 1.5788853280407766, | |
| "learning_rate": 6.651943279009015e-05, | |
| "loss": 0.674, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.7159813480288257, | |
| "grad_norm": 1.135684283004857, | |
| "learning_rate": 6.637681025385261e-05, | |
| "loss": 0.6944, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.7227638830012717, | |
| "grad_norm": 1.4839512076861858, | |
| "learning_rate": 6.62335918932154e-05, | |
| "loss": 0.6752, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.7295464179737177, | |
| "grad_norm": 1.3150087814531235, | |
| "learning_rate": 6.608978094333068e-05, | |
| "loss": 0.6826, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.7363289529461636, | |
| "grad_norm": 1.0720984172971046, | |
| "learning_rate": 6.594538065273652e-05, | |
| "loss": 0.683, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.7431114879186096, | |
| "grad_norm": 1.46543732407952, | |
| "learning_rate": 6.58003942832836e-05, | |
| "loss": 0.6796, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.7498940228910556, | |
| "grad_norm": 0.9292343800735161, | |
| "learning_rate": 6.56548251100615e-05, | |
| "loss": 0.6686, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.7566765578635015, | |
| "grad_norm": 1.5664491391186621, | |
| "learning_rate": 6.550867642132474e-05, | |
| "loss": 0.6912, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.7634590928359475, | |
| "grad_norm": 0.9821764593726072, | |
| "learning_rate": 6.536195151841847e-05, | |
| "loss": 0.6702, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.7702416278083932, | |
| "grad_norm": 1.4298526249936745, | |
| "learning_rate": 6.521465371570393e-05, | |
| "loss": 0.6871, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.7770241627808394, | |
| "grad_norm": 1.2452682220878895, | |
| "learning_rate": 6.506678634048353e-05, | |
| "loss": 0.6677, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.7838066977532852, | |
| "grad_norm": 1.4920349233263195, | |
| "learning_rate": 6.491835273292574e-05, | |
| "loss": 0.6809, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.7905892327257313, | |
| "grad_norm": 1.212833170547237, | |
| "learning_rate": 6.476935624598966e-05, | |
| "loss": 0.6809, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.797371767698177, | |
| "grad_norm": 1.250429726318394, | |
| "learning_rate": 6.461980024534918e-05, | |
| "loss": 0.674, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.8041543026706233, | |
| "grad_norm": 1.0681520589021245, | |
| "learning_rate": 6.446968810931707e-05, | |
| "loss": 0.6767, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.810936837643069, | |
| "grad_norm": 1.470191760394408, | |
| "learning_rate": 6.431902322876855e-05, | |
| "loss": 0.6737, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.8177193726155152, | |
| "grad_norm": 1.1749343585137977, | |
| "learning_rate": 6.416780900706484e-05, | |
| "loss": 0.683, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.824501907587961, | |
| "grad_norm": 1.1727141312870375, | |
| "learning_rate": 6.401604885997614e-05, | |
| "loss": 0.6808, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.831284442560407, | |
| "grad_norm": 1.3495790960778822, | |
| "learning_rate": 6.386374621560455e-05, | |
| "loss": 0.6847, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.8380669775328529, | |
| "grad_norm": 1.075708125802829, | |
| "learning_rate": 6.37109045143066e-05, | |
| "loss": 0.676, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.8448495125052988, | |
| "grad_norm": 1.1555696102312405, | |
| "learning_rate": 6.355752720861559e-05, | |
| "loss": 0.6661, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.8516320474777448, | |
| "grad_norm": 1.383594134484124, | |
| "learning_rate": 6.340361776316349e-05, | |
| "loss": 0.6784, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.8584145824501908, | |
| "grad_norm": 1.063837234136454, | |
| "learning_rate": 6.324917965460279e-05, | |
| "loss": 0.6781, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.8651971174226367, | |
| "grad_norm": 1.173609581411256, | |
| "learning_rate": 6.309421637152794e-05, | |
| "loss": 0.6682, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.8719796523950827, | |
| "grad_norm": 1.0658200919149257, | |
| "learning_rate": 6.29387314143965e-05, | |
| "loss": 0.6878, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.8787621873675286, | |
| "grad_norm": 1.7400399121218273, | |
| "learning_rate": 6.278272829545011e-05, | |
| "loss": 0.6706, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.8855447223399746, | |
| "grad_norm": 0.7229472871914281, | |
| "learning_rate": 6.26262105386351e-05, | |
| "loss": 0.6847, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.8923272573124206, | |
| "grad_norm": 1.0742091230131698, | |
| "learning_rate": 6.246918167952304e-05, | |
| "loss": 0.6756, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.8991097922848663, | |
| "grad_norm": 2.3846707594542456, | |
| "learning_rate": 6.231164526523063e-05, | |
| "loss": 0.6776, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.9058923272573125, | |
| "grad_norm": 1.181568933400366, | |
| "learning_rate": 6.21536048543398e-05, | |
| "loss": 0.686, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.9126748622297582, | |
| "grad_norm": 3.197777220254654, | |
| "learning_rate": 6.199506401681718e-05, | |
| "loss": 0.7007, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.9194573972022044, | |
| "grad_norm": 2.538862401423035, | |
| "learning_rate": 6.183602633393352e-05, | |
| "loss": 0.7031, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.9262399321746502, | |
| "grad_norm": 1.8353914421208335, | |
| "learning_rate": 6.167649539818284e-05, | |
| "loss": 0.6663, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.9330224671470964, | |
| "grad_norm": 1.8245299177531569, | |
| "learning_rate": 6.151647481320114e-05, | |
| "loss": 0.7011, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.939805002119542, | |
| "grad_norm": 2.284062594732862, | |
| "learning_rate": 6.135596819368512e-05, | |
| "loss": 0.701, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.9465875370919883, | |
| "grad_norm": 1.5939451814656143, | |
| "learning_rate": 6.119497916531053e-05, | |
| "loss": 0.6898, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.953370072064434, | |
| "grad_norm": 2.382128196264195, | |
| "learning_rate": 6.103351136465014e-05, | |
| "loss": 0.689, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.96015260703688, | |
| "grad_norm": 2.255809021386828, | |
| "learning_rate": 6.0871568439091743e-05, | |
| "loss": 0.689, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.966935142009326, | |
| "grad_norm": 1.192524289628154, | |
| "learning_rate": 6.070915404675571e-05, | |
| "loss": 0.6709, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.973717676981772, | |
| "grad_norm": 1.2397061535202811, | |
| "learning_rate": 6.0546271856412306e-05, | |
| "loss": 0.687, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.9805002119542179, | |
| "grad_norm": 0.9812283662135952, | |
| "learning_rate": 6.03829255473989e-05, | |
| "loss": 0.677, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.9872827469266638, | |
| "grad_norm": 1.410310313066498, | |
| "learning_rate": 6.0219118809536794e-05, | |
| "loss": 0.6882, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.9940652818991098, | |
| "grad_norm": 0.7865238012992424, | |
| "learning_rate": 6.0054855343047914e-05, | |
| "loss": 0.6769, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 2.0008478168715556, | |
| "grad_norm": 1.9901508305957583, | |
| "learning_rate": 5.989013885847117e-05, | |
| "loss": 1.0693, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.0076303518440017, | |
| "grad_norm": 1.3663813100431552, | |
| "learning_rate": 5.972497307657869e-05, | |
| "loss": 0.6673, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 2.0144128868164475, | |
| "grad_norm": 1.1381480345523374, | |
| "learning_rate": 5.955936172829179e-05, | |
| "loss": 0.6676, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 2.0211954217888937, | |
| "grad_norm": 1.496250854193103, | |
| "learning_rate": 5.939330855459661e-05, | |
| "loss": 0.6551, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 2.0279779567613394, | |
| "grad_norm": 0.9716458462062207, | |
| "learning_rate": 5.922681730645968e-05, | |
| "loss": 0.6723, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 2.0347604917337856, | |
| "grad_norm": 1.501595368602735, | |
| "learning_rate": 5.905989174474319e-05, | |
| "loss": 0.6509, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.0415430267062313, | |
| "grad_norm": 0.849974962385986, | |
| "learning_rate": 5.889253564011999e-05, | |
| "loss": 0.6577, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 2.0483255616786775, | |
| "grad_norm": 1.418902591957157, | |
| "learning_rate": 5.872475277298847e-05, | |
| "loss": 0.6665, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 2.0551080966511233, | |
| "grad_norm": 0.8446815446152741, | |
| "learning_rate": 5.855654693338711e-05, | |
| "loss": 0.6556, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 2.0618906316235694, | |
| "grad_norm": 1.3364133345670484, | |
| "learning_rate": 5.838792192090889e-05, | |
| "loss": 0.6604, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 2.068673166596015, | |
| "grad_norm": 0.8691215260428371, | |
| "learning_rate": 5.821888154461549e-05, | |
| "loss": 0.6593, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.0754557015684614, | |
| "grad_norm": 1.2207875769882877, | |
| "learning_rate": 5.8049429622951194e-05, | |
| "loss": 0.6637, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 2.082238236540907, | |
| "grad_norm": 0.7775512997267359, | |
| "learning_rate": 5.7879569983656694e-05, | |
| "loss": 0.6433, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 2.0890207715133533, | |
| "grad_norm": 1.2237758983601073, | |
| "learning_rate": 5.770930646368257e-05, | |
| "loss": 0.6681, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 2.095803306485799, | |
| "grad_norm": 0.6935518063946065, | |
| "learning_rate": 5.7538642909102654e-05, | |
| "loss": 0.6571, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 2.1025858414582452, | |
| "grad_norm": 1.0509844665628039, | |
| "learning_rate": 5.736758317502714e-05, | |
| "loss": 0.6462, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.109368376430691, | |
| "grad_norm": 0.6501336655556583, | |
| "learning_rate": 5.719613112551546e-05, | |
| "loss": 0.642, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 2.1161509114031367, | |
| "grad_norm": 0.791518345366507, | |
| "learning_rate": 5.702429063348912e-05, | |
| "loss": 0.6523, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 2.122933446375583, | |
| "grad_norm": 0.7216839703720191, | |
| "learning_rate": 5.685206558064407e-05, | |
| "loss": 0.6485, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 2.1297159813480286, | |
| "grad_norm": 1.3490427513390442, | |
| "learning_rate": 5.667945985736308e-05, | |
| "loss": 0.6622, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 2.136498516320475, | |
| "grad_norm": 1.036563799894598, | |
| "learning_rate": 5.6506477362627926e-05, | |
| "loss": 0.6598, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.1432810512929206, | |
| "grad_norm": 0.8799949127384937, | |
| "learning_rate": 5.6333122003931186e-05, | |
| "loss": 0.6631, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 2.1500635862653668, | |
| "grad_norm": 0.9027804053204243, | |
| "learning_rate": 5.615939769718809e-05, | |
| "loss": 0.6543, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 2.1568461212378125, | |
| "grad_norm": 1.08723379469273, | |
| "learning_rate": 5.5985308366647985e-05, | |
| "loss": 0.6453, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 2.1636286562102587, | |
| "grad_norm": 1.198788894798635, | |
| "learning_rate": 5.5810857944805744e-05, | |
| "loss": 0.6466, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 2.1704111911827044, | |
| "grad_norm": 1.2287340016478474, | |
| "learning_rate": 5.5636050372312896e-05, | |
| "loss": 0.6776, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.1771937261551506, | |
| "grad_norm": 0.676307375412045, | |
| "learning_rate": 5.546088959788862e-05, | |
| "loss": 0.6517, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 2.1839762611275964, | |
| "grad_norm": 0.8023647253320252, | |
| "learning_rate": 5.528537957823061e-05, | |
| "loss": 0.6565, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 2.1907587961000425, | |
| "grad_norm": 1.4391382554036933, | |
| "learning_rate": 5.510952427792559e-05, | |
| "loss": 0.6587, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 2.1975413310724883, | |
| "grad_norm": 0.7633580340639281, | |
| "learning_rate": 5.493332766935981e-05, | |
| "loss": 0.6433, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 2.2043238660449345, | |
| "grad_norm": 1.0303353819004128, | |
| "learning_rate": 5.4756793732629335e-05, | |
| "loss": 0.6578, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.21110640101738, | |
| "grad_norm": 1.3526690779961053, | |
| "learning_rate": 5.45799264554501e-05, | |
| "loss": 0.6499, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 2.2178889359898264, | |
| "grad_norm": 0.5228459526464206, | |
| "learning_rate": 5.440272983306789e-05, | |
| "loss": 0.6394, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 2.224671470962272, | |
| "grad_norm": 1.4252351086818837, | |
| "learning_rate": 5.4225207868167994e-05, | |
| "loss": 0.6554, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.231454005934718, | |
| "grad_norm": 0.6950731562608328, | |
| "learning_rate": 5.404736457078489e-05, | |
| "loss": 0.6482, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 2.238236540907164, | |
| "grad_norm": 0.7644651875362564, | |
| "learning_rate": 5.38692039582116e-05, | |
| "loss": 0.6503, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.24501907587961, | |
| "grad_norm": 0.9023571857971306, | |
| "learning_rate": 5.3690730054908985e-05, | |
| "loss": 0.6468, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 2.251801610852056, | |
| "grad_norm": 0.9107802448508358, | |
| "learning_rate": 5.3511946892414775e-05, | |
| "loss": 0.6518, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.2585841458245017, | |
| "grad_norm": 1.1127754837574242, | |
| "learning_rate": 5.333285850925256e-05, | |
| "loss": 0.6565, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 2.265366680796948, | |
| "grad_norm": 0.8282253306938299, | |
| "learning_rate": 5.315346895084056e-05, | |
| "loss": 0.6564, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 2.2721492157693937, | |
| "grad_norm": 1.14130477616649, | |
| "learning_rate": 5.297378226940019e-05, | |
| "loss": 0.6516, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.27893175074184, | |
| "grad_norm": 1.0103971093744055, | |
| "learning_rate": 5.279380252386461e-05, | |
| "loss": 0.6626, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 0.9536394369627204, | |
| "learning_rate": 5.2613533779786945e-05, | |
| "loss": 0.6663, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 2.2924968206867318, | |
| "grad_norm": 0.942528066117481, | |
| "learning_rate": 5.243298010924852e-05, | |
| "loss": 0.6408, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 2.2992793556591775, | |
| "grad_norm": 1.2117876187133245, | |
| "learning_rate": 5.225214559076683e-05, | |
| "loss": 0.6435, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 2.3060618906316237, | |
| "grad_norm": 0.8633776831955887, | |
| "learning_rate": 5.207103430920345e-05, | |
| "loss": 0.6395, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.3128444256040694, | |
| "grad_norm": 0.727531307349217, | |
| "learning_rate": 5.1889650355671725e-05, | |
| "loss": 0.654, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 2.3196269605765156, | |
| "grad_norm": 0.669365998637684, | |
| "learning_rate": 5.1707997827444394e-05, | |
| "loss": 0.6546, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.3264094955489614, | |
| "grad_norm": 0.8312703348077676, | |
| "learning_rate": 5.152608082786098e-05, | |
| "loss": 0.6646, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 2.3331920305214076, | |
| "grad_norm": 1.0187489492222126, | |
| "learning_rate": 5.1343903466235174e-05, | |
| "loss": 0.648, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 2.3399745654938533, | |
| "grad_norm": 1.034867027111884, | |
| "learning_rate": 5.116146985776194e-05, | |
| "loss": 0.6593, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.3467571004662995, | |
| "grad_norm": 1.1577447007405788, | |
| "learning_rate": 5.0978784123424617e-05, | |
| "loss": 0.6516, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 2.3535396354387452, | |
| "grad_norm": 1.0011140671455747, | |
| "learning_rate": 5.0795850389901784e-05, | |
| "loss": 0.6638, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 2.360322170411191, | |
| "grad_norm": 0.9415943625257757, | |
| "learning_rate": 5.061267278947408e-05, | |
| "loss": 0.6457, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 2.367104705383637, | |
| "grad_norm": 0.6919138650974885, | |
| "learning_rate": 5.042925545993079e-05, | |
| "loss": 0.6584, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 2.373887240356083, | |
| "grad_norm": 0.7266901542517143, | |
| "learning_rate": 5.02456025444765e-05, | |
| "loss": 0.6348, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.380669775328529, | |
| "grad_norm": 0.6635241827908179, | |
| "learning_rate": 5.0061718191637394e-05, | |
| "loss": 0.6574, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 2.387452310300975, | |
| "grad_norm": 0.5653141179051638, | |
| "learning_rate": 4.987760655516757e-05, | |
| "loss": 0.6502, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.394234845273421, | |
| "grad_norm": 0.7007173012785425, | |
| "learning_rate": 4.9693271793955255e-05, | |
| "loss": 0.6468, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 2.4010173802458668, | |
| "grad_norm": 0.5821543619857267, | |
| "learning_rate": 4.95087180719288e-05, | |
| "loss": 0.6486, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 2.407799915218313, | |
| "grad_norm": 0.4265984966323706, | |
| "learning_rate": 4.9323949557962684e-05, | |
| "loss": 0.6477, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.4145824501907587, | |
| "grad_norm": 0.6454773425594047, | |
| "learning_rate": 4.913897042578327e-05, | |
| "loss": 0.6479, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 2.421364985163205, | |
| "grad_norm": 0.615481868155692, | |
| "learning_rate": 4.8953784853874624e-05, | |
| "loss": 0.6605, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 2.4281475201356506, | |
| "grad_norm": 0.4923342474878763, | |
| "learning_rate": 4.8768397025383996e-05, | |
| "loss": 0.6414, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.434930055108097, | |
| "grad_norm": 0.4236872576222672, | |
| "learning_rate": 4.858281112802745e-05, | |
| "loss": 0.6526, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 2.4417125900805425, | |
| "grad_norm": 0.4376255004390753, | |
| "learning_rate": 4.83970313539952e-05, | |
| "loss": 0.6503, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.4484951250529887, | |
| "grad_norm": 0.7271305316785697, | |
| "learning_rate": 4.821106189985693e-05, | |
| "loss": 0.6447, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 2.4552776600254345, | |
| "grad_norm": 1.001429290369445, | |
| "learning_rate": 4.8024906966467e-05, | |
| "loss": 0.6566, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.4620601949978806, | |
| "grad_norm": 1.168304354121221, | |
| "learning_rate": 4.783857075886956e-05, | |
| "loss": 0.6614, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 2.4688427299703264, | |
| "grad_norm": 0.8021141929873027, | |
| "learning_rate": 4.7652057486203525e-05, | |
| "loss": 0.6446, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.4756252649427726, | |
| "grad_norm": 0.7075159940006046, | |
| "learning_rate": 4.746537136160757e-05, | |
| "loss": 0.6563, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.4824077999152183, | |
| "grad_norm": 0.7183111983007646, | |
| "learning_rate": 4.727851660212487e-05, | |
| "loss": 0.6502, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.489190334887664, | |
| "grad_norm": 0.755055422137936, | |
| "learning_rate": 4.709149742860792e-05, | |
| "loss": 0.6424, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 2.4959728698601102, | |
| "grad_norm": 0.7253802127810576, | |
| "learning_rate": 4.690431806562311e-05, | |
| "loss": 0.6496, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.5027554048325564, | |
| "grad_norm": 0.5125324963207099, | |
| "learning_rate": 4.6716982741355386e-05, | |
| "loss": 0.6533, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 2.509537939805002, | |
| "grad_norm": 0.5597998347086172, | |
| "learning_rate": 4.652949568751267e-05, | |
| "loss": 0.6402, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.516320474777448, | |
| "grad_norm": 0.7015467668995325, | |
| "learning_rate": 4.63418611392303e-05, | |
| "loss": 0.6482, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.523103009749894, | |
| "grad_norm": 0.511973221694181, | |
| "learning_rate": 4.615408333497538e-05, | |
| "loss": 0.6522, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.52988554472234, | |
| "grad_norm": 0.4401280437938751, | |
| "learning_rate": 4.5966166516450985e-05, | |
| "loss": 0.6573, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.536668079694786, | |
| "grad_norm": 0.614469648497891, | |
| "learning_rate": 4.577811492850039e-05, | |
| "loss": 0.6598, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.5434506146672318, | |
| "grad_norm": 0.7173941689992674, | |
| "learning_rate": 4.558993281901116e-05, | |
| "loss": 0.6465, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.550233149639678, | |
| "grad_norm": 0.6690409730578648, | |
| "learning_rate": 4.540162443881922e-05, | |
| "loss": 0.6488, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.5570156846121237, | |
| "grad_norm": 0.5848162533642309, | |
| "learning_rate": 4.5213194041612824e-05, | |
| "loss": 0.648, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.56379821958457, | |
| "grad_norm": 0.549981546670745, | |
| "learning_rate": 4.5024645883836426e-05, | |
| "loss": 0.6579, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.5705807545570156, | |
| "grad_norm": 0.556140263499365, | |
| "learning_rate": 4.4835984224594586e-05, | |
| "loss": 0.6621, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.577363289529462, | |
| "grad_norm": 0.5195224064491745, | |
| "learning_rate": 4.464721332555577e-05, | |
| "loss": 0.6512, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.5841458245019076, | |
| "grad_norm": 0.38259318048535124, | |
| "learning_rate": 4.445833745085602e-05, | |
| "loss": 0.6492, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.5909283594743533, | |
| "grad_norm": 0.3534037726943067, | |
| "learning_rate": 4.4269360867002675e-05, | |
| "loss": 0.6589, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.5977108944467995, | |
| "grad_norm": 0.315508715276319, | |
| "learning_rate": 4.408028784277799e-05, | |
| "loss": 0.6493, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.6044934294192457, | |
| "grad_norm": 0.3298656413250479, | |
| "learning_rate": 4.389112264914273e-05, | |
| "loss": 0.6589, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.6112759643916914, | |
| "grad_norm": 0.3016047037119661, | |
| "learning_rate": 4.370186955913962e-05, | |
| "loss": 0.6561, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.618058499364137, | |
| "grad_norm": 0.34845242594318765, | |
| "learning_rate": 4.351253284779692e-05, | |
| "loss": 0.6552, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.6248410343365833, | |
| "grad_norm": 0.38736846664169255, | |
| "learning_rate": 4.332311679203177e-05, | |
| "loss": 0.6364, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.6316235693090295, | |
| "grad_norm": 0.3951534285172612, | |
| "learning_rate": 4.313362567055367e-05, | |
| "loss": 0.6502, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.6384061042814753, | |
| "grad_norm": 0.4657313097001591, | |
| "learning_rate": 4.294406376376771e-05, | |
| "loss": 0.6561, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.645188639253921, | |
| "grad_norm": 0.4131171546046392, | |
| "learning_rate": 4.2754435353677985e-05, | |
| "loss": 0.6567, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.651971174226367, | |
| "grad_norm": 0.5305329556333895, | |
| "learning_rate": 4.2564744723790835e-05, | |
| "loss": 0.6568, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.658753709198813, | |
| "grad_norm": 0.7002749870897172, | |
| "learning_rate": 4.237499615901805e-05, | |
| "loss": 0.6552, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.665536244171259, | |
| "grad_norm": 0.7839360975679, | |
| "learning_rate": 4.218519394558013e-05, | |
| "loss": 0.6333, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.672318779143705, | |
| "grad_norm": 0.7621551938800204, | |
| "learning_rate": 4.199534237090943e-05, | |
| "loss": 0.6501, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.679101314116151, | |
| "grad_norm": 0.6652202397867919, | |
| "learning_rate": 4.1805445723553346e-05, | |
| "loss": 0.648, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.685883849088597, | |
| "grad_norm": 0.4811941000709009, | |
| "learning_rate": 4.1615508293077394e-05, | |
| "loss": 0.6544, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.692666384061043, | |
| "grad_norm": 0.3387872066498026, | |
| "learning_rate": 4.142553436996834e-05, | |
| "loss": 0.6558, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.6994489190334887, | |
| "grad_norm": 0.28441864482912343, | |
| "learning_rate": 4.12355282455373e-05, | |
| "loss": 0.6354, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.706231454005935, | |
| "grad_norm": 0.285244776358457, | |
| "learning_rate": 4.1045494211822756e-05, | |
| "loss": 0.6493, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.7130139889783806, | |
| "grad_norm": 0.3573803066995761, | |
| "learning_rate": 4.085543656149365e-05, | |
| "loss": 0.6397, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.7197965239508264, | |
| "grad_norm": 0.4278142282008419, | |
| "learning_rate": 4.0665359587752394e-05, | |
| "loss": 0.6537, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.7265790589232726, | |
| "grad_norm": 0.49944968397215816, | |
| "learning_rate": 4.04752675842379e-05, | |
| "loss": 0.656, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.7333615938957188, | |
| "grad_norm": 0.4642912024112797, | |
| "learning_rate": 4.028516484492857e-05, | |
| "loss": 0.6424, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.7401441288681645, | |
| "grad_norm": 0.40098112979297407, | |
| "learning_rate": 4.009505566404535e-05, | |
| "loss": 0.6377, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.7469266638406102, | |
| "grad_norm": 0.3708879898016676, | |
| "learning_rate": 3.990494433595466e-05, | |
| "loss": 0.6518, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.7537091988130564, | |
| "grad_norm": 0.40008123571174364, | |
| "learning_rate": 3.9714835155071435e-05, | |
| "loss": 0.6536, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.760491733785502, | |
| "grad_norm": 0.29605943254539613, | |
| "learning_rate": 3.952473241576211e-05, | |
| "loss": 0.6583, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.7672742687579484, | |
| "grad_norm": 0.3406525853896961, | |
| "learning_rate": 3.933464041224761e-05, | |
| "loss": 0.6545, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.774056803730394, | |
| "grad_norm": 0.4722405867001987, | |
| "learning_rate": 3.914456343850637e-05, | |
| "loss": 0.6513, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.7808393387028403, | |
| "grad_norm": 0.5263658831108335, | |
| "learning_rate": 3.895450578817727e-05, | |
| "loss": 0.6548, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.787621873675286, | |
| "grad_norm": 0.5203524120767699, | |
| "learning_rate": 3.8764471754462714e-05, | |
| "loss": 0.654, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.794404408647732, | |
| "grad_norm": 0.3902260359882798, | |
| "learning_rate": 3.857446563003167e-05, | |
| "loss": 0.651, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.801186943620178, | |
| "grad_norm": 0.3931366323184314, | |
| "learning_rate": 3.838449170692262e-05, | |
| "loss": 0.6418, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.807969478592624, | |
| "grad_norm": 0.4496897165040769, | |
| "learning_rate": 3.819455427644666e-05, | |
| "loss": 0.6566, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.81475201356507, | |
| "grad_norm": 0.4843275111384109, | |
| "learning_rate": 3.800465762909057e-05, | |
| "loss": 0.6494, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.821534548537516, | |
| "grad_norm": 0.4284479617853188, | |
| "learning_rate": 3.781480605441989e-05, | |
| "loss": 0.6504, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.828317083509962, | |
| "grad_norm": 0.3505752717784012, | |
| "learning_rate": 3.7625003840981976e-05, | |
| "loss": 0.6515, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.835099618482408, | |
| "grad_norm": 0.2739600638831855, | |
| "learning_rate": 3.743525527620918e-05, | |
| "loss": 0.6482, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.8418821534548537, | |
| "grad_norm": 0.2821715826512274, | |
| "learning_rate": 3.724556464632203e-05, | |
| "loss": 0.6558, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.8486646884272995, | |
| "grad_norm": 0.3001235929581994, | |
| "learning_rate": 3.7055936236232296e-05, | |
| "loss": 0.6283, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.8554472233997457, | |
| "grad_norm": 0.34084577684820244, | |
| "learning_rate": 3.686637432944634e-05, | |
| "loss": 0.6524, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.862229758372192, | |
| "grad_norm": 0.31354392380727714, | |
| "learning_rate": 3.6676883207968226e-05, | |
| "loss": 0.6407, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.8690122933446376, | |
| "grad_norm": 0.29938503730372684, | |
| "learning_rate": 3.648746715220309e-05, | |
| "loss": 0.6431, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.8757948283170833, | |
| "grad_norm": 0.292588496703024, | |
| "learning_rate": 3.6298130440860394e-05, | |
| "loss": 0.6457, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.8825773632895295, | |
| "grad_norm": 0.34211158982130174, | |
| "learning_rate": 3.6108877350857296e-05, | |
| "loss": 0.6413, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.8893598982619753, | |
| "grad_norm": 0.28453013315503256, | |
| "learning_rate": 3.5919712157222014e-05, | |
| "loss": 0.6493, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.8961424332344214, | |
| "grad_norm": 0.25000620848294364, | |
| "learning_rate": 3.573063913299733e-05, | |
| "loss": 0.6471, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.902924968206867, | |
| "grad_norm": 0.32984361513817445, | |
| "learning_rate": 3.554166254914399e-05, | |
| "loss": 0.6525, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.9097075031793134, | |
| "grad_norm": 0.34282163750459993, | |
| "learning_rate": 3.535278667444423e-05, | |
| "loss": 0.6435, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.916490038151759, | |
| "grad_norm": 0.3376932266610861, | |
| "learning_rate": 3.5164015775405414e-05, | |
| "loss": 0.6537, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.9232725731242053, | |
| "grad_norm": 0.2736761009463742, | |
| "learning_rate": 3.4975354116163594e-05, | |
| "loss": 0.6528, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.930055108096651, | |
| "grad_norm": 0.28401811879767763, | |
| "learning_rate": 3.478680595838719e-05, | |
| "loss": 0.6552, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.9368376430690972, | |
| "grad_norm": 0.21207447278197997, | |
| "learning_rate": 3.4598375561180783e-05, | |
| "loss": 0.6477, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.943620178041543, | |
| "grad_norm": 0.3273666184303206, | |
| "learning_rate": 3.441006718098885e-05, | |
| "loss": 0.6415, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.9504027130139887, | |
| "grad_norm": 0.3922599048141651, | |
| "learning_rate": 3.422188507149962e-05, | |
| "loss": 0.66, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.957185247986435, | |
| "grad_norm": 0.33853238933941526, | |
| "learning_rate": 3.403383348354902e-05, | |
| "loss": 0.6392, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.963967782958881, | |
| "grad_norm": 0.2904409798403406, | |
| "learning_rate": 3.3845916665024626e-05, | |
| "loss": 0.6571, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.970750317931327, | |
| "grad_norm": 0.25650378002806623, | |
| "learning_rate": 3.36581388607697e-05, | |
| "loss": 0.658, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.9775328529037726, | |
| "grad_norm": 0.30516122417008495, | |
| "learning_rate": 3.347050431248735e-05, | |
| "loss": 0.6445, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.9843153878762188, | |
| "grad_norm": 0.35193441095679195, | |
| "learning_rate": 3.3283017258644634e-05, | |
| "loss": 0.6514, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.991097922848665, | |
| "grad_norm": 0.26810523993687996, | |
| "learning_rate": 3.30956819343769e-05, | |
| "loss": 0.6479, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.9978804578211107, | |
| "grad_norm": 0.40136478388949387, | |
| "learning_rate": 3.290850257139209e-05, | |
| "loss": 0.7446, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 3.0046629927935564, | |
| "grad_norm": 0.6219962321725231, | |
| "learning_rate": 3.2721483397875135e-05, | |
| "loss": 0.9011, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 3.0114455277660026, | |
| "grad_norm": 0.6301524386828496, | |
| "learning_rate": 3.253462863839243e-05, | |
| "loss": 0.6098, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 3.0182280627384483, | |
| "grad_norm": 0.5440271202067795, | |
| "learning_rate": 3.2347942513796475e-05, | |
| "loss": 0.6367, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 3.0250105977108945, | |
| "grad_norm": 0.5386656826635444, | |
| "learning_rate": 3.216142924113046e-05, | |
| "loss": 0.6285, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 3.0317931326833403, | |
| "grad_norm": 0.4074410048009017, | |
| "learning_rate": 3.1975093033533003e-05, | |
| "loss": 0.6332, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 3.0385756676557865, | |
| "grad_norm": 0.25928533106144735, | |
| "learning_rate": 3.1788938100143086e-05, | |
| "loss": 0.6246, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 3.045358202628232, | |
| "grad_norm": 0.445439070033727, | |
| "learning_rate": 3.160296864600482e-05, | |
| "loss": 0.6193, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 3.0521407376006784, | |
| "grad_norm": 0.46469489323619156, | |
| "learning_rate": 3.1417188871972554e-05, | |
| "loss": 0.6264, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.058923272573124, | |
| "grad_norm": 0.3272979337949089, | |
| "learning_rate": 3.123160297461601e-05, | |
| "loss": 0.631, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 3.0657058075455703, | |
| "grad_norm": 0.3178424540888323, | |
| "learning_rate": 3.104621514612538e-05, | |
| "loss": 0.6344, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 3.072488342518016, | |
| "grad_norm": 0.2754042730785633, | |
| "learning_rate": 3.086102957421672e-05, | |
| "loss": 0.6129, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 3.0792708774904622, | |
| "grad_norm": 0.35442538475012997, | |
| "learning_rate": 3.0676050442037336e-05, | |
| "loss": 0.6269, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 3.086053412462908, | |
| "grad_norm": 0.2797121425291907, | |
| "learning_rate": 3.0491281928071217e-05, | |
| "loss": 0.6221, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 3.092835947435354, | |
| "grad_norm": 0.23765468818900978, | |
| "learning_rate": 3.030672820604476e-05, | |
| "loss": 0.6256, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 3.0996184824078, | |
| "grad_norm": 0.26572260092618777, | |
| "learning_rate": 3.012239344483244e-05, | |
| "loss": 0.6177, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 3.1064010173802457, | |
| "grad_norm": 0.33390446968478465, | |
| "learning_rate": 2.993828180836262e-05, | |
| "loss": 0.6113, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 3.113183552352692, | |
| "grad_norm": 0.2666734723327277, | |
| "learning_rate": 2.9754397455523497e-05, | |
| "loss": 0.6297, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 3.1199660873251376, | |
| "grad_norm": 0.29084625313932944, | |
| "learning_rate": 2.957074454006921e-05, | |
| "loss": 0.6196, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.1267486222975838, | |
| "grad_norm": 0.19046989657100505, | |
| "learning_rate": 2.9387327210525942e-05, | |
| "loss": 0.6188, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 3.1335311572700295, | |
| "grad_norm": 0.338123523854364, | |
| "learning_rate": 2.9204149610098223e-05, | |
| "loss": 0.6281, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 3.1403136922424757, | |
| "grad_norm": 0.22618376500160342, | |
| "learning_rate": 2.902121587657539e-05, | |
| "loss": 0.6157, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 3.1470962272149214, | |
| "grad_norm": 0.35386494541824426, | |
| "learning_rate": 2.8838530142238076e-05, | |
| "loss": 0.626, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 3.1538787621873676, | |
| "grad_norm": 0.2840090224796156, | |
| "learning_rate": 2.865609653376484e-05, | |
| "loss": 0.6199, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 3.1606612971598134, | |
| "grad_norm": 0.2835278774397419, | |
| "learning_rate": 2.8473919172139034e-05, | |
| "loss": 0.635, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 3.1674438321322596, | |
| "grad_norm": 0.2932289544670752, | |
| "learning_rate": 2.8292002172555616e-05, | |
| "loss": 0.6181, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 3.1742263671047053, | |
| "grad_norm": 0.21388735557547445, | |
| "learning_rate": 2.8110349644328275e-05, | |
| "loss": 0.6304, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 3.1810089020771515, | |
| "grad_norm": 0.25484830423076227, | |
| "learning_rate": 2.7928965690796562e-05, | |
| "loss": 0.6351, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 3.187791437049597, | |
| "grad_norm": 0.24843127490388153, | |
| "learning_rate": 2.7747854409233187e-05, | |
| "loss": 0.6301, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.1945739720220434, | |
| "grad_norm": 0.2451683485321557, | |
| "learning_rate": 2.7567019890751493e-05, | |
| "loss": 0.6207, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 3.201356506994489, | |
| "grad_norm": 0.3164960123899796, | |
| "learning_rate": 2.7386466220213065e-05, | |
| "loss": 0.6213, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 3.2081390419669353, | |
| "grad_norm": 0.2697409403652239, | |
| "learning_rate": 2.7206197476135403e-05, | |
| "loss": 0.6212, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 3.214921576939381, | |
| "grad_norm": 0.19669860705803766, | |
| "learning_rate": 2.7026217730599814e-05, | |
| "loss": 0.6284, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 3.2217041119118273, | |
| "grad_norm": 0.23674235246224137, | |
| "learning_rate": 2.6846531049159454e-05, | |
| "loss": 0.627, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 3.228486646884273, | |
| "grad_norm": 0.18982523269613027, | |
| "learning_rate": 2.6667141490747454e-05, | |
| "loss": 0.6236, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 3.2352691818567187, | |
| "grad_norm": 0.20626545109123062, | |
| "learning_rate": 2.648805310758524e-05, | |
| "loss": 0.6242, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 3.242051716829165, | |
| "grad_norm": 0.25273617130117126, | |
| "learning_rate": 2.6309269945091025e-05, | |
| "loss": 0.6135, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 3.2488342518016107, | |
| "grad_norm": 0.19396420036714845, | |
| "learning_rate": 2.6130796041788404e-05, | |
| "loss": 0.6401, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 3.255616786774057, | |
| "grad_norm": 0.26449583344475813, | |
| "learning_rate": 2.5952635429215117e-05, | |
| "loss": 0.6303, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.2623993217465026, | |
| "grad_norm": 0.2880771386228117, | |
| "learning_rate": 2.5774792131832012e-05, | |
| "loss": 0.6213, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 3.269181856718949, | |
| "grad_norm": 0.19301524096425413, | |
| "learning_rate": 2.559727016693212e-05, | |
| "loss": 0.6368, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 3.2759643916913945, | |
| "grad_norm": 0.31002080783316865, | |
| "learning_rate": 2.54200735445499e-05, | |
| "loss": 0.641, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 3.2827469266638407, | |
| "grad_norm": 0.24500950182263187, | |
| "learning_rate": 2.5243206267370685e-05, | |
| "loss": 0.6204, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 3.2895294616362865, | |
| "grad_norm": 0.25623143489960465, | |
| "learning_rate": 2.506667233064021e-05, | |
| "loss": 0.6207, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 3.2963119966087326, | |
| "grad_norm": 0.2918652621084026, | |
| "learning_rate": 2.489047572207442e-05, | |
| "loss": 0.6371, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 3.3030945315811784, | |
| "grad_norm": 0.23337516028968874, | |
| "learning_rate": 2.4714620421769394e-05, | |
| "loss": 0.6285, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 3.3098770665536246, | |
| "grad_norm": 0.23619697217842073, | |
| "learning_rate": 2.4539110402111383e-05, | |
| "loss": 0.631, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 3.3166596015260703, | |
| "grad_norm": 0.2448512796334409, | |
| "learning_rate": 2.436394962768712e-05, | |
| "loss": 0.6276, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 3.3234421364985165, | |
| "grad_norm": 0.1983649853769521, | |
| "learning_rate": 2.4189142055194276e-05, | |
| "loss": 0.6244, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.3302246714709622, | |
| "grad_norm": 0.21190165986074258, | |
| "learning_rate": 2.4014691633352038e-05, | |
| "loss": 0.6101, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 3.337007206443408, | |
| "grad_norm": 0.25804827992388285, | |
| "learning_rate": 2.3840602302811923e-05, | |
| "loss": 0.6135, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 3.343789741415854, | |
| "grad_norm": 0.18856304220278483, | |
| "learning_rate": 2.3666877996068824e-05, | |
| "loss": 0.6288, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 3.3505722763883004, | |
| "grad_norm": 0.3301373974007395, | |
| "learning_rate": 2.3493522637372087e-05, | |
| "loss": 0.6326, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 3.357354811360746, | |
| "grad_norm": 0.30315622915732504, | |
| "learning_rate": 2.332054014263692e-05, | |
| "loss": 0.6259, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.364137346333192, | |
| "grad_norm": 0.22646576230809926, | |
| "learning_rate": 2.3147934419355935e-05, | |
| "loss": 0.6245, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 3.370919881305638, | |
| "grad_norm": 0.30590058038547674, | |
| "learning_rate": 2.2975709366510887e-05, | |
| "loss": 0.6316, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 3.3777024162780838, | |
| "grad_norm": 0.26486089548607467, | |
| "learning_rate": 2.2803868874484536e-05, | |
| "loss": 0.6383, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 3.38448495125053, | |
| "grad_norm": 0.209806274707419, | |
| "learning_rate": 2.2632416824972886e-05, | |
| "loss": 0.614, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 3.3912674862229757, | |
| "grad_norm": 0.3076979703172019, | |
| "learning_rate": 2.2461357090897352e-05, | |
| "loss": 0.6317, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.398050021195422, | |
| "grad_norm": 0.27851643544183735, | |
| "learning_rate": 2.2290693536317444e-05, | |
| "loss": 0.6226, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 3.4048325561678676, | |
| "grad_norm": 0.26720054263757376, | |
| "learning_rate": 2.212043001634332e-05, | |
| "loss": 0.6306, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 3.411615091140314, | |
| "grad_norm": 0.3264075780455984, | |
| "learning_rate": 2.1950570377048813e-05, | |
| "loss": 0.6144, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 3.4183976261127595, | |
| "grad_norm": 0.24476904503391836, | |
| "learning_rate": 2.178111845538453e-05, | |
| "loss": 0.6317, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 3.4251801610852057, | |
| "grad_norm": 0.3445192467182261, | |
| "learning_rate": 2.1612078079091125e-05, | |
| "loss": 0.6227, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.4319626960576515, | |
| "grad_norm": 0.2248390312896755, | |
| "learning_rate": 2.144345306661291e-05, | |
| "loss": 0.6179, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 3.4387452310300977, | |
| "grad_norm": 0.24580333907022087, | |
| "learning_rate": 2.1275247227011536e-05, | |
| "loss": 0.6218, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 3.4455277660025434, | |
| "grad_norm": 0.2333810028522329, | |
| "learning_rate": 2.1107464359880022e-05, | |
| "loss": 0.6313, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 3.4523103009749896, | |
| "grad_norm": 0.20038699343598237, | |
| "learning_rate": 2.0940108255256823e-05, | |
| "loss": 0.6309, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 3.4590928359474353, | |
| "grad_norm": 0.19291801763604194, | |
| "learning_rate": 2.0773182693540325e-05, | |
| "loss": 0.6076, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.465875370919881, | |
| "grad_norm": 0.21138321490252843, | |
| "learning_rate": 2.0606691445403398e-05, | |
| "loss": 0.6238, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 3.4726579058923273, | |
| "grad_norm": 0.2081459000808386, | |
| "learning_rate": 2.0440638271708226e-05, | |
| "loss": 0.6135, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 3.4794404408647734, | |
| "grad_norm": 0.21359018594893273, | |
| "learning_rate": 2.0275026923421315e-05, | |
| "loss": 0.6347, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 3.486222975837219, | |
| "grad_norm": 0.262459399100271, | |
| "learning_rate": 2.0109861141528853e-05, | |
| "loss": 0.6289, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 3.493005510809665, | |
| "grad_norm": 0.24248389999156902, | |
| "learning_rate": 1.9945144656952103e-05, | |
| "loss": 0.6128, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 3.499788045782111, | |
| "grad_norm": 0.22920015918246003, | |
| "learning_rate": 1.9780881190463222e-05, | |
| "loss": 0.634, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 3.506570580754557, | |
| "grad_norm": 0.2824421315404272, | |
| "learning_rate": 1.9617074452601113e-05, | |
| "loss": 0.6322, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 3.513353115727003, | |
| "grad_norm": 0.19432350726010572, | |
| "learning_rate": 1.9453728143587697e-05, | |
| "loss": 0.6231, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 3.520135650699449, | |
| "grad_norm": 0.2846932920722668, | |
| "learning_rate": 1.9290845953244307e-05, | |
| "loss": 0.625, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 3.526918185671895, | |
| "grad_norm": 0.16416357169798437, | |
| "learning_rate": 1.9128431560908263e-05, | |
| "loss": 0.6191, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.5337007206443407, | |
| "grad_norm": 0.2747654462569946, | |
| "learning_rate": 1.896648863534988e-05, | |
| "loss": 0.622, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 3.540483255616787, | |
| "grad_norm": 0.15903407751400953, | |
| "learning_rate": 1.8805020834689487e-05, | |
| "loss": 0.6194, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 3.5472657905892326, | |
| "grad_norm": 0.20365263598678185, | |
| "learning_rate": 1.864403180631489e-05, | |
| "loss": 0.6255, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 3.554048325561679, | |
| "grad_norm": 0.16301528679144703, | |
| "learning_rate": 1.8483525186798875e-05, | |
| "loss": 0.6253, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 3.5608308605341246, | |
| "grad_norm": 0.23098070191792286, | |
| "learning_rate": 1.832350460181717e-05, | |
| "loss": 0.62, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 3.5676133955065703, | |
| "grad_norm": 0.16673057537934008, | |
| "learning_rate": 1.8163973666066473e-05, | |
| "loss": 0.629, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 3.5743959304790165, | |
| "grad_norm": 0.19378211117923547, | |
| "learning_rate": 1.8004935983182835e-05, | |
| "loss": 0.6344, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 3.5811784654514627, | |
| "grad_norm": 0.17643236928174555, | |
| "learning_rate": 1.784639514566021e-05, | |
| "loss": 0.6284, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 3.5879610004239084, | |
| "grad_norm": 0.19805537148789543, | |
| "learning_rate": 1.7688354734769378e-05, | |
| "loss": 0.6245, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 3.594743535396354, | |
| "grad_norm": 0.17444243321183842, | |
| "learning_rate": 1.753081832047697e-05, | |
| "loss": 0.6241, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.6015260703688003, | |
| "grad_norm": 0.22629631276174578, | |
| "learning_rate": 1.7373789461364904e-05, | |
| "loss": 0.6296, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 3.6083086053412465, | |
| "grad_norm": 0.2322655718660276, | |
| "learning_rate": 1.7217271704549913e-05, | |
| "loss": 0.6335, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 3.6150911403136923, | |
| "grad_norm": 0.20701067571130669, | |
| "learning_rate": 1.7061268585603507e-05, | |
| "loss": 0.6307, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 3.621873675286138, | |
| "grad_norm": 0.2206959741176191, | |
| "learning_rate": 1.6905783628472074e-05, | |
| "loss": 0.6347, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 3.628656210258584, | |
| "grad_norm": 0.15861912044210094, | |
| "learning_rate": 1.6750820345397217e-05, | |
| "loss": 0.6265, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 3.63543874523103, | |
| "grad_norm": 0.2001185331683702, | |
| "learning_rate": 1.659638223683653e-05, | |
| "loss": 0.6287, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 3.642221280203476, | |
| "grad_norm": 0.1746493458635906, | |
| "learning_rate": 1.644247279138442e-05, | |
| "loss": 0.6361, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 3.649003815175922, | |
| "grad_norm": 0.17095450468813203, | |
| "learning_rate": 1.628909548569339e-05, | |
| "loss": 0.6251, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 3.655786350148368, | |
| "grad_norm": 0.15588769920603499, | |
| "learning_rate": 1.6136253784395455e-05, | |
| "loss": 0.629, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 3.662568885120814, | |
| "grad_norm": 0.14759859028229927, | |
| "learning_rate": 1.5983951140023864e-05, | |
| "loss": 0.6174, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.66935142009326, | |
| "grad_norm": 0.16662793387428904, | |
| "learning_rate": 1.583219099293516e-05, | |
| "loss": 0.6192, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 3.6761339550657057, | |
| "grad_norm": 0.16325631134586066, | |
| "learning_rate": 1.568097677123146e-05, | |
| "loss": 0.6193, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 3.682916490038152, | |
| "grad_norm": 0.1590144709587067, | |
| "learning_rate": 1.5530311890682946e-05, | |
| "loss": 0.6126, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 3.6896990250105977, | |
| "grad_norm": 0.16971629601446409, | |
| "learning_rate": 1.5380199754650838e-05, | |
| "loss": 0.6204, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 3.6964815599830434, | |
| "grad_norm": 0.17653934494907608, | |
| "learning_rate": 1.5230643754010355e-05, | |
| "loss": 0.634, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.7032640949554896, | |
| "grad_norm": 0.1602687042337523, | |
| "learning_rate": 1.508164726707427e-05, | |
| "loss": 0.6132, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 3.7100466299279358, | |
| "grad_norm": 0.1853250131338786, | |
| "learning_rate": 1.4933213659516485e-05, | |
| "loss": 0.6263, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 3.7168291649003815, | |
| "grad_norm": 0.16166905381502983, | |
| "learning_rate": 1.4785346284296078e-05, | |
| "loss": 0.6158, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 3.7236116998728273, | |
| "grad_norm": 0.19092582748564702, | |
| "learning_rate": 1.4638048481581537e-05, | |
| "loss": 0.6235, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 3.7303942348452734, | |
| "grad_norm": 0.14148685680959427, | |
| "learning_rate": 1.4491323578675265e-05, | |
| "loss": 0.6137, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.7371767698177196, | |
| "grad_norm": 0.18984864298487686, | |
| "learning_rate": 1.4345174889938514e-05, | |
| "loss": 0.6329, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 3.7439593047901654, | |
| "grad_norm": 0.14327461316917306, | |
| "learning_rate": 1.4199605716716414e-05, | |
| "loss": 0.6272, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 3.750741839762611, | |
| "grad_norm": 0.18666689301215467, | |
| "learning_rate": 1.4054619347263487e-05, | |
| "loss": 0.6363, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 3.7575243747350573, | |
| "grad_norm": 0.14946115608698116, | |
| "learning_rate": 1.3910219056669335e-05, | |
| "loss": 0.6218, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 3.764306909707503, | |
| "grad_norm": 0.17777432794193568, | |
| "learning_rate": 1.3766408106784601e-05, | |
| "loss": 0.6278, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.771089444679949, | |
| "grad_norm": 0.16174854488646243, | |
| "learning_rate": 1.3623189746147398e-05, | |
| "loss": 0.6227, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 3.777871979652395, | |
| "grad_norm": 0.15621831808343503, | |
| "learning_rate": 1.3480567209909863e-05, | |
| "loss": 0.6296, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 3.784654514624841, | |
| "grad_norm": 0.17960424688042897, | |
| "learning_rate": 1.333854371976505e-05, | |
| "loss": 0.6175, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 3.791437049597287, | |
| "grad_norm": 0.161165540194335, | |
| "learning_rate": 1.319712248387424e-05, | |
| "loss": 0.6259, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 3.798219584569733, | |
| "grad_norm": 0.18195946037024635, | |
| "learning_rate": 1.3056306696794377e-05, | |
| "loss": 0.6259, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.805002119542179, | |
| "grad_norm": 0.15299855099029444, | |
| "learning_rate": 1.2916099539405997e-05, | |
| "loss": 0.623, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 3.811784654514625, | |
| "grad_norm": 0.19532294279356358, | |
| "learning_rate": 1.2776504178841291e-05, | |
| "loss": 0.629, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 3.8185671894870707, | |
| "grad_norm": 0.1505952319707712, | |
| "learning_rate": 1.2637523768412633e-05, | |
| "loss": 0.6239, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 3.8253497244595165, | |
| "grad_norm": 0.15785614162676143, | |
| "learning_rate": 1.249916144754133e-05, | |
| "loss": 0.6213, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 3.8321322594319627, | |
| "grad_norm": 0.17665393013957048, | |
| "learning_rate": 1.2361420341686641e-05, | |
| "loss": 0.6205, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.838914794404409, | |
| "grad_norm": 0.16420702224755918, | |
| "learning_rate": 1.22243035622753e-05, | |
| "loss": 0.6221, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 3.8456973293768546, | |
| "grad_norm": 0.17607498450226258, | |
| "learning_rate": 1.2087814206631095e-05, | |
| "loss": 0.6212, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 3.8524798643493003, | |
| "grad_norm": 0.19592268500165855, | |
| "learning_rate": 1.1951955357905033e-05, | |
| "loss": 0.6199, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 3.8592623993217465, | |
| "grad_norm": 0.1402346539755977, | |
| "learning_rate": 1.1816730085005603e-05, | |
| "loss": 0.6198, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 3.8660449342941927, | |
| "grad_norm": 0.18099131317071607, | |
| "learning_rate": 1.1682141442529473e-05, | |
| "loss": 0.6256, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.8728274692666385, | |
| "grad_norm": 0.19545074620930133, | |
| "learning_rate": 1.1548192470692516e-05, | |
| "loss": 0.6157, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 3.879610004239084, | |
| "grad_norm": 0.15514995332077897, | |
| "learning_rate": 1.1414886195261135e-05, | |
| "loss": 0.6262, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 3.8863925392115304, | |
| "grad_norm": 0.1534732088259275, | |
| "learning_rate": 1.128222562748384e-05, | |
| "loss": 0.6218, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 3.893175074183976, | |
| "grad_norm": 0.16016994591208966, | |
| "learning_rate": 1.1150213764023357e-05, | |
| "loss": 0.6264, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 3.8999576091564223, | |
| "grad_norm": 0.1393505701166259, | |
| "learning_rate": 1.1018853586888794e-05, | |
| "loss": 0.6217, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.906740144128868, | |
| "grad_norm": 0.13613614708020336, | |
| "learning_rate": 1.0888148063368411e-05, | |
| "loss": 0.6271, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 3.9135226791013142, | |
| "grad_norm": 0.13989845925554537, | |
| "learning_rate": 1.075810014596248e-05, | |
| "loss": 0.6246, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 3.92030521407376, | |
| "grad_norm": 0.14295575937313854, | |
| "learning_rate": 1.0628712772316682e-05, | |
| "loss": 0.6186, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 3.927087749046206, | |
| "grad_norm": 0.14578310176151307, | |
| "learning_rate": 1.04999888651557e-05, | |
| "loss": 0.6237, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 3.933870284018652, | |
| "grad_norm": 0.1418828430912816, | |
| "learning_rate": 1.0371931332217175e-05, | |
| "loss": 0.6247, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.940652818991098, | |
| "grad_norm": 0.1325595548114853, | |
| "learning_rate": 1.0244543066186102e-05, | |
| "loss": 0.623, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 3.947435353963544, | |
| "grad_norm": 0.1458164336431073, | |
| "learning_rate": 1.0117826944629425e-05, | |
| "loss": 0.6311, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 3.9542178889359896, | |
| "grad_norm": 0.1369850394553834, | |
| "learning_rate": 9.991785829931024e-06, | |
| "loss": 0.6352, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 3.9610004239084358, | |
| "grad_norm": 0.11861951638152013, | |
| "learning_rate": 9.866422569227133e-06, | |
| "loss": 0.616, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 3.967782958880882, | |
| "grad_norm": 0.12198529312439654, | |
| "learning_rate": 9.74173999434192e-06, | |
| "loss": 0.6267, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.9745654938533277, | |
| "grad_norm": 0.13835745132234695, | |
| "learning_rate": 9.61774092172362e-06, | |
| "loss": 0.6228, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 3.9813480288257734, | |
| "grad_norm": 0.14469581512913757, | |
| "learning_rate": 9.494428152380868e-06, | |
| "loss": 0.6249, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 3.9881305637982196, | |
| "grad_norm": 0.14538533970008014, | |
| "learning_rate": 9.371804471819401e-06, | |
| "loss": 0.6327, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 3.994913098770666, | |
| "grad_norm": 0.12842111782896357, | |
| "learning_rate": 9.249872649979203e-06, | |
| "loss": 0.6303, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 4.001695633743111, | |
| "grad_norm": 0.26900932028906, | |
| "learning_rate": 9.128635441171854e-06, | |
| "loss": 0.947, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 4.008478168715557, | |
| "grad_norm": 0.1546620990829906, | |
| "learning_rate": 9.008095584018406e-06, | |
| "loss": 0.6138, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 4.0152607036880035, | |
| "grad_norm": 0.15420921751444336, | |
| "learning_rate": 8.888255801387417e-06, | |
| "loss": 0.6084, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 4.02204323866045, | |
| "grad_norm": 0.15369650428751042, | |
| "learning_rate": 8.76911880033354e-06, | |
| "loss": 0.6196, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 4.028825773632895, | |
| "grad_norm": 0.1491321631121837, | |
| "learning_rate": 8.65068727203633e-06, | |
| "loss": 0.6193, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 4.035608308605341, | |
| "grad_norm": 0.14296040181465852, | |
| "learning_rate": 8.53296389173941e-06, | |
| "loss": 0.5971, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 4.042390843577787, | |
| "grad_norm": 0.17332216986843843, | |
| "learning_rate": 8.415951318690134e-06, | |
| "loss": 0.6085, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 4.0491733785502335, | |
| "grad_norm": 0.1392839623319017, | |
| "learning_rate": 8.299652196079462e-06, | |
| "loss": 0.6001, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 4.055955913522679, | |
| "grad_norm": 0.13152438443257536, | |
| "learning_rate": 8.184069150982217e-06, | |
| "loss": 0.6105, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 4.062738448495125, | |
| "grad_norm": 0.1534567228015376, | |
| "learning_rate": 8.069204794297843e-06, | |
| "loss": 0.6055, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 4.069520983467571, | |
| "grad_norm": 0.13360654544275674, | |
| "learning_rate": 7.955061720691315e-06, | |
| "loss": 0.6032, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.076303518440017, | |
| "grad_norm": 0.12240252460009761, | |
| "learning_rate": 7.841642508534617e-06, | |
| "loss": 0.6123, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 4.083086053412463, | |
| "grad_norm": 0.13532197093706563, | |
| "learning_rate": 7.728949719848464e-06, | |
| "loss": 0.6153, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 4.089868588384909, | |
| "grad_norm": 0.14193336659405498, | |
| "learning_rate": 7.6169859002443954e-06, | |
| "loss": 0.6168, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 4.096651123357355, | |
| "grad_norm": 0.12702143209359149, | |
| "learning_rate": 7.505753578867354e-06, | |
| "loss": 0.6151, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 4.1034336583298, | |
| "grad_norm": 0.13369676075093645, | |
| "learning_rate": 7.395255268338459e-06, | |
| "loss": 0.6166, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 4.1102161933022465, | |
| "grad_norm": 0.1334402641156527, | |
| "learning_rate": 7.285493464698343e-06, | |
| "loss": 0.6103, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 4.116998728274693, | |
| "grad_norm": 0.11322677082673437, | |
| "learning_rate": 7.176470647350675e-06, | |
| "loss": 0.6213, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 4.123781263247139, | |
| "grad_norm": 0.1301924506080975, | |
| "learning_rate": 7.068189279006237e-06, | |
| "loss": 0.6099, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 4.130563798219584, | |
| "grad_norm": 0.13846251217535666, | |
| "learning_rate": 6.960651805627248e-06, | |
| "loss": 0.5961, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 4.13734633319203, | |
| "grad_norm": 0.11426017628960868, | |
| "learning_rate": 6.853860656372125e-06, | |
| "loss": 0.6001, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 4.144128868164477, | |
| "grad_norm": 0.11173851423251971, | |
| "learning_rate": 6.747818243540587e-06, | |
| "loss": 0.6068, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 4.150911403136923, | |
| "grad_norm": 0.13703419453889504, | |
| "learning_rate": 6.642526962519218e-06, | |
| "loss": 0.6083, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 4.157693938109368, | |
| "grad_norm": 0.12103577637533855, | |
| "learning_rate": 6.537989191727292e-06, | |
| "loss": 0.6026, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 4.164476473081814, | |
| "grad_norm": 0.12350144156787667, | |
| "learning_rate": 6.4342072925631125e-06, | |
| "loss": 0.6019, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 4.17125900805426, | |
| "grad_norm": 0.11640263763641474, | |
| "learning_rate": 6.331183609350602e-06, | |
| "loss": 0.5954, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 4.178041543026707, | |
| "grad_norm": 0.10545213840827801, | |
| "learning_rate": 6.228920469286408e-06, | |
| "loss": 0.6117, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 4.184824077999152, | |
| "grad_norm": 0.11495651066805103, | |
| "learning_rate": 6.127420182387314e-06, | |
| "loss": 0.5946, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 4.191606612971598, | |
| "grad_norm": 0.11620572636315642, | |
| "learning_rate": 6.026685041438018e-06, | |
| "loss": 0.6034, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 4.198389147944044, | |
| "grad_norm": 0.10866699027698076, | |
| "learning_rate": 5.926717321939417e-06, | |
| "loss": 0.6048, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 4.2051716829164905, | |
| "grad_norm": 0.1245636539367363, | |
| "learning_rate": 5.827519282057128e-06, | |
| "loss": 0.6037, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 4.211954217888936, | |
| "grad_norm": 0.10644196388055245, | |
| "learning_rate": 5.729093162570545e-06, | |
| "loss": 0.607, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 4.218736752861382, | |
| "grad_norm": 0.10894266432537414, | |
| "learning_rate": 5.631441186822168e-06, | |
| "loss": 0.6135, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 4.225519287833828, | |
| "grad_norm": 0.12336549859525484, | |
| "learning_rate": 5.534565560667426e-06, | |
| "loss": 0.5981, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 4.232301822806273, | |
| "grad_norm": 0.10948607920606619, | |
| "learning_rate": 5.4384684724248096e-06, | |
| "loss": 0.6118, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 4.23908435777872, | |
| "grad_norm": 0.10785742485131251, | |
| "learning_rate": 5.343152092826476e-06, | |
| "loss": 0.6184, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 4.245866892751166, | |
| "grad_norm": 0.1346812509747976, | |
| "learning_rate": 5.2486185749691665e-06, | |
| "loss": 0.6058, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 4.252649427723612, | |
| "grad_norm": 0.10711676011900324, | |
| "learning_rate": 5.154870054265635e-06, | |
| "loss": 0.6071, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 4.259431962696057, | |
| "grad_norm": 0.09686690911531301, | |
| "learning_rate": 5.061908648396338e-06, | |
| "loss": 0.6019, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 4.2662144976685035, | |
| "grad_norm": 0.11286945799474804, | |
| "learning_rate": 4.969736457261669e-06, | |
| "loss": 0.604, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 4.27299703264095, | |
| "grad_norm": 0.12539528001284939, | |
| "learning_rate": 4.878355562934465e-06, | |
| "loss": 0.6103, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.279779567613396, | |
| "grad_norm": 0.11846976963374484, | |
| "learning_rate": 4.787768029613014e-06, | |
| "loss": 0.6102, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 4.286562102585841, | |
| "grad_norm": 0.11455949347213798, | |
| "learning_rate": 4.697975903574419e-06, | |
| "loss": 0.6044, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 4.293344637558287, | |
| "grad_norm": 0.11423245618436771, | |
| "learning_rate": 4.608981213128347e-06, | |
| "loss": 0.6104, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 4.3001271725307335, | |
| "grad_norm": 0.11763066458751498, | |
| "learning_rate": 4.520785968571266e-06, | |
| "loss": 0.6103, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 4.30690970750318, | |
| "grad_norm": 0.12407775475836062, | |
| "learning_rate": 4.433392162140968e-06, | |
| "loss": 0.6127, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 4.313692242475625, | |
| "grad_norm": 0.1177975105922243, | |
| "learning_rate": 4.3468017679716245e-06, | |
| "loss": 0.6258, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 4.320474777448071, | |
| "grad_norm": 0.11062581278413729, | |
| "learning_rate": 4.261016742049155e-06, | |
| "loss": 0.617, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 4.327257312420517, | |
| "grad_norm": 0.11328064594157465, | |
| "learning_rate": 4.176039022167064e-06, | |
| "loss": 0.6086, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 4.3340398473929636, | |
| "grad_norm": 0.12286256944458376, | |
| "learning_rate": 4.0918705278826685e-06, | |
| "loss": 0.6108, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 4.340822382365409, | |
| "grad_norm": 0.11726282791596607, | |
| "learning_rate": 4.008513160473726e-06, | |
| "loss": 0.6122, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.347604917337855, | |
| "grad_norm": 0.13243402516995725, | |
| "learning_rate": 3.925968802895477e-06, | |
| "loss": 0.6059, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 4.354387452310301, | |
| "grad_norm": 0.11732415982857679, | |
| "learning_rate": 3.8442393197381456e-06, | |
| "loss": 0.6159, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 4.3611699872827465, | |
| "grad_norm": 0.11432499142874704, | |
| "learning_rate": 3.7633265571847787e-06, | |
| "loss": 0.6053, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 4.367952522255193, | |
| "grad_norm": 0.12438661313712396, | |
| "learning_rate": 3.683232342969594e-06, | |
| "loss": 0.6185, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 4.374735057227639, | |
| "grad_norm": 0.12003558611356997, | |
| "learning_rate": 3.603958486336625e-06, | |
| "loss": 0.6311, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 4.381517592200085, | |
| "grad_norm": 0.10916503150131288, | |
| "learning_rate": 3.5255067779989173e-06, | |
| "loss": 0.6158, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 4.38830012717253, | |
| "grad_norm": 0.11467284610857227, | |
| "learning_rate": 3.4478789900980545e-06, | |
| "loss": 0.6176, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 4.395082662144977, | |
| "grad_norm": 0.11307569011740914, | |
| "learning_rate": 3.371076876164101e-06, | |
| "loss": 0.614, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 4.401865197117423, | |
| "grad_norm": 0.11385086584182105, | |
| "learning_rate": 3.295102171076039e-06, | |
| "loss": 0.6098, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 4.408647732089869, | |
| "grad_norm": 0.13858694628446455, | |
| "learning_rate": 3.2199565910225305e-06, | |
| "loss": 0.5976, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.415430267062314, | |
| "grad_norm": 0.12042166145746055, | |
| "learning_rate": 3.145641833463198e-06, | |
| "loss": 0.6164, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 4.42221280203476, | |
| "grad_norm": 0.11519596457203823, | |
| "learning_rate": 3.0721595770902435e-06, | |
| "loss": 0.6156, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 4.428995337007207, | |
| "grad_norm": 0.09905226368345378, | |
| "learning_rate": 2.9995114817905493e-06, | |
| "loss": 0.6001, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 4.435777871979653, | |
| "grad_norm": 0.11732702729180129, | |
| "learning_rate": 2.927699188608171e-06, | |
| "loss": 0.607, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 4.442560406952098, | |
| "grad_norm": 0.1179451846081634, | |
| "learning_rate": 2.856724319707289e-06, | |
| "loss": 0.6169, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 4.449342941924544, | |
| "grad_norm": 0.10442323225441018, | |
| "learning_rate": 2.7865884783355234e-06, | |
| "loss": 0.6179, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 4.4561254768969905, | |
| "grad_norm": 0.09435389572080079, | |
| "learning_rate": 2.717293248787769e-06, | |
| "loss": 0.6201, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 4.462908011869436, | |
| "grad_norm": 0.11295536763791658, | |
| "learning_rate": 2.648840196370368e-06, | |
| "loss": 0.6019, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 4.469690546841882, | |
| "grad_norm": 0.11594274422462209, | |
| "learning_rate": 2.5812308673657825e-06, | |
| "loss": 0.5977, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 4.476473081814328, | |
| "grad_norm": 0.10182885233809727, | |
| "learning_rate": 2.5144667889976316e-06, | |
| "loss": 0.6017, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.483255616786774, | |
| "grad_norm": 0.10348633393081054, | |
| "learning_rate": 2.4485494693962197e-06, | |
| "loss": 0.6086, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 4.49003815175922, | |
| "grad_norm": 0.10025894496329021, | |
| "learning_rate": 2.3834803975644772e-06, | |
| "loss": 0.6182, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 4.496820686731666, | |
| "grad_norm": 0.10187054293463914, | |
| "learning_rate": 2.3192610433442784e-06, | |
| "loss": 0.6074, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 4.503603221704112, | |
| "grad_norm": 0.09715916393818302, | |
| "learning_rate": 2.2558928573833016e-06, | |
| "loss": 0.6076, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 4.510385756676558, | |
| "grad_norm": 0.09756303138919038, | |
| "learning_rate": 2.1933772711021995e-06, | |
| "loss": 0.6019, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 4.5171682916490035, | |
| "grad_norm": 0.09525032911781027, | |
| "learning_rate": 2.1317156966623243e-06, | |
| "loss": 0.5995, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 4.52395082662145, | |
| "grad_norm": 0.09487242097788966, | |
| "learning_rate": 2.0709095269337755e-06, | |
| "loss": 0.6023, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 4.530733361593896, | |
| "grad_norm": 0.11476206706719008, | |
| "learning_rate": 2.0109601354639706e-06, | |
| "loss": 0.6143, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 4.537515896566342, | |
| "grad_norm": 0.09904862194550892, | |
| "learning_rate": 1.9518688764466096e-06, | |
| "loss": 0.6143, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 4.544298431538787, | |
| "grad_norm": 0.09278301353904947, | |
| "learning_rate": 1.8936370846910845e-06, | |
| "loss": 0.6092, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.5510809665112335, | |
| "grad_norm": 0.10688691601881584, | |
| "learning_rate": 1.8362660755923079e-06, | |
| "loss": 0.6186, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 4.55786350148368, | |
| "grad_norm": 0.09697916608330605, | |
| "learning_rate": 1.7797571451010442e-06, | |
| "loss": 0.5983, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 4.564646036456125, | |
| "grad_norm": 0.10471706981067595, | |
| "learning_rate": 1.7241115696945821e-06, | |
| "loss": 0.6132, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 4.571428571428571, | |
| "grad_norm": 0.09406802929392503, | |
| "learning_rate": 1.669330606347952e-06, | |
| "loss": 0.5962, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 4.578211106401017, | |
| "grad_norm": 0.0954037644840885, | |
| "learning_rate": 1.6154154925054878e-06, | |
| "loss": 0.6141, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 4.5849936413734635, | |
| "grad_norm": 0.10094861062769968, | |
| "learning_rate": 1.5623674460529014e-06, | |
| "loss": 0.6024, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 4.59177617634591, | |
| "grad_norm": 0.09573172404295381, | |
| "learning_rate": 1.510187665289773e-06, | |
| "loss": 0.6086, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 4.598558711318355, | |
| "grad_norm": 0.09995771347446304, | |
| "learning_rate": 1.458877328902455e-06, | |
| "loss": 0.6052, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 4.605341246290801, | |
| "grad_norm": 0.09656263063599439, | |
| "learning_rate": 1.4084375959374864e-06, | |
| "loss": 0.6072, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 4.612123781263247, | |
| "grad_norm": 0.09822686472765482, | |
| "learning_rate": 1.3588696057753725e-06, | |
| "loss": 0.6089, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.618906316235693, | |
| "grad_norm": 0.09676277658643694, | |
| "learning_rate": 1.3101744781048774e-06, | |
| "loss": 0.6016, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 4.625688851208139, | |
| "grad_norm": 0.0933744691144583, | |
| "learning_rate": 1.262353312897715e-06, | |
| "loss": 0.6075, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 4.632471386180585, | |
| "grad_norm": 0.0996442423520143, | |
| "learning_rate": 1.2154071903837107e-06, | |
| "loss": 0.6157, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 4.639253921153031, | |
| "grad_norm": 0.10109190980066803, | |
| "learning_rate": 1.1693371710263857e-06, | |
| "loss": 0.6049, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 4.6460364561254766, | |
| "grad_norm": 0.08703236124988406, | |
| "learning_rate": 1.1241442954990301e-06, | |
| "loss": 0.6048, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 4.652818991097923, | |
| "grad_norm": 0.09280509544326945, | |
| "learning_rate": 1.0798295846611562e-06, | |
| "loss": 0.5989, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 4.659601526070369, | |
| "grad_norm": 0.09785736700280491, | |
| "learning_rate": 1.0363940395354777e-06, | |
| "loss": 0.6098, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 4.666384061042815, | |
| "grad_norm": 0.09508110174195833, | |
| "learning_rate": 9.938386412852652e-07, | |
| "loss": 0.6149, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 4.67316659601526, | |
| "grad_norm": 0.09065950330259104, | |
| "learning_rate": 9.52164351192213e-07, | |
| "loss": 0.6088, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 4.679949130987707, | |
| "grad_norm": 0.09551867620930947, | |
| "learning_rate": 9.113721106346918e-07, | |
| "loss": 0.5989, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.686731665960153, | |
| "grad_norm": 0.09832290856026324, | |
| "learning_rate": 8.714628410665082e-07, | |
| "loss": 0.6172, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 4.693514200932599, | |
| "grad_norm": 0.09371892802996978, | |
| "learning_rate": 8.324374439960947e-07, | |
| "loss": 0.6067, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 4.700296735905044, | |
| "grad_norm": 0.09014551117205812, | |
| "learning_rate": 7.942968009661123e-07, | |
| "loss": 0.6142, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 4.7070792708774905, | |
| "grad_norm": 0.09724706571681141, | |
| "learning_rate": 7.570417735335733e-07, | |
| "loss": 0.614, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 4.713861805849937, | |
| "grad_norm": 0.08494611369686998, | |
| "learning_rate": 7.206732032503638e-07, | |
| "loss": 0.5969, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 4.720644340822382, | |
| "grad_norm": 0.0927927564675014, | |
| "learning_rate": 6.851919116442274e-07, | |
| "loss": 0.6122, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 4.727426875794828, | |
| "grad_norm": 0.09273724936533222, | |
| "learning_rate": 6.505987002002245e-07, | |
| "loss": 0.6092, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 4.734209410767274, | |
| "grad_norm": 0.08796818616185165, | |
| "learning_rate": 6.168943503426139e-07, | |
| "loss": 0.6038, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 4.7409919457397205, | |
| "grad_norm": 0.08686636687902749, | |
| "learning_rate": 5.840796234172085e-07, | |
| "loss": 0.5943, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 4.747774480712166, | |
| "grad_norm": 0.09236449250012803, | |
| "learning_rate": 5.521552606741765e-07, | |
| "loss": 0.6066, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.754557015684612, | |
| "grad_norm": 0.0926749240950783, | |
| "learning_rate": 5.211219832512893e-07, | |
| "loss": 0.6114, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 4.761339550657058, | |
| "grad_norm": 0.10499405025384424, | |
| "learning_rate": 4.909804921576466e-07, | |
| "loss": 0.6185, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 4.768122085629504, | |
| "grad_norm": 0.08848832101586154, | |
| "learning_rate": 4.6173146825782224e-07, | |
| "loss": 0.6067, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 4.77490462060195, | |
| "grad_norm": 0.09319395321366955, | |
| "learning_rate": 4.3337557225650695e-07, | |
| "loss": 0.6047, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 4.781687155574396, | |
| "grad_norm": 0.0915059840709739, | |
| "learning_rate": 4.0591344468355666e-07, | |
| "loss": 0.6118, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 4.788469690546842, | |
| "grad_norm": 0.10729723170818359, | |
| "learning_rate": 3.793457058795591e-07, | |
| "loss": 0.6189, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 4.795252225519288, | |
| "grad_norm": 0.09285587112058538, | |
| "learning_rate": 3.5367295598178307e-07, | |
| "loss": 0.6085, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 4.8020347604917335, | |
| "grad_norm": 0.08196537866623092, | |
| "learning_rate": 3.2889577491064693e-07, | |
| "loss": 0.6045, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 4.80881729546418, | |
| "grad_norm": 0.08720837958758754, | |
| "learning_rate": 3.050147223566091e-07, | |
| "loss": 0.6122, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 4.815599830436626, | |
| "grad_norm": 0.09158067797742139, | |
| "learning_rate": 2.8203033776752487e-07, | |
| "loss": 0.6033, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.822382365409071, | |
| "grad_norm": 0.09043295389558649, | |
| "learning_rate": 2.599431403364694e-07, | |
| "loss": 0.6094, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 4.829164900381517, | |
| "grad_norm": 0.09180388224113892, | |
| "learning_rate": 2.3875362899000054e-07, | |
| "loss": 0.604, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 4.8359474353539635, | |
| "grad_norm": 0.08651605692378189, | |
| "learning_rate": 2.1846228237689226e-07, | |
| "loss": 0.6087, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 4.84272997032641, | |
| "grad_norm": 0.08511470109771488, | |
| "learning_rate": 1.9906955885732104e-07, | |
| "loss": 0.6091, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 4.849512505298856, | |
| "grad_norm": 0.08454739481192287, | |
| "learning_rate": 1.8057589649251862e-07, | |
| "loss": 0.6146, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 4.856295040271301, | |
| "grad_norm": 0.08724704005095565, | |
| "learning_rate": 1.62981713034851e-07, | |
| "loss": 0.6042, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 4.863077575243747, | |
| "grad_norm": 0.08396574948424247, | |
| "learning_rate": 1.462874059184305e-07, | |
| "loss": 0.6086, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 4.869860110216194, | |
| "grad_norm": 0.09082130018441244, | |
| "learning_rate": 1.3049335225009175e-07, | |
| "loss": 0.6295, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 4.876642645188639, | |
| "grad_norm": 0.08722605095693269, | |
| "learning_rate": 1.1559990880089189e-07, | |
| "loss": 0.6085, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 4.883425180161085, | |
| "grad_norm": 0.0878159462681937, | |
| "learning_rate": 1.0160741199805036e-07, | |
| "loss": 0.5998, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.890207715133531, | |
| "grad_norm": 0.0878736816907958, | |
| "learning_rate": 8.851617791735933e-08, | |
| "loss": 0.6145, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 4.896990250105977, | |
| "grad_norm": 0.08847166155449576, | |
| "learning_rate": 7.632650227602511e-08, | |
| "loss": 0.604, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 4.903772785078423, | |
| "grad_norm": 0.08650107449834574, | |
| "learning_rate": 6.503866042599338e-08, | |
| "loss": 0.6187, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 4.910555320050869, | |
| "grad_norm": 0.09963600285093287, | |
| "learning_rate": 5.4652907347745307e-08, | |
| "loss": 0.6166, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 4.917337855023315, | |
| "grad_norm": 0.08660963081265105, | |
| "learning_rate": 4.516947764451107e-08, | |
| "loss": 0.6156, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.924120389995761, | |
| "grad_norm": 0.09162932265931903, | |
| "learning_rate": 3.6588585537002955e-08, | |
| "loss": 0.6085, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 4.930902924968207, | |
| "grad_norm": 0.08533741638096395, | |
| "learning_rate": 2.8910424858543673e-08, | |
| "loss": 0.6026, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 4.937685459940653, | |
| "grad_norm": 0.08431534489847897, | |
| "learning_rate": 2.213516905070101e-08, | |
| "loss": 0.6012, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 4.944467994913099, | |
| "grad_norm": 0.08633616914365788, | |
| "learning_rate": 1.6262971159384243e-08, | |
| "loss": 0.6146, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 4.951250529885545, | |
| "grad_norm": 0.0931259062674402, | |
| "learning_rate": 1.1293963831366939e-08, | |
| "loss": 0.6116, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.9580330648579904, | |
| "grad_norm": 0.08678440176862204, | |
| "learning_rate": 7.228259311293784e-09, | |
| "loss": 0.602, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 4.964815599830437, | |
| "grad_norm": 0.0942499520961841, | |
| "learning_rate": 4.065949439158168e-09, | |
| "loss": 0.6099, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 4.971598134802883, | |
| "grad_norm": 0.07935089647201646, | |
| "learning_rate": 1.8071056482105166e-09, | |
| "loss": 0.615, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 4.978380669775328, | |
| "grad_norm": 0.08485220331271215, | |
| "learning_rate": 4.517789633551317e-10, | |
| "loss": 0.6104, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 4.985163204747774, | |
| "grad_norm": 0.08704984136629952, | |
| "learning_rate": 0.0, | |
| "loss": 0.6111, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 4.985163204747774, | |
| "step": 735, | |
| "total_flos": 1.8913738317545603e+19, | |
| "train_loss": 0.673498132682982, | |
| "train_runtime": 162938.9767, | |
| "train_samples_per_second": 2.316, | |
| "train_steps_per_second": 0.005 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 735, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.8913738317545603e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |