| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998209489704566, | |
| "eval_steps": 500, | |
| "global_step": 698, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 9.523809523809525e-07, | |
| "loss": 5.8567, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.904761904761905e-06, | |
| "loss": 6.0371, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 6.0126, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.80952380952381e-06, | |
| "loss": 5.7297, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.761904761904762e-06, | |
| "loss": 4.5319, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 3.2653, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 2.8089, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 7.61904761904762e-06, | |
| "loss": 2.5048, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 2.3898, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.523809523809525e-06, | |
| "loss": 2.1694, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.0476190476190477e-05, | |
| "loss": 1.816, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 1.8469, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.2380952380952383e-05, | |
| "loss": 1.7793, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 1.576, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 1.5517, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.523809523809524e-05, | |
| "loss": 1.614, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.6190476190476193e-05, | |
| "loss": 1.3789, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 1.401, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.8095238095238097e-05, | |
| "loss": 1.2179, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.904761904761905e-05, | |
| "loss": 1.309, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2e-05, | |
| "loss": 1.248, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.99998923307636e-05, | |
| "loss": 1.3434, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9999569325372924e-05, | |
| "loss": 1.2565, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.999903099078353e-05, | |
| "loss": 1.4607, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9998277338587826e-05, | |
| "loss": 1.2576, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9997308385014843e-05, | |
| "loss": 1.2126, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9996124150929886e-05, | |
| "loss": 1.1541, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9994724661834065e-05, | |
| "loss": 1.0114, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9993109947863768e-05, | |
| "loss": 1.1159, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9991280043789992e-05, | |
| "loss": 0.9749, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9989234989017622e-05, | |
| "loss": 1.0364, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.998697482758455e-05, | |
| "loss": 1.0704, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9984499608160744e-05, | |
| "loss": 0.9538, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.998180938404721e-05, | |
| "loss": 1.1589, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9978904213174812e-05, | |
| "loss": 1.1763, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9975784158103062e-05, | |
| "loss": 1.1016, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.997244928601875e-05, | |
| "loss": 1.0959, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9968899668734503e-05, | |
| "loss": 1.058, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9965135382687235e-05, | |
| "loss": 0.9515, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9961156508936505e-05, | |
| "loss": 1.1494, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9956963133162776e-05, | |
| "loss": 1.1608, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9952555345665563e-05, | |
| "loss": 1.0207, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9947933241361482e-05, | |
| "loss": 0.8301, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9943096919782227e-05, | |
| "loss": 1.0359, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.993804648507241e-05, | |
| "loss": 0.9566, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9932782045987317e-05, | |
| "loss": 1.0352, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9927303715890573e-05, | |
| "loss": 1.0771, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9921611612751707e-05, | |
| "loss": 1.0692, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9915705859143597e-05, | |
| "loss": 1.0156, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9909586582239835e-05, | |
| "loss": 0.8363, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9903253913812003e-05, | |
| "loss": 0.9625, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.989670799022681e-05, | |
| "loss": 0.9653, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9889948952443174e-05, | |
| "loss": 1.0257, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9882976946009188e-05, | |
| "loss": 0.9201, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.987579212105897e-05, | |
| "loss": 1.0039, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9868394632309443e-05, | |
| "loss": 0.9089, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9860784639057e-05, | |
| "loss": 0.9722, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.985296230517408e-05, | |
| "loss": 0.8276, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9844927799105615e-05, | |
| "loss": 0.9444, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9836681293865437e-05, | |
| "loss": 0.9135, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9828222967032533e-05, | |
| "loss": 0.8918, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.981955300074722e-05, | |
| "loss": 0.9801, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9810671581707223e-05, | |
| "loss": 0.9516, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9801578901163672e-05, | |
| "loss": 1.041, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.979227515491695e-05, | |
| "loss": 0.8907, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9782760543312516e-05, | |
| "loss": 0.8245, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9773035271236566e-05, | |
| "loss": 1.0137, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9763099548111616e-05, | |
| "loss": 0.9072, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9752953587892013e-05, | |
| "loss": 0.8409, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9742597609059317e-05, | |
| "loss": 0.9885, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.973203183461759e-05, | |
| "loss": 0.8947, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9721256492088612e-05, | |
| "loss": 0.8884, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9710271813506954e-05, | |
| "loss": 0.9261, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9699078035415016e-05, | |
| "loss": 0.9486, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.96876753988579e-05, | |
| "loss": 0.9148, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9676064149378246e-05, | |
| "loss": 1.0073, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9664244537010924e-05, | |
| "loss": 0.9977, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9652216816277657e-05, | |
| "loss": 0.8075, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9639981246181557e-05, | |
| "loss": 0.8004, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.962753809020151e-05, | |
| "loss": 0.9598, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9614887616286544e-05, | |
| "loss": 0.8931, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.960203009685003e-05, | |
| "loss": 0.781, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.958896580876383e-05, | |
| "loss": 0.8981, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9575695033352325e-05, | |
| "loss": 0.8702, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9562218056386366e-05, | |
| "loss": 0.8195, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9548535168077124e-05, | |
| "loss": 0.9101, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9534646663069816e-05, | |
| "loss": 0.9772, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9520552840437396e-05, | |
| "loss": 0.9102, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9506254003674084e-05, | |
| "loss": 0.8156, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9491750460688845e-05, | |
| "loss": 0.7806, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9477042523798762e-05, | |
| "loss": 0.8244, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9462130509722307e-05, | |
| "loss": 0.7807, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9447014739572503e-05, | |
| "loss": 0.8917, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.943169553885004e-05, | |
| "loss": 0.8531, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9416173237436252e-05, | |
| "loss": 0.9514, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9400448169586004e-05, | |
| "loss": 0.8841, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9384520673920502e-05, | |
| "loss": 0.8131, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9368391093420004e-05, | |
| "loss": 0.8181, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9352059775416442e-05, | |
| "loss": 0.8195, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9335527071585918e-05, | |
| "loss": 0.8074, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.931879333794115e-05, | |
| "loss": 0.8649, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.930185893482381e-05, | |
| "loss": 0.8792, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.928472422689674e-05, | |
| "loss": 0.8648, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9267389583136124e-05, | |
| "loss": 0.8068, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9249855376823542e-05, | |
| "loss": 0.8233, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9232121985537907e-05, | |
| "loss": 0.8313, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9214189791147363e-05, | |
| "loss": 0.7466, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9196059179801038e-05, | |
| "loss": 0.8399, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.917773054192076e-05, | |
| "loss": 0.9008, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.915920427219261e-05, | |
| "loss": 0.7411, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9140480769558448e-05, | |
| "loss": 0.7961, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.912156043720733e-05, | |
| "loss": 0.8987, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9102443682566792e-05, | |
| "loss": 0.7881, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.908313091729412e-05, | |
| "loss": 0.8272, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9063622557267443e-05, | |
| "loss": 0.7862, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9043919022576817e-05, | |
| "loss": 0.8541, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9024020737515135e-05, | |
| "loss": 0.8827, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.900392813056904e-05, | |
| "loss": 0.8477, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.8983641634409657e-05, | |
| "loss": 0.8459, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.8963161685883294e-05, | |
| "loss": 0.8905, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.894248872600204e-05, | |
| "loss": 0.7484, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.8921623199934255e-05, | |
| "loss": 0.8252, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8900565556994986e-05, | |
| "loss": 0.8301, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8879316250636305e-05, | |
| "loss": 0.8509, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8857875738437526e-05, | |
| "loss": 0.8303, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8836244482095366e-05, | |
| "loss": 0.869, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8814422947414e-05, | |
| "loss": 0.7672, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8792411604295016e-05, | |
| "loss": 0.8633, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.877021092672732e-05, | |
| "loss": 0.7947, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.874782139277691e-05, | |
| "loss": 0.8519, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.872524348457659e-05, | |
| "loss": 0.7401, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.870247768831559e-05, | |
| "loss": 0.8887, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.867952449422909e-05, | |
| "loss": 0.8262, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.8656384396587663e-05, | |
| "loss": 0.7345, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.863305789368664e-05, | |
| "loss": 0.8879, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.860954548783537e-05, | |
| "loss": 0.7528, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.8585847685346415e-05, | |
| "loss": 0.8162, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.8561964996524628e-05, | |
| "loss": 0.8139, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.853789793565618e-05, | |
| "loss": 0.7945, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.85136470209975e-05, | |
| "loss": 0.8346, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.8489212774764064e-05, | |
| "loss": 0.785, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.8464595723119206e-05, | |
| "loss": 0.7792, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.8439796396162756e-05, | |
| "loss": 0.7224, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.8414815327919633e-05, | |
| "loss": 0.827, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.8389653056328344e-05, | |
| "loss": 0.7432, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.8364310123229406e-05, | |
| "loss": 0.8724, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.833878707435367e-05, | |
| "loss": 0.8208, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.8313084459310567e-05, | |
| "loss": 0.8187, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.8287202831576292e-05, | |
| "loss": 0.7933, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.826114274848186e-05, | |
| "loss": 0.8285, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.8234904771201115e-05, | |
| "loss": 0.8506, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.8208489464738664e-05, | |
| "loss": 0.8432, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.8181897397917672e-05, | |
| "loss": 0.7251, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.8155129143367653e-05, | |
| "loss": 0.7721, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.8128185277512106e-05, | |
| "loss": 0.7181, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.8101066380556127e-05, | |
| "loss": 0.7582, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.80737730364739e-05, | |
| "loss": 0.6928, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.8046305832996128e-05, | |
| "loss": 0.6841, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.8018665361597378e-05, | |
| "loss": 0.8184, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.799085221748334e-05, | |
| "loss": 0.6881, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.7962866999578005e-05, | |
| "loss": 0.823, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.793471031051079e-05, | |
| "loss": 0.8332, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.7906382756603536e-05, | |
| "loss": 0.7349, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.7877884947857457e-05, | |
| "loss": 0.8319, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.784921749794002e-05, | |
| "loss": 0.734, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.7820381024171713e-05, | |
| "loss": 0.8613, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.7791376147512754e-05, | |
| "loss": 0.8122, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.776220349254973e-05, | |
| "loss": 0.7485, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.773286368748214e-05, | |
| "loss": 0.7714, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.7703357364108862e-05, | |
| "loss": 0.8018, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.7673685157814556e-05, | |
| "loss": 0.7894, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.764384770755599e-05, | |
| "loss": 0.7003, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.761384565584825e-05, | |
| "loss": 0.7932, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.7583679648750945e-05, | |
| "loss": 0.7752, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.7553350335854253e-05, | |
| "loss": 0.7647, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.7522858370264976e-05, | |
| "loss": 0.8296, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.7492204408592447e-05, | |
| "loss": 0.7772, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.7461389110934382e-05, | |
| "loss": 0.7059, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.7430413140862705e-05, | |
| "loss": 0.7803, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.7399277165409222e-05, | |
| "loss": 0.743, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.7367981855051275e-05, | |
| "loss": 0.7164, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.7336527883697293e-05, | |
| "loss": 0.7179, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.73049159286723e-05, | |
| "loss": 0.8069, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.7273146670703298e-05, | |
| "loss": 0.749, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.7241220793904644e-05, | |
| "loss": 0.7535, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.7209138985763288e-05, | |
| "loss": 0.6617, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.717690193712399e-05, | |
| "loss": 0.8043, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.714451034217443e-05, | |
| "loss": 0.7241, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.711196489843027e-05, | |
| "loss": 0.7573, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.7079266306720125e-05, | |
| "loss": 0.8234, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.704641527117047e-05, | |
| "loss": 0.7087, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.7013412499190494e-05, | |
| "loss": 0.6466, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.6980258701456843e-05, | |
| "loss": 0.6502, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.694695459189834e-05, | |
| "loss": 0.6613, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.6913500887680588e-05, | |
| "loss": 0.7887, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.687989830919055e-05, | |
| "loss": 0.7655, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.6846147580021016e-05, | |
| "loss": 0.7749, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.6812249426955033e-05, | |
| "loss": 0.6766, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6778204579950258e-05, | |
| "loss": 0.6377, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.674401377212322e-05, | |
| "loss": 0.7007, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6709677739733555e-05, | |
| "loss": 0.7397, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6675197222168144e-05, | |
| "loss": 0.7753, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6640572961925182e-05, | |
| "loss": 0.8122, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6605805704598208e-05, | |
| "loss": 0.6951, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.657089619886002e-05, | |
| "loss": 0.7008, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.6535845196446593e-05, | |
| "loss": 0.7901, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.650065345214086e-05, | |
| "loss": 0.7753, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.6465321723756464e-05, | |
| "loss": 0.8366, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.6429850772121448e-05, | |
| "loss": 0.6737, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.6394241361061873e-05, | |
| "loss": 0.6977, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.635849425738535e-05, | |
| "loss": 0.8065, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.632261023086456e-05, | |
| "loss": 0.7527, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6286590054220643e-05, | |
| "loss": 0.6199, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.625043450310658e-05, | |
| "loss": 0.6572, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6214144356090494e-05, | |
| "loss": 0.679, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6177720394638865e-05, | |
| "loss": 0.7324, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6141163403099716e-05, | |
| "loss": 0.7634, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6104474168685724e-05, | |
| "loss": 0.7243, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6067653481457254e-05, | |
| "loss": 0.7528, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.603070213430536e-05, | |
| "loss": 0.7936, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.5993620922934716e-05, | |
| "loss": 0.782, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.595641064584646e-05, | |
| "loss": 0.6669, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.591907210432102e-05, | |
| "loss": 0.7199, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.588160610240084e-05, | |
| "loss": 0.7384, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.5844013446873087e-05, | |
| "loss": 0.6975, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.5806294947252264e-05, | |
| "loss": 0.7367, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5768451415762784e-05, | |
| "loss": 0.6712, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.573048366732147e-05, | |
| "loss": 0.7853, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5692392519520022e-05, | |
| "loss": 0.702, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.56541787926074e-05, | |
| "loss": 0.7576, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5615843309472162e-05, | |
| "loss": 0.6459, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5577386895624743e-05, | |
| "loss": 0.6806, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5538810379179694e-05, | |
| "loss": 0.7799, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.5500114590837823e-05, | |
| "loss": 0.6903, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.5461300363868326e-05, | |
| "loss": 0.6198, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.5422368534090844e-05, | |
| "loss": 0.6099, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.538331993985745e-05, | |
| "loss": 0.7392, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.5344155422034608e-05, | |
| "loss": 0.6374, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.5304875823985067e-05, | |
| "loss": 0.7201, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.526548199154969e-05, | |
| "loss": 0.651, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.5225974773029246e-05, | |
| "loss": 0.7595, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.5186355019166153e-05, | |
| "loss": 0.6956, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.5146623583126134e-05, | |
| "loss": 0.6793, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.5106781320479864e-05, | |
| "loss": 0.7099, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.5066829089184545e-05, | |
| "loss": 0.7128, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.5026767749565423e-05, | |
| "loss": 0.6939, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.498659816429727e-05, | |
| "loss": 0.7006, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4946321198385796e-05, | |
| "loss": 0.639, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4905937719149038e-05, | |
| "loss": 0.6143, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4865448596198666e-05, | |
| "loss": 0.7363, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4824854701421277e-05, | |
| "loss": 0.6142, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4784156908959593e-05, | |
| "loss": 0.7525, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4743356095193665e-05, | |
| "loss": 0.5949, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4702453138721993e-05, | |
| "loss": 0.5521, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4661448920342585e-05, | |
| "loss": 0.6692, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4620344323034016e-05, | |
| "loss": 0.6782, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4579140231936415e-05, | |
| "loss": 0.723, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4537837534332386e-05, | |
| "loss": 0.6467, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4496437119627907e-05, | |
| "loss": 0.6532, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.445493987933319e-05, | |
| "loss": 0.8364, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4413346707043467e-05, | |
| "loss": 0.6203, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4371658498419758e-05, | |
| "loss": 0.6979, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4329876151169581e-05, | |
| "loss": 0.6578, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4288000565027625e-05, | |
| "loss": 0.6033, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4246032641736362e-05, | |
| "loss": 0.7435, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4203973285026642e-05, | |
| "loss": 0.6783, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4161823400598234e-05, | |
| "loss": 0.6228, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4119583896100309e-05, | |
| "loss": 0.6166, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.4077255681111905e-05, | |
| "loss": 0.6846, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.403483966712234e-05, | |
| "loss": 0.7459, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.3992336767511585e-05, | |
| "loss": 0.7032, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.3949747897530583e-05, | |
| "loss": 0.6372, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.3907073974281562e-05, | |
| "loss": 0.609, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.3864315916698266e-05, | |
| "loss": 0.6418, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.3821474645526174e-05, | |
| "loss": 0.657, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3778551083302683e-05, | |
| "loss": 0.527, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3735546154337218e-05, | |
| "loss": 0.6964, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3692460784691357e-05, | |
| "loss": 0.746, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3649295902158874e-05, | |
| "loss": 0.6308, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.360605243624575e-05, | |
| "loss": 0.699, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3562731318150177e-05, | |
| "loss": 0.596, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3519333480742502e-05, | |
| "loss": 0.7016, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3475859858545121e-05, | |
| "loss": 0.5943, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3432311387712378e-05, | |
| "loss": 0.6255, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3388689006010394e-05, | |
| "loss": 0.6161, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3344993652796872e-05, | |
| "loss": 0.659, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.330122626900088e-05, | |
| "loss": 0.7248, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.325738779710257e-05, | |
| "loss": 0.6769, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3213479181112906e-05, | |
| "loss": 0.656, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.3169501366553314e-05, | |
| "loss": 0.7008, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.3125455300435343e-05, | |
| "loss": 0.6463, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.3081341931240248e-05, | |
| "loss": 0.5619, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.303716220889859e-05, | |
| "loss": 0.5967, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.2992917084769757e-05, | |
| "loss": 0.6877, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.2948607511621498e-05, | |
| "loss": 0.5901, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.2904234443609395e-05, | |
| "loss": 0.7197, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2859798836256316e-05, | |
| "loss": 0.7116, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2815301646431846e-05, | |
| "loss": 0.668, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.277074383233167e-05, | |
| "loss": 0.7152, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2726126353456955e-05, | |
| "loss": 0.5837, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2681450170593683e-05, | |
| "loss": 0.6508, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2636716245791945e-05, | |
| "loss": 0.622, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2591925542345244e-05, | |
| "loss": 0.5665, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2547079024769757e-05, | |
| "loss": 0.6986, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2502177658783538e-05, | |
| "loss": 0.5567, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2457222411285745e-05, | |
| "loss": 0.637, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2412214250335815e-05, | |
| "loss": 0.601, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2367154145132609e-05, | |
| "loss": 0.7463, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2322043065993556e-05, | |
| "loss": 0.6863, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2276881984333738e-05, | |
| "loss": 0.6335, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.2231671872644995e-05, | |
| "loss": 0.6756, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.2186413704474964e-05, | |
| "loss": 0.6561, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.214110845440613e-05, | |
| "loss": 0.5206, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.209575709803483e-05, | |
| "loss": 0.5541, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.2050360611950245e-05, | |
| "loss": 0.7716, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.200491997371337e-05, | |
| "loss": 0.565, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.1959436161835971e-05, | |
| "loss": 0.6736, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.191391015575951e-05, | |
| "loss": 0.5984, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1868342935834043e-05, | |
| "loss": 0.5351, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.182273548329713e-05, | |
| "loss": 0.5762, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1777088780252688e-05, | |
| "loss": 0.5658, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1731403809649847e-05, | |
| "loss": 0.668, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1685681555261788e-05, | |
| "loss": 0.6045, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1639923001664557e-05, | |
| "loss": 0.6554, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1594129134215852e-05, | |
| "loss": 0.5964, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.154830093903383e-05, | |
| "loss": 0.6268, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1502439402975842e-05, | |
| "loss": 0.643, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1456545513617199e-05, | |
| "loss": 0.674, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.141062025922991e-05, | |
| "loss": 0.6062, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1364664628761391e-05, | |
| "loss": 0.716, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1318679611813166e-05, | |
| "loss": 0.6475, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1272666198619567e-05, | |
| "loss": 0.669, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1226625380026407e-05, | |
| "loss": 0.586, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1180558147469645e-05, | |
| "loss": 0.5703, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1134465492954028e-05, | |
| "loss": 0.6057, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1088348409031744e-05, | |
| "loss": 0.6938, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1042207888781031e-05, | |
| "loss": 0.6842, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.0996044925784805e-05, | |
| "loss": 0.5665, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0949860514109265e-05, | |
| "loss": 0.5285, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0903655648282476e-05, | |
| "loss": 0.5631, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.085743132327296e-05, | |
| "loss": 0.6012, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0811188534468275e-05, | |
| "loss": 0.5105, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0764928277653577e-05, | |
| "loss": 0.6936, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0718651548990165e-05, | |
| "loss": 0.5982, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.067235934499405e-05, | |
| "loss": 0.5279, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.0626052662514484e-05, | |
| "loss": 0.6138, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.057973249871249e-05, | |
| "loss": 0.7268, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.053339985103941e-05, | |
| "loss": 0.6102, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.0487055717215394e-05, | |
| "loss": 0.5865, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.0440701095207948e-05, | |
| "loss": 0.5869, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.039433698321042e-05, | |
| "loss": 0.6136, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.034796437962052e-05, | |
| "loss": 0.6175, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0301584283018813e-05, | |
| "loss": 0.5578, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.025519769214721e-05, | |
| "loss": 0.7355, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.020880560588748e-05, | |
| "loss": 0.6317, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0162409023239718e-05, | |
| "loss": 0.6411, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0116008943300852e-05, | |
| "loss": 0.6395, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0069606365243123e-05, | |
| "loss": 0.6126, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0023202288292552e-05, | |
| "loss": 0.5297, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.97679771170745e-06, | |
| "loss": 0.5626, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.930393634756877e-06, | |
| "loss": 0.5696, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.883991056699146e-06, | |
| "loss": 0.6071, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.837590976760283e-06, | |
| "loss": 0.6008, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.791194394112525e-06, | |
| "loss": 0.525, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.744802307852794e-06, | |
| "loss": 0.4788, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.69841571698119e-06, | |
| "loss": 0.62, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.652035620379481e-06, | |
| "loss": 0.5697, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.605663016789583e-06, | |
| "loss": 0.6032, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.559298904792054e-06, | |
| "loss": 0.4847, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.51294428278461e-06, | |
| "loss": 0.6121, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.466600148960597e-06, | |
| "loss": 0.5883, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.420267501287512e-06, | |
| "loss": 0.5999, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.373947337485521e-06, | |
| "loss": 0.6836, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.327640655005951e-06, | |
| "loss": 0.5001, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.281348451009837e-06, | |
| "loss": 0.5704, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.235071722346424e-06, | |
| "loss": 0.5429, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.188811465531725e-06, | |
| "loss": 0.5853, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.142568676727043e-06, | |
| "loss": 0.4976, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.096344351717528e-06, | |
| "loss": 0.5841, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.050139485890738e-06, | |
| "loss": 0.5662, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 9.003955074215198e-06, | |
| "loss": 0.5292, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.95779211121897e-06, | |
| "loss": 0.5485, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.911651590968259e-06, | |
| "loss": 0.6976, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.865534507045974e-06, | |
| "loss": 0.6143, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.819441852530358e-06, | |
| "loss": 0.6539, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.773374619973598e-06, | |
| "loss": 0.5753, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.72733380138044e-06, | |
| "loss": 0.5288, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.68132038818684e-06, | |
| "loss": 0.5158, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.63533537123861e-06, | |
| "loss": 0.4977, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.589379740770091e-06, | |
| "loss": 0.6709, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.543454486382803e-06, | |
| "loss": 0.5111, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.497560597024161e-06, | |
| "loss": 0.6411, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.451699060966174e-06, | |
| "loss": 0.651, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.405870865784151e-06, | |
| "loss": 0.6071, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.360076998335447e-06, | |
| "loss": 0.593, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.314318444738215e-06, | |
| "loss": 0.5674, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.268596190350158e-06, | |
| "loss": 0.5944, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.222911219747317e-06, | |
| "loss": 0.6004, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.177264516702875e-06, | |
| "loss": 0.5214, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.131657064165962e-06, | |
| "loss": 0.613, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.086089844240495e-06, | |
| "loss": 0.6016, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.040563838164034e-06, | |
| "loss": 0.4822, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.995080026286632e-06, | |
| "loss": 0.5671, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.949639388049758e-06, | |
| "loss": 0.6044, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.904242901965171e-06, | |
| "loss": 0.6211, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.858891545593871e-06, | |
| "loss": 0.5418, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.81358629552504e-06, | |
| "loss": 0.576, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.768328127355008e-06, | |
| "loss": 0.5502, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.723118015666266e-06, | |
| "loss": 0.5358, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.677956934006447e-06, | |
| "loss": 0.6183, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.632845854867393e-06, | |
| "loss": 0.591, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.5877857496641885e-06, | |
| "loss": 0.5402, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.542777588714256e-06, | |
| "loss": 0.5547, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.497822341216465e-06, | |
| "loss": 0.508, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.452920975230247e-06, | |
| "loss": 0.599, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.408074457654757e-06, | |
| "loss": 0.47, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.363283754208061e-06, | |
| "loss": 0.566, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.318549829406318e-06, | |
| "loss": 0.6632, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.273873646543044e-06, | |
| "loss": 0.5991, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.2292561676683305e-06, | |
| "loss": 0.5955, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.184698353568157e-06, | |
| "loss": 0.5297, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.140201163743686e-06, | |
| "loss": 0.5336, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.095765556390606e-06, | |
| "loss": 0.531, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.051392488378503e-06, | |
| "loss": 0.5654, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.007082915230247e-06, | |
| "loss": 0.485, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 6.962837791101414e-06, | |
| "loss": 0.5386, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 6.918658068759754e-06, | |
| "loss": 0.5497, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 6.874544699564662e-06, | |
| "loss": 0.5798, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 6.8304986334466884e-06, | |
| "loss": 0.5768, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.786520818887099e-06, | |
| "loss": 0.5271, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.742612202897436e-06, | |
| "loss": 0.5497, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.698773730999124e-06, | |
| "loss": 0.5568, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.655006347203128e-06, | |
| "loss": 0.5791, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.611310993989608e-06, | |
| "loss": 0.5516, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.567688612287625e-06, | |
| "loss": 0.4615, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.524140141454881e-06, | |
| "loss": 0.644, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.480666519257501e-06, | |
| "loss": 0.5063, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.437268681849824e-06, | |
| "loss": 0.6262, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.393947563754253e-06, | |
| "loss": 0.6606, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.350704097841129e-06, | |
| "loss": 0.5263, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.307539215308644e-06, | |
| "loss": 0.5027, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.264453845662785e-06, | |
| "loss": 0.5184, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.2214489166973235e-06, | |
| "loss": 0.6031, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.17852535447383e-06, | |
| "loss": 0.4828, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.135684083301738e-06, | |
| "loss": 0.5925, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.092926025718438e-06, | |
| "loss": 0.5344, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.050252102469417e-06, | |
| "loss": 0.6122, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.007663232488418e-06, | |
| "loss": 0.5284, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 5.9651603328776606e-06, | |
| "loss": 0.5838, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 5.922744318888098e-06, | |
| "loss": 0.6277, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.880416103899696e-06, | |
| "loss": 0.5112, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.838176599401768e-06, | |
| "loss": 0.6094, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.796026714973359e-06, | |
| "loss": 0.5793, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.753967358263643e-06, | |
| "loss": 0.5859, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.711999434972378e-06, | |
| "loss": 0.5951, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.670123848830419e-06, | |
| "loss": 0.4739, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.628341501580246e-06, | |
| "loss": 0.5585, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.586653292956536e-06, | |
| "loss": 0.6364, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.545060120666812e-06, | |
| "loss": 0.4729, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.5035628803720975e-06, | |
| "loss": 0.5906, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.462162465667614e-06, | |
| "loss": 0.5259, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.4208597680635866e-06, | |
| "loss": 0.5606, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.379655676965984e-06, | |
| "loss": 0.5555, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.338551079657419e-06, | |
| "loss": 0.4976, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.297546861278013e-06, | |
| "loss": 0.5368, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.256643904806335e-06, | |
| "loss": 0.5036, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.215843091040409e-06, | |
| "loss": 0.5338, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.17514529857873e-06, | |
| "loss": 0.4907, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.134551403801336e-06, | |
| "loss": 0.5693, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.0940622808509645e-06, | |
| "loss": 0.6064, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.053678801614205e-06, | |
| "loss": 0.5129, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.013401835702733e-06, | |
| "loss": 0.6519, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.973232250434579e-06, | |
| "loss": 0.5084, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.933170910815457e-06, | |
| "loss": 0.5823, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.893218679520137e-06, | |
| "loss": 0.4856, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.853376416873867e-06, | |
| "loss": 0.5387, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.813644980833851e-06, | |
| "loss": 0.5285, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.774025226970754e-06, | |
| "loss": 0.538, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.734518008450312e-06, | |
| "loss": 0.5506, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.695124176014938e-06, | |
| "loss": 0.4618, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.6558445779653946e-06, | |
| "loss": 0.5938, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.616680060142552e-06, | |
| "loss": 0.5627, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.57763146590916e-06, | |
| "loss": 0.4912, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.538699636131676e-06, | |
| "loss": 0.5197, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.49988540916218e-06, | |
| "loss": 0.4991, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.461189620820312e-06, | |
| "loss": 0.5795, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.422613104375259e-06, | |
| "loss": 0.4703, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.384156690527842e-06, | |
| "loss": 0.5486, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.345821207392605e-06, | |
| "loss": 0.499, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.307607480479977e-06, | |
| "loss": 0.5496, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.269516332678529e-06, | |
| "loss": 0.5846, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.231548584237219e-06, | |
| "loss": 0.5554, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.193705052747737e-06, | |
| "loss": 0.5126, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.155986553126914e-06, | |
| "loss": 0.523, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.1183938975991644e-06, | |
| "loss": 0.4926, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.080927895678984e-06, | |
| "loss": 0.4799, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.043589354153541e-06, | |
| "loss": 0.502, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.006379077065288e-06, | |
| "loss": 0.5447, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.969297865694641e-06, | |
| "loss": 0.4702, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.93234651854275e-06, | |
| "loss": 0.5145, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.895525831314282e-06, | |
| "loss": 0.4896, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.858836596900286e-06, | |
| "loss": 0.5074, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.822279605361138e-06, | |
| "loss": 0.5589, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.7858556439095073e-06, | |
| "loss": 0.5544, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.74956549689342e-06, | |
| "loss": 0.483, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.7134099457793625e-06, | |
| "loss": 0.5127, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.677389769135444e-06, | |
| "loss": 0.6738, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.6415057426146504e-06, | |
| "loss": 0.506, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.6057586389381326e-06, | |
| "loss": 0.5462, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.5701492278785543e-06, | |
| "loss": 0.5466, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.5346782762435383e-06, | |
| "loss": 0.6327, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.4993465478591447e-06, | |
| "loss": 0.4681, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.464154803553408e-06, | |
| "loss": 0.5137, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.429103801139981e-06, | |
| "loss": 0.5108, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.394194295401796e-06, | |
| "loss": 0.5533, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.3594270380748205e-06, | |
| "loss": 0.4782, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.3248027778318593e-06, | |
| "loss": 0.5367, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.2903222602664464e-06, | |
| "loss": 0.53, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.255986227876782e-06, | |
| "loss": 0.5079, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.221795420049744e-06, | |
| "loss": 0.4688, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.1877505730449677e-06, | |
| "loss": 0.5587, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.1538524199789853e-06, | |
| "loss": 0.5302, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.1201016908094518e-06, | |
| "loss": 0.4613, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.086499112319414e-06, | |
| "loss": 0.6063, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.0530454081016637e-06, | |
| "loss": 0.5349, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.0197412985431584e-06, | |
| "loss": 0.4467, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.9865875008095114e-06, | |
| "loss": 0.5005, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.953584728829533e-06, | |
| "loss": 0.4915, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.920733693279879e-06, | |
| "loss": 0.4643, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.8880351015697337e-06, | |
| "loss": 0.573, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.855489657825573e-06, | |
| "loss": 0.462, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.823098062876013e-06, | |
| "loss": 0.5971, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.7908610142367144e-06, | |
| "loss": 0.5416, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.758779206095359e-06, | |
| "loss": 0.4902, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.7268533292967026e-06, | |
| "loss": 0.5425, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.6950840713277037e-06, | |
| "loss": 0.5577, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.6634721163027076e-06, | |
| "loss": 0.5675, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.632018144948727e-06, | |
| "loss": 0.5889, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.600722834590781e-06, | |
| "loss": 0.5252, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.5695868591372975e-06, | |
| "loss": 0.5297, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.538610889065619e-06, | |
| "loss": 0.5192, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.507795591407559e-06, | |
| "loss": 0.499, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.477141629735025e-06, | |
| "loss": 0.4614, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.4466496641457483e-06, | |
| "loss": 0.5016, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.416320351249062e-06, | |
| "loss": 0.4386, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.386154344151752e-06, | |
| "loss": 0.5361, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.3561522924440127e-06, | |
| "loss": 0.5185, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.326314842185443e-06, | |
| "loss": 0.5295, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.2966426358911387e-06, | |
| "loss": 0.5863, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.2671363125178635e-06, | |
| "loss": 0.5446, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.237796507450272e-06, | |
| "loss": 0.5217, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.208623852487248e-06, | |
| "loss": 0.4801, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.1796189758282917e-06, | |
| "loss": 0.5402, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.1507825020599827e-06, | |
| "loss": 0.419, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.122115052142545e-06, | |
| "loss": 0.5494, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.0936172433964696e-06, | |
| "loss": 0.4936, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.065289689489213e-06, | |
| "loss": 0.5906, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.037133000421997e-06, | |
| "loss": 0.4602, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.0091477825166637e-06, | |
| "loss": 0.4716, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.9813346384026266e-06, | |
| "loss": 0.5926, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.9536941670038745e-06, | |
| "loss": 0.46, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.926226963526103e-06, | |
| "loss": 0.4761, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.8989336194438756e-06, | |
| "loss": 0.4689, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.8718147224878957e-06, | |
| "loss": 0.5164, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.8448708566323504e-06, | |
| "loss": 0.5523, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.818102602082329e-06, | |
| "loss": 0.487, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.7915105352613382e-06, | |
| "loss": 0.4778, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.7650952287988864e-06, | |
| "loss": 0.5652, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.7388572515181445e-06, | |
| "loss": 0.5723, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.7127971684237098e-06, | |
| "loss": 0.4921, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.6869155406894344e-06, | |
| "loss": 0.4978, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.6612129256463338e-06, | |
| "loss": 0.532, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.6356898767705954e-06, | |
| "loss": 0.4466, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.610346943671659e-06, | |
| "loss": 0.5068, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.585184672080371e-06, | |
| "loss": 0.5097, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.5602036038372448e-06, | |
| "loss": 0.4844, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.5354042768807976e-06, | |
| "loss": 0.5384, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.510787225235939e-06, | |
| "loss": 0.5508, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.4863529790025033e-06, | |
| "loss": 0.5572, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.46210206434382e-06, | |
| "loss": 0.5417, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.4380350034753766e-06, | |
| "loss": 0.5127, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.4141523146535886e-06, | |
| "loss": 0.5064, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.3904545121646319e-06, | |
| "loss": 0.5853, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.3669421063133626e-06, | |
| "loss": 0.5566, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.3436156034123383e-06, | |
| "loss": 0.5032, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.320475505770913e-06, | |
| "loss": 0.5101, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.2975223116844115e-06, | |
| "loss": 0.556, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.274756515423411e-06, | |
| "loss": 0.5616, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.2521786072230935e-06, | |
| "loss": 0.4534, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.2297890732726814e-06, | |
| "loss": 0.5217, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.2075883957049862e-06, | |
| "loss": 0.4903, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.1855770525860033e-06, | |
| "loss": 0.5061, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.1637555179046344e-06, | |
| "loss": 0.5087, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.1421242615624772e-06, | |
| "loss": 0.5611, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.1206837493636992e-06, | |
| "loss": 0.5706, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.0994344430050163e-06, | |
| "loss": 0.4671, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.078376800065749e-06, | |
| "loss": 0.4745, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.057511273997962e-06, | |
| "loss": 0.5482, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.036838314116706e-06, | |
| "loss": 0.4953, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.0163583655903464e-06, | |
| "loss": 0.4589, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 9.960718694309623e-07, | |
| "loss": 0.4736, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 9.759792624848662e-07, | |
| "loss": 0.53, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 9.560809774231872e-07, | |
| "loss": 0.5382, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.363774427325578e-07, | |
| "loss": 0.5388, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.168690827058813e-07, | |
| "loss": 0.4865, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.975563174332091e-07, | |
| "loss": 0.4705, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.784395627926734e-07, | |
| "loss": 0.5287, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.595192304415534e-07, | |
| "loss": 0.5257, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.407957278073952e-07, | |
| "loss": 0.5747, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.222694580792434e-07, | |
| "loss": 0.51, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.039408201989618e-07, | |
| "loss": 0.5299, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 7.85810208852642e-07, | |
| "loss": 0.5341, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 7.678780144620956e-07, | |
| "loss": 0.5411, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 7.501446231764609e-07, | |
| "loss": 0.4272, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 7.32610416863877e-07, | |
| "loss": 0.5023, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 7.152757731032645e-07, | |
| "loss": 0.4607, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.981410651761933e-07, | |
| "loss": 0.5416, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.81206662058852e-07, | |
| "loss": 0.5301, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.644729284140828e-07, | |
| "loss": 0.4581, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.479402245835587e-07, | |
| "loss": 0.606, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.316089065799958e-07, | |
| "loss": 0.5818, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.154793260795011e-07, | |
| "loss": 0.6078, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.995518304139991e-07, | |
| "loss": 0.4643, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.838267625637495e-07, | |
| "loss": 0.3653, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.6830446114996e-07, | |
| "loss": 0.4498, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.529852604274987e-07, | |
| "loss": 0.5397, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.37869490277697e-07, | |
| "loss": 0.4457, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.229574762012379e-07, | |
| "loss": 0.5155, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.082495393111564e-07, | |
| "loss": 0.3884, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.937459963259206e-07, | |
| "loss": 0.5399, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.794471595626071e-07, | |
| "loss": 0.5594, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.653533369301855e-07, | |
| "loss": 0.5078, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.514648319228798e-07, | |
| "loss": 0.5837, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.3778194361363323e-07, | |
| "loss": 0.5007, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.243049666476784e-07, | |
| "loss": 0.4823, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.110341912361726e-07, | |
| "loss": 0.4576, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.9796990314997176e-07, | |
| "loss": 0.4334, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.851123837134585e-07, | |
| "loss": 0.5832, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.7246190979849164e-07, | |
| "loss": 0.4585, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.600187538184463e-07, | |
| "loss": 0.5534, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.477831837223433e-07, | |
| "loss": 0.5029, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.3575546298907914e-07, | |
| "loss": 0.5229, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.239358506217549e-07, | |
| "loss": 0.5767, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.1232460114209994e-07, | |
| "loss": 0.3828, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.009219645849859e-07, | |
| "loss": 0.5912, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.897281864930468e-07, | |
| "loss": 0.4827, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.7874350791139203e-07, | |
| "loss": 0.4718, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.6796816538241065e-07, | |
| "loss": 0.5658, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.574023909406853e-07, | |
| "loss": 0.5658, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.4704641210798853e-07, | |
| "loss": 0.5866, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.369004518883855e-07, | |
| "loss": 0.472, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.2696472876343467e-07, | |
| "loss": 0.5015, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.1723945668748248e-07, | |
| "loss": 0.4904, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.0772484508304937e-07, | |
| "loss": 0.5254, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.984210988363311e-07, | |
| "loss": 0.3971, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.8932841829277794e-07, | |
| "loss": 0.5335, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.8044699925278242e-07, | |
| "loss": 0.4604, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.7177703296746838e-07, | |
| "loss": 0.5667, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.6331870613456423e-07, | |
| "loss": 0.4999, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5507220089438724e-07, | |
| "loss": 0.484, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.4703769482592335e-07, | |
| "loss": 0.479, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.3921536094299914e-07, | |
| "loss": 0.4738, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.3160536769055708e-07, | |
| "loss": 0.5374, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.2420787894103058e-07, | |
| "loss": 0.5197, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.1702305399081349e-07, | |
| "loss": 0.5258, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.1005104755682617e-07, | |
| "loss": 0.5347, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.0329200977319265e-07, | |
| "loss": 0.4883, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.674608618799986e-08, | |
| "loss": 0.5169, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.041341776016565e-08, | |
| "loss": 0.5368, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 8.429414085640574e-08, | |
| "loss": 0.5595, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 7.83883872482949e-08, | |
| "loss": 0.4733, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 7.269628410942808e-08, | |
| "loss": 0.3585, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 6.721795401268493e-08, | |
| "loss": 0.3875, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 6.195351492759183e-08, | |
| "loss": 0.5666, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 5.69030802177728e-08, | |
| "loss": 0.5308, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 5.206675863851818e-08, | |
| "loss": 0.4619, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.744465433443979e-08, | |
| "loss": 0.5458, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.303686683722497e-08, | |
| "loss": 0.4714, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.884349106349716e-08, | |
| "loss": 0.4355, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.486461731276869e-08, | |
| "loss": 0.4941, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.110033126549894e-08, | |
| "loss": 0.5258, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.755071398125031e-08, | |
| "loss": 0.5551, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.4215841896938486e-08, | |
| "loss": 0.575, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.1095786825190423e-08, | |
| "loss": 0.489, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.8190615952794477e-08, | |
| "loss": 0.4864, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.5500391839256002e-08, | |
| "loss": 0.5031, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.3025172415451758e-08, | |
| "loss": 0.5158, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.0765010982378698e-08, | |
| "loss": 0.5214, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.719956210007096e-09, | |
| "loss": 0.5405, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 6.890052136234726e-09, | |
| "loss": 0.4096, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 5.275338165935395e-09, | |
| "loss": 0.5636, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.875849070115179e-09, | |
| "loss": 0.5095, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.6916149851563542e-09, | |
| "loss": 0.4996, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.7226614121756968e-09, | |
| "loss": 0.5245, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 9.690092164715835e-10, | |
| "loss": 0.4947, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.3067462707546693e-10, | |
| "loss": 0.5609, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.0766923640215254e-10, | |
| "loss": 0.5161, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.5087, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 698, | |
| "total_flos": 407653124669440.0, | |
| "train_loss": 0.714859038890262, | |
| "train_runtime": 15566.5988, | |
| "train_samples_per_second": 1.435, | |
| "train_steps_per_second": 0.045 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 698, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 407653124669440.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |