| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "global_step": 1329, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 2.7921, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 2.7341, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.5e-06, |
| "loss": 2.8151, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 2.7216, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.5e-06, |
| "loss": 2.5898, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3e-06, |
| "loss": 2.7029, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 3.5e-06, |
| "loss": 2.6816, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 2.6626, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.5e-06, |
| "loss": 2.5352, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5e-06, |
| "loss": 2.5139, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 2.6674, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 6e-06, |
| "loss": 2.5652, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 2.5165, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7e-06, |
| "loss": 2.4227, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 2.4806, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 2.5979, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 8.5e-06, |
| "loss": 2.4325, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9e-06, |
| "loss": 2.2942, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.5e-06, |
| "loss": 2.4361, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1e-05, |
| "loss": 2.4365, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.0500000000000001e-05, |
| "loss": 2.2536, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 2.2818, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.15e-05, |
| "loss": 2.304, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.2e-05, |
| "loss": 2.1969, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.25e-05, |
| "loss": 2.1164, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 2.2132, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.3500000000000001e-05, |
| "loss": 2.065, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.4e-05, |
| "loss": 2.0703, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.45e-05, |
| "loss": 1.9844, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 2.0265, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.55e-05, |
| "loss": 1.8093, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 1.9518, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.65e-05, |
| "loss": 1.8611, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.7e-05, |
| "loss": 1.6355, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.7500000000000002e-05, |
| "loss": 1.7262, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.8e-05, |
| "loss": 1.6861, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.8500000000000002e-05, |
| "loss": 1.5701, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9e-05, |
| "loss": 1.5863, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.95e-05, |
| "loss": 1.349, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2e-05, |
| "loss": 1.3674, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9999970299504145e-05, |
| "loss": 1.3706, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9999881198192997e-05, |
| "loss": 1.3871, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9999732696595825e-05, |
| "loss": 1.2545, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.999952479559475e-05, |
| "loss": 1.3386, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.999925749642472e-05, |
| "loss": 1.3576, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.999893080067352e-05, |
| "loss": 1.2272, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9998544710281757e-05, |
| "loss": 1.2747, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9998099227542843e-05, |
| "loss": 1.2944, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9997594355102988e-05, |
| "loss": 1.243, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.999703009596119e-05, |
| "loss": 1.1542, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.99964064534692e-05, |
| "loss": 1.1436, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9995723431331517e-05, |
| "loss": 1.2183, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9994981033605364e-05, |
| "loss": 1.1039, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.999417926470065e-05, |
| "loss": 1.1347, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.999331812937997e-05, |
| "loss": 1.1239, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9992397632758545e-05, |
| "loss": 1.0616, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.999141778030422e-05, |
| "loss": 1.1196, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.999037857783742e-05, |
| "loss": 1.0849, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9989280031531103e-05, |
| "loss": 1.0734, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.998812214791075e-05, |
| "loss": 1.0206, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.99869049338543e-05, |
| "loss": 0.9276, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9985628396592122e-05, |
| "loss": 1.0207, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9984292543706982e-05, |
| "loss": 1.0138, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9982897383133978e-05, |
| "loss": 0.9639, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9981442923160494e-05, |
| "loss": 0.9498, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9979929172426175e-05, |
| "loss": 1.0486, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9978356139922844e-05, |
| "loss": 0.9061, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9976723834994475e-05, |
| "loss": 1.0376, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9975032267337122e-05, |
| "loss": 0.9994, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.997328144699886e-05, |
| "loss": 0.9606, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9971471384379737e-05, |
| "loss": 0.942, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9969602090231704e-05, |
| "loss": 0.9768, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9967673575658554e-05, |
| "loss": 0.8961, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.996568585211586e-05, |
| "loss": 0.8958, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9963638931410887e-05, |
| "loss": 0.9206, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9961532825702553e-05, |
| "loss": 1.0313, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9959367547501335e-05, |
| "loss": 0.9968, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.99571431096692e-05, |
| "loss": 0.8786, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.995485952541953e-05, |
| "loss": 0.9039, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.9952516808317036e-05, |
| "loss": 0.9654, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.9950114972277698e-05, |
| "loss": 0.8409, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.9947654031568657e-05, |
| "loss": 0.9234, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.9945134000808143e-05, |
| "loss": 0.8606, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.9942554894965392e-05, |
| "loss": 0.8819, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.9939916729360544e-05, |
| "loss": 0.9028, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.9937219519664567e-05, |
| "loss": 0.9007, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9934463281899157e-05, |
| "loss": 0.8164, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9931648032436634e-05, |
| "loss": 0.9644, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.992877378799986e-05, |
| "loss": 0.9359, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.992584056566214e-05, |
| "loss": 0.8641, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.9922848382847094e-05, |
| "loss": 0.9085, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.9919797257328596e-05, |
| "loss": 0.869, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.9916687207230622e-05, |
| "loss": 0.8349, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.9913518251027187e-05, |
| "loss": 0.8558, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.9910290407542202e-05, |
| "loss": 0.8784, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.9907003695949377e-05, |
| "loss": 0.8321, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.9903658135772106e-05, |
| "loss": 0.8658, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.9900253746883347e-05, |
| "loss": 0.8395, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.9896790549505508e-05, |
| "loss": 0.8121, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.9893268564210327e-05, |
| "loss": 0.8644, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.9889687811918744e-05, |
| "loss": 0.823, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.988604831390078e-05, |
| "loss": 0.9383, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.988235009177542e-05, |
| "loss": 0.7767, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.9878593167510466e-05, |
| "loss": 0.8295, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.9874777563422425e-05, |
| "loss": 0.8656, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.987090330217636e-05, |
| "loss": 0.8626, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.9866970406785763e-05, |
| "loss": 0.7942, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.9862978900612432e-05, |
| "loss": 0.8658, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.9858928807366303e-05, |
| "loss": 0.8264, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.985482015110533e-05, |
| "loss": 0.8445, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.9850652956235347e-05, |
| "loss": 0.8318, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.98464272475099e-05, |
| "loss": 0.7747, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9842143050030115e-05, |
| "loss": 0.8545, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9837800389244553e-05, |
| "loss": 0.8254, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.983339929094905e-05, |
| "loss": 0.7988, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9828939781286564e-05, |
| "loss": 0.873, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.982442188674703e-05, |
| "loss": 0.7796, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.981984563416718e-05, |
| "loss": 0.8491, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.981521105073042e-05, |
| "loss": 0.7819, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.9810518163966627e-05, |
| "loss": 0.8041, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.9805767001752016e-05, |
| "loss": 0.8117, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.980095759230896e-05, |
| "loss": 0.8403, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.9796089964205832e-05, |
| "loss": 0.8192, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.9791164146356823e-05, |
| "loss": 0.806, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.978618016802178e-05, |
| "loss": 0.7675, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.978113805880603e-05, |
| "loss": 0.7825, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.9776037848660202e-05, |
| "loss": 0.7986, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.9770879567880046e-05, |
| "loss": 0.8481, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.9765663247106265e-05, |
| "loss": 0.7997, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.9760388917324317e-05, |
| "loss": 0.8009, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.975505660986425e-05, |
| "loss": 0.8215, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.97496663564005e-05, |
| "loss": 0.8177, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.9744218188951698e-05, |
| "loss": 0.8077, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.973871213988051e-05, |
| "loss": 0.6987, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.9733148241893403e-05, |
| "loss": 0.8333, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.972752652804049e-05, |
| "loss": 0.7606, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.972184703171531e-05, |
| "loss": 0.7712, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.9716109786654627e-05, |
| "loss": 0.789, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.9710314826938254e-05, |
| "loss": 0.7854, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.970446218698882e-05, |
| "loss": 0.7522, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.969855190157159e-05, |
| "loss": 0.8335, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.9692584005794245e-05, |
| "loss": 0.8291, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.9686558535106675e-05, |
| "loss": 0.7212, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.9680475525300778e-05, |
| "loss": 0.7672, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.967433501251023e-05, |
| "loss": 0.7229, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.9668137033210292e-05, |
| "loss": 0.7872, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.9661881624217573e-05, |
| "loss": 0.8126, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.9655568822689825e-05, |
| "loss": 0.8099, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.964919866612571e-05, |
| "loss": 0.7901, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.9642771192364593e-05, |
| "loss": 0.764, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.9636286439586303e-05, |
| "loss": 0.7654, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.962974444631092e-05, |
| "loss": 0.7325, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.9623145251398527e-05, |
| "loss": 0.7639, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.9616488894049e-05, |
| "loss": 0.7469, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.9609775413801763e-05, |
| "loss": 0.7931, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.9603004850535547e-05, |
| "loss": 0.7947, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.9596177244468177e-05, |
| "loss": 0.7884, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.9589292636156306e-05, |
| "loss": 0.7227, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.9582351066495193e-05, |
| "loss": 0.7837, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.957535257671845e-05, |
| "loss": 0.7729, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.95682972083978e-05, |
| "loss": 0.7594, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.9561185003442827e-05, |
| "loss": 0.7461, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.9554016004100734e-05, |
| "loss": 0.7548, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.9546790252956093e-05, |
| "loss": 0.795, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.9539507792930582e-05, |
| "loss": 0.7066, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.9532168667282732e-05, |
| "loss": 0.7259, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.952477291960768e-05, |
| "loss": 0.8147, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.9517320593836895e-05, |
| "loss": 0.7231, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.9509811734237938e-05, |
| "loss": 0.7954, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.9502246385414177e-05, |
| "loss": 0.7019, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.9494624592304536e-05, |
| "loss": 0.7428, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.948694640018322e-05, |
| "loss": 0.7203, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.947921185465945e-05, |
| "loss": 0.7101, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.94714210016772e-05, |
| "loss": 0.7192, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.9463573887514902e-05, |
| "loss": 0.7315, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.9455670558785195e-05, |
| "loss": 0.7302, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.9447711062434633e-05, |
| "loss": 0.7454, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.943969544574342e-05, |
| "loss": 0.7678, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.9431623756325112e-05, |
| "loss": 0.7147, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.942349604212634e-05, |
| "loss": 0.728, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.9415312351426533e-05, |
| "loss": 0.7975, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.940707273283763e-05, |
| "loss": 0.7226, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.9398777235303783e-05, |
| "loss": 0.7323, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.9390425908101063e-05, |
| "loss": 0.7481, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.938201880083719e-05, |
| "loss": 0.7388, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.9373555963451213e-05, |
| "loss": 0.7761, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.9365037446213216e-05, |
| "loss": 0.7655, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.9356463299724047e-05, |
| "loss": 0.7207, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.9347833574914985e-05, |
| "loss": 0.7057, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.9339148323047447e-05, |
| "loss": 0.7628, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.933040759571269e-05, |
| "loss": 0.7443, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.932161144483151e-05, |
| "loss": 0.759, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.9312759922653908e-05, |
| "loss": 0.7464, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.9303853081758803e-05, |
| "loss": 0.7177, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.9294890975053713e-05, |
| "loss": 0.7215, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.9285873655774447e-05, |
| "loss": 0.7209, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.927680117748477e-05, |
| "loss": 0.7786, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.9267673594076103e-05, |
| "loss": 0.7363, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.92584909597672e-05, |
| "loss": 0.7411, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.9249253329103817e-05, |
| "loss": 0.7043, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.92399607569584e-05, |
| "loss": 0.6569, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.923061329852974e-05, |
| "loss": 0.8199, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.9221211009342677e-05, |
| "loss": 0.7481, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.921175394524773e-05, |
| "loss": 0.8048, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.920224216242081e-05, |
| "loss": 0.7414, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.9192675717362847e-05, |
| "loss": 0.7315, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.918305466689947e-05, |
| "loss": 0.7091, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.917337906818067e-05, |
| "loss": 0.7026, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.916364897868047e-05, |
| "loss": 0.7258, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.9153864456196565e-05, |
| "loss": 0.7535, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.9144025558849987e-05, |
| "loss": 0.7199, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.913413234508476e-05, |
| "loss": 0.746, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.912418487366756e-05, |
| "loss": 0.7275, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.9114183203687352e-05, |
| "loss": 0.7451, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.9104127394555044e-05, |
| "loss": 0.7031, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.9094017506003144e-05, |
| "loss": 0.6917, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.908385359808539e-05, |
| "loss": 0.6952, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.9073635731176406e-05, |
| "loss": 0.6967, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.906336396597133e-05, |
| "loss": 0.724, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.905303836348547e-05, |
| "loss": 0.7074, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.904265898505393e-05, |
| "loss": 0.7079, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.9032225892331238e-05, |
| "loss": 0.7189, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.902173914729101e-05, |
| "loss": 0.7507, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.9011198812225548e-05, |
| "loss": 0.7258, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.9000604949745484e-05, |
| "loss": 0.7251, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.898995762277942e-05, |
| "loss": 0.7108, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.8979256894573525e-05, |
| "loss": 0.7447, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.896850282869119e-05, |
| "loss": 0.6982, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.8957695489012635e-05, |
| "loss": 0.7926, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.8946834939734526e-05, |
| "loss": 0.7088, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.8935921245369606e-05, |
| "loss": 0.6911, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.8924954470746296e-05, |
| "loss": 0.6635, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.8913934681008328e-05, |
| "loss": 0.7868, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.890286194161435e-05, |
| "loss": 0.7063, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.8891736318337525e-05, |
| "loss": 0.6989, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.8880557877265165e-05, |
| "loss": 0.6803, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.8869326684798315e-05, |
| "loss": 0.6633, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.885804280765137e-05, |
| "loss": 0.7379, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.8846706312851687e-05, |
| "loss": 0.7677, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.8835317267739158e-05, |
| "loss": 0.8168, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.882387573996585e-05, |
| "loss": 0.7371, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 1.881238179749557e-05, |
| "loss": 0.6967, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 1.8800835508603478e-05, |
| "loss": 0.7367, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 1.878923694187567e-05, |
| "loss": 0.6795, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 1.8777586166208786e-05, |
| "loss": 0.741, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.8765883250809586e-05, |
| "loss": 0.6769, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.8754128265194554e-05, |
| "loss": 0.7383, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.8742321279189465e-05, |
| "loss": 0.7208, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.873046236292899e-05, |
| "loss": 0.7576, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.871855158685626e-05, |
| "loss": 0.6826, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 1.870658902172248e-05, |
| "loss": 0.7316, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 1.869457473858646e-05, |
| "loss": 0.7603, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 1.868250880881424e-05, |
| "loss": 0.6663, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 1.867039130407864e-05, |
| "loss": 0.7768, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.8658222296358834e-05, |
| "loss": 0.6399, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.864600185793994e-05, |
| "loss": 0.6573, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.8633730061412575e-05, |
| "loss": 0.7238, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.8621406979672422e-05, |
| "loss": 0.6909, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.8609032685919815e-05, |
| "loss": 0.7015, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.8596607253659283e-05, |
| "loss": 0.7304, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.8584130756699122e-05, |
| "loss": 0.7332, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.857160326915097e-05, |
| "loss": 0.7717, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.8559024865429336e-05, |
| "loss": 0.6973, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.854639562025119e-05, |
| "loss": 0.6558, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.85337156086355e-05, |
| "loss": 0.7278, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.8520984905902798e-05, |
| "loss": 0.6709, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.8508203587674713e-05, |
| "loss": 0.6762, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.8495371729873545e-05, |
| "loss": 0.6915, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 1.8482489408721804e-05, |
| "loss": 0.6745, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 1.8469556700741755e-05, |
| "loss": 0.7015, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 1.845657368275496e-05, |
| "loss": 0.6914, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 1.8443540431881842e-05, |
| "loss": 0.7471, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.8430457025541203e-05, |
| "loss": 0.7049, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.841732354144977e-05, |
| "loss": 0.7247, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.8404140057621735e-05, |
| "loss": 0.7045, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.8390906652368313e-05, |
| "loss": 0.7241, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.8377623404297236e-05, |
| "loss": 0.7352, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.8364290392312318e-05, |
| "loss": 0.7272, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.8350907695612963e-05, |
| "loss": 0.734, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.833747539369373e-05, |
| "loss": 0.6458, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.8323993566343817e-05, |
| "loss": 0.7287, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.8310462293646617e-05, |
| "loss": 0.7266, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.829688165597923e-05, |
| "loss": 0.6643, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.8283251734011994e-05, |
| "loss": 0.6928, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.8269572608707995e-05, |
| "loss": 0.667, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.8255844361322594e-05, |
| "loss": 0.7104, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.8242067073402943e-05, |
| "loss": 0.7399, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.8228240826787497e-05, |
| "loss": 0.6967, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.821436570360553e-05, |
| "loss": 0.6748, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.8200441786276655e-05, |
| "loss": 0.6997, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 1.818646915751032e-05, |
| "loss": 0.7266, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 1.8172447900305327e-05, |
| "loss": 0.7751, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 1.8158378097949327e-05, |
| "loss": 0.6918, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 1.814425983401835e-05, |
| "loss": 0.6871, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.813009319237628e-05, |
| "loss": 0.718, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.8115878257174372e-05, |
| "loss": 0.6978, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.8101615112850752e-05, |
| "loss": 0.714, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.8087303844129915e-05, |
| "loss": 0.7054, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.8072944536022213e-05, |
| "loss": 0.7226, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.805853727382336e-05, |
| "loss": 0.6674, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.8044082143113924e-05, |
| "loss": 0.6867, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.8029579229758812e-05, |
| "loss": 0.7246, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.8015028619906774e-05, |
| "loss": 0.7068, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.8000430399989866e-05, |
| "loss": 0.7425, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.798578465672297e-05, |
| "loss": 0.7239, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.797109147710325e-05, |
| "loss": 0.7028, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.7956350948409655e-05, |
| "loss": 0.7167, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.7941563158202376e-05, |
| "loss": 0.6964, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.7926728194322364e-05, |
| "loss": 0.7107, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.7911846144890772e-05, |
| "loss": 0.7054, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.7896917098308448e-05, |
| "loss": 0.7115, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.7881941143255414e-05, |
| "loss": 0.7805, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 1.7866918368690324e-05, |
| "loss": 0.6673, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 1.7851848863849948e-05, |
| "loss": 0.7111, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 1.7836732718248644e-05, |
| "loss": 0.7082, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 1.782157002167781e-05, |
| "loss": 0.7505, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.780636086420537e-05, |
| "loss": 0.6944, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.779110533617523e-05, |
| "loss": 0.6788, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.7775803528206736e-05, |
| "loss": 0.7136, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.776045553119415e-05, |
| "loss": 0.6588, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.774506143630609e-05, |
| "loss": 0.6321, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.7729621334985005e-05, |
| "loss": 0.7047, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.7714135318946637e-05, |
| "loss": 0.705, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.769860348017945e-05, |
| "loss": 0.6682, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.768302591094411e-05, |
| "loss": 0.677, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.766740270377292e-05, |
| "loss": 0.7102, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.7651733951469283e-05, |
| "loss": 0.7166, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.763601974710714e-05, |
| "loss": 0.7296, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.7620260184030422e-05, |
| "loss": 0.6376, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.7604455355852498e-05, |
| "loss": 0.7293, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.7588605356455618e-05, |
| "loss": 0.7007, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.7572710279990345e-05, |
| "loss": 0.7327, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.7556770220875014e-05, |
| "loss": 0.7004, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.7540785273795152e-05, |
| "loss": 0.6465, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.7524755533702933e-05, |
| "loss": 0.7309, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.7508681095816603e-05, |
| "loss": 0.6871, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.7492562055619916e-05, |
| "loss": 0.6667, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.747639850886157e-05, |
| "loss": 0.6829, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.7460190551554633e-05, |
| "loss": 0.7077, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.7443938279975988e-05, |
| "loss": 0.7125, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.7427641790665728e-05, |
| "loss": 0.6544, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.741130118042662e-05, |
| "loss": 0.6815, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.7394916546323514e-05, |
| "loss": 0.7034, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.7378487985682758e-05, |
| "loss": 0.7322, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.736201559609163e-05, |
| "loss": 0.706, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.7345499475397756e-05, |
| "loss": 0.6743, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.732893972170854e-05, |
| "loss": 0.7012, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7312336433390552e-05, |
| "loss": 0.7028, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7295689709068974e-05, |
| "loss": 0.6783, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7278999647626998e-05, |
| "loss": 0.7084, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7262266348205246e-05, |
| "loss": 0.6949, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7245489910201177e-05, |
| "loss": 0.7032, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.7228670433268494e-05, |
| "loss": 0.6914, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.721180801731656e-05, |
| "loss": 0.6878, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.71949027625098e-05, |
| "loss": 0.6693, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.7177954769267098e-05, |
| "loss": 0.6652, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.7160964138261217e-05, |
| "loss": 0.6961, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.7143930970418196e-05, |
| "loss": 0.6733, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.712685536691673e-05, |
| "loss": 0.6693, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.7109737429187604e-05, |
| "loss": 0.7208, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.709257725891307e-05, |
| "loss": 0.691, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7075374958026235e-05, |
| "loss": 0.6782, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7058130628710473e-05, |
| "loss": 0.6652, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.704084437339881e-05, |
| "loss": 0.7174, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7023516294773318e-05, |
| "loss": 0.6752, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.7006146495764503e-05, |
| "loss": 0.6904, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.698873507955069e-05, |
| "loss": 0.7341, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.6971282149557428e-05, |
| "loss": 0.7794, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.695378780945684e-05, |
| "loss": 0.7369, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.6936252163167048e-05, |
| "loss": 0.68, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.6918675314851524e-05, |
| "loss": 0.6891, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.6901057368918497e-05, |
| "loss": 0.6794, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.6883398430020314e-05, |
| "loss": 0.6611, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.6865698603052813e-05, |
| "loss": 0.7198, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.6847957993154734e-05, |
| "loss": 0.6707, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.683017670570705e-05, |
| "loss": 0.6614, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.6812354846332376e-05, |
| "loss": 0.7048, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.6794492520894324e-05, |
| "loss": 0.6672, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.6776589835496878e-05, |
| "loss": 0.6745, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.6758646896483762e-05, |
| "loss": 0.7399, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.674066381043782e-05, |
| "loss": 0.6527, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.6722640684180354e-05, |
| "loss": 0.7013, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.6704577624770536e-05, |
| "loss": 0.6649, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.6686474739504723e-05, |
| "loss": 0.6718, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.666833213591585e-05, |
| "loss": 0.708, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.665014992177278e-05, |
| "loss": 0.6771, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.663192820507967e-05, |
| "loss": 0.6808, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.6613667094075324e-05, |
| "loss": 0.6663, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.659536669723255e-05, |
| "loss": 0.6487, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.6577027123257522e-05, |
| "loss": 0.6897, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.655864848108913e-05, |
| "loss": 0.6907, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.6540230879898327e-05, |
| "loss": 0.7152, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.6521774429087495e-05, |
| "loss": 0.6911, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.6503279238289776e-05, |
| "loss": 0.6503, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.6484745417368446e-05, |
| "loss": 0.6214, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.646617307641623e-05, |
| "loss": 0.7051, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.6447562325754683e-05, |
| "loss": 0.6916, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.642891327593351e-05, |
| "loss": 0.6816, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.641022603772991e-05, |
| "loss": 0.6798, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.639150072214793e-05, |
| "loss": 0.6688, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.637273744041781e-05, |
| "loss": 0.6512, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.63539363039953e-05, |
| "loss": 0.6907, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.6335097424561015e-05, |
| "loss": 0.635, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.6316220914019765e-05, |
| "loss": 0.699, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.6297306884499898e-05, |
| "loss": 0.7085, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.627835544835262e-05, |
| "loss": 0.6628, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.625936671815135e-05, |
| "loss": 0.7145, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.624034080669102e-05, |
| "loss": 0.7012, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.6221277826987435e-05, |
| "loss": 0.7415, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.6202177892276588e-05, |
| "loss": 0.6851, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.6183041116013976e-05, |
| "loss": 0.6706, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.6163867611873954e-05, |
| "loss": 0.6832, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.614465749374904e-05, |
| "loss": 0.7037, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.612541087574924e-05, |
| "loss": 0.6591, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6106127872201364e-05, |
| "loss": 0.6466, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6086808597648377e-05, |
| "loss": 0.718, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6067453166848682e-05, |
| "loss": 0.7338, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6048061694775458e-05, |
| "loss": 0.6408, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.6028634296615973e-05, |
| "loss": 0.6511, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.6009171087770895e-05, |
| "loss": 0.6559, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.598967218385362e-05, |
| "loss": 0.7159, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.5970137700689567e-05, |
| "loss": 0.7021, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.5950567754315504e-05, |
| "loss": 0.6726, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.593096246097885e-05, |
| "loss": 0.6247, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.5911321937136997e-05, |
| "loss": 0.6916, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.5891646299456607e-05, |
| "loss": 0.6227, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.5871935664812913e-05, |
| "loss": 0.7073, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.585219015028904e-05, |
| "loss": 0.6513, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.58324098731753e-05, |
| "loss": 0.6828, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.581259495096851e-05, |
| "loss": 0.6997, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.5792745501371265e-05, |
| "loss": 0.6471, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.5772861642291266e-05, |
| "loss": 0.6449, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.5752943491840608e-05, |
| "loss": 0.646, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.5732991168335085e-05, |
| "loss": 0.6599, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.571300479029347e-05, |
| "loss": 0.7271, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.569298447643683e-05, |
| "loss": 0.6926, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.567293034568782e-05, |
| "loss": 0.6529, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.5652842517169968e-05, |
| "loss": 0.6807, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.563272111020696e-05, |
| "loss": 0.6908, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.5612566244321948e-05, |
| "loss": 0.6927, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.5592378039236843e-05, |
| "loss": 0.7129, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.5572156614871577e-05, |
| "loss": 0.6616, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.555190209134342e-05, |
| "loss": 0.6865, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.553161458896625e-05, |
| "loss": 0.4986, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.5511294228249845e-05, |
| "loss": 0.5378, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 1.549094112989916e-05, |
| "loss": 0.523, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 1.547055541481362e-05, |
| "loss": 0.5141, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 1.545013720408639e-05, |
| "loss": 0.4931, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 1.5429686619003672e-05, |
| "loss": 0.4841, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1.5409203781043964e-05, |
| "loss": 0.5636, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1.5388688811877357e-05, |
| "loss": 0.5198, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1.5368141833364805e-05, |
| "loss": 0.4757, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1.5347562967557395e-05, |
| "loss": 0.4768, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1.5326952336695637e-05, |
| "loss": 0.5287, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 1.5306310063208712e-05, |
| "loss": 0.4694, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 1.5285636269713776e-05, |
| "loss": 0.5035, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 1.5264931079015216e-05, |
| "loss": 0.4959, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 1.5244194614103914e-05, |
| "loss": 0.5042, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1.522342699815653e-05, |
| "loss": 0.4582, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1.5202628354534762e-05, |
| "loss": 0.5145, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1.5181798806784614e-05, |
| "loss": 0.4757, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1.5160938478635667e-05, |
| "loss": 0.5436, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 1.5140047494000341e-05, |
| "loss": 0.4993, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 1.5119125976973152e-05, |
| "loss": 0.4807, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 1.509817405182999e-05, |
| "loss": 0.5208, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 1.5077191843027366e-05, |
| "loss": 0.4628, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 1.5056179475201683e-05, |
| "loss": 0.4791, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 1.5035137073168487e-05, |
| "loss": 0.4812, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 1.5014064761921736e-05, |
| "loss": 0.4742, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 1.4992962666633044e-05, |
| "loss": 0.4995, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 1.4971830912650953e-05, |
| "loss": 0.4637, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 1.4950669625500178e-05, |
| "loss": 0.5083, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 1.4929478930880862e-05, |
| "loss": 0.5213, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 1.4908258954667832e-05, |
| "loss": 0.4594, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 1.4887009822909853e-05, |
| "loss": 0.4824, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 1.486573166182887e-05, |
| "loss": 0.4934, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 1.4844424597819276e-05, |
| "loss": 0.4764, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 1.4823088757447144e-05, |
| "loss": 0.4725, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 1.4801724267449477e-05, |
| "loss": 0.5164, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 1.478033125473347e-05, |
| "loss": 0.5344, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1.4758909846375736e-05, |
| "loss": 0.4889, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1.4737460169621564e-05, |
| "loss": 0.4687, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1.4715982351884166e-05, |
| "loss": 0.4993, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1.4694476520743908e-05, |
| "loss": 0.4685, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1.4672942803947556e-05, |
| "loss": 0.4834, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 1.4651381329407527e-05, |
| "loss": 0.4811, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 1.4629792225201115e-05, |
| "loss": 0.4497, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 1.460817561956974e-05, |
| "loss": 0.5004, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 1.458653164091819e-05, |
| "loss": 0.5071, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 1.4564860417813837e-05, |
| "loss": 0.5306, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 1.4543162078985898e-05, |
| "loss": 0.4839, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 1.4521436753324659e-05, |
| "loss": 0.4976, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 1.4499684569880705e-05, |
| "loss": 0.5096, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 1.4477905657864169e-05, |
| "loss": 0.47, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 1.4456100146643941e-05, |
| "loss": 0.4731, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 1.4434268165746925e-05, |
| "loss": 0.5398, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 1.441240984485725e-05, |
| "loss": 0.4744, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 1.4390525313815516e-05, |
| "loss": 0.4721, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 1.4368614702617997e-05, |
| "loss": 0.4635, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 1.4346678141415905e-05, |
| "loss": 0.4543, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 1.4324715760514588e-05, |
| "loss": 0.4941, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 1.4302727690372764e-05, |
| "loss": 0.4827, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.428071406160175e-05, |
| "loss": 0.4838, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.4258675004964687e-05, |
| "loss": 0.5128, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.4236610651375752e-05, |
| "loss": 0.5191, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.42145211318994e-05, |
| "loss": 0.4921, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.4192406577749562e-05, |
| "loss": 0.4979, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 1.4170267120288885e-05, |
| "loss": 0.5136, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 1.4148102891027943e-05, |
| "loss": 0.5089, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 1.4125914021624454e-05, |
| "loss": 0.504, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 1.4103700643882503e-05, |
| "loss": 0.4671, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 1.4081462889751756e-05, |
| "loss": 0.4766, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 1.4059200891326683e-05, |
| "loss": 0.4884, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 1.4036914780845757e-05, |
| "loss": 0.5225, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 1.4014604690690683e-05, |
| "loss": 0.4511, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 1.3992270753385614e-05, |
| "loss": 0.5009, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 1.3969913101596351e-05, |
| "loss": 0.4734, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 1.394753186812956e-05, |
| "loss": 0.4765, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 1.3925127185931993e-05, |
| "loss": 0.5218, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 1.3902699188089679e-05, |
| "loss": 0.4594, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.3880248007827151e-05, |
| "loss": 0.4988, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.3857773778506643e-05, |
| "loss": 0.5155, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.3835276633627313e-05, |
| "loss": 0.4867, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.3812756706824428e-05, |
| "loss": 0.5115, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 1.3790214131868588e-05, |
| "loss": 0.5179, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 1.3767649042664925e-05, |
| "loss": 0.4606, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 1.3745061573252305e-05, |
| "loss": 0.4803, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 1.3722451857802535e-05, |
| "loss": 0.4904, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 1.3699820030619569e-05, |
| "loss": 0.4961, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 1.3677166226138705e-05, |
| "loss": 0.5076, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 1.3654490578925788e-05, |
| "loss": 0.5081, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 1.3631793223676408e-05, |
| "loss": 0.4853, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 1.3609074295215113e-05, |
| "loss": 0.4963, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.3586333928494582e-05, |
| "loss": 0.4867, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.3563572258594854e-05, |
| "loss": 0.4955, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.3540789420722509e-05, |
| "loss": 0.4929, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.3517985550209859e-05, |
| "loss": 0.4945, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.3495160782514154e-05, |
| "loss": 0.5025, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 1.3472315253216782e-05, |
| "loss": 0.4861, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 1.3449449098022452e-05, |
| "loss": 0.4869, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 1.3426562452758391e-05, |
| "loss": 0.4765, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 1.3403655453373545e-05, |
| "loss": 0.4695, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.3380728235937758e-05, |
| "loss": 0.5029, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.3357780936640981e-05, |
| "loss": 0.4633, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.333481369179244e-05, |
| "loss": 0.5174, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.3311826637819856e-05, |
| "loss": 0.4974, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.32888199112686e-05, |
| "loss": 0.5198, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 1.3265793648800915e-05, |
| "loss": 0.4722, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 1.3242747987195084e-05, |
| "loss": 0.4951, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 1.3219683063344619e-05, |
| "loss": 0.4641, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 1.3196599014257459e-05, |
| "loss": 0.488, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1.3173495977055142e-05, |
| "loss": 0.4812, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1.3150374088972e-05, |
| "loss": 0.4926, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1.3127233487354342e-05, |
| "loss": 0.4609, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1.3104074309659637e-05, |
| "loss": 0.486, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.3080896693455699e-05, |
| "loss": 0.5201, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.305770077641986e-05, |
| "loss": 0.4515, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.3034486696338173e-05, |
| "loss": 0.531, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.3011254591104578e-05, |
| "loss": 0.5102, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.2988004598720083e-05, |
| "loss": 0.5289, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.2964736857291944e-05, |
| "loss": 0.4543, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.2941451505032857e-05, |
| "loss": 0.536, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.291814868026012e-05, |
| "loss": 0.5164, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.2894828521394824e-05, |
| "loss": 0.4949, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.2871491166961028e-05, |
| "loss": 0.5053, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.284813675558493e-05, |
| "loss": 0.4922, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.2824765425994047e-05, |
| "loss": 0.4764, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.2801377317016402e-05, |
| "loss": 0.4604, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.2777972567579673e-05, |
| "loss": 0.4804, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 1.2754551316710397e-05, |
| "loss": 0.4913, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 1.273111370353313e-05, |
| "loss": 0.4722, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 1.2707659867269613e-05, |
| "loss": 0.4883, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 1.2684189947237964e-05, |
| "loss": 0.5032, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.2660704082851831e-05, |
| "loss": 0.4544, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.263720241361958e-05, |
| "loss": 0.4866, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.2613685079143458e-05, |
| "loss": 0.4717, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.2590152219118762e-05, |
| "loss": 0.4628, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.2566603973333016e-05, |
| "loss": 0.5074, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.2543040481665134e-05, |
| "loss": 0.5163, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.2519461884084592e-05, |
| "loss": 0.4608, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.24958683206506e-05, |
| "loss": 0.4555, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.2472259931511265e-05, |
| "loss": 0.4877, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.244863685690276e-05, |
| "loss": 0.5046, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.242499923714849e-05, |
| "loss": 0.5057, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.240134721265826e-05, |
| "loss": 0.4941, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.237768092392744e-05, |
| "loss": 0.5013, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 1.2354000511536135e-05, |
| "loss": 0.4802, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 1.2330306116148344e-05, |
| "loss": 0.4923, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 1.230659787851112e-05, |
| "loss": 0.5112, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 1.228287593945375e-05, |
| "loss": 0.4823, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 1.22591404398869e-05, |
| "loss": 0.512, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 1.2235391520801801e-05, |
| "loss": 0.5106, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 1.2211629323269377e-05, |
| "loss": 0.4944, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 1.2187853988439442e-05, |
| "loss": 0.4953, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 1.2164065657539846e-05, |
| "loss": 0.4902, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.2140264471875627e-05, |
| "loss": 0.482, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.2116450572828194e-05, |
| "loss": 0.5075, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.2092624101854466e-05, |
| "loss": 0.5016, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.2068785200486044e-05, |
| "loss": 0.483, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.204493401032837e-05, |
| "loss": 0.4576, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 1.202107067305987e-05, |
| "loss": 0.4608, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 1.1997195330431141e-05, |
| "loss": 0.4803, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 1.1973308124264087e-05, |
| "loss": 0.4515, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 1.1949409196451073e-05, |
| "loss": 0.4808, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 1.1925498688954111e-05, |
| "loss": 0.4788, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 1.1901576743803984e-05, |
| "loss": 0.5234, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 1.1877643503099414e-05, |
| "loss": 0.4877, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 1.1853699109006227e-05, |
| "loss": 0.495, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 1.1829743703756498e-05, |
| "loss": 0.4853, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.1805777429647712e-05, |
| "loss": 0.5241, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.178180042904191e-05, |
| "loss": 0.4862, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.1757812844364855e-05, |
| "loss": 0.5476, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.173381481810518e-05, |
| "loss": 0.4532, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.1709806492813542e-05, |
| "loss": 0.4767, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.168578801110177e-05, |
| "loss": 0.4671, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.166175951564203e-05, |
| "loss": 0.5454, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.1637721149165971e-05, |
| "loss": 0.4776, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.161367305446387e-05, |
| "loss": 0.4916, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.1589615374383793e-05, |
| "loss": 0.4836, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.156554825183075e-05, |
| "loss": 0.46, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.1541471829765832e-05, |
| "loss": 0.5275, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.1517386251205375e-05, |
| "loss": 0.4816, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.1493291659220104e-05, |
| "loss": 0.5156, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.1469188196934289e-05, |
| "loss": 0.4918, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.1445076007524877e-05, |
| "loss": 0.4725, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.1420955234220675e-05, |
| "loss": 0.4952, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.1396826020301457e-05, |
| "loss": 0.4983, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.1372688509097158e-05, |
| "loss": 0.5061, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.1348542843986983e-05, |
| "loss": 0.4609, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.1324389168398576e-05, |
| "loss": 0.5053, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.1300227625807167e-05, |
| "loss": 0.4776, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.1276058359734719e-05, |
| "loss": 0.477, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.1251881513749062e-05, |
| "loss": 0.5003, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.1227697231463062e-05, |
| "loss": 0.5353, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.1203505656533756e-05, |
| "loss": 0.4741, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.1179306932661496e-05, |
| "loss": 0.464, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.1155101203589102e-05, |
| "loss": 0.4933, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.1130888613101007e-05, |
| "loss": 0.4678, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.1106669305022397e-05, |
| "loss": 0.5098, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.1082443423218366e-05, |
| "loss": 0.4637, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 1.1058211111593054e-05, |
| "loss": 0.4909, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 1.1033972514088793e-05, |
| "loss": 0.4746, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 1.1009727774685257e-05, |
| "loss": 0.5037, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 1.0985477037398606e-05, |
| "loss": 0.5255, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.096122044628062e-05, |
| "loss": 0.487, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.0936958145417858e-05, |
| "loss": 0.4664, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.0912690278930791e-05, |
| "loss": 0.518, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.0888416990972957e-05, |
| "loss": 0.4671, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.0864138425730088e-05, |
| "loss": 0.5023, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.0839854727419273e-05, |
| "loss": 0.4709, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.0815566040288088e-05, |
| "loss": 0.4959, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.0791272508613742e-05, |
| "loss": 0.4765, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.0766974276702227e-05, |
| "loss": 0.5163, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 1.0742671488887444e-05, |
| "loss": 0.503, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 1.0718364289530363e-05, |
| "loss": 0.5361, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 1.0694052823018164e-05, |
| "loss": 0.4615, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 1.0669737233763363e-05, |
| "loss": 0.5281, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.0645417666202978e-05, |
| "loss": 0.5149, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.0621094264797647e-05, |
| "loss": 0.4832, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.0596767174030786e-05, |
| "loss": 0.4707, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.0572436538407734e-05, |
| "loss": 0.4661, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.054810250245487e-05, |
| "loss": 0.4945, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 1.0523765210718783e-05, |
| "loss": 0.4644, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 1.0499424807765408e-05, |
| "loss": 0.4929, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 1.0475081438179143e-05, |
| "loss": 0.5048, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 1.045073524656202e-05, |
| "loss": 0.5006, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.0426386377532836e-05, |
| "loss": 0.4952, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.040203497572628e-05, |
| "loss": 0.486, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.0377681185792102e-05, |
| "loss": 0.5019, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.0353325152394222e-05, |
| "loss": 0.4907, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.03289670202099e-05, |
| "loss": 0.4565, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 1.030460693392885e-05, |
| "loss": 0.4783, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 1.0280245038252403e-05, |
| "loss": 0.4972, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 1.0255881477892639e-05, |
| "loss": 0.4406, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 1.0231516397571521e-05, |
| "loss": 0.4833, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.020714994202004e-05, |
| "loss": 0.5206, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.018278225597736e-05, |
| "loss": 0.4994, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.0158413484189955e-05, |
| "loss": 0.4812, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.0134043771410744e-05, |
| "loss": 0.4756, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.0109673262398234e-05, |
| "loss": 0.4962, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 1.0085302101915672e-05, |
| "loss": 0.5003, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 1.0060930434730162e-05, |
| "loss": 0.4872, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 1.0036558405611832e-05, |
| "loss": 0.4974, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 1.0012186159332944e-05, |
| "loss": 0.468, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.98781384066706e-06, |
| "loss": 0.5092, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.963441594388172e-06, |
| "loss": 0.4494, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.939069565269841e-06, |
| "loss": 0.4768, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.914697898084331e-06, |
| "loss": 0.4732, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.89032673760177e-06, |
| "loss": 0.4869, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.865956228589259e-06, |
| "loss": 0.5096, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.841586515810045e-06, |
| "loss": 0.4989, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.817217744022641e-06, |
| "loss": 0.4863, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.79285005797996e-06, |
| "loss": 0.4784, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.768483602428482e-06, |
| "loss": 0.4789, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.744118522107361e-06, |
| "loss": 0.5008, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.719754961747599e-06, |
| "loss": 0.4747, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.695393066071153e-06, |
| "loss": 0.5056, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.671032979790105e-06, |
| "loss": 0.4459, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.64667484760578e-06, |
| "loss": 0.4722, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.622318814207903e-06, |
| "loss": 0.4742, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.597965024273723e-06, |
| "loss": 0.4861, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.573613622467166e-06, |
| "loss": 0.4648, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.549264753437982e-06, |
| "loss": 0.447, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.524918561820857e-06, |
| "loss": 0.482, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.500575192234595e-06, |
| "loss": 0.4709, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.476234789281215e-06, |
| "loss": 0.4699, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.451897497545136e-06, |
| "loss": 0.485, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.427563461592271e-06, |
| "loss": 0.4359, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.403232825969217e-06, |
| "loss": 0.4802, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.378905735202356e-06, |
| "loss": 0.4492, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.354582333797027e-06, |
| "loss": 0.4567, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.330262766236638e-06, |
| "loss": 0.4409, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.305947176981843e-06, |
| "loss": 0.4808, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.281635710469639e-06, |
| "loss": 0.5058, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.25732851111256e-06, |
| "loss": 0.4882, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.233025723297776e-06, |
| "loss": 0.4833, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.208727491386258e-06, |
| "loss": 0.4576, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.184433959711916e-06, |
| "loss": 0.4642, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.160145272580729e-06, |
| "loss": 0.5047, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.135861574269917e-06, |
| "loss": 0.4651, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.111583009027048e-06, |
| "loss": 0.4636, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.087309721069214e-06, |
| "loss": 0.4919, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.063041854582145e-06, |
| "loss": 0.5028, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.038779553719386e-06, |
| "loss": 0.4978, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 9.014522962601398e-06, |
| "loss": 0.5048, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 8.990272225314743e-06, |
| "loss": 0.4566, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 8.96602748591121e-06, |
| "loss": 0.4825, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 8.941788888406948e-06, |
| "loss": 0.4986, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 8.917556576781638e-06, |
| "loss": 0.4832, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 8.893330694977606e-06, |
| "loss": 0.5103, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 8.869111386898997e-06, |
| "loss": 0.5006, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 8.844898796410901e-06, |
| "loss": 0.4608, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 8.820693067338507e-06, |
| "loss": 0.4743, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 8.796494343466247e-06, |
| "loss": 0.4819, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 8.772302768536943e-06, |
| "loss": 0.4596, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 8.748118486250942e-06, |
| "loss": 0.4981, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 8.723941640265283e-06, |
| "loss": 0.4984, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.699772374192835e-06, |
| "loss": 0.4786, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.675610831601424e-06, |
| "loss": 0.4938, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.65145715601302e-06, |
| "loss": 0.4579, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.627311490902843e-06, |
| "loss": 0.5083, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.603173979698544e-06, |
| "loss": 0.449, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.579044765779329e-06, |
| "loss": 0.5281, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.554923992475126e-06, |
| "loss": 0.5002, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.530811803065715e-06, |
| "loss": 0.4912, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.5067083407799e-06, |
| "loss": 0.4653, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.482613748794628e-06, |
| "loss": 0.5034, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.458528170234171e-06, |
| "loss": 0.4859, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.434451748169255e-06, |
| "loss": 0.4548, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.410384625616208e-06, |
| "loss": 0.4651, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.386326945536134e-06, |
| "loss": 0.5084, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.36227885083403e-06, |
| "loss": 0.4796, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.338240484357971e-06, |
| "loss": 0.489, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.31421198889823e-06, |
| "loss": 0.5035, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.290193507186464e-06, |
| "loss": 0.5213, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.266185181894821e-06, |
| "loss": 0.4585, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.24218715563515e-06, |
| "loss": 0.5318, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.218199570958094e-06, |
| "loss": 0.4694, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.194222570352295e-06, |
| "loss": 0.4996, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.170256296243505e-06, |
| "loss": 0.4922, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.146300890993776e-06, |
| "loss": 0.4919, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.12235649690059e-06, |
| "loss": 0.4518, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.098423256196018e-06, |
| "loss": 0.4359, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.074501311045892e-06, |
| "loss": 0.505, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 8.050590803548927e-06, |
| "loss": 0.5173, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 8.026691875735918e-06, |
| "loss": 0.4621, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 8.00280466956886e-06, |
| "loss": 0.4769, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 7.978929326940135e-06, |
| "loss": 0.4793, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 7.955065989671636e-06, |
| "loss": 0.5219, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 7.93121479951396e-06, |
| "loss": 0.4625, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 7.907375898145538e-06, |
| "loss": 0.4755, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 7.883549427171806e-06, |
| "loss": 0.5136, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 7.859735528124375e-06, |
| "loss": 0.5005, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 7.835934342460156e-06, |
| "loss": 0.4902, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 7.81214601156056e-06, |
| "loss": 0.4651, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 7.788370676730625e-06, |
| "loss": 0.4668, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 7.764608479198204e-06, |
| "loss": 0.4744, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 7.740859560113101e-06, |
| "loss": 0.4451, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 7.717124060546254e-06, |
| "loss": 0.4383, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 7.693402121488884e-06, |
| "loss": 0.4764, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 7.669693883851663e-06, |
| "loss": 0.4724, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 7.645999488463867e-06, |
| "loss": 0.4989, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 7.622319076072564e-06, |
| "loss": 0.5171, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 7.598652787341744e-06, |
| "loss": 0.4631, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 7.575000762851511e-06, |
| "loss": 0.4749, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 7.551363143097244e-06, |
| "loss": 0.5007, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 7.527740068488735e-06, |
| "loss": 0.5064, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 7.504131679349402e-06, |
| "loss": 0.4649, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 7.48053811591541e-06, |
| "loss": 0.4585, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 7.456959518334871e-06, |
| "loss": 0.4771, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 7.4333960266669855e-06, |
| "loss": 0.5022, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 7.409847780881241e-06, |
| "loss": 0.4676, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 7.386314920856546e-06, |
| "loss": 0.4744, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 7.362797586380423e-06, |
| "loss": 0.4726, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 7.339295917148173e-06, |
| "loss": 0.4457, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 7.315810052762038e-06, |
| "loss": 0.4901, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 7.292340132730389e-06, |
| "loss": 0.48, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 7.268886296466871e-06, |
| "loss": 0.5181, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 7.245448683289605e-06, |
| "loss": 0.4704, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 7.222027432420329e-06, |
| "loss": 0.4764, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 7.198622682983603e-06, |
| "loss": 0.4423, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 7.1752345740059536e-06, |
| "loss": 0.5054, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 7.151863244415076e-06, |
| "loss": 0.4999, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 7.128508833038976e-06, |
| "loss": 0.4793, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 7.105171478605182e-06, |
| "loss": 0.4865, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 7.081851319739884e-06, |
| "loss": 0.497, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 7.0585484949671475e-06, |
| "loss": 0.5078, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 7.035263142708058e-06, |
| "loss": 0.4804, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 7.0119954012799195e-06, |
| "loss": 0.4825, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 6.988745408895424e-06, |
| "loss": 0.44, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 6.965513303661826e-06, |
| "loss": 0.4784, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 6.942299223580144e-06, |
| "loss": 0.4694, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 6.9191033065443045e-06, |
| "loss": 0.4627, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 6.895925690340367e-06, |
| "loss": 0.4647, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 6.872766512645661e-06, |
| "loss": 0.5004, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 6.849625911028005e-06, |
| "loss": 0.4797, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 6.826504022944862e-06, |
| "loss": 0.4732, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 6.803400985742545e-06, |
| "loss": 0.4839, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 6.780316936655382e-06, |
| "loss": 0.4778, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 6.7572520128049164e-06, |
| "loss": 0.4618, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 6.734206351199086e-06, |
| "loss": 0.4444, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.7111800887314e-06, |
| "loss": 0.4701, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.688173362180148e-06, |
| "loss": 0.4493, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.665186308207562e-06, |
| "loss": 0.4444, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.642219063359023e-06, |
| "loss": 0.4765, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.619271764062244e-06, |
| "loss": 0.4797, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 6.596344546626461e-06, |
| "loss": 0.4617, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 6.5734375472416115e-06, |
| "loss": 0.4976, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 6.550550901977552e-06, |
| "loss": 0.4637, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 6.527684746783221e-06, |
| "loss": 0.4432, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 6.5048392174858465e-06, |
| "loss": 0.4758, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 6.482014449790145e-06, |
| "loss": 0.4736, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 6.459210579277492e-06, |
| "loss": 0.4679, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 6.4364277414051465e-06, |
| "loss": 0.4523, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 6.41366607150542e-06, |
| "loss": 0.4605, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 6.390925704784894e-06, |
| "loss": 0.4754, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 6.368206776323593e-06, |
| "loss": 0.4926, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 6.345509421074218e-06, |
| "loss": 0.4587, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 6.322833773861296e-06, |
| "loss": 0.4375, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 6.300179969380435e-06, |
| "loss": 0.4688, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 6.277548142197468e-06, |
| "loss": 0.4623, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 6.254938426747697e-06, |
| "loss": 0.4777, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 6.232350957335078e-06, |
| "loss": 0.4935, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 6.2097858681314115e-06, |
| "loss": 0.4596, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 6.187243293175573e-06, |
| "loss": 0.4481, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 6.164723366372688e-06, |
| "loss": 0.4843, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 6.142226221493359e-06, |
| "loss": 0.4704, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 6.119751992172853e-06, |
| "loss": 0.4896, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 6.097300811910327e-06, |
| "loss": 0.4831, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 6.07487281406801e-06, |
| "loss": 0.4724, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 6.052468131870444e-06, |
| "loss": 0.483, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 6.030086898403652e-06, |
| "loss": 0.4791, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 6.007729246614387e-06, |
| "loss": 0.4689, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 5.985395309309319e-06, |
| "loss": 0.4905, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 5.963085219154247e-06, |
| "loss": 0.4658, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 5.94079910867332e-06, |
| "loss": 0.4497, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 5.918537110248244e-06, |
| "loss": 0.4585, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 5.896299356117501e-06, |
| "loss": 0.4468, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 5.874085978375548e-06, |
| "loss": 0.4902, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 5.8518971089720626e-06, |
| "loss": 0.4613, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 5.829732879711116e-06, |
| "loss": 0.4942, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 5.807593422250441e-06, |
| "loss": 0.4831, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 5.785478868100604e-06, |
| "loss": 0.4371, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 5.763389348624251e-06, |
| "loss": 0.4408, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 5.741324995035318e-06, |
| "loss": 0.4882, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 5.719285938398254e-06, |
| "loss": 0.5094, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.69727230962724e-06, |
| "loss": 0.4716, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.675284239485415e-06, |
| "loss": 0.446, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.653321858584095e-06, |
| "loss": 0.4746, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.631385297382004e-06, |
| "loss": 0.4702, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 5.609474686184488e-06, |
| "loss": 0.4573, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 5.58759015514275e-06, |
| "loss": 0.4525, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 5.565731834253077e-06, |
| "loss": 0.47, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 5.543899853356062e-06, |
| "loss": 0.4843, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.522094342135835e-06, |
| "loss": 0.4608, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.500315430119298e-06, |
| "loss": 0.461, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.478563246675345e-06, |
| "loss": 0.4347, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.456837921014105e-06, |
| "loss": 0.4541, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.4351395821861665e-06, |
| "loss": 0.4586, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 5.413468359081814e-06, |
| "loss": 0.4841, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 5.391824380430262e-06, |
| "loss": 0.4803, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 5.3702077747988904e-06, |
| "loss": 0.4923, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 5.3486186705924785e-06, |
| "loss": 0.4814, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 5.327057196052449e-06, |
| "loss": 0.4729, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 5.305523479256096e-06, |
| "loss": 0.4713, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 5.284017648115837e-06, |
| "loss": 0.5153, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 5.262539830378438e-06, |
| "loss": 0.3328, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 5.241090153624264e-06, |
| "loss": 0.336, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 5.219668745266533e-06, |
| "loss": 0.2995, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 5.198275732550522e-06, |
| "loss": 0.3322, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 5.17691124255286e-06, |
| "loss": 0.3147, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 5.155575402180721e-06, |
| "loss": 0.3186, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 5.134268338171133e-06, |
| "loss": 0.3011, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 5.1129901770901525e-06, |
| "loss": 0.2903, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 5.091741045332173e-06, |
| "loss": 0.3047, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 5.070521069119143e-06, |
| "loss": 0.3322, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 5.049330374499826e-06, |
| "loss": 0.3158, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 5.028169087349051e-06, |
| "loss": 0.2897, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 5.0070373333669595e-06, |
| "loss": 0.2875, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 4.98593523807827e-06, |
| "loss": 0.3225, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 4.964862926831513e-06, |
| "loss": 0.2805, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 4.94382052479832e-06, |
| "loss": 0.3066, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 4.922808156972633e-06, |
| "loss": 0.2938, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 4.901825948170013e-06, |
| "loss": 0.3164, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 4.880874023026847e-06, |
| "loss": 0.3285, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 4.859952505999663e-06, |
| "loss": 0.2846, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 4.839061521364332e-06, |
| "loss": 0.2867, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 4.81820119321539e-06, |
| "loss": 0.306, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 4.79737164546524e-06, |
| "loss": 0.2945, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 4.776573001843475e-06, |
| "loss": 0.3122, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 4.75580538589609e-06, |
| "loss": 0.2916, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 4.735068920984786e-06, |
| "loss": 0.3027, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 4.714363730286227e-06, |
| "loss": 0.289, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 4.69368993679129e-06, |
| "loss": 0.3046, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 4.67304766330437e-06, |
| "loss": 0.2687, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 4.652437032442604e-06, |
| "loss": 0.2909, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 4.631858166635198e-06, |
| "loss": 0.2926, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 4.6113111881226425e-06, |
| "loss": 0.289, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 4.590796218956041e-06, |
| "loss": 0.3294, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 4.570313380996331e-06, |
| "loss": 0.2951, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 4.549862795913614e-06, |
| "loss": 0.3024, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 4.5294445851863824e-06, |
| "loss": 0.2878, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 4.50905887010084e-06, |
| "loss": 0.2739, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 4.488705771750155e-06, |
| "loss": 0.3131, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 4.468385411033749e-06, |
| "loss": 0.2929, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 4.44809790865658e-06, |
| "loss": 0.3022, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 4.427843385128424e-06, |
| "loss": 0.2868, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 4.407621960763163e-06, |
| "loss": 0.2939, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 4.3874337556780535e-06, |
| "loss": 0.2782, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 4.367278889793049e-06, |
| "loss": 0.3002, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 4.347157482830036e-06, |
| "loss": 0.3155, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 4.327069654312184e-06, |
| "loss": 0.2727, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 4.30701552356317e-06, |
| "loss": 0.2982, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 4.286995209706537e-06, |
| "loss": 0.3042, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 4.267008831664919e-06, |
| "loss": 0.2837, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 4.247056508159392e-06, |
| "loss": 0.2738, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 4.227138357708735e-06, |
| "loss": 0.3027, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 4.207254498628737e-06, |
| "loss": 0.2969, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 4.187405049031492e-06, |
| "loss": 0.2917, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 4.167590126824701e-06, |
| "loss": 0.2939, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 4.147809849710964e-06, |
| "loss": 0.2954, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 4.128064335187091e-06, |
| "loss": 0.3066, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 4.108353700543396e-06, |
| "loss": 0.2808, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 4.088678062863003e-06, |
| "loss": 0.2897, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 4.069037539021155e-06, |
| "loss": 0.3047, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 4.0494322456845006e-06, |
| "loss": 0.3304, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 4.029862299310437e-06, |
| "loss": 0.2965, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 4.010327816146382e-06, |
| "loss": 0.3029, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 3.990828912229105e-06, |
| "loss": 0.293, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 3.971365703384028e-06, |
| "loss": 0.2993, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 3.951938305224542e-06, |
| "loss": 0.296, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 3.932546833151318e-06, |
| "loss": 0.275, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 3.913191402351624e-06, |
| "loss": 0.3096, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 3.893872127798638e-06, |
| "loss": 0.2754, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 3.874589124250766e-06, |
| "loss": 0.3007, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 3.855342506250963e-06, |
| "loss": 0.284, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 3.836132388126048e-06, |
| "loss": 0.2944, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 3.816958883986027e-06, |
| "loss": 0.2914, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 3.7978221077234167e-06, |
| "loss": 0.2966, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 3.7787221730125668e-06, |
| "loss": 0.3016, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 3.759659193308981e-06, |
| "loss": 0.2825, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 3.740633281848652e-06, |
| "loss": 0.2755, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 3.7216445516473797e-06, |
| "loss": 0.2866, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 3.7026931155001055e-06, |
| "loss": 0.3103, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 3.6837790859802382e-06, |
| "loss": 0.3009, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 3.664902575438988e-06, |
| "loss": 0.2982, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 3.6460636960047024e-06, |
| "loss": 0.2862, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 3.627262559582191e-06, |
| "loss": 0.2887, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 3.60849927785207e-06, |
| "loss": 0.306, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 3.5897739622700944e-06, |
| "loss": 0.2876, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 3.571086724066494e-06, |
| "loss": 0.2593, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 3.552437674245317e-06, |
| "loss": 0.2839, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 3.5338269235837695e-06, |
| "loss": 0.3004, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 3.5152545826315578e-06, |
| "loss": 0.3222, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 3.4967207617102263e-06, |
| "loss": 0.3011, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 3.478225570912509e-06, |
| "loss": 0.2815, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 3.459769120101676e-06, |
| "loss": 0.2953, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 3.441351518910875e-06, |
| "loss": 0.282, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 3.4229728767424807e-06, |
| "loss": 0.2986, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 3.4046333027674536e-06, |
| "loss": 0.2897, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 3.386332905924681e-06, |
| "loss": 0.3066, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 3.36807179492033e-06, |
| "loss": 0.274, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 3.3498500782272224e-06, |
| "loss": 0.3053, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 3.3316678640841503e-06, |
| "loss": 0.2783, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 3.3135252604952795e-06, |
| "loss": 0.2766, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 3.2954223752294657e-06, |
| "loss": 0.3057, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 3.277359315819647e-06, |
| "loss": 0.2977, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 3.2593361895621865e-06, |
| "loss": 0.2833, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 3.2413531035162414e-06, |
| "loss": 0.2802, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 3.223410164503127e-06, |
| "loss": 0.3085, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 3.2055074791056807e-06, |
| "loss": 0.301, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 3.187645153667628e-06, |
| "loss": 0.3194, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 3.16982329429295e-06, |
| "loss": 0.2815, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 3.1520420068452705e-06, |
| "loss": 0.2839, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 3.134301396947186e-06, |
| "loss": 0.284, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 3.1166015699796915e-06, |
| "loss": 0.2983, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 3.0989426310815018e-06, |
| "loss": 0.3001, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 3.081324685148479e-06, |
| "loss": 0.2919, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 3.0637478368329543e-06, |
| "loss": 0.2864, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 3.046212190543165e-06, |
| "loss": 0.298, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 3.028717850442575e-06, |
| "loss": 0.3068, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 3.0112649204493117e-06, |
| "loss": 0.2924, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 2.993853504235501e-06, |
| "loss": 0.2911, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 2.976483705226683e-06, |
| "loss": 0.2964, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 2.9591556266011945e-06, |
| "loss": 0.302, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 2.9418693712895295e-06, |
| "loss": 0.3098, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 2.92462504197377e-06, |
| "loss": 0.2977, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 2.9074227410869315e-06, |
| "loss": 0.3239, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 2.890262570812398e-06, |
| "loss": 0.3508, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 2.8731446330832715e-06, |
| "loss": 0.2954, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 2.8560690295818115e-06, |
| "loss": 0.298, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 2.8390358617387836e-06, |
| "loss": 0.2863, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 2.8220452307329073e-06, |
| "loss": 0.2827, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 2.805097237490203e-06, |
| "loss": 0.3203, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 2.7881919826834435e-06, |
| "loss": 0.2962, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 2.7713295667315065e-06, |
| "loss": 0.2871, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 2.754510089798824e-06, |
| "loss": 0.3196, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 2.737733651794755e-06, |
| "loss": 0.2895, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 2.7210003523730044e-06, |
| "loss": 0.3046, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 2.7043102909310327e-06, |
| "loss": 0.2843, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 2.687663566609452e-06, |
| "loss": 0.2951, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 2.6710602782914664e-06, |
| "loss": 0.2735, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 2.6545005246022438e-06, |
| "loss": 0.2864, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 2.6379844039083758e-06, |
| "loss": 0.2665, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 2.6215120143172447e-06, |
| "loss": 0.2963, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 2.6050834536764903e-06, |
| "loss": 0.2977, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 2.58869881957338e-06, |
| "loss": 0.2931, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 2.5723582093342736e-06, |
| "loss": 0.2795, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 2.5560617200240155e-06, |
| "loss": 0.3144, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 2.5398094484453663e-06, |
| "loss": 0.3015, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 2.523601491138432e-06, |
| "loss": 0.2728, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 2.507437944380087e-06, |
| "loss": 0.2666, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 2.4913189041833997e-06, |
| "loss": 0.3027, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 2.47524446629707e-06, |
| "loss": 0.301, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 2.4592147262048506e-06, |
| "loss": 0.3144, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 2.4432297791249893e-06, |
| "loss": 0.2776, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 2.42728972000966e-06, |
| "loss": 0.3019, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 2.4113946435443847e-06, |
| "loss": 0.2819, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2.3955446441475027e-06, |
| "loss": 0.2836, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2.3797398159695795e-06, |
| "loss": 0.2949, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2.363980252892862e-06, |
| "loss": 0.3065, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2.3482660485307196e-06, |
| "loss": 0.284, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2.3325972962270813e-06, |
| "loss": 0.2691, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 2.3169740890558922e-06, |
| "loss": 0.2859, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 2.301396519820551e-06, |
| "loss": 0.2506, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 2.285864681053365e-06, |
| "loss": 0.2671, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 2.270378665014995e-06, |
| "loss": 0.3213, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 2.2549385636939136e-06, |
| "loss": 0.3155, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 2.239544468805853e-06, |
| "loss": 0.2639, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 2.2241964717932652e-06, |
| "loss": 0.3051, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 2.208894663824772e-06, |
| "loss": 0.2849, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 2.1936391357946307e-06, |
| "loss": 0.2892, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 2.178429978322193e-06, |
| "loss": 0.3092, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 2.16326728175136e-06, |
| "loss": 0.2901, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 2.148151136150054e-06, |
| "loss": 0.288, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 2.133081631309679e-06, |
| "loss": 0.3126, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 2.118058856744588e-06, |
| "loss": 0.3036, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 2.103082901691552e-06, |
| "loss": 0.3148, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 2.0881538551092306e-06, |
| "loss": 0.3272, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 2.073271805677638e-06, |
| "loss": 0.3151, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 2.0584368417976266e-06, |
| "loss": 0.2915, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 2.0436490515903506e-06, |
| "loss": 0.255, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 2.028908522896752e-06, |
| "loss": 0.292, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 2.014215343277032e-06, |
| "loss": 0.2902, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1.999569600010136e-06, |
| "loss": 0.2952, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 1.9849713800932304e-06, |
| "loss": 0.2664, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 1.9704207702411892e-06, |
| "loss": 0.2772, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 1.9559178568860792e-06, |
| "loss": 0.3004, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 1.941462726176643e-06, |
| "loss": 0.2597, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.9270554639777903e-06, |
| "loss": 0.3086, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.9126961558700875e-06, |
| "loss": 0.3049, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.8983848871492494e-06, |
| "loss": 0.2838, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.884121742825631e-06, |
| "loss": 0.2994, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.8699068076237215e-06, |
| "loss": 0.2949, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1.8557401659816531e-06, |
| "loss": 0.2738, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1.8416219020506732e-06, |
| "loss": 0.285, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1.8275520996946783e-06, |
| "loss": 0.2871, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1.8135308424896792e-06, |
| "loss": 0.2817, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.799558213723347e-06, |
| "loss": 0.2867, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.7856342963944717e-06, |
| "loss": 0.2913, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.7717591732125072e-06, |
| "loss": 0.2978, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.7579329265970612e-06, |
| "loss": 0.3074, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.7441556386774095e-06, |
| "loss": 0.2722, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 1.7304273912920088e-06, |
| "loss": 0.2807, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 1.7167482659880098e-06, |
| "loss": 0.2717, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 1.7031183440207732e-06, |
| "loss": 0.3116, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 1.6895377063533848e-06, |
| "loss": 0.3018, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 1.6760064336561876e-06, |
| "loss": 0.2963, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 1.6625246063062717e-06, |
| "loss": 0.2793, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 1.64909230438704e-06, |
| "loss": 0.2721, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 1.6357096076876867e-06, |
| "loss": 0.2796, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1.6223765957027682e-06, |
| "loss": 0.3006, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1.6090933476316882e-06, |
| "loss": 0.299, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1.595859942378266e-06, |
| "loss": 0.28, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1.5826764585502341e-06, |
| "loss": 0.2888, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1.569542974458801e-06, |
| "loss": 0.2887, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 1.5564595681181593e-06, |
| "loss": 0.2959, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 1.5434263172450381e-06, |
| "loss": 0.2829, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 1.5304432992582485e-06, |
| "loss": 0.2834, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 1.5175105912781962e-06, |
| "loss": 0.2604, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 1.504628270126457e-06, |
| "loss": 0.3021, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 1.4917964123252881e-06, |
| "loss": 0.3144, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 1.479015094097206e-06, |
| "loss": 0.2887, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 1.4662843913644987e-06, |
| "loss": 0.2895, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 1.4536043797488132e-06, |
| "loss": 0.2825, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 1.440975134570667e-06, |
| "loss": 0.2911, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 1.4283967308490366e-06, |
| "loss": 0.3063, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 1.4158692433008792e-06, |
| "loss": 0.2862, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 1.4033927463407204e-06, |
| "loss": 0.3062, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 1.390967314080186e-06, |
| "loss": 0.2845, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 1.3785930203275776e-06, |
| "loss": 0.2704, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 1.3662699385874268e-06, |
| "loss": 0.2848, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 1.353998142060061e-06, |
| "loss": 0.2747, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 1.3417777036411693e-06, |
| "loss": 0.2922, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 1.329608695921364e-06, |
| "loss": 0.3015, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 1.3174911911857647e-06, |
| "loss": 0.2798, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 1.3054252614135432e-06, |
| "loss": 0.3118, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 1.293410978277526e-06, |
| "loss": 0.3089, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 1.281448413143741e-06, |
| "loss": 0.3061, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 1.2695376370710143e-06, |
| "loss": 0.2844, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 1.2576787208105378e-06, |
| "loss": 0.2929, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 1.2458717348054483e-06, |
| "loss": 0.314, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 1.234116749190415e-06, |
| "loss": 0.2893, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 1.222413833791216e-06, |
| "loss": 0.287, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 1.2107630581243323e-06, |
| "loss": 0.2711, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 1.199164491396525e-06, |
| "loss": 0.2992, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 1.1876182025044302e-06, |
| "loss": 0.2997, |
| "step": 1127 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 1.1761242600341504e-06, |
| "loss": 0.2851, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 1.1646827322608422e-06, |
| "loss": 0.2938, |
| "step": 1129 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 1.1532936871483169e-06, |
| "loss": 0.2775, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 1.1419571923486339e-06, |
| "loss": 0.2771, |
| "step": 1131 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.130673315201689e-06, |
| "loss": 0.2808, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.1194421227348385e-06, |
| "loss": 0.3052, |
| "step": 1133 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.108263681662477e-06, |
| "loss": 0.2956, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.097138058385654e-06, |
| "loss": 0.2809, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.0860653189916736e-06, |
| "loss": 0.2831, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 1.0750455292537077e-06, |
| "loss": 0.2603, |
| "step": 1137 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 1.0640787546303987e-06, |
| "loss": 0.2675, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 1.0531650602654752e-06, |
| "loss": 0.3147, |
| "step": 1139 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 1.0423045109873664e-06, |
| "loss": 0.3, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 1.0314971713088096e-06, |
| "loss": 0.2749, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 1.020743105426476e-06, |
| "loss": 0.3171, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 1.0100423772205826e-06, |
| "loss": 0.2793, |
| "step": 1143 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 9.993950502545158e-07, |
| "loss": 0.3008, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 9.88801187774454e-07, |
| "loss": 0.3087, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 9.78260852708991e-07, |
| "loss": 0.3245, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 9.67774107668763e-07, |
| "loss": 0.2676, |
| "step": 1147 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 9.573410149460749e-07, |
| "loss": 0.2714, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 9.469616365145318e-07, |
| "loss": 0.2592, |
| "step": 1149 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 9.366360340286718e-07, |
| "loss": 0.294, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 9.263642688235963e-07, |
| "loss": 0.2719, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 9.161464019146115e-07, |
| "loss": 0.2777, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 9.059824939968575e-07, |
| "loss": 0.3025, |
| "step": 1153 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 8.958726054449573e-07, |
| "loss": 0.273, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 8.858167963126508e-07, |
| "loss": 0.3158, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 8.75815126332441e-07, |
| "loss": 0.278, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 8.658676549152411e-07, |
| "loss": 0.2943, |
| "step": 1157 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 8.55974441150016e-07, |
| "loss": 0.2945, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 8.46135543803438e-07, |
| "loss": 0.3105, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 8.363510213195314e-07, |
| "loss": 0.2912, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 8.266209318193319e-07, |
| "loss": 0.2675, |
| "step": 1161 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 8.169453331005351e-07, |
| "loss": 0.2966, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 8.073242826371564e-07, |
| "loss": 0.2678, |
| "step": 1163 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 7.977578375791906e-07, |
| "loss": 0.3018, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 7.882460547522708e-07, |
| "loss": 0.2756, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 7.787889906573287e-07, |
| "loss": 0.288, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 7.693867014702638e-07, |
| "loss": 0.2992, |
| "step": 1167 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 7.600392430416037e-07, |
| "loss": 0.3164, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 7.507466708961853e-07, |
| "loss": 0.2846, |
| "step": 1169 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 7.415090402327996e-07, |
| "loss": 0.3011, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 7.323264059238977e-07, |
| "loss": 0.2812, |
| "step": 1171 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 7.23198822515232e-07, |
| "loss": 0.2828, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 7.141263442255553e-07, |
| "loss": 0.3217, |
| "step": 1173 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 7.051090249462878e-07, |
| "loss": 0.2825, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 6.961469182411996e-07, |
| "loss": 0.2763, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 6.872400773460952e-07, |
| "loss": 0.2807, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 6.783885551684921e-07, |
| "loss": 0.2865, |
| "step": 1177 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 6.695924042873092e-07, |
| "loss": 0.2787, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 6.608516769525531e-07, |
| "loss": 0.2759, |
| "step": 1179 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 6.521664250850179e-07, |
| "loss": 0.2884, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 6.43536700275953e-07, |
| "loss": 0.2888, |
| "step": 1181 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 6.349625537867854e-07, |
| "loss": 0.3039, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 6.264440365487912e-07, |
| "loss": 0.2804, |
| "step": 1183 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 6.179811991628115e-07, |
| "loss": 0.2713, |
| "step": 1184 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 6.095740918989357e-07, |
| "loss": 0.3016, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 6.012227646962198e-07, |
| "loss": 0.2728, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.929272671623687e-07, |
| "loss": 0.2692, |
| "step": 1187 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.846876485734687e-07, |
| "loss": 0.3005, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.765039578736631e-07, |
| "loss": 0.3051, |
| "step": 1189 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.683762436748919e-07, |
| "loss": 0.2813, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.603045542565821e-07, |
| "loss": 0.3022, |
| "step": 1191 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.522889375653673e-07, |
| "loss": 0.2969, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.443294412148092e-07, |
| "loss": 0.2682, |
| "step": 1193 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 5.364261124851011e-07, |
| "loss": 0.3063, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 5.28578998322804e-07, |
| "loss": 0.3068, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 5.207881453405494e-07, |
| "loss": 0.3001, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 5.130535998167829e-07, |
| "loss": 0.2691, |
| "step": 1197 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 5.053754076954653e-07, |
| "loss": 0.2969, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.977536145858242e-07, |
| "loss": 0.3002, |
| "step": 1199 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.901882657620627e-07, |
| "loss": 0.2921, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.826794061631068e-07, |
| "loss": 0.2805, |
| "step": 1201 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.752270803923231e-07, |
| "loss": 0.3007, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.678313327172701e-07, |
| "loss": 0.2911, |
| "step": 1203 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.6049220706941957e-07, |
| "loss": 0.2973, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.5320974704390675e-07, |
| "loss": 0.3319, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.459839958992662e-07, |
| "loss": 0.2788, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.388149965571753e-07, |
| "loss": 0.3064, |
| "step": 1207 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 4.317027916022043e-07, |
| "loss": 0.2997, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 4.2464742328155116e-07, |
| "loss": 0.2933, |
| "step": 1209 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 4.176489335048084e-07, |
| "loss": 0.3118, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 4.1070736384369423e-07, |
| "loss": 0.2973, |
| "step": 1211 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 4.0382275553182527e-07, |
| "loss": 0.2891, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 3.9699514946445416e-07, |
| "loss": 0.2939, |
| "step": 1213 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 3.902245861982412e-07, |
| "loss": 0.2576, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 3.835111059510022e-07, |
| "loss": 0.2974, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 3.768547486014751e-07, |
| "loss": 0.3159, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 3.7025555368908285e-07, |
| "loss": 0.3023, |
| "step": 1217 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 3.6371356041369874e-07, |
| "loss": 0.2894, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 3.5722880763541134e-07, |
| "loss": 0.308, |
| "step": 1219 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 3.508013338742944e-07, |
| "loss": 0.3221, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 3.444311773101794e-07, |
| "loss": 0.2932, |
| "step": 1221 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 3.38118375782428e-07, |
| "loss": 0.2885, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 3.3186296678970885e-07, |
| "loss": 0.2764, |
| "step": 1223 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 3.25664987489771e-07, |
| "loss": 0.2738, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.1952447469922545e-07, |
| "loss": 0.2959, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.1344146489332705e-07, |
| "loss": 0.2984, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.074159942057586e-07, |
| "loss": 0.3055, |
| "step": 1227 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.0144809842841293e-07, |
| "loss": 0.2668, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 2.955378130111819e-07, |
| "loss": 0.3003, |
| "step": 1229 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 2.896851730617489e-07, |
| "loss": 0.3046, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 2.8389021334537357e-07, |
| "loss": 0.2622, |
| "step": 1231 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 2.7815296828469286e-07, |
| "loss": 0.3285, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 2.7247347195951013e-07, |
| "loss": 0.2749, |
| "step": 1233 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 2.668517581065977e-07, |
| "loss": 0.2995, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 2.612878601194935e-07, |
| "loss": 0.2698, |
| "step": 1235 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 2.5578181104830347e-07, |
| "loss": 0.2906, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 2.5033364359950406e-07, |
| "loss": 0.2808, |
| "step": 1237 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 2.449433901357512e-07, |
| "loss": 0.2884, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 2.3961108267568365e-07, |
| "loss": 0.3031, |
| "step": 1239 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 2.343367528937379e-07, |
| "loss": 0.2653, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 2.2912043211995583e-07, |
| "loss": 0.2826, |
| "step": 1241 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 2.2396215133980047e-07, |
| "loss": 0.2747, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 2.1886194119396963e-07, |
| "loss": 0.2681, |
| "step": 1243 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 2.138198319782192e-07, |
| "loss": 0.2716, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 2.088358536431767e-07, |
| "loss": 0.2752, |
| "step": 1245 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 2.0391003579416814e-07, |
| "loss": 0.2873, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 1.9904240769104022e-07, |
| "loss": 0.2742, |
| "step": 1247 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.9423299824798624e-07, |
| "loss": 0.2888, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.89481836033375e-07, |
| "loss": 0.3036, |
| "step": 1249 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.8478894926958203e-07, |
| "loss": 0.2999, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.8015436583281975e-07, |
| "loss": 0.3175, |
| "step": 1251 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 1.7557811325297324e-07, |
| "loss": 0.2797, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 1.7106021871343803e-07, |
| "loss": 0.2724, |
| "step": 1253 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 1.666007090509525e-07, |
| "loss": 0.3123, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 1.621996107554491e-07, |
| "loss": 0.2837, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 1.5785694996988789e-07, |
| "loss": 0.2711, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 1.5357275249010427e-07, |
| "loss": 0.2684, |
| "step": 1257 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 1.493470437646549e-07, |
| "loss": 0.299, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 1.4517984889466985e-07, |
| "loss": 0.2904, |
| "step": 1259 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 1.410711926336994e-07, |
| "loss": 0.2768, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.3702109938757092e-07, |
| "loss": 0.2866, |
| "step": 1261 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.330295932142378e-07, |
| "loss": 0.2758, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.2909669782364409e-07, |
| "loss": 0.3144, |
| "step": 1263 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.252224365775767e-07, |
| "loss": 0.2735, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 1.2140683248953345e-07, |
| "loss": 0.303, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 1.1764990822458078e-07, |
| "loss": 0.2758, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 1.1395168609921959e-07, |
| "loss": 0.2757, |
| "step": 1267 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 1.1031218808125854e-07, |
| "loss": 0.2792, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 1.0673143578967427e-07, |
| "loss": 0.2787, |
| "step": 1269 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 1.0320945049449249e-07, |
| "loss": 0.282, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 9.974625311665375e-08, |
| "loss": 0.2922, |
| "step": 1271 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 9.634186422789571e-08, |
| "loss": 0.3034, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 9.299630405062433e-08, |
| "loss": 0.263, |
| "step": 1273 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 8.970959245780064e-08, |
| "loss": 0.2739, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 8.648174897281425e-08, |
| "loss": 0.3171, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 8.331279276937887e-08, |
| "loss": 0.2929, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 8.020274267140694e-08, |
| "loss": 0.2954, |
| "step": 1277 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 7.71516171529052e-08, |
| "loss": 0.2647, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 7.415943433786043e-08, |
| "loss": 0.2829, |
| "step": 1279 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 7.122621200013835e-08, |
| "loss": 0.2776, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 6.835196756336704e-08, |
| "loss": 0.2831, |
| "step": 1281 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 6.553671810084483e-08, |
| "loss": 0.2935, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 6.278048033543371e-08, |
| "loss": 0.2917, |
| "step": 1283 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 6.008327063945718e-08, |
| "loss": 0.3114, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 5.744510503461143e-08, |
| "loss": 0.2911, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 5.486599919185875e-08, |
| "loss": 0.2815, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 5.234596843134543e-08, |
| "loss": 0.2972, |
| "step": 1287 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 4.988502772230286e-08, |
| "loss": 0.2855, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 4.7483191682964333e-08, |
| "loss": 0.2756, |
| "step": 1289 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 4.514047458047288e-08, |
| "loss": 0.3157, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 4.2856890330801315e-08, |
| "loss": 0.2999, |
| "step": 1291 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 4.063245249866454e-08, |
| "loss": 0.3104, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 3.84671742974474e-08, |
| "loss": 0.2905, |
| "step": 1293 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 3.63610685891147e-08, |
| "loss": 0.2708, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 3.4314147884143554e-08, |
| "loss": 0.3025, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 3.2326424341445616e-08, |
| "loss": 0.2597, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 3.039790976829715e-08, |
| "loss": 0.2829, |
| "step": 1297 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 2.8528615620265766e-08, |
| "loss": 0.2823, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 2.6718553001142676e-08, |
| "loss": 0.2978, |
| "step": 1299 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 2.496773266288055e-08, |
| "loss": 0.2678, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 2.3276165005524652e-08, |
| "loss": 0.3141, |
| "step": 1301 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 2.164386007715624e-08, |
| "loss": 0.3082, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 2.0070827573827055e-08, |
| "loss": 0.2702, |
| "step": 1303 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.855707683950714e-08, |
| "loss": 0.2945, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 1.710261686602488e-08, |
| "loss": 0.2835, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 1.5707456293018177e-08, |
| "loss": 0.2745, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 1.4371603407878909e-08, |
| "loss": 0.2926, |
| "step": 1307 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 1.3095066145704105e-08, |
| "loss": 0.2938, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 1.1877852089253739e-08, |
| "loss": 0.2948, |
| "step": 1309 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 1.0719968468898556e-08, |
| "loss": 0.2903, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 9.621422162583437e-09, |
| "loss": 0.2763, |
| "step": 1311 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 8.58221969578077e-09, |
| "loss": 0.2563, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 7.602367241458241e-09, |
| "loss": 0.2879, |
| "step": 1313 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 6.681870620034448e-09, |
| "loss": 0.2682, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 5.820735299352231e-09, |
| "loss": 0.2914, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 5.018966394639835e-09, |
| "loss": 0.2929, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 4.276568668485359e-09, |
| "loss": 0.2858, |
| "step": 1317 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 3.59354653080346e-09, |
| "loss": 0.2855, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 2.9699040388131427e-09, |
| "loss": 0.2827, |
| "step": 1319 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 2.4056448970144474e-09, |
| "loss": 0.2632, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 1.9007724571606935e-09, |
| "loss": 0.2817, |
| "step": 1321 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 1.4552897182462667e-09, |
| "loss": 0.3004, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.069199326481085e-09, |
| "loss": 0.2623, |
| "step": 1323 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 7.425035752817167e-10, |
| "loss": 0.3205, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 4.752044052513949e-10, |
| "loss": 0.2791, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 2.673034041755784e-10, |
| "loss": 0.294, |
| "step": 1326 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 1.1880180700640787e-10, |
| "loss": 0.2973, |
| "step": 1327 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 2.970049585715451e-11, |
| "loss": 0.2872, |
| "step": 1328 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 0.0, |
| "loss": 0.2736, |
| "step": 1329 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1329, |
| "total_flos": 1.2791024001451295e+18, |
| "train_loss": 0.5584695404458531, |
| "train_runtime": 25818.8582, |
| "train_samples_per_second": 6.584, |
| "train_steps_per_second": 0.051 |
| } |
| ], |
| "max_steps": 1329, |
| "num_train_epochs": 3, |
| "total_flos": 1.2791024001451295e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|