| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 1630, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.0816326530612243e-07, | |
| "loss": 11.0156, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.163265306122449e-07, | |
| "loss": 10.5312, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.2244897959183673e-06, | |
| "loss": 10.9531, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.6326530612244897e-06, | |
| "loss": 10.4062, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.0408163265306125e-06, | |
| "loss": 10.5156, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.4489795918367347e-06, | |
| "loss": 10.5156, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 9.8281, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.2653061224489794e-06, | |
| "loss": 8.9531, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.6734693877551024e-06, | |
| "loss": 9.0156, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.081632653061225e-06, | |
| "loss": 8.9062, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.489795918367348e-06, | |
| "loss": 8.0781, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.897959183673469e-06, | |
| "loss": 7.625, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 5.306122448979593e-06, | |
| "loss": 7.7188, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 7.0391, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 6.122448979591837e-06, | |
| "loss": 6.6875, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 6.530612244897959e-06, | |
| "loss": 6.4922, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 6.938775510204082e-06, | |
| "loss": 6.1953, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 7.346938775510205e-06, | |
| "loss": 6.2578, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 7.755102040816327e-06, | |
| "loss": 5.8906, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 8.16326530612245e-06, | |
| "loss": 5.7656, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 5.4844, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 8.979591836734695e-06, | |
| "loss": 5.4922, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.387755102040818e-06, | |
| "loss": 5.3594, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.795918367346939e-06, | |
| "loss": 5.0625, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.0204081632653063e-05, | |
| "loss": 4.9219, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.0612244897959186e-05, | |
| "loss": 4.9609, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.1020408163265306e-05, | |
| "loss": 4.6172, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 4.7578, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.1836734693877552e-05, | |
| "loss": 4.4219, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.2244897959183674e-05, | |
| "loss": 4.4688, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.2653061224489798e-05, | |
| "loss": 4.8281, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.3061224489795918e-05, | |
| "loss": 4.6484, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.3469387755102042e-05, | |
| "loss": 4.6172, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.3877551020408165e-05, | |
| "loss": 4.5938, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 4.5156, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.469387755102041e-05, | |
| "loss": 4.5938, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.510204081632653e-05, | |
| "loss": 4.2109, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.5510204081632655e-05, | |
| "loss": 4.1094, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.5918367346938776e-05, | |
| "loss": 4.0742, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.63265306122449e-05, | |
| "loss": 4.1406, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.673469387755102e-05, | |
| "loss": 3.9453, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 3.7773, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.7551020408163266e-05, | |
| "loss": 4.0781, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.795918367346939e-05, | |
| "loss": 4.3906, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.836734693877551e-05, | |
| "loss": 4.2031, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.8775510204081636e-05, | |
| "loss": 4.0469, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.9183673469387756e-05, | |
| "loss": 4.3359, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.9591836734693877e-05, | |
| "loss": 3.9688, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 2e-05, | |
| "loss": 4.1289, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.9999980257330883e-05, | |
| "loss": 4.2734, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.9999921029401478e-05, | |
| "loss": 4.2344, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.9999822316445652e-05, | |
| "loss": 4.1797, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.9999684118853177e-05, | |
| "loss": 4.0859, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.9999506437169723e-05, | |
| "loss": 4.082, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.9999289272096886e-05, | |
| "loss": 4.1875, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.9999032624492144e-05, | |
| "loss": 3.9062, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.999873649536887e-05, | |
| "loss": 4.0664, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.9998400885896355e-05, | |
| "loss": 3.9922, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.9998025797399753e-05, | |
| "loss": 4.0312, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.9997611231360117e-05, | |
| "loss": 3.9297, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.9997157189414373e-05, | |
| "loss": 4.0781, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.9996663673355326e-05, | |
| "loss": 3.8125, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.9996130685131637e-05, | |
| "loss": 3.9375, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.999555822684783e-05, | |
| "loss": 4.1602, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.9994946300764276e-05, | |
| "loss": 3.9336, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.999429490929718e-05, | |
| "loss": 4.207, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.999360405501859e-05, | |
| "loss": 3.7422, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.9992873740656372e-05, | |
| "loss": 3.9414, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.9992103969094182e-05, | |
| "loss": 3.8711, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.99912947433715e-05, | |
| "loss": 4.25, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.999044606668358e-05, | |
| "loss": 3.8203, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.998955794238145e-05, | |
| "loss": 3.9688, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.9988630373971896e-05, | |
| "loss": 3.9414, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.9987663365117456e-05, | |
| "loss": 3.5312, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.99866569196364e-05, | |
| "loss": 3.8789, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.9985611041502704e-05, | |
| "loss": 3.9062, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.9984525734846056e-05, | |
| "loss": 3.75, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.998340100395183e-05, | |
| "loss": 3.8984, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.9982236853261067e-05, | |
| "loss": 4.0781, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.9981033287370443e-05, | |
| "loss": 3.8672, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.9979790311032288e-05, | |
| "loss": 3.7461, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.9978507929154534e-05, | |
| "loss": 3.6602, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.9977186146800707e-05, | |
| "loss": 3.8555, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.9975824969189913e-05, | |
| "loss": 3.8086, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.997442440169681e-05, | |
| "loss": 3.9727, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.997298444985158e-05, | |
| "loss": 3.6172, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.9971505119339923e-05, | |
| "loss": 3.8359, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.9969986416003026e-05, | |
| "loss": 3.8594, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.9968428345837542e-05, | |
| "loss": 3.7227, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.996683091499556e-05, | |
| "loss": 4.125, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.9965194129784597e-05, | |
| "loss": 3.832, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.9963517996667548e-05, | |
| "loss": 4.0, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.9961802522262685e-05, | |
| "loss": 4.0703, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.996004771334361e-05, | |
| "loss": 3.7461, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.9958253576839256e-05, | |
| "loss": 3.9727, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.9956420119833826e-05, | |
| "loss": 4.0664, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.9954547349566783e-05, | |
| "loss": 4.2539, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.9952635273432835e-05, | |
| "loss": 4.0156, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9950683898981866e-05, | |
| "loss": 4.1406, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.994869323391895e-05, | |
| "loss": 4.1523, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9946663286104303e-05, | |
| "loss": 3.9023, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.994459406355323e-05, | |
| "loss": 3.8086, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.994248557443613e-05, | |
| "loss": 4.0391, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.9940337827078448e-05, | |
| "loss": 3.9453, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.9938150829960634e-05, | |
| "loss": 4.0039, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.993592459171812e-05, | |
| "loss": 3.9883, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.9933659121141283e-05, | |
| "loss": 3.6758, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.993135442717541e-05, | |
| "loss": 3.793, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.9929010518920667e-05, | |
| "loss": 3.7383, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.9926627405632048e-05, | |
| "loss": 3.7227, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.992420509671936e-05, | |
| "loss": 3.9023, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.992174360174717e-05, | |
| "loss": 4.0078, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.991924293043478e-05, | |
| "loss": 4.2109, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.991670309265617e-05, | |
| "loss": 3.7461, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.9914124098439976e-05, | |
| "loss": 4.0039, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.9911505957969443e-05, | |
| "loss": 3.8867, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.990884868158239e-05, | |
| "loss": 3.9883, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.9906152279771162e-05, | |
| "loss": 3.8359, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.990341676318259e-05, | |
| "loss": 3.6719, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.9900642142617958e-05, | |
| "loss": 3.5898, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.9897828429032946e-05, | |
| "loss": 3.9922, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.98949756335376e-05, | |
| "loss": 3.8711, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.9892083767396274e-05, | |
| "loss": 3.6797, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.9889152842027607e-05, | |
| "loss": 4.0078, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.9886182869004447e-05, | |
| "loss": 3.8164, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.9883173860053845e-05, | |
| "loss": 3.6953, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.9880125827056967e-05, | |
| "loss": 3.7344, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.9877038782049074e-05, | |
| "loss": 3.6562, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.9873912737219468e-05, | |
| "loss": 3.5625, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.987074770491145e-05, | |
| "loss": 4.0859, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.9867543697622248e-05, | |
| "loss": 3.7344, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.9864300728002997e-05, | |
| "loss": 3.9453, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.986101880885867e-05, | |
| "loss": 4.1211, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.985769795314804e-05, | |
| "loss": 3.7344, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.9854338173983615e-05, | |
| "loss": 3.6875, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.9850939484631598e-05, | |
| "loss": 3.8125, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.9847501898511824e-05, | |
| "loss": 3.707, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.9844025429197727e-05, | |
| "loss": 4.0781, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.984051009041626e-05, | |
| "loss": 3.8281, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.983695589604785e-05, | |
| "loss": 4.0391, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.9833362860126364e-05, | |
| "loss": 3.6719, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.982973099683902e-05, | |
| "loss": 4.0898, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.9826060320526355e-05, | |
| "loss": 3.8281, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.982235084568216e-05, | |
| "loss": 3.9219, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.9818602586953414e-05, | |
| "loss": 3.9961, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.9814815559140258e-05, | |
| "loss": 3.8125, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.9810989777195884e-05, | |
| "loss": 3.8164, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.9807125256226532e-05, | |
| "loss": 4.1094, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.9803222011491385e-05, | |
| "loss": 3.4805, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.979928005840255e-05, | |
| "loss": 3.7305, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.9795299412524948e-05, | |
| "loss": 3.793, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.9791280089576302e-05, | |
| "loss": 4.0312, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.978722210542704e-05, | |
| "loss": 3.6953, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.9783125476100254e-05, | |
| "loss": 3.7891, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.977899021777162e-05, | |
| "loss": 3.6523, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.977481634676935e-05, | |
| "loss": 3.9414, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.9770603879574108e-05, | |
| "loss": 3.9609, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.9766352832818972e-05, | |
| "loss": 3.4336, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.9762063223289334e-05, | |
| "loss": 3.6484, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.975773506792287e-05, | |
| "loss": 3.8281, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.9753368383809445e-05, | |
| "loss": 3.7578, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.974896318819106e-05, | |
| "loss": 3.8555, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.974451949846177e-05, | |
| "loss": 3.7617, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 1.974003733216765e-05, | |
| "loss": 3.5039, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 1.9735516707006676e-05, | |
| "loss": 3.7344, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.973095764082869e-05, | |
| "loss": 3.6172, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.972636015163532e-05, | |
| "loss": 3.7734, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 1.9721724257579907e-05, | |
| "loss": 3.543, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.9717049976967437e-05, | |
| "loss": 3.7031, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.971233732825446e-05, | |
| "loss": 3.543, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 1.9707586330049037e-05, | |
| "loss": 3.6836, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.9702797001110642e-05, | |
| "loss": 3.2969, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.9697969360350098e-05, | |
| "loss": 3.3789, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.969310342682951e-05, | |
| "loss": 3.5625, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.9688199219762183e-05, | |
| "loss": 3.9297, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 1.9683256758512544e-05, | |
| "loss": 3.6094, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.967827606259607e-05, | |
| "loss": 3.5547, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.96732571516792e-05, | |
| "loss": 3.5742, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 1.9668200045579283e-05, | |
| "loss": 3.3047, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 1.9663104764264468e-05, | |
| "loss": 3.5117, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 1.9657971327853644e-05, | |
| "loss": 3.4805, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 1.9652799756616364e-05, | |
| "loss": 3.4453, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 1.964759007097275e-05, | |
| "loss": 3.5195, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.964234229149342e-05, | |
| "loss": 3.375, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.963705643889941e-05, | |
| "loss": 3.4648, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 1.9631732534062088e-05, | |
| "loss": 3.6719, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.962637059800307e-05, | |
| "loss": 3.582, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.9620970651894146e-05, | |
| "loss": 3.8086, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 1.9615532717057185e-05, | |
| "loss": 3.5234, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.9610056814964053e-05, | |
| "loss": 3.6016, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.9604542967236535e-05, | |
| "loss": 3.6172, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.9598991195646252e-05, | |
| "loss": 3.3477, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.959340152211455e-05, | |
| "loss": 3.3125, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.9587773968712458e-05, | |
| "loss": 3.7891, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.958210855766055e-05, | |
| "loss": 3.8008, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.95764053113289e-05, | |
| "loss": 3.5156, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.9570664252236966e-05, | |
| "loss": 3.9531, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.956488540305351e-05, | |
| "loss": 3.3164, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.9559068786596526e-05, | |
| "loss": 3.6797, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.9553214425833108e-05, | |
| "loss": 3.4844, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.9547322343879397e-05, | |
| "loss": 3.6641, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.954139256400049e-05, | |
| "loss": 3.5195, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.9535425109610317e-05, | |
| "loss": 3.7773, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.9529420004271568e-05, | |
| "loss": 3.3711, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 1.952337727169561e-05, | |
| "loss": 3.8828, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 1.951729693574238e-05, | |
| "loss": 3.5781, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.9511179020420284e-05, | |
| "loss": 3.457, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.950502354988612e-05, | |
| "loss": 3.5312, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.9498830548444972e-05, | |
| "loss": 3.6367, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.9492600040550114e-05, | |
| "loss": 3.5625, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.948633205080292e-05, | |
| "loss": 3.5703, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.948002660395276e-05, | |
| "loss": 3.7461, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.9473683724896898e-05, | |
| "loss": 3.7148, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.9467303438680414e-05, | |
| "loss": 3.5039, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.946088577049608e-05, | |
| "loss": 3.5273, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.9454430745684276e-05, | |
| "loss": 3.7188, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.944793838973289e-05, | |
| "loss": 3.5586, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.94414087282772e-05, | |
| "loss": 3.6602, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.9434841787099804e-05, | |
| "loss": 3.3633, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.9428237592130487e-05, | |
| "loss": 3.2969, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.9421596169446135e-05, | |
| "loss": 3.7031, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.941491754527064e-05, | |
| "loss": 3.4375, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.940820174597476e-05, | |
| "loss": 3.6016, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.9401448798076064e-05, | |
| "loss": 3.6406, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.9394658728238797e-05, | |
| "loss": 3.5273, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.9387831563273775e-05, | |
| "loss": 3.4336, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.938096733013829e-05, | |
| "loss": 3.4141, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.9374066055936004e-05, | |
| "loss": 3.6797, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.9367127767916828e-05, | |
| "loss": 3.6953, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.9360152493476828e-05, | |
| "loss": 3.6797, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.9353140260158108e-05, | |
| "loss": 3.5938, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.9346091095648712e-05, | |
| "loss": 3.9492, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.93390050277825e-05, | |
| "loss": 3.6172, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.9331882084539056e-05, | |
| "loss": 3.5977, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.932472229404356e-05, | |
| "loss": 3.5703, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.9317525684566686e-05, | |
| "loss": 3.4336, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.931029228452449e-05, | |
| "loss": 3.5508, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.9303022122478303e-05, | |
| "loss": 3.7188, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.9295715227134595e-05, | |
| "loss": 3.4766, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.9288371627344894e-05, | |
| "loss": 3.6484, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.9280991352105656e-05, | |
| "loss": 3.5703, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.9273574430558143e-05, | |
| "loss": 3.4336, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.9266120891988326e-05, | |
| "loss": 3.5469, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.925863076582674e-05, | |
| "loss": 3.2812, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.9251104081648423e-05, | |
| "loss": 3.4102, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.9243540869172724e-05, | |
| "loss": 3.332, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.9235941158263253e-05, | |
| "loss": 3.5039, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.922830497892772e-05, | |
| "loss": 3.4883, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.9220632361317843e-05, | |
| "loss": 3.5664, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.9212923335729206e-05, | |
| "loss": 3.5195, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.920517793260116e-05, | |
| "loss": 3.4531, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.9197396182516694e-05, | |
| "loss": 3.7734, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.918957811620231e-05, | |
| "loss": 3.6953, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 1.9181723764527902e-05, | |
| "loss": 3.6133, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 1.917383315850665e-05, | |
| "loss": 3.5391, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 1.9165906329294875e-05, | |
| "loss": 3.5898, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.9157943308191934e-05, | |
| "loss": 3.7188, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.914994412664008e-05, | |
| "loss": 3.8125, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 1.9141908816224356e-05, | |
| "loss": 3.875, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.9133837408672456e-05, | |
| "loss": 3.4102, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.9125729935854606e-05, | |
| "loss": 3.2344, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 1.9117586429783433e-05, | |
| "loss": 3.7656, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 1.910940692261385e-05, | |
| "loss": 3.6992, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 1.9101191446642917e-05, | |
| "loss": 3.4766, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 1.909294003430972e-05, | |
| "loss": 3.1211, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 1.9084652718195237e-05, | |
| "loss": 3.4102, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 1.907632953102222e-05, | |
| "loss": 3.6602, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 1.906797050565505e-05, | |
| "loss": 3.6836, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 1.9059575675099622e-05, | |
| "loss": 3.582, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 1.9051145072503216e-05, | |
| "loss": 3.6172, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 1.9042678731154337e-05, | |
| "loss": 3.457, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 1.9034176684482638e-05, | |
| "loss": 3.3398, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 1.9025638966058722e-05, | |
| "loss": 3.4883, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 1.901706560959407e-05, | |
| "loss": 3.6602, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 1.900845664894086e-05, | |
| "loss": 3.6797, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 1.8999812118091877e-05, | |
| "loss": 3.4766, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 1.8991132051180332e-05, | |
| "loss": 3.3945, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 1.898241648247977e-05, | |
| "loss": 3.2461, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 1.8973665446403902e-05, | |
| "loss": 3.4023, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 1.8964878977506496e-05, | |
| "loss": 3.4492, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 1.895605711048122e-05, | |
| "loss": 3.5, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 1.8947199880161515e-05, | |
| "loss": 3.4531, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 1.8938307321520453e-05, | |
| "loss": 3.6523, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 1.89293794696706e-05, | |
| "loss": 3.6445, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 1.8920416359863885e-05, | |
| "loss": 3.3711, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 1.8911418027491453e-05, | |
| "loss": 3.4414, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 1.8902384508083518e-05, | |
| "loss": 3.2656, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 1.8893315837309235e-05, | |
| "loss": 3.6289, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 1.8884212050976568e-05, | |
| "loss": 3.4023, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 1.8875073185032116e-05, | |
| "loss": 3.6914, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 1.8865899275561003e-05, | |
| "loss": 3.3281, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 1.885669035878672e-05, | |
| "loss": 3.7227, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.8847446471070985e-05, | |
| "loss": 3.2891, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.8838167648913606e-05, | |
| "loss": 3.4844, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.882885392895232e-05, | |
| "loss": 3.7617, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.881950534796267e-05, | |
| "loss": 3.3945, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.8810121942857848e-05, | |
| "loss": 3.5547, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.8800703750688536e-05, | |
| "loss": 3.6484, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.8791250808642792e-05, | |
| "loss": 3.668, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.8781763154045873e-05, | |
| "loss": 3.5664, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.877224082436011e-05, | |
| "loss": 3.2695, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.8762683857184738e-05, | |
| "loss": 3.5781, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.8753092290255765e-05, | |
| "loss": 3.8359, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8743466161445823e-05, | |
| "loss": 3.3242, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8733805508764e-05, | |
| "loss": 3.3086, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.872411037035572e-05, | |
| "loss": 3.4531, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8714380784502553e-05, | |
| "loss": 3.5586, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.870461678962211e-05, | |
| "loss": 3.6797, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.869481842426784e-05, | |
| "loss": 3.4609, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.8684985727128936e-05, | |
| "loss": 3.6289, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.8675118737030123e-05, | |
| "loss": 3.4844, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.866521749293155e-05, | |
| "loss": 3.7461, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8655282033928618e-05, | |
| "loss": 3.2852, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.8645312399251818e-05, | |
| "loss": 3.6875, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.8635308628266586e-05, | |
| "loss": 3.2266, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.8625270760473164e-05, | |
| "loss": 3.5977, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.8615198835506393e-05, | |
| "loss": 3.6133, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.8605092893135626e-05, | |
| "loss": 3.6172, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.8594952973264512e-05, | |
| "loss": 3.4766, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.8584779115930866e-05, | |
| "loss": 3.4766, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.857457136130651e-05, | |
| "loss": 3.6875, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.856432974969711e-05, | |
| "loss": 3.3359, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.855405432154201e-05, | |
| "loss": 3.5, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.8543745117414094e-05, | |
| "loss": 3.5547, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.8533402178019596e-05, | |
| "loss": 3.1367, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8523025544197964e-05, | |
| "loss": 3.4141, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.8512615256921692e-05, | |
| "loss": 3.0078, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.8502171357296144e-05, | |
| "loss": 3.0586, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.8491693886559413e-05, | |
| "loss": 3.1953, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.848118288608215e-05, | |
| "loss": 3.0625, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.8470638397367397e-05, | |
| "loss": 3.25, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.846006046205042e-05, | |
| "loss": 3.2422, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.8449449121898552e-05, | |
| "loss": 2.9258, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.8438804418811038e-05, | |
| "loss": 2.9883, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.842812639481884e-05, | |
| "loss": 3.3203, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.84174150920845e-05, | |
| "loss": 3.0195, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.8406670552901958e-05, | |
| "loss": 2.9375, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.839589281969639e-05, | |
| "loss": 3.2578, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.8385081935024044e-05, | |
| "loss": 3.0469, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.837423794157206e-05, | |
| "loss": 3.1367, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.836336088215831e-05, | |
| "loss": 3.0234, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.835245079973124e-05, | |
| "loss": 2.8242, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.834150773736967e-05, | |
| "loss": 2.9414, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.8330531738282656e-05, | |
| "loss": 3.0742, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.8319522845809306e-05, | |
| "loss": 3.0625, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.8308481103418597e-05, | |
| "loss": 2.8828, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.8297406554709228e-05, | |
| "loss": 3.1836, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.8286299243409424e-05, | |
| "loss": 2.8086, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.8275159213376783e-05, | |
| "loss": 2.9258, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.826398650859809e-05, | |
| "loss": 3.0977, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.8252781173189148e-05, | |
| "loss": 3.3086, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.82415432513946e-05, | |
| "loss": 3.0117, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.823027278758776e-05, | |
| "loss": 2.957, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.821896982627044e-05, | |
| "loss": 3.2617, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.8207634412072765e-05, | |
| "loss": 3.1172, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.8196266589753e-05, | |
| "loss": 2.8867, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.818486640419737e-05, | |
| "loss": 3.2539, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.81734339004199e-05, | |
| "loss": 2.8633, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.816196912356222e-05, | |
| "loss": 3.1016, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.8150472118893382e-05, | |
| "loss": 3.0898, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.8138942931809702e-05, | |
| "loss": 2.9453, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.8127381607834563e-05, | |
| "loss": 3.2383, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.8115788192618247e-05, | |
| "loss": 3.0703, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.8104162731937746e-05, | |
| "loss": 3.0977, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.8092505271696582e-05, | |
| "loss": 3.2344, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.808081585792463e-05, | |
| "loss": 2.7617, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.8069094536777938e-05, | |
| "loss": 3.0898, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.805734135453854e-05, | |
| "loss": 3.0781, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.8045556357614273e-05, | |
| "loss": 3.4922, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.8033739592538598e-05, | |
| "loss": 3.1211, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.8021891105970405e-05, | |
| "loss": 2.9453, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.8010010944693846e-05, | |
| "loss": 3.1016, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.7998099155618147e-05, | |
| "loss": 3.0117, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.7986155785777402e-05, | |
| "loss": 3.1523, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.7974180882330413e-05, | |
| "loss": 3.0352, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.7962174492560492e-05, | |
| "loss": 2.8711, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.7950136663875274e-05, | |
| "loss": 3.1953, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.7938067443806538e-05, | |
| "loss": 3.2188, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.7925966880009998e-05, | |
| "loss": 2.8203, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.791383502026515e-05, | |
| "loss": 3.1172, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.790167191247504e-05, | |
| "loss": 2.9414, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.7889477604666124e-05, | |
| "loss": 2.8398, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.787725214498803e-05, | |
| "loss": 3.1836, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.78649955817134e-05, | |
| "loss": 3.0625, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.785270796323769e-05, | |
| "loss": 2.8945, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.784038933807898e-05, | |
| "loss": 2.9688, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.7828039754877778e-05, | |
| "loss": 3.0352, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.7815659262396825e-05, | |
| "loss": 3.0977, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.780324790952092e-05, | |
| "loss": 3.1445, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.7790805745256703e-05, | |
| "loss": 2.9766, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.7778332818732492e-05, | |
| "loss": 3.0547, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.7765829179198048e-05, | |
| "loss": 3.1758, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.7753294876024417e-05, | |
| "loss": 3.0625, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.7740729958703725e-05, | |
| "loss": 2.9297, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.7728134476848965e-05, | |
| "loss": 3.0586, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.7715508480193832e-05, | |
| "loss": 3.0039, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.7702852018592493e-05, | |
| "loss": 2.8086, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.769016514201942e-05, | |
| "loss": 2.9336, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.7677447900569166e-05, | |
| "loss": 3.4219, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.7664700344456198e-05, | |
| "loss": 3.0625, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.765192252401467e-05, | |
| "loss": 3.2617, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 1.7639114489698238e-05, | |
| "loss": 3.0977, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 1.762627629207986e-05, | |
| "loss": 3.0703, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 1.7613407981851586e-05, | |
| "loss": 3.0938, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 1.760050960982439e-05, | |
| "loss": 3.3047, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 1.758758122692791e-05, | |
| "loss": 2.8867, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.757462288421032e-05, | |
| "loss": 3.2148, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.7561634632838062e-05, | |
| "loss": 3.1172, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 1.7548616524095697e-05, | |
| "loss": 2.9141, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 1.753556860938566e-05, | |
| "loss": 3.0938, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 1.7522490940228086e-05, | |
| "loss": 2.8672, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 1.7509383568260597e-05, | |
| "loss": 3.1641, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 1.749624654523809e-05, | |
| "loss": 2.9883, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 1.7483079923032543e-05, | |
| "loss": 3.0898, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 1.7469883753632817e-05, | |
| "loss": 3.0391, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 1.745665808914443e-05, | |
| "loss": 3.1055, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 1.744340298178936e-05, | |
| "loss": 3.0664, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 1.743011848390585e-05, | |
| "loss": 2.8672, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 1.7416804647948194e-05, | |
| "loss": 3.2891, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 1.740346152648652e-05, | |
| "loss": 2.9805, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 1.7390089172206594e-05, | |
| "loss": 2.7305, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 1.7376687637909607e-05, | |
| "loss": 3.0547, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 1.7363256976511972e-05, | |
| "loss": 2.7773, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 1.7349797241045115e-05, | |
| "loss": 3.2188, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 1.733630848465525e-05, | |
| "loss": 3.0156, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 1.732279076060319e-05, | |
| "loss": 3.1328, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 1.730924412226413e-05, | |
| "loss": 3.0664, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 1.729566862312742e-05, | |
| "loss": 2.9102, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 1.7282064316796387e-05, | |
| "loss": 3.0508, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 1.726843125698809e-05, | |
| "loss": 2.8711, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 1.7254769497533128e-05, | |
| "loss": 2.75, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 1.724107909237542e-05, | |
| "loss": 2.8438, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 1.7227360095571992e-05, | |
| "loss": 2.9883, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 1.721361256129277e-05, | |
| "loss": 3.2461, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 1.719983654382036e-05, | |
| "loss": 3.0781, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 1.7186032097549822e-05, | |
| "loss": 3.1523, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 1.717219927698849e-05, | |
| "loss": 2.832, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 1.7158338136755724e-05, | |
| "loss": 3.2617, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 1.7144448731582698e-05, | |
| "loss": 3.0781, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 1.7130531116312202e-05, | |
| "loss": 3.1641, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.7116585345898413e-05, | |
| "loss": 3.1484, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.7102611475406676e-05, | |
| "loss": 3.2656, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 1.7088609560013284e-05, | |
| "loss": 3.0938, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 1.7074579655005282e-05, | |
| "loss": 2.9648, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 1.7060521815780225e-05, | |
| "loss": 3.1328, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 1.704643609784596e-05, | |
| "loss": 3.1211, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 1.7032322556820428e-05, | |
| "loss": 3.1719, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 1.7018181248431416e-05, | |
| "loss": 2.9883, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 1.700401222851636e-05, | |
| "loss": 3.1172, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 1.698981555302212e-05, | |
| "loss": 2.9531, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 1.6975591278004747e-05, | |
| "loss": 2.9375, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 1.696133945962927e-05, | |
| "loss": 3.1875, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 1.6947060154169473e-05, | |
| "loss": 3.0742, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 1.6932753418007683e-05, | |
| "loss": 3.0977, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 1.691841930763453e-05, | |
| "loss": 2.9531, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 1.690405787964873e-05, | |
| "loss": 2.9609, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 1.688966919075687e-05, | |
| "loss": 2.7578, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.687525329777317e-05, | |
| "loss": 2.9961, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.686081025761928e-05, | |
| "loss": 3.3203, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.684634012732403e-05, | |
| "loss": 2.9258, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 1.6831842964023212e-05, | |
| "loss": 3.1445, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 1.6817318824959375e-05, | |
| "loss": 3.2617, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 1.680276776748157e-05, | |
| "loss": 2.9883, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.6788189849045135e-05, | |
| "loss": 2.9219, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.6773585127211478e-05, | |
| "loss": 2.8281, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 1.6758953659647838e-05, | |
| "loss": 3.0312, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 1.6744295504127055e-05, | |
| "loss": 3.2461, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 1.6729610718527357e-05, | |
| "loss": 3.1562, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.6714899360832118e-05, | |
| "loss": 2.9023, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.6700161489129624e-05, | |
| "loss": 3.0898, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.668539716161287e-05, | |
| "loss": 2.9414, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.667060643657929e-05, | |
| "loss": 2.9844, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.6655789372430572e-05, | |
| "loss": 3.0859, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.6640946027672395e-05, | |
| "loss": 3.1758, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.66260764609142e-05, | |
| "loss": 3.1719, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.6611180730868975e-05, | |
| "loss": 3.0508, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.6596258896353027e-05, | |
| "loss": 3.1406, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.658131101628571e-05, | |
| "loss": 3.1836, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.656633714968924e-05, | |
| "loss": 3.0352, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.6551337355688437e-05, | |
| "loss": 2.8789, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 1.653631169351049e-05, | |
| "loss": 3.1094, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 1.6521260222484738e-05, | |
| "loss": 3.4102, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 1.650618300204242e-05, | |
| "loss": 3.293, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 1.6491080091716457e-05, | |
| "loss": 2.9922, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 1.64759515511412e-05, | |
| "loss": 3.082, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 1.6460797440052195e-05, | |
| "loss": 2.9297, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.6445617818285974e-05, | |
| "loss": 2.8906, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.643041274577978e-05, | |
| "loss": 3.0625, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.6415182282571356e-05, | |
| "loss": 3.1562, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 1.6399926488798702e-05, | |
| "loss": 2.6367, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 1.6384645424699835e-05, | |
| "loss": 2.207, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 1.6369339150612557e-05, | |
| "loss": 2.4844, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 1.6354007726974205e-05, | |
| "loss": 2.4219, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 1.6338651214321426e-05, | |
| "loss": 2.4531, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 1.632326967328993e-05, | |
| "loss": 2.4961, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 1.630786316461425e-05, | |
| "loss": 2.4219, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 1.6292431749127507e-05, | |
| "loss": 2.5273, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 1.627697548776117e-05, | |
| "loss": 2.4492, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 1.6261494441544805e-05, | |
| "loss": 2.4922, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 1.624598867160585e-05, | |
| "loss": 2.375, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 1.623045823916936e-05, | |
| "loss": 2.6914, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 1.6214903205557774e-05, | |
| "loss": 2.4141, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 1.619932363219067e-05, | |
| "loss": 2.5742, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 1.6183719580584515e-05, | |
| "loss": 2.332, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 1.6168091112352443e-05, | |
| "loss": 2.4727, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 1.6152438289203982e-05, | |
| "loss": 2.5352, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 1.6136761172944837e-05, | |
| "loss": 2.4375, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 1.612105982547663e-05, | |
| "loss": 2.543, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 1.6105334308796665e-05, | |
| "loss": 2.3945, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 1.6089584684997674e-05, | |
| "loss": 2.4531, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 1.607381101626758e-05, | |
| "loss": 2.5781, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 1.6058013364889247e-05, | |
| "loss": 2.2852, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 1.6042191793240242e-05, | |
| "loss": 2.293, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 1.6026346363792565e-05, | |
| "loss": 2.5156, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 1.6010477139112438e-05, | |
| "loss": 2.3711, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 1.5994584181860028e-05, | |
| "loss": 2.2891, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 1.5978667554789216e-05, | |
| "loss": 2.3867, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 1.596272732074734e-05, | |
| "loss": 2.457, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 1.5946763542674958e-05, | |
| "loss": 2.293, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 1.5930776283605585e-05, | |
| "loss": 2.4492, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 1.5914765606665454e-05, | |
| "loss": 2.2383, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 1.5898731575073262e-05, | |
| "loss": 2.3281, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 1.5882674252139928e-05, | |
| "loss": 2.4688, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 1.5866593701268334e-05, | |
| "loss": 2.3125, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 1.5850489985953076e-05, | |
| "loss": 2.3672, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 1.5834363169780227e-05, | |
| "loss": 2.4688, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 1.5818213316427056e-05, | |
| "loss": 2.375, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 1.5802040489661817e-05, | |
| "loss": 2.418, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 1.578584475334345e-05, | |
| "loss": 2.3867, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 1.5769626171421376e-05, | |
| "loss": 2.2852, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 1.5753384807935214e-05, | |
| "loss": 2.5234, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 1.5737120727014535e-05, | |
| "loss": 2.3828, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 1.572083399287861e-05, | |
| "loss": 2.4023, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 1.570452466983617e-05, | |
| "loss": 2.4961, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 1.5688192822285116e-05, | |
| "loss": 2.5234, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 1.567183851471231e-05, | |
| "loss": 2.418, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 1.565546181169328e-05, | |
| "loss": 2.3555, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 1.5639062777892e-05, | |
| "loss": 2.4883, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 1.5622641478060602e-05, | |
| "loss": 2.5586, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 1.5606197977039154e-05, | |
| "loss": 2.3359, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 1.5589732339755362e-05, | |
| "loss": 2.3398, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 1.5573244631224364e-05, | |
| "loss": 2.2969, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 1.5556734916548432e-05, | |
| "loss": 2.375, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 1.5540203260916728e-05, | |
| "loss": 2.3398, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 1.552364972960506e-05, | |
| "loss": 2.3516, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 1.5507074387975603e-05, | |
| "loss": 2.4805, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 1.5490477301476648e-05, | |
| "loss": 2.4766, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 1.5473858535642365e-05, | |
| "loss": 2.4062, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 1.5457218156092503e-05, | |
| "loss": 2.4727, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 1.5440556228532168e-05, | |
| "loss": 2.3672, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 1.5423872818751544e-05, | |
| "loss": 2.5195, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 1.5407167992625636e-05, | |
| "loss": 2.418, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 1.5390441816114022e-05, | |
| "loss": 2.3828, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 1.5373694355260565e-05, | |
| "loss": 2.4336, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 1.5356925676193192e-05, | |
| "loss": 2.3086, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 1.534013584512359e-05, | |
| "loss": 2.25, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 1.5323324928346984e-05, | |
| "loss": 2.3242, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 1.5306492992241836e-05, | |
| "loss": 2.4023, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 1.5289640103269626e-05, | |
| "loss": 2.4531, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 1.527276632797455e-05, | |
| "loss": 2.3945, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 1.5255871732983284e-05, | |
| "loss": 2.4258, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 1.5238956385004703e-05, | |
| "loss": 2.4766, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 1.5222020350829636e-05, | |
| "loss": 2.4141, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 1.5205063697330582e-05, | |
| "loss": 2.3359, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 1.5188086491461467e-05, | |
| "loss": 2.3047, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 1.5171088800257354e-05, | |
| "loss": 2.5508, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 1.5154070690834211e-05, | |
| "loss": 2.0957, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 1.5137032230388613e-05, | |
| "loss": 2.4102, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 1.5119973486197497e-05, | |
| "loss": 2.5352, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 1.5102894525617892e-05, | |
| "loss": 2.25, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 1.5085795416086655e-05, | |
| "loss": 2.3047, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 1.5068676225120196e-05, | |
| "loss": 2.3359, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 1.5051537020314218e-05, | |
| "loss": 2.5508, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 1.5034377869343453e-05, | |
| "loss": 2.6211, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 1.5017198839961388e-05, | |
| "loss": 2.5625, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 2.293, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 1.4982781417369496e-05, | |
| "loss": 2.5078, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 1.4965543160058028e-05, | |
| "loss": 2.3594, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 1.4948285296131435e-05, | |
| "loss": 2.4531, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 1.4931007893732981e-05, | |
| "loss": 2.4961, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 1.4913711021083071e-05, | |
| "loss": 2.3672, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 1.4896394746478995e-05, | |
| "loss": 2.5469, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 1.4879059138294647e-05, | |
| "loss": 2.5703, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 1.4861704264980264e-05, | |
| "loss": 2.5859, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 1.4844330195062145e-05, | |
| "loss": 2.4648, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 1.4826936997142399e-05, | |
| "loss": 2.4883, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 1.4809524739898651e-05, | |
| "loss": 2.2656, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 1.4792093492083792e-05, | |
| "loss": 2.2734, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 1.4774643322525691e-05, | |
| "loss": 2.5156, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 1.4757174300126935e-05, | |
| "loss": 2.6797, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 1.473968649386455e-05, | |
| "loss": 2.3398, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 1.4722179972789725e-05, | |
| "loss": 2.2539, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 1.4704654806027558e-05, | |
| "loss": 2.5781, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 1.4687111062776758e-05, | |
| "loss": 2.5352, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 1.466954881230939e-05, | |
| "loss": 2.5195, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 1.4651968123970592e-05, | |
| "loss": 2.3945, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 1.4634369067178312e-05, | |
| "loss": 2.4922, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 1.4616751711423016e-05, | |
| "loss": 2.4922, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 1.4599116126267431e-05, | |
| "loss": 2.4961, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 1.4581462381346261e-05, | |
| "loss": 2.4922, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 1.4563790546365914e-05, | |
| "loss": 2.5, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 1.454610069110423e-05, | |
| "loss": 2.4219, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 1.45283928854102e-05, | |
| "loss": 2.418, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 1.4510667199203697e-05, | |
| "loss": 2.5488, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 1.4492923702475183e-05, | |
| "loss": 2.5312, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 1.4475162465285463e-05, | |
| "loss": 2.5273, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 1.4457383557765385e-05, | |
| "loss": 2.4141, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 1.443958705011556e-05, | |
| "loss": 2.4453, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 1.4421773012606104e-05, | |
| "loss": 2.293, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 1.4403941515576344e-05, | |
| "loss": 2.4258, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 1.4386092629434551e-05, | |
| "loss": 2.4648, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 1.4368226424657661e-05, | |
| "loss": 2.3438, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 1.4350342971790979e-05, | |
| "loss": 2.2168, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 1.4332442341447926e-05, | |
| "loss": 2.3828, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 1.4314524604309748e-05, | |
| "loss": 2.5117, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 1.4296589831125234e-05, | |
| "loss": 2.4961, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 1.4278638092710446e-05, | |
| "loss": 2.5391, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 1.4260669459948429e-05, | |
| "loss": 2.3828, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 1.4242684003788934e-05, | |
| "loss": 2.4102, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 1.4224681795248149e-05, | |
| "loss": 2.457, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 1.42066629054084e-05, | |
| "loss": 2.5, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 1.418862740541788e-05, | |
| "loss": 2.4102, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 1.4170575366490376e-05, | |
| "loss": 2.1758, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 1.415250685990497e-05, | |
| "loss": 2.6445, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 1.4134421957005775e-05, | |
| "loss": 2.043, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 1.4116320729201642e-05, | |
| "loss": 2.457, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 1.4098203247965876e-05, | |
| "loss": 2.1992, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 1.4080069584835971e-05, | |
| "loss": 2.2891, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 1.4061919811413305e-05, | |
| "loss": 2.2227, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 1.4043753999362872e-05, | |
| "loss": 2.2305, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 1.4025572220412998e-05, | |
| "loss": 2.625, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 1.400737454635505e-05, | |
| "loss": 2.4219, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 1.398916104904316e-05, | |
| "loss": 2.6133, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 1.3970931800393943e-05, | |
| "loss": 2.5625, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 1.3952686872386195e-05, | |
| "loss": 2.4531, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 1.3934426337060638e-05, | |
| "loss": 2.6016, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 1.391615026651961e-05, | |
| "loss": 2.3789, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 1.3897858732926794e-05, | |
| "loss": 2.3281, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 1.3879551808506932e-05, | |
| "loss": 2.2031, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 1.3861229565545532e-05, | |
| "loss": 2.5352, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 1.384289207638859e-05, | |
| "loss": 2.3008, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 1.3824539413442304e-05, | |
| "loss": 2.5352, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 1.3806171649172782e-05, | |
| "loss": 2.4922, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 1.3787788856105762e-05, | |
| "loss": 2.3945, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 1.3769391106826326e-05, | |
| "loss": 2.6016, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 1.3750978473978611e-05, | |
| "loss": 2.4375, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 1.3732551030265514e-05, | |
| "loss": 2.5195, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 1.371410884844843e-05, | |
| "loss": 2.5391, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 1.3695652001346928e-05, | |
| "loss": 2.4102, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 1.3677180561838501e-05, | |
| "loss": 2.4727, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 1.3658694602858247e-05, | |
| "loss": 2.6055, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.36401941973986e-05, | |
| "loss": 2.2852, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 1.362167941850904e-05, | |
| "loss": 1.9121, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 1.3603150339295797e-05, | |
| "loss": 2.0977, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 1.3584607032921566e-05, | |
| "loss": 1.9668, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 1.3566049572605222e-05, | |
| "loss": 1.8398, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 1.3547478031621517e-05, | |
| "loss": 1.7559, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 1.3528892483300821e-05, | |
| "loss": 2.0586, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 1.3510293001028792e-05, | |
| "loss": 1.8984, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 1.3491679658246114e-05, | |
| "loss": 1.6895, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 1.3473052528448203e-05, | |
| "loss": 1.7812, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 1.3454411685184913e-05, | |
| "loss": 1.7539, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 1.3435757202060242e-05, | |
| "loss": 1.9492, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 1.3417089152732049e-05, | |
| "loss": 1.7031, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 1.3398407610911752e-05, | |
| "loss": 1.791, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 1.3379712650364061e-05, | |
| "loss": 1.8066, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 1.3361004344906652e-05, | |
| "loss": 1.6992, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 1.3342282768409904e-05, | |
| "loss": 1.8965, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 1.3323547994796597e-05, | |
| "loss": 1.7832, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 1.330480009804162e-05, | |
| "loss": 1.8633, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 1.3286039152171667e-05, | |
| "loss": 1.6055, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 1.3267265231264982e-05, | |
| "loss": 1.8164, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 1.3248478409451017e-05, | |
| "loss": 1.9805, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 1.3229678760910174e-05, | |
| "loss": 1.666, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 1.3210866359873506e-05, | |
| "loss": 1.8867, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 1.3192041280622409e-05, | |
| "loss": 1.9473, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 1.3173203597488348e-05, | |
| "loss": 1.9375, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 1.3154353384852559e-05, | |
| "loss": 1.8145, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 1.3135490717145726e-05, | |
| "loss": 1.7539, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 1.3116615668847749e-05, | |
| "loss": 1.7734, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 1.3097728314487385e-05, | |
| "loss": 1.7656, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 1.3078828728641994e-05, | |
| "loss": 1.8672, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 1.305991698593723e-05, | |
| "loss": 1.7656, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 1.3040993161046749e-05, | |
| "loss": 1.8789, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 1.3022057328691915e-05, | |
| "loss": 1.627, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 1.3003109563641499e-05, | |
| "loss": 1.7695, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 1.298414994071139e-05, | |
| "loss": 1.709, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 1.2965178534764311e-05, | |
| "loss": 1.7383, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 1.294619542070949e-05, | |
| "loss": 1.6523, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 1.2927200673502399e-05, | |
| "loss": 1.8145, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 1.2908194368144437e-05, | |
| "loss": 1.7949, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 1.288917657968265e-05, | |
| "loss": 1.7422, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 1.287014738320941e-05, | |
| "loss": 1.9102, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 1.285110685386215e-05, | |
| "loss": 1.6523, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 1.283205506682304e-05, | |
| "loss": 1.5938, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 1.2812992097318711e-05, | |
| "loss": 1.6797, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 1.2793918020619937e-05, | |
| "loss": 1.8164, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 1.2774832912041356e-05, | |
| "loss": 1.6328, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 1.2755736846941167e-05, | |
| "loss": 1.9219, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 1.2736629900720832e-05, | |
| "loss": 1.8496, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 1.2717512148824764e-05, | |
| "loss": 1.7031, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 1.2698383666740064e-05, | |
| "loss": 1.7266, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 1.2679244529996182e-05, | |
| "loss": 1.9102, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 1.2660094814164653e-05, | |
| "loss": 1.6855, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 1.2640934594858773e-05, | |
| "loss": 1.6641, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 1.262176394773332e-05, | |
| "loss": 1.8672, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 1.2602582948484243e-05, | |
| "loss": 1.7383, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 1.2583391672848361e-05, | |
| "loss": 2.0586, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 1.256419019660308e-05, | |
| "loss": 1.8281, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 1.2544978595566078e-05, | |
| "loss": 1.7207, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 1.2525756945595006e-05, | |
| "loss": 1.6328, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 1.2506525322587207e-05, | |
| "loss": 1.8379, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 1.2487283802479389e-05, | |
| "loss": 1.8828, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 1.246803246124735e-05, | |
| "loss": 1.916, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 1.2448771374905655e-05, | |
| "loss": 1.7852, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 1.2429500619507362e-05, | |
| "loss": 2.0391, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 1.2410220271143693e-05, | |
| "loss": 1.7422, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 1.2390930405943766e-05, | |
| "loss": 1.8672, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 1.237163110007426e-05, | |
| "loss": 1.8457, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 1.2352322429739134e-05, | |
| "loss": 1.7402, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 1.233300447117933e-05, | |
| "loss": 1.6465, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 1.2313677300672463e-05, | |
| "loss": 1.6777, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 1.2294340994532511e-05, | |
| "loss": 1.7656, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 1.2274995629109545e-05, | |
| "loss": 1.8066, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 1.2255641280789385e-05, | |
| "loss": 1.8809, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 1.2236278025993334e-05, | |
| "loss": 1.8223, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 1.2216905941177854e-05, | |
| "loss": 1.7656, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 1.2197525102834284e-05, | |
| "loss": 1.8066, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 1.2178135587488515e-05, | |
| "loss": 1.7207, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 1.215873747170071e-05, | |
| "loss": 1.8535, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 1.2139330832064975e-05, | |
| "loss": 1.7949, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 1.2119915745209092e-05, | |
| "loss": 1.8926, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 1.2100492287794186e-05, | |
| "loss": 1.6777, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 1.2081060536514432e-05, | |
| "loss": 1.7773, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 1.206162056809676e-05, | |
| "loss": 1.6699, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 1.2042172459300546e-05, | |
| "loss": 1.709, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 1.2022716286917298e-05, | |
| "loss": 1.8887, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 1.2003252127770378e-05, | |
| "loss": 1.9219, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 1.198378005871467e-05, | |
| "loss": 1.8535, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 1.1964300156636304e-05, | |
| "loss": 1.7051, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 1.1944812498452329e-05, | |
| "loss": 1.7578, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 1.192531716111042e-05, | |
| "loss": 1.8203, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 1.1905814221588581e-05, | |
| "loss": 1.6016, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 1.1886303756894828e-05, | |
| "loss": 1.543, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 1.1866785844066884e-05, | |
| "loss": 1.8145, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 1.1847260560171895e-05, | |
| "loss": 1.6719, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 1.18277279823061e-05, | |
| "loss": 1.6953, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 1.1808188187594549e-05, | |
| "loss": 1.6406, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 1.1788641253190779e-05, | |
| "loss": 1.7246, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 1.176908725627652e-05, | |
| "loss": 1.6992, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 1.1749526274061394e-05, | |
| "loss": 1.916, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 1.1729958383782598e-05, | |
| "loss": 1.6543, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 1.1710383662704608e-05, | |
| "loss": 1.707, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 1.1690802188118878e-05, | |
| "loss": 1.6953, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 1.1671214037343515e-05, | |
| "loss": 1.6875, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 1.1651619287723e-05, | |
| "loss": 1.7969, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 1.1632018016627859e-05, | |
| "loss": 1.7461, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 1.1612410301454384e-05, | |
| "loss": 1.8887, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 1.1592796219624292e-05, | |
| "loss": 1.9414, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 1.1573175848584455e-05, | |
| "loss": 1.8711, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 1.1553549265806567e-05, | |
| "loss": 1.7246, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 1.1533916548786856e-05, | |
| "loss": 1.8496, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 1.1514277775045768e-05, | |
| "loss": 1.918, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 1.1494633022127669e-05, | |
| "loss": 1.8574, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 1.1474982367600524e-05, | |
| "loss": 1.668, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 1.1455325889055616e-05, | |
| "loss": 1.7031, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 1.1435663664107204e-05, | |
| "loss": 1.7754, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 1.141599577039226e-05, | |
| "loss": 1.7129, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 1.1396322285570119e-05, | |
| "loss": 1.6582, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 1.1376643287322202e-05, | |
| "loss": 1.8672, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 1.1356958853351705e-05, | |
| "loss": 1.8867, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 1.1337269061383278e-05, | |
| "loss": 1.8359, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 1.1317573989162727e-05, | |
| "loss": 1.8535, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 1.129787371445672e-05, | |
| "loss": 1.7793, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 1.1278168315052445e-05, | |
| "loss": 1.834, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 1.1258457868757352e-05, | |
| "loss": 1.8906, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 1.1238742453398794e-05, | |
| "loss": 1.9512, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 1.1219022146823762e-05, | |
| "loss": 1.8047, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 1.1199297026898547e-05, | |
| "loss": 1.627, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 1.1179567171508463e-05, | |
| "loss": 1.8242, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 1.1159832658557498e-05, | |
| "loss": 1.7129, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 1.1140093565968055e-05, | |
| "loss": 1.7012, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 1.1120349971680605e-05, | |
| "loss": 1.8145, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 1.1100601953653393e-05, | |
| "loss": 1.6426, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 1.1080849589862142e-05, | |
| "loss": 1.8574, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 1.1061092958299727e-05, | |
| "loss": 1.752, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 1.1041332136975874e-05, | |
| "loss": 1.9531, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 1.1021567203916861e-05, | |
| "loss": 1.7676, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 1.1001798237165185e-05, | |
| "loss": 1.7656, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 1.0982025314779287e-05, | |
| "loss": 1.9512, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 1.0962248514833218e-05, | |
| "loss": 1.791, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 1.0942467915416342e-05, | |
| "loss": 1.8398, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 1.092268359463302e-05, | |
| "loss": 1.6797, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 1.090289563060232e-05, | |
| "loss": 1.7871, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 1.088310410145768e-05, | |
| "loss": 1.6738, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 1.086330908534663e-05, | |
| "loss": 1.8711, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 1.0843510660430447e-05, | |
| "loss": 1.752, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 1.0823708904883898e-05, | |
| "loss": 1.9297, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 1.0803903896894877e-05, | |
| "loss": 1.9141, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 1.0784095714664124e-05, | |
| "loss": 1.7188, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 1.0764284436404924e-05, | |
| "loss": 1.7441, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 1.0744470140342775e-05, | |
| "loss": 1.7266, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 1.0724652904715091e-05, | |
| "loss": 1.832, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 1.0704832807770909e-05, | |
| "loss": 1.6152, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 1.0685009927770542e-05, | |
| "loss": 1.8281, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 1.0665184342985306e-05, | |
| "loss": 1.7812, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 1.064535613169719e-05, | |
| "loss": 1.875, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 1.0625525372198564e-05, | |
| "loss": 1.748, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 1.0605692142791846e-05, | |
| "loss": 1.7148, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 1.0585856521789215e-05, | |
| "loss": 1.7715, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 1.056601858751229e-05, | |
| "loss": 1.7676, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 1.0546178418291833e-05, | |
| "loss": 1.7852, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 1.0526336092467414e-05, | |
| "loss": 1.9141, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 1.0506491688387128e-05, | |
| "loss": 1.6602, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.0486645284407282e-05, | |
| "loss": 1.75, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 1.0466796958892071e-05, | |
| "loss": 1.5469, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 1.0446946790213275e-05, | |
| "loss": 1.2852, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 1.0427094856749966e-05, | |
| "loss": 1.3926, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 1.0407241236888164e-05, | |
| "loss": 1.293, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 1.0387386009020569e-05, | |
| "loss": 1.2559, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 1.0367529251546208e-05, | |
| "loss": 1.3379, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 1.034767104287017e-05, | |
| "loss": 1.3047, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 1.032781146140326e-05, | |
| "loss": 1.3105, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 1.0307950585561705e-05, | |
| "loss": 1.3203, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 1.0288088493766846e-05, | |
| "loss": 1.2461, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 1.0268225264444829e-05, | |
| "loss": 1.3281, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 1.0248360976026279e-05, | |
| "loss": 1.1758, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 1.0228495706946015e-05, | |
| "loss": 1.1465, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 1.0208629535642726e-05, | |
| "loss": 1.1836, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 1.0188762540558657e-05, | |
| "loss": 1.1504, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 1.0168894800139311e-05, | |
| "loss": 1.1641, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 1.0149026392833137e-05, | |
| "loss": 1.1504, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 1.0129157397091208e-05, | |
| "loss": 1.2832, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 1.010928789136693e-05, | |
| "loss": 1.25, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 1.0089417954115715e-05, | |
| "loss": 1.2207, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 1.0069547663794682e-05, | |
| "loss": 1.1855, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 1.0049677098862347e-05, | |
| "loss": 1.1289, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 1.002980633777831e-05, | |
| "loss": 1.1562, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 1.0009935459002935e-05, | |
| "loss": 1.3242, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 9.990064540997066e-06, | |
| "loss": 1.3105, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 9.970193662221694e-06, | |
| "loss": 1.3145, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 9.950322901137655e-06, | |
| "loss": 1.2441, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 9.93045233620532e-06, | |
| "loss": 1.3262, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 9.910582045884292e-06, | |
| "loss": 1.2656, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 9.890712108633076e-06, | |
| "loss": 1.3633, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 9.870842602908794e-06, | |
| "loss": 1.2734, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 9.850973607166865e-06, | |
| "loss": 1.2656, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 9.83110519986069e-06, | |
| "loss": 1.2949, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 9.811237459441346e-06, | |
| "loss": 1.2227, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 9.791370464357279e-06, | |
| "loss": 1.2793, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 9.771504293053985e-06, | |
| "loss": 1.3633, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 9.751639023973724e-06, | |
| "loss": 1.207, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 9.731774735555174e-06, | |
| "loss": 1.252, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 9.711911506233157e-06, | |
| "loss": 1.1992, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 9.692049414438298e-06, | |
| "loss": 1.3516, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 9.672188538596746e-06, | |
| "loss": 1.3574, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 9.652328957129831e-06, | |
| "loss": 1.4062, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 9.632470748453794e-06, | |
| "loss": 1.3223, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 9.612613990979436e-06, | |
| "loss": 1.2207, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 9.59275876311184e-06, | |
| "loss": 1.2441, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 9.572905143250039e-06, | |
| "loss": 1.0586, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 9.553053209786725e-06, | |
| "loss": 1.2148, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 9.53320304110793e-06, | |
| "loss": 1.2402, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 9.513354715592721e-06, | |
| "loss": 1.1338, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 9.493508311612874e-06, | |
| "loss": 1.332, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 9.473663907532593e-06, | |
| "loss": 1.2715, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 9.453821581708174e-06, | |
| "loss": 1.2793, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 9.433981412487711e-06, | |
| "loss": 1.2969, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 9.414143478210786e-06, | |
| "loss": 1.1074, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 9.394307857208158e-06, | |
| "loss": 1.1924, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 9.374474627801439e-06, | |
| "loss": 1.2188, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 9.354643868302813e-06, | |
| "loss": 1.2246, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 9.334815657014696e-06, | |
| "loss": 1.2109, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 9.314990072229461e-06, | |
| "loss": 1.2832, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 9.295167192229093e-06, | |
| "loss": 1.2666, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 9.27534709528491e-06, | |
| "loss": 1.3066, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 9.25552985965723e-06, | |
| "loss": 1.5352, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 9.235715563595082e-06, | |
| "loss": 1.2305, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 9.215904285335876e-06, | |
| "loss": 1.1113, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 9.196096103105127e-06, | |
| "loss": 1.2285, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 9.176291095116104e-06, | |
| "loss": 1.2871, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 9.156489339569555e-06, | |
| "loss": 1.2539, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 9.136690914653377e-06, | |
| "loss": 1.2666, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 9.11689589854232e-06, | |
| "loss": 1.2539, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 9.097104369397681e-06, | |
| "loss": 1.1562, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 9.07731640536698e-06, | |
| "loss": 1.2148, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 9.057532084583662e-06, | |
| "loss": 1.3848, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 9.037751485166785e-06, | |
| "loss": 1.2832, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 9.017974685220716e-06, | |
| "loss": 1.2832, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 8.998201762834815e-06, | |
| "loss": 1.3906, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 8.97843279608314e-06, | |
| "loss": 1.2539, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 8.958667863024127e-06, | |
| "loss": 1.168, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 8.938907041700275e-06, | |
| "loss": 1.3086, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 8.919150410137862e-06, | |
| "loss": 1.2656, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 8.899398046346608e-06, | |
| "loss": 1.209, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 8.8796500283194e-06, | |
| "loss": 1.2852, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 8.859906434031947e-06, | |
| "loss": 1.1504, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 8.840167341442505e-06, | |
| "loss": 1.0957, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 8.820432828491542e-06, | |
| "loss": 1.2148, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 8.800702973101454e-06, | |
| "loss": 1.2832, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 8.78097785317624e-06, | |
| "loss": 1.252, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 8.761257546601209e-06, | |
| "loss": 1.3633, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 8.741542131242652e-06, | |
| "loss": 1.2246, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 8.721831684947557e-06, | |
| "loss": 1.2148, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 8.702126285543286e-06, | |
| "loss": 1.127, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 8.682426010837274e-06, | |
| "loss": 1.25, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 8.662730938616724e-06, | |
| "loss": 1.2031, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 8.643041146648299e-06, | |
| "loss": 1.2246, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 8.6233567126778e-06, | |
| "loss": 1.3438, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 8.603677714429888e-06, | |
| "loss": 1.2852, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 8.584004229607747e-06, | |
| "loss": 1.418, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 8.564336335892798e-06, | |
| "loss": 1.3105, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 8.54467411094439e-06, | |
| "loss": 1.2422, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 8.52501763239948e-06, | |
| "loss": 1.2373, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 8.505366977872336e-06, | |
| "loss": 1.2637, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 8.485722224954237e-06, | |
| "loss": 1.3906, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 8.466083451213145e-06, | |
| "loss": 1.1748, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 8.446450734193437e-06, | |
| "loss": 1.2949, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 8.426824151415548e-06, | |
| "loss": 1.125, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 8.407203780375711e-06, | |
| "loss": 1.2539, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 8.38758969854562e-06, | |
| "loss": 1.2305, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 8.367981983372143e-06, | |
| "loss": 1.1523, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 8.348380712277002e-06, | |
| "loss": 1.2285, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 8.32878596265649e-06, | |
| "loss": 1.3281, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 8.309197811881128e-06, | |
| "loss": 1.3379, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 8.289616337295396e-06, | |
| "loss": 1.2891, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 8.270041616217407e-06, | |
| "loss": 1.2441, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 8.250473725938608e-06, | |
| "loss": 1.3652, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 8.23091274372348e-06, | |
| "loss": 1.1523, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 8.211358746809225e-06, | |
| "loss": 1.2637, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 8.191811812405453e-06, | |
| "loss": 1.3184, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 8.172272017693903e-06, | |
| "loss": 1.2676, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 8.15273943982811e-06, | |
| "loss": 1.1836, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 8.133214155933118e-06, | |
| "loss": 1.1533, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 8.113696243105175e-06, | |
| "loss": 1.1562, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 8.09418577841142e-06, | |
| "loss": 1.3008, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 8.074682838889581e-06, | |
| "loss": 1.3379, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 8.055187501547674e-06, | |
| "loss": 1.2012, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 8.035699843363696e-06, | |
| "loss": 1.1484, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 8.01621994128533e-06, | |
| "loss": 1.293, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 7.996747872229624e-06, | |
| "loss": 1.3223, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 7.977283713082706e-06, | |
| "loss": 1.3105, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 7.95782754069946e-06, | |
| "loss": 1.207, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 7.938379431903243e-06, | |
| "loss": 1.1992, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 7.91893946348557e-06, | |
| "loss": 1.1582, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 7.899507712205818e-06, | |
| "loss": 1.168, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 7.880084254790911e-06, | |
| "loss": 1.3105, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 7.860669167935028e-06, | |
| "loss": 1.2988, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 7.841262528299296e-06, | |
| "loss": 1.1211, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 7.821864412511485e-06, | |
| "loss": 1.2832, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 7.802474897165716e-06, | |
| "loss": 1.0977, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 7.783094058822147e-06, | |
| "loss": 1.0918, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 7.76372197400667e-06, | |
| "loss": 1.2617, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 7.74435871921062e-06, | |
| "loss": 1.2793, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 7.72500437089046e-06, | |
| "loss": 1.2402, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 7.705659005467489e-06, | |
| "loss": 1.2344, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 7.68632269932754e-06, | |
| "loss": 1.2832, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 7.666995528820673e-06, | |
| "loss": 1.2402, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 7.647677570260868e-06, | |
| "loss": 1.3262, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 7.628368899925744e-06, | |
| "loss": 1.2695, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 7.609069594056234e-06, | |
| "loss": 1.2031, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 7.589779728856307e-06, | |
| "loss": 1.1484, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 7.570499380492641e-06, | |
| "loss": 1.3203, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 7.551228625094349e-06, | |
| "loss": 1.2754, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 7.5319675387526555e-06, | |
| "loss": 1.2559, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 7.512716197520614e-06, | |
| "loss": 1.209, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 7.493474677412795e-06, | |
| "loss": 1.1875, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 7.4742430544049945e-06, | |
| "loss": 1.2168, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 7.4550214044339256e-06, | |
| "loss": 1.209, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 7.435809803396923e-06, | |
| "loss": 1.25, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 7.416608327151642e-06, | |
| "loss": 1.1211, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 7.397417051515758e-06, | |
| "loss": 1.1113, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 7.37823605226668e-06, | |
| "loss": 1.2422, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 7.359065405141228e-06, | |
| "loss": 1.2363, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 7.33990518583535e-06, | |
| "loss": 1.1338, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 7.320755470003822e-06, | |
| "loss": 1.0918, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 7.301616333259942e-06, | |
| "loss": 1.3027, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 7.282487851175237e-06, | |
| "loss": 1.0625, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 7.263370099279173e-06, | |
| "loss": 0.792, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 7.244263153058835e-06, | |
| "loss": 0.9102, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 7.225167087958647e-06, | |
| "loss": 0.832, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 7.2060819793800665e-06, | |
| "loss": 0.8662, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 7.187007902681289e-06, | |
| "loss": 0.8164, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 7.16794493317696e-06, | |
| "loss": 0.8496, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 7.148893146137852e-06, | |
| "loss": 0.9854, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 7.129852616790594e-06, | |
| "loss": 0.8486, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 7.110823420317356e-06, | |
| "loss": 0.8359, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 7.091805631855566e-06, | |
| "loss": 0.7695, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 7.072799326497603e-06, | |
| "loss": 0.8828, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 7.053804579290513e-06, | |
| "loss": 0.9307, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 7.034821465235693e-06, | |
| "loss": 0.7568, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 7.0158500592886115e-06, | |
| "loss": 0.8779, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 6.996890436358505e-06, | |
| "loss": 0.9648, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 6.977942671308087e-06, | |
| "loss": 0.7734, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 6.95900683895325e-06, | |
| "loss": 0.8066, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 6.9400830140627705e-06, | |
| "loss": 0.9189, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 6.921171271358007e-06, | |
| "loss": 0.8271, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 6.902271685512616e-06, | |
| "loss": 0.9258, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 6.883384331152254e-06, | |
| "loss": 0.9004, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 6.864509282854272e-06, | |
| "loss": 0.8652, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 6.845646615147445e-06, | |
| "loss": 0.8779, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 6.826796402511653e-06, | |
| "loss": 0.8105, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 6.8079587193775935e-06, | |
| "loss": 0.9023, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 6.789133640126498e-06, | |
| "loss": 0.8877, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 6.770321239089825e-06, | |
| "loss": 0.9209, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 6.751521590548986e-06, | |
| "loss": 0.8389, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 6.732734768735021e-06, | |
| "loss": 0.8125, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 6.713960847828335e-06, | |
| "loss": 0.8408, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 6.695199901958386e-06, | |
| "loss": 0.9258, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 6.6764520052034054e-06, | |
| "loss": 0.8213, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 6.657717231590095e-06, | |
| "loss": 0.8838, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 6.638995655093351e-06, | |
| "loss": 0.667, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 6.620287349635942e-06, | |
| "loss": 0.9072, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 6.601592389088251e-06, | |
| "loss": 0.8184, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 6.582910847267957e-06, | |
| "loss": 0.9688, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 6.564242797939759e-06, | |
| "loss": 0.7861, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 6.545588314815088e-06, | |
| "loss": 0.9268, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 6.526947471551799e-06, | |
| "loss": 0.7949, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 6.508320341753889e-06, | |
| "loss": 0.8994, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 6.489706998971212e-06, | |
| "loss": 0.8193, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 6.471107516699183e-06, | |
| "loss": 0.877, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 6.452521968378482e-06, | |
| "loss": 0.8525, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 6.4339504273947805e-06, | |
| "loss": 0.8115, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 6.415392967078438e-06, | |
| "loss": 0.8262, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 6.396849660704205e-06, | |
| "loss": 0.9258, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 6.378320581490962e-06, | |
| "loss": 0.873, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 6.3598058026013995e-06, | |
| "loss": 0.9082, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 6.3413053971417575e-06, | |
| "loss": 0.9756, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 6.322819438161502e-06, | |
| "loss": 0.7363, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 6.304347998653074e-06, | |
| "loss": 0.835, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 6.285891151551573e-06, | |
| "loss": 0.8457, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 6.267448969734486e-06, | |
| "loss": 0.833, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 6.24902152602139e-06, | |
| "loss": 0.7949, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 6.2306088931736766e-06, | |
| "loss": 0.9092, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 6.21221114389424e-06, | |
| "loss": 0.8643, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 6.193828350827222e-06, | |
| "loss": 0.8809, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 6.175460586557701e-06, | |
| "loss": 0.8662, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 6.157107923611412e-06, | |
| "loss": 0.8682, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 6.1387704344544684e-06, | |
| "loss": 0.8701, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 6.120448191493071e-06, | |
| "loss": 0.791, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 6.102141267073207e-06, | |
| "loss": 0.8857, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 6.083849733480394e-06, | |
| "loss": 0.8623, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 6.065573662939367e-06, | |
| "loss": 0.8105, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 6.047313127613808e-06, | |
| "loss": 0.9443, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 6.0290681996060605e-06, | |
| "loss": 0.7783, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 6.010838950956841e-06, | |
| "loss": 0.8701, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 5.992625453644953e-06, | |
| "loss": 0.8672, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 5.974427779587004e-06, | |
| "loss": 0.8262, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 5.9562460006371295e-06, | |
| "loss": 0.8818, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 5.938080188586699e-06, | |
| "loss": 0.7998, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 5.919930415164033e-06, | |
| "loss": 0.7217, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 5.901796752034128e-06, | |
| "loss": 0.8486, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 5.883679270798363e-06, | |
| "loss": 0.7949, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 5.865578042994227e-06, | |
| "loss": 0.9209, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 5.84749314009503e-06, | |
| "loss": 0.8779, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 5.829424633509627e-06, | |
| "loss": 0.9678, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 5.8113725945821245e-06, | |
| "loss": 0.7764, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 5.7933370945916036e-06, | |
| "loss": 0.8252, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 5.775318204751854e-06, | |
| "loss": 0.8438, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 5.757315996211066e-06, | |
| "loss": 0.7744, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 5.7393305400515755e-06, | |
| "loss": 0.8027, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 5.721361907289556e-06, | |
| "loss": 0.834, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 5.703410168874768e-06, | |
| "loss": 0.8496, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 5.685475395690259e-06, | |
| "loss": 1.0342, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 5.667557658552078e-06, | |
| "loss": 0.8789, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 5.649657028209024e-06, | |
| "loss": 0.7568, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 5.631773575342343e-06, | |
| "loss": 0.791, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 5.61390737056545e-06, | |
| "loss": 0.9238, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 5.5960584844236565e-06, | |
| "loss": 0.7002, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 5.5782269873939e-06, | |
| "loss": 0.8096, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 5.560412949884442e-06, | |
| "loss": 0.8545, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 5.542616442234618e-06, | |
| "loss": 0.8203, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 5.52483753471454e-06, | |
| "loss": 0.8271, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 5.507076297524818e-06, | |
| "loss": 0.8428, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 5.48933280079631e-06, | |
| "loss": 0.8076, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 5.471607114589806e-06, | |
| "loss": 0.8057, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 5.453899308895774e-06, | |
| "loss": 0.7715, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 5.436209453634087e-06, | |
| "loss": 0.7207, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 5.418537618653743e-06, | |
| "loss": 0.7812, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 5.400883873732574e-06, | |
| "loss": 0.8213, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 5.3832482885769855e-06, | |
| "loss": 0.7451, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 5.365630932821688e-06, | |
| "loss": 0.835, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 5.3480318760294084e-06, | |
| "loss": 0.8604, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 5.330451187690614e-06, | |
| "loss": 0.9072, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 5.3128889372232436e-06, | |
| "loss": 0.8721, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 5.295345193972445e-06, | |
| "loss": 0.8779, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 5.277820027210279e-06, | |
| "loss": 0.8916, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 5.260313506135452e-06, | |
| "loss": 0.8721, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 5.242825699873068e-06, | |
| "loss": 0.8613, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 5.225356677474309e-06, | |
| "loss": 0.8379, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 5.2079065079162115e-06, | |
| "loss": 0.708, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 5.190475260101353e-06, | |
| "loss": 0.873, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 5.1730630028576055e-06, | |
| "loss": 0.7119, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 5.155669804937855e-06, | |
| "loss": 0.8848, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 5.138295735019741e-06, | |
| "loss": 0.8633, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 5.120940861705357e-06, | |
| "loss": 0.8203, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 5.103605253521007e-06, | |
| "loss": 0.8398, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 5.086288978916931e-06, | |
| "loss": 0.9297, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 5.068992106267021e-06, | |
| "loss": 0.71, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 5.051714703868569e-06, | |
| "loss": 0.7275, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 5.034456839941979e-06, | |
| "loss": 0.8164, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 5.017218582630507e-06, | |
| "loss": 0.7363, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 5.000000000000003e-06, | |
| "loss": 0.9561, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 4.982801160038614e-06, | |
| "loss": 0.834, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 4.965622130656551e-06, | |
| "loss": 0.8418, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 4.948462979685783e-06, | |
| "loss": 0.8418, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 4.931323774879807e-06, | |
| "loss": 0.8584, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 4.914204583913349e-06, | |
| "loss": 0.8105, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 4.897105474382109e-06, | |
| "loss": 0.9131, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 4.880026513802504e-06, | |
| "loss": 0.791, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 4.862967769611389e-06, | |
| "loss": 0.8828, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 4.845929309165793e-06, | |
| "loss": 0.8291, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 4.828911199742646e-06, | |
| "loss": 0.8252, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 4.8119135085385375e-06, | |
| "loss": 0.7529, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 4.794936302669417e-06, | |
| "loss": 0.8613, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 4.777979649170367e-06, | |
| "loss": 0.7803, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 4.7610436149953e-06, | |
| "loss": 0.9141, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 4.744128267016719e-06, | |
| "loss": 0.8291, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 4.727233672025453e-06, | |
| "loss": 0.7451, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 4.710359896730379e-06, | |
| "loss": 0.8457, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 4.693507007758165e-06, | |
| "loss": 0.7646, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 4.676675071653019e-06, | |
| "loss": 0.8506, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 4.659864154876411e-06, | |
| "loss": 0.7246, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 4.643074323806813e-06, | |
| "loss": 0.8555, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 4.626305644739435e-06, | |
| "loss": 0.8125, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 4.609558183885979e-06, | |
| "loss": 0.8418, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 4.592832007374364e-06, | |
| "loss": 0.8271, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 4.576127181248459e-06, | |
| "loss": 0.7979, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 4.559443771467833e-06, | |
| "loss": 0.8438, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 4.542781843907499e-06, | |
| "loss": 0.7432, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 4.5261414643576396e-06, | |
| "loss": 0.7852, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 4.509522698523352e-06, | |
| "loss": 0.8125, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 4.492925612024402e-06, | |
| "loss": 0.7588, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 4.476350270394942e-06, | |
| "loss": 0.751, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 4.4597967390832745e-06, | |
| "loss": 0.9287, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 4.4432650834515735e-06, | |
| "loss": 0.7432, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 4.426755368775637e-06, | |
| "loss": 0.7783, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 4.4102676602446375e-06, | |
| "loss": 0.8613, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 4.3938020229608506e-06, | |
| "loss": 0.8584, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 4.377358521939401e-06, | |
| "loss": 0.8105, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 4.360937222108002e-06, | |
| "loss": 0.7871, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 4.344538188306723e-06, | |
| "loss": 0.5469, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 4.328161485287693e-06, | |
| "loss": 0.6025, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 4.3118071777148865e-06, | |
| "loss": 0.5752, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 4.295475330163832e-06, | |
| "loss": 0.6367, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 4.279166007121389e-06, | |
| "loss": 0.5527, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 4.262879272985468e-06, | |
| "loss": 0.5439, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 4.246615192064787e-06, | |
| "loss": 0.5586, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 4.230373828578626e-06, | |
| "loss": 0.6318, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 4.21415524665655e-06, | |
| "loss": 0.6299, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 4.197959510338187e-06, | |
| "loss": 0.583, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 4.181786683572946e-06, | |
| "loss": 0.626, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 4.165636830219776e-06, | |
| "loss": 0.5845, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 4.149510014046922e-06, | |
| "loss": 0.5723, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 4.1334062987316695e-06, | |
| "loss": 0.5391, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 4.117325747860077e-06, | |
| "loss": 0.5967, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 4.101268424926741e-06, | |
| "loss": 0.6357, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 4.085234393334551e-06, | |
| "loss": 0.5654, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 4.069223716394419e-06, | |
| "loss": 0.5889, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 4.053236457325043e-06, | |
| "loss": 0.5615, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 4.0372726792526614e-06, | |
| "loss": 0.5459, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 4.021332445210785e-06, | |
| "loss": 0.6182, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 4.005415818139975e-06, | |
| "loss": 0.6357, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 3.989522860887567e-06, | |
| "loss": 0.5, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 3.973653636207437e-06, | |
| "loss": 0.5625, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 3.95780820675976e-06, | |
| "loss": 0.6074, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 3.941986635110754e-06, | |
| "loss": 0.6416, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 3.9261889837324245e-06, | |
| "loss": 0.5239, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 3.910415315002328e-06, | |
| "loss": 0.5127, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 3.89466569120334e-06, | |
| "loss": 0.5771, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 3.878940174523371e-06, | |
| "loss": 0.6367, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 3.8632388270551665e-06, | |
| "loss": 0.6191, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 3.847561710796019e-06, | |
| "loss": 0.5928, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 3.8319088876475595e-06, | |
| "loss": 0.5742, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 3.816280419415487e-06, | |
| "loss": 0.6201, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 3.8006763678093326e-06, | |
| "loss": 0.6885, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 3.785096794442229e-06, | |
| "loss": 0.5742, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 3.7695417608306415e-06, | |
| "loss": 0.5352, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 3.7540113283941536e-06, | |
| "loss": 0.6123, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 3.7385055584552e-06, | |
| "loss": 0.5605, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 3.723024512238833e-06, | |
| "loss": 0.541, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 3.707568250872493e-06, | |
| "loss": 0.6328, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 3.6921368353857524e-06, | |
| "loss": 0.5498, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 3.676730326710074e-06, | |
| "loss": 0.5938, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 3.6613487856785744e-06, | |
| "loss": 0.5742, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 3.645992273025797e-06, | |
| "loss": 0.5493, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 3.630660849387444e-06, | |
| "loss": 0.5947, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 3.6153545753001663e-06, | |
| "loss": 0.5522, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 3.6000735112012984e-06, | |
| "loss": 0.5967, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 3.584817717428647e-06, | |
| "loss": 0.6006, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 3.569587254220225e-06, | |
| "loss": 0.5664, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 3.5543821817140313e-06, | |
| "loss": 0.5898, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 3.5392025599478053e-06, | |
| "loss": 0.4985, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 3.5240484488588012e-06, | |
| "loss": 0.5273, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 3.5089199082835436e-06, | |
| "loss": 0.627, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 3.493816997957582e-06, | |
| "loss": 0.5479, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 3.478739777515264e-06, | |
| "loss": 0.5625, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 3.463688306489511e-06, | |
| "loss": 0.5649, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 3.448662644311567e-06, | |
| "loss": 0.6064, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 3.433662850310763e-06, | |
| "loss": 0.6211, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 3.418688983714291e-06, | |
| "loss": 0.5337, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 3.403741103646977e-06, | |
| "loss": 0.6035, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 3.3888192691310262e-06, | |
| "loss": 0.5508, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 3.373923539085805e-06, | |
| "loss": 0.5215, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 3.3590539723276083e-06, | |
| "loss": 0.5239, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 3.3442106275694295e-06, | |
| "loss": 0.5444, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 3.329393563420713e-06, | |
| "loss": 0.6401, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 3.3146028383871363e-06, | |
| "loss": 0.5825, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 3.2998385108703766e-06, | |
| "loss": 0.5347, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 3.285100639167883e-06, | |
| "loss": 0.5645, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 3.2703892814726436e-06, | |
| "loss": 0.5459, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 3.2557044958729466e-06, | |
| "loss": 0.582, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 3.2410463403521653e-06, | |
| "loss": 0.6035, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 3.2264148727885257e-06, | |
| "loss": 0.6094, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 3.211810150954867e-06, | |
| "loss": 0.5801, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 3.1972322325184347e-06, | |
| "loss": 0.6016, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 3.182681175040625e-06, | |
| "loss": 0.5352, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 3.1681570359767875e-06, | |
| "loss": 0.5757, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 3.1536598726759747e-06, | |
| "loss": 0.5894, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 3.1391897423807204e-06, | |
| "loss": 0.4736, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 3.1247467022268284e-06, | |
| "loss": 0.4985, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 3.110330809243134e-06, | |
| "loss": 0.5459, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 3.095942120351276e-06, | |
| "loss": 0.4756, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 3.081580692365478e-06, | |
| "loss": 0.5908, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 3.0672465819923215e-06, | |
| "loss": 0.583, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 3.052939845830528e-06, | |
| "loss": 0.5034, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 3.0386605403707347e-06, | |
| "loss": 0.4697, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 3.0244087219952565e-06, | |
| "loss": 0.5146, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 3.0101844469778797e-06, | |
| "loss": 0.5674, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 2.9959877714836406e-06, | |
| "loss": 0.542, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 2.981818751568586e-06, | |
| "loss": 0.5669, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 2.9676774431795752e-06, | |
| "loss": 0.5244, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 2.95356390215404e-06, | |
| "loss": 0.5679, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 2.939478184219777e-06, | |
| "loss": 0.4868, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 2.9254203449947196e-06, | |
| "loss": 0.5498, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 2.9113904399867188e-06, | |
| "loss": 0.6143, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 2.8973885245933287e-06, | |
| "loss": 0.6279, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 2.8834146541015874e-06, | |
| "loss": 0.5552, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 2.869468883687798e-06, | |
| "loss": 0.5186, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 2.855551268417305e-06, | |
| "loss": 0.5244, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 2.8416618632442785e-06, | |
| "loss": 0.5884, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 2.827800723011508e-06, | |
| "loss": 0.6289, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 2.813967902450179e-06, | |
| "loss": 0.5732, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 2.8001634561796463e-06, | |
| "loss": 0.5527, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 2.786387438707231e-06, | |
| "loss": 0.5835, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 2.7726399044280107e-06, | |
| "loss": 0.5557, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 2.758920907624585e-06, | |
| "loss": 0.5322, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 2.7452305024668747e-06, | |
| "loss": 0.54, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 2.7315687430119097e-06, | |
| "loss": 0.6719, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 2.7179356832036142e-06, | |
| "loss": 0.6846, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 2.704331376872581e-06, | |
| "loss": 0.5723, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 2.6907558777358756e-06, | |
| "loss": 0.5562, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 2.677209239396811e-06, | |
| "loss": 0.5967, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 2.6636915153447494e-06, | |
| "loss": 0.4829, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 2.650202758954886e-06, | |
| "loss": 0.6201, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 2.6367430234880286e-06, | |
| "loss": 0.4766, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 2.6233123620903946e-06, | |
| "loss": 0.583, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 2.6099108277934105e-06, | |
| "loss": 0.5054, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 2.5965384735134825e-06, | |
| "loss": 0.5459, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 2.583195352051808e-06, | |
| "loss": 0.5312, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 2.5698815160941494e-06, | |
| "loss": 0.584, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 2.5565970182106425e-06, | |
| "loss": 0.5928, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 2.5433419108555758e-06, | |
| "loss": 0.5205, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 2.5301162463671845e-06, | |
| "loss": 0.5303, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 2.516920076967455e-06, | |
| "loss": 0.5615, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 2.5037534547619125e-06, | |
| "loss": 0.6182, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 2.4906164317394067e-06, | |
| "loss": 0.5088, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 2.4775090597719163e-06, | |
| "loss": 0.5264, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 2.4644313906143414e-06, | |
| "loss": 0.5195, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 2.451383475904304e-06, | |
| "loss": 0.5332, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 2.438365367161939e-06, | |
| "loss": 0.5718, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 2.4253771157896856e-06, | |
| "loss": 0.5269, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 2.4124187730720916e-06, | |
| "loss": 0.563, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 2.3994903901756163e-06, | |
| "loss": 0.5156, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 2.3865920181484127e-06, | |
| "loss": 0.478, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 2.3737237079201437e-06, | |
| "loss": 0.5879, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 2.3608855103017613e-06, | |
| "loss": 0.5972, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 2.3480774759853307e-06, | |
| "loss": 0.5254, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 2.3352996555438036e-06, | |
| "loss": 0.5645, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 2.3225520994308382e-06, | |
| "loss": 0.5957, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 2.309834857980583e-06, | |
| "loss": 0.5371, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 2.297147981407509e-06, | |
| "loss": 0.5508, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 2.2844915198061714e-06, | |
| "loss": 0.4985, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 2.2718655231510368e-06, | |
| "loss": 0.5928, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 2.2592700412962775e-06, | |
| "loss": 0.5928, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "learning_rate": 2.246705123975582e-06, | |
| "loss": 0.6377, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 2.234170820801954e-06, | |
| "loss": 0.5674, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 2.2216671812675118e-06, | |
| "loss": 0.4785, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 2.209194254743295e-06, | |
| "loss": 0.5767, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 2.196752090479083e-06, | |
| "loss": 0.5601, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 2.184340737603178e-06, | |
| "loss": 0.4595, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 2.1719602451222245e-06, | |
| "loss": 0.5625, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 2.159610661921018e-06, | |
| "loss": 0.5679, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 2.1472920367623094e-06, | |
| "loss": 0.6499, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 2.1350044182866025e-06, | |
| "loss": 0.4966, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 2.1227478550119763e-06, | |
| "loss": 0.5933, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 2.1105223953338805e-06, | |
| "loss": 0.4814, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 2.09832808752496e-06, | |
| "loss": 0.5088, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 2.086164979734856e-06, | |
| "loss": 0.5586, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 2.0740331199900053e-06, | |
| "loss": 0.5396, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 2.0619325561934658e-06, | |
| "loss": 0.6182, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 2.0498633361247278e-06, | |
| "loss": 0.5537, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 2.0378255074395094e-06, | |
| "loss": 0.5107, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 2.0258191176695896e-06, | |
| "loss": 0.5176, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "learning_rate": 2.0138442142226e-06, | |
| "loss": 0.4658, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "learning_rate": 2.001900844381857e-06, | |
| "loss": 0.3608, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 1.9899890553061565e-06, | |
| "loss": 0.4785, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 1.978108894029598e-06, | |
| "loss": 0.4692, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "learning_rate": 1.9662604074614044e-06, | |
| "loss": 0.4463, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 1.954443642385727e-06, | |
| "loss": 0.4473, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 1.9426586454614617e-06, | |
| "loss": 0.3853, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 1.9309054632220645e-06, | |
| "loss": 0.4043, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 1.919184142075372e-06, | |
| "loss": 0.3589, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 1.9074947283034206e-06, | |
| "loss": 0.3608, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 1.895837268062256e-06, | |
| "loss": 0.499, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 1.884211807381755e-06, | |
| "loss": 0.4058, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 1.8726183921654373e-06, | |
| "loss": 0.5142, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 1.8610570681903018e-06, | |
| "loss": 0.3506, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 1.8495278811066197e-06, | |
| "loss": 0.4849, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 1.8380308764377841e-06, | |
| "loss": 0.3979, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 1.8265660995801004e-06, | |
| "loss": 0.375, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 8.11, | |
| "learning_rate": 1.8151335958026317e-06, | |
| "loss": 0.4575, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 1.803733410247006e-06, | |
| "loss": 0.3691, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 1.7923655879272395e-06, | |
| "loss": 0.4448, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 1.7810301737295588e-06, | |
| "loss": 0.4111, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 1.76972721241224e-06, | |
| "loss": 0.3872, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 1.7584567486054039e-06, | |
| "loss": 0.4336, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 1.7472188268108569e-06, | |
| "loss": 0.3569, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 1.7360134914019122e-06, | |
| "loss": 0.4526, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 1.7248407866232175e-06, | |
| "loss": 0.4351, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 1.7137007565905772e-06, | |
| "loss": 0.3394, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 1.7025934452907755e-06, | |
| "loss": 0.439, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 1.6915188965814034e-06, | |
| "loss": 0.437, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 1.6804771541906972e-06, | |
| "loss": 0.3999, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "learning_rate": 1.6694682617173452e-06, | |
| "loss": 0.3999, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 1.6584922626303325e-06, | |
| "loss": 0.4165, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 1.6475492002687632e-06, | |
| "loss": 0.4141, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 1.6366391178416918e-06, | |
| "loss": 0.397, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 1.6257620584279454e-06, | |
| "loss": 0.3926, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 1.6149180649759622e-06, | |
| "loss": 0.3926, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 1.60410718030361e-06, | |
| "loss": 0.436, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 1.5933294470980443e-06, | |
| "loss": 0.4141, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "learning_rate": 1.5825849079155032e-06, | |
| "loss": 0.4165, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 1.5718736051811634e-06, | |
| "loss": 0.4912, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 1.5611955811889645e-06, | |
| "loss": 0.397, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 1.5505508781014489e-06, | |
| "loss": 0.4297, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 1.539939537949583e-06, | |
| "loss": 0.4883, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 1.5293616026326053e-06, | |
| "loss": 0.3496, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "learning_rate": 1.5188171139178486e-06, | |
| "loss": 0.4014, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "learning_rate": 1.5083061134405874e-06, | |
| "loss": 0.3706, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "learning_rate": 1.4978286427038602e-06, | |
| "loss": 0.4463, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "learning_rate": 1.4873847430783118e-06, | |
| "loss": 0.4316, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 1.476974455802036e-06, | |
| "loss": 0.4258, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 1.4665978219804056e-06, | |
| "loss": 0.3833, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 1.4562548825859092e-06, | |
| "loss": 0.3687, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 1.4459456784579917e-06, | |
| "loss": 0.4141, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 1.435670250302892e-06, | |
| "loss": 0.4692, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 1.425428638693489e-06, | |
| "loss": 0.3999, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 1.415220884069135e-06, | |
| "loss": 0.4443, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 1.405047026735491e-06, | |
| "loss": 0.3403, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 1.394907106864375e-06, | |
| "loss": 0.4438, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 1.3848011644936077e-06, | |
| "loss": 0.3643, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 1.3747292395268407e-06, | |
| "loss": 0.4121, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 1.3646913717334142e-06, | |
| "loss": 0.394, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 1.3546876007481847e-06, | |
| "loss": 0.4102, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 1.344717966071385e-06, | |
| "loss": 0.3857, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 1.3347825070684518e-06, | |
| "loss": 0.3726, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 1.3248812629698815e-06, | |
| "loss": 0.4077, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 1.3150142728710669e-06, | |
| "loss": 0.4009, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 1.3051815757321607e-06, | |
| "loss": 0.3789, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 1.295383210377895e-06, | |
| "loss": 0.3452, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 1.2856192154974488e-06, | |
| "loss": 0.4043, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 1.2758896296442834e-06, | |
| "loss": 0.4385, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "learning_rate": 1.266194491235998e-06, | |
| "loss": 0.4263, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 1.2565338385541792e-06, | |
| "loss": 0.416, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 1.2469077097442372e-06, | |
| "loss": 0.4087, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 1.2373161428152647e-06, | |
| "loss": 0.4033, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 1.2277591756398933e-06, | |
| "loss": 0.3394, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 1.2182368459541294e-06, | |
| "loss": 0.4214, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 1.2087491913572103e-06, | |
| "loss": 0.4229, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 1.1992962493114645e-06, | |
| "loss": 0.3779, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 1.1898780571421554e-06, | |
| "loss": 0.4639, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 1.1804946520373307e-06, | |
| "loss": 0.4116, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "learning_rate": 1.171146071047683e-06, | |
| "loss": 0.3823, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 1.161832351086396e-06, | |
| "loss": 0.4209, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 1.1525535289290168e-06, | |
| "loss": 0.3936, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "learning_rate": 1.1433096412132838e-06, | |
| "loss": 0.3999, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "learning_rate": 1.1341007244390023e-06, | |
| "loss": 0.437, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "learning_rate": 1.124926814967887e-06, | |
| "loss": 0.3521, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 1.1157879490234346e-06, | |
| "loss": 0.4141, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 1.1066841626907633e-06, | |
| "loss": 0.418, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 1.097615491916485e-06, | |
| "loss": 0.4189, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 1.088581972508549e-06, | |
| "loss": 0.4517, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 1.0795836401361148e-06, | |
| "loss": 0.4067, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 1.0706205303294025e-06, | |
| "loss": 0.375, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 1.0616926784795511e-06, | |
| "loss": 0.3359, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 1.0528001198384862e-06, | |
| "loss": 0.4092, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "learning_rate": 1.043942889518782e-06, | |
| "loss": 0.3726, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "learning_rate": 1.035121022493506e-06, | |
| "loss": 0.4136, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "learning_rate": 1.026334553596101e-06, | |
| "loss": 0.3877, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 1.0175835175202341e-06, | |
| "loss": 0.4268, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 1.0088679488196695e-06, | |
| "loss": 0.4053, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 1.0001878819081268e-06, | |
| "loss": 0.3955, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 9.91543351059141e-07, | |
| "loss": 0.3677, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 9.829343904059342e-07, | |
| "loss": 0.3691, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "learning_rate": 9.743610339412801e-07, | |
| "loss": 0.4097, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "learning_rate": 9.658233155173657e-07, | |
| "loss": 0.4043, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 9.573212688456635e-07, | |
| "loss": 0.4346, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 9.488549274967873e-07, | |
| "loss": 0.3755, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 9.404243249003786e-07, | |
| "loss": 0.373, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 9.320294943449537e-07, | |
| "loss": 0.4517, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 9.236704689777842e-07, | |
| "loss": 0.4087, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "learning_rate": 9.153472818047627e-07, | |
| "loss": 0.4146, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "learning_rate": 9.070599656902801e-07, | |
| "loss": 0.3848, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "learning_rate": 8.988085533570833e-07, | |
| "loss": 0.3652, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 8.905930773861527e-07, | |
| "loss": 0.3765, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 8.824135702165693e-07, | |
| "loss": 0.395, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 8.74270064145396e-07, | |
| "loss": 0.3818, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 8.661625913275463e-07, | |
| "loss": 0.375, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 8.580911837756467e-07, | |
| "loss": 0.3896, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 8.500558733599206e-07, | |
| "loss": 0.3535, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 8.420566918080686e-07, | |
| "loss": 0.4189, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 8.340936707051273e-07, | |
| "loss": 0.4199, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 8.261668414933521e-07, | |
| "loss": 0.4771, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 8.182762354720985e-07, | |
| "loss": 0.3779, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 8.10421883797694e-07, | |
| "loss": 0.3979, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 8.026038174833085e-07, | |
| "loss": 0.4072, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "learning_rate": 7.948220673988427e-07, | |
| "loss": 0.4141, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "learning_rate": 7.87076664270795e-07, | |
| "loss": 0.3457, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "learning_rate": 7.793676386821602e-07, | |
| "loss": 0.395, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 7.716950210722818e-07, | |
| "loss": 0.4409, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "learning_rate": 7.6405884173675e-07, | |
| "loss": 0.4697, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "learning_rate": 7.564591308272773e-07, | |
| "loss": 0.3926, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 7.488959183515809e-07, | |
| "loss": 0.3809, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 7.413692341732582e-07, | |
| "loss": 0.3564, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 7.338791080116792e-07, | |
| "loss": 0.3618, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "learning_rate": 7.264255694418576e-07, | |
| "loss": 0.4092, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "learning_rate": 7.190086478943459e-07, | |
| "loss": 0.4375, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 7.116283726551077e-07, | |
| "loss": 0.3667, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 7.042847728654078e-07, | |
| "loss": 0.3511, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 6.969778775217007e-07, | |
| "loss": 0.3926, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 6.897077154755094e-07, | |
| "loss": 0.4565, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 6.824743154333157e-07, | |
| "loss": 0.3608, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 6.752777059564431e-07, | |
| "loss": 0.4204, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 8.87, | |
| "learning_rate": 6.681179154609463e-07, | |
| "loss": 0.4058, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 8.87, | |
| "learning_rate": 6.609949722175013e-07, | |
| "loss": 0.3936, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 6.539089043512914e-07, | |
| "loss": 0.4004, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 6.468597398418952e-07, | |
| "loss": 0.3545, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 6.398475065231746e-07, | |
| "loss": 0.3264, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 6.328722320831737e-07, | |
| "loss": 0.3521, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 6.259339440639966e-07, | |
| "loss": 0.3779, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "learning_rate": 6.1903266986171e-07, | |
| "loss": 0.397, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "learning_rate": 6.121684367262271e-07, | |
| "loss": 0.4111, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "learning_rate": 6.053412717612061e-07, | |
| "loss": 0.373, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 5.985512019239392e-07, | |
| "loss": 0.4199, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 5.917982540252442e-07, | |
| "loss": 0.3833, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 5.850824547293655e-07, | |
| "loss": 0.3838, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 5.784038305538653e-07, | |
| "loss": 0.4448, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 5.71762407869515e-07, | |
| "loss": 0.4224, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 5.651582129001987e-07, | |
| "loss": 0.3784, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 5.585912717228015e-07, | |
| "loss": 0.3955, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 5.520616102671128e-07, | |
| "loss": 0.4287, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 5.455692543157243e-07, | |
| "loss": 0.4048, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 5.391142295039209e-07, | |
| "loss": 0.4062, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "learning_rate": 5.326965613195867e-07, | |
| "loss": 0.4785, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "learning_rate": 5.263162751031025e-07, | |
| "loss": 0.4512, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 5.199733960472431e-07, | |
| "loss": 0.416, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 5.136679491970809e-07, | |
| "loss": 0.3584, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 5.073999594498869e-07, | |
| "loss": 0.3274, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 5.011694515550303e-07, | |
| "loss": 0.3901, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 4.949764501138832e-07, | |
| "loss": 0.3359, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 4.888209795797205e-07, | |
| "loss": 0.3325, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 4.827030642576236e-07, | |
| "loss": 0.3188, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 4.766227283043912e-07, | |
| "loss": 0.3936, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "learning_rate": 4.7057999572843516e-07, | |
| "loss": 0.3057, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "learning_rate": 4.645748903896885e-07, | |
| "loss": 0.3564, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "learning_rate": 4.5860743599951186e-07, | |
| "loss": 0.3252, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 4.5267765612060253e-07, | |
| "loss": 0.355, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 4.4678557416689586e-07, | |
| "loss": 0.332, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "learning_rate": 4.4093121340347824e-07, | |
| "loss": 0.3267, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 4.3511459694648873e-07, | |
| "loss": 0.3574, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 4.2933574776303664e-07, | |
| "loss": 0.3354, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 4.235946886711018e-07, | |
| "loss": 0.3193, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 4.1789144233945087e-07, | |
| "loss": 0.3301, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "learning_rate": 4.122260312875437e-07, | |
| "loss": 0.3311, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 4.0659847788544926e-07, | |
| "loss": 0.3257, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 4.010088043537519e-07, | |
| "loss": 0.3389, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 3.954570327634677e-07, | |
| "loss": 0.3252, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 3.899431850359503e-07, | |
| "loss": 0.3359, | |
| "step": 1489 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "learning_rate": 3.8446728294281865e-07, | |
| "loss": 0.3408, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 3.7902934810585603e-07, | |
| "loss": 0.3555, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 3.736294019969311e-07, | |
| "loss": 0.3066, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 3.682674659379137e-07, | |
| "loss": 0.3354, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 3.629435611005916e-07, | |
| "loss": 0.3721, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 3.5765770850658244e-07, | |
| "loss": 0.3271, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 3.5240992902725204e-07, | |
| "loss": 0.2993, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 3.4720024338363633e-07, | |
| "loss": 0.3398, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "learning_rate": 3.420286721463562e-07, | |
| "loss": 0.3213, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 3.3689523573553597e-07, | |
| "loss": 0.3203, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 3.3179995442071956e-07, | |
| "loss": 0.3105, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "learning_rate": 3.2674284832080127e-07, | |
| "loss": 0.3369, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "learning_rate": 3.217239374039338e-07, | |
| "loss": 0.3384, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 3.1674324148745827e-07, | |
| "loss": 0.2983, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 3.118007802378198e-07, | |
| "loss": 0.374, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 3.0689657317049205e-07, | |
| "loss": 0.3257, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "learning_rate": 3.020306396499062e-07, | |
| "loss": 0.3735, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 2.972029988893621e-07, | |
| "loss": 0.3589, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 2.9241366995096387e-07, | |
| "loss": 0.2961, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 9.26, | |
| "learning_rate": 2.8766267174553884e-07, | |
| "loss": 0.2913, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 9.26, | |
| "learning_rate": 2.8295002303256546e-07, | |
| "loss": 0.3169, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 2.7827574242009434e-07, | |
| "loss": 0.355, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 2.736398483646807e-07, | |
| "loss": 0.3374, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 2.6904235917131094e-07, | |
| "loss": 0.334, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 2.64483292993325e-07, | |
| "loss": 0.3369, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 2.599626678323508e-07, | |
| "loss": 0.3076, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 2.554805015382289e-07, | |
| "loss": 0.3066, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 2.5103681180894566e-07, | |
| "loss": 0.3735, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 2.4663161619055797e-07, | |
| "loss": 0.3203, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 2.422649320771331e-07, | |
| "loss": 0.2974, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 2.3793677671066882e-07, | |
| "loss": 0.2905, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 2.3364716718103143e-07, | |
| "loss": 0.3438, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 2.293961204258932e-07, | |
| "loss": 0.3091, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 2.2518365323065284e-07, | |
| "loss": 0.3037, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 2.2100978222838186e-07, | |
| "loss": 0.4043, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 2.1687452389974829e-07, | |
| "loss": 0.3203, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 2.1277789457296306e-07, | |
| "loss": 0.4023, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "learning_rate": 2.0871991042370255e-07, | |
| "loss": 0.3345, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "learning_rate": 2.0470058747505516e-07, | |
| "loss": 0.3618, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 9.38, | |
| "learning_rate": 2.0071994159745367e-07, | |
| "loss": 0.333, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 1.9677798850861517e-07, | |
| "loss": 0.3579, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 1.9287474377347238e-07, | |
| "loss": 0.3389, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 1.8901022280411906e-07, | |
| "loss": 0.292, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 1.8518444085974697e-07, | |
| "loss": 0.3896, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 1.8139741304658566e-07, | |
| "loss": 0.3501, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "learning_rate": 1.776491543178438e-07, | |
| "loss": 0.3237, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "learning_rate": 1.739396794736481e-07, | |
| "loss": 0.334, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 1.7026900316098217e-07, | |
| "loss": 0.332, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "learning_rate": 1.6663713987363882e-07, | |
| "loss": 0.3452, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "learning_rate": 1.6304410395215243e-07, | |
| "loss": 0.3301, | |
| "step": 1539 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 1.5948990958374543e-07, | |
| "loss": 0.3374, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 1.559745708022753e-07, | |
| "loss": 0.2935, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 1.5249810148817658e-07, | |
| "loss": 0.3643, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 1.490605153684066e-07, | |
| "loss": 0.3765, | |
| "step": 1543 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 1.4566182601638779e-07, | |
| "loss": 0.335, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "learning_rate": 1.4230204685196202e-07, | |
| "loss": 0.3569, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "learning_rate": 1.3898119114133192e-07, | |
| "loss": 0.356, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 1.3569927199700628e-07, | |
| "loss": 0.3247, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 1.3245630237775585e-07, | |
| "loss": 0.3125, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 1.292522950885533e-07, | |
| "loss": 0.3115, | |
| "step": 1549 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 1.2608726278053208e-07, | |
| "loss": 0.3647, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 1.2296121795092874e-07, | |
| "loss": 0.3447, | |
| "step": 1551 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 1.1987417294303748e-07, | |
| "loss": 0.3105, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "learning_rate": 1.1682613994615788e-07, | |
| "loss": 0.3765, | |
| "step": 1553 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "learning_rate": 1.1381713099555381e-07, | |
| "loss": 0.3472, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 9.54, | |
| "learning_rate": 1.1084715797239798e-07, | |
| "loss": 0.2969, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 1.0791623260372863e-07, | |
| "loss": 0.3467, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 1.0502436646240399e-07, | |
| "loss": 0.3164, | |
| "step": 1557 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "learning_rate": 1.0217157096705676e-07, | |
| "loss": 0.3633, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "learning_rate": 9.935785738204417e-08, | |
| "loss": 0.3267, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 9.658323681741133e-08, | |
| "loss": 0.3037, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 9.384772022884015e-08, | |
| "loss": 0.3833, | |
| "step": 1561 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 9.11513184176116e-08, | |
| "loss": 0.3452, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 8.8494042030558e-08, | |
| "loss": 0.3096, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 8.587590156002635e-08, | |
| "loss": 0.3167, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 8.329690734383278e-08, | |
| "loss": 0.3413, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "learning_rate": 8.075706956522156e-08, | |
| "loss": 0.3936, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "learning_rate": 7.825639825282949e-08, | |
| "loss": 0.3364, | |
| "step": 1567 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "learning_rate": 7.579490328064265e-08, | |
| "loss": 0.3911, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 7.33725943679553e-08, | |
| "loss": 0.2969, | |
| "step": 1569 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 7.098948107933656e-08, | |
| "loss": 0.3291, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "learning_rate": 6.864557282459162e-08, | |
| "loss": 0.3184, | |
| "step": 1571 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "learning_rate": 6.634087885871832e-08, | |
| "loss": 0.335, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 9.65, | |
| "learning_rate": 6.407540828188175e-08, | |
| "loss": 0.3523, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 6.184917003936752e-08, | |
| "loss": 0.2961, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 5.966217292155296e-08, | |
| "loss": 0.3701, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 5.7514425563870436e-08, | |
| "loss": 0.3662, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 5.540593644677295e-08, | |
| "loss": 0.3115, | |
| "step": 1577 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 5.333671389569972e-08, | |
| "loss": 0.3164, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "learning_rate": 5.1306766081048456e-08, | |
| "loss": 0.3003, | |
| "step": 1579 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "learning_rate": 4.931610101813533e-08, | |
| "loss": 0.3164, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 9.7, | |
| "learning_rate": 4.73647265671684e-08, | |
| "loss": 0.3521, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 4.545265043321645e-08, | |
| "loss": 0.2876, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 4.357988016617687e-08, | |
| "loss": 0.2947, | |
| "step": 1583 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "learning_rate": 4.174642316074562e-08, | |
| "loss": 0.3423, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "learning_rate": 3.9952286656389506e-08, | |
| "loss": 0.3438, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 3.819747773731841e-08, | |
| "loss": 0.3872, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "learning_rate": 3.648200333245422e-08, | |
| "loss": 0.3247, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "learning_rate": 3.480587021540527e-08, | |
| "loss": 0.3091, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 3.316908500443972e-08, | |
| "loss": 0.3633, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 3.1571654162461107e-08, | |
| "loss": 0.3281, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 3.001358399697618e-08, | |
| "loss": 0.3545, | |
| "step": 1591 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 2.8494880660080437e-08, | |
| "loss": 0.3472, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 2.7015550148423718e-08, | |
| "loss": 0.3682, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 2.557559830319245e-08, | |
| "loss": 0.3105, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "learning_rate": 2.417503081008632e-08, | |
| "loss": 0.3003, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "learning_rate": 2.2813853199292745e-08, | |
| "loss": 0.3608, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "learning_rate": 2.1492070845468005e-08, | |
| "loss": 0.2871, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "learning_rate": 2.0209688967713914e-08, | |
| "loss": 0.3169, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "learning_rate": 1.896671262955896e-08, | |
| "loss": 0.3218, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 1.7763146738938307e-08, | |
| "loss": 0.332, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 1.659899604816939e-08, | |
| "loss": 0.3013, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 1.5474265153944124e-08, | |
| "loss": 0.3262, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 1.4388958497300043e-08, | |
| "loss": 0.2925, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "learning_rate": 1.3343080363604766e-08, | |
| "loss": 0.314, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "learning_rate": 1.2336634882544885e-08, | |
| "loss": 0.3696, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "learning_rate": 1.1369626028104874e-08, | |
| "loss": 0.3647, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 1.0442057618551549e-08, | |
| "loss": 0.3306, | |
| "step": 1607 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 9.553933316420739e-09, | |
| "loss": 0.3916, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 8.705256628499525e-09, | |
| "loss": 0.3525, | |
| "step": 1609 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "learning_rate": 7.896030905818474e-09, | |
| "loss": 0.3662, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "learning_rate": 7.126259343631648e-09, | |
| "loss": 0.3042, | |
| "step": 1611 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "learning_rate": 6.39594498140883e-09, | |
| "loss": 0.3257, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 5.705090702819993e-09, | |
| "loss": 0.3237, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 5.053699235726406e-09, | |
| "loss": 0.29, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 4.4417731521717576e-09, | |
| "loss": 0.3081, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 3.869314868363283e-09, | |
| "loss": 0.2944, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "learning_rate": 3.3363266446750918e-09, | |
| "loss": 0.2676, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "learning_rate": 2.842810585627076e-09, | |
| "loss": 0.3086, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "learning_rate": 2.388768639886019e-09, | |
| "loss": 0.3047, | |
| "step": 1619 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 1.9742026002500526e-09, | |
| "loss": 0.3242, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 1.5991141036475478e-09, | |
| "loss": 0.3086, | |
| "step": 1621 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "learning_rate": 1.263504631129342e-09, | |
| "loss": 0.3174, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "learning_rate": 9.673755078598578e-10, | |
| "loss": 0.354, | |
| "step": 1623 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "learning_rate": 7.107279031148828e-10, | |
| "loss": 0.3208, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 4.935628302760175e-10, | |
| "loss": 0.3721, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "learning_rate": 3.158811468273459e-10, | |
| "loss": 0.3354, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "learning_rate": 1.776835543509936e-10, | |
| "loss": 0.3215, | |
| "step": 1627 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "learning_rate": 7.897059852490785e-11, | |
| "loss": 0.2866, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "learning_rate": 1.9742669119526824e-11, | |
| "loss": 0.3057, | |
| "step": 1629 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.3101, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 1630, | |
| "total_flos": 41842376695808.0, | |
| "train_loss": 1.8768900678201688, | |
| "train_runtime": 1493.3467, | |
| "train_samples_per_second": 69.736, | |
| "train_steps_per_second": 1.092 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1630, | |
| "num_train_epochs": 10, | |
| "save_steps": 10000, | |
| "total_flos": 41842376695808.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |