| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.991549295774648, |
| "global_step": 531, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.25e-06, |
| "loss": 1.1094, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.5e-06, |
| "loss": 1.1226, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.995, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5e-06, |
| "loss": 0.9446, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 6.25e-06, |
| "loss": 0.9452, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.9096, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 8.750000000000001e-06, |
| "loss": 0.931, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1e-05, |
| "loss": 0.8846, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.125e-05, |
| "loss": 0.8401, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.25e-05, |
| "loss": 0.8941, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.375e-05, |
| "loss": 0.8645, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.8188, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.6250000000000002e-05, |
| "loss": 0.8554, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.7500000000000002e-05, |
| "loss": 0.8108, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.8750000000000002e-05, |
| "loss": 0.8178, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2e-05, |
| "loss": 0.82, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9999813939602312e-05, |
| "loss": 0.7755, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9999255765332947e-05, |
| "loss": 0.813, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9998325497962724e-05, |
| "loss": 0.8127, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.999702317210883e-05, |
| "loss": 0.8011, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9995348836233517e-05, |
| "loss": 0.8315, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9993302552642306e-05, |
| "loss": 0.8075, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9990884397481664e-05, |
| "loss": 0.7879, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9988094460736175e-05, |
| "loss": 0.8185, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.998493284622518e-05, |
| "loss": 0.818, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.998139967159894e-05, |
| "loss": 0.7796, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9977495068334223e-05, |
| "loss": 0.7653, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.997321918172944e-05, |
| "loss": 0.7758, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.996857217089922e-05, |
| "loss": 0.7504, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9963554208768502e-05, |
| "loss": 0.7599, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9958165482066094e-05, |
| "loss": 0.7667, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.9952406191317718e-05, |
| "loss": 0.7626, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.994627655083856e-05, |
| "loss": 0.7985, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.9939776788725296e-05, |
| "loss": 0.7512, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.993290714684758e-05, |
| "loss": 0.7604, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.992566788083908e-05, |
| "loss": 0.7225, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.9918059260087937e-05, |
| "loss": 0.7572, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.9910081567726746e-05, |
| "loss": 0.7767, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.9901735100622038e-05, |
| "loss": 0.7256, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.9893020169363203e-05, |
| "loss": 0.7386, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.9883937098250962e-05, |
| "loss": 0.7766, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.9874486225285278e-05, |
| "loss": 0.7665, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.986466790215279e-05, |
| "loss": 0.7294, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.985448249421371e-05, |
| "loss": 0.7798, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.9843930380488257e-05, |
| "loss": 0.7859, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9833011953642525e-05, |
| "loss": 0.7533, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9821727619973884e-05, |
| "loss": 0.7672, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.9810077799395847e-05, |
| "loss": 0.7952, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.9798062925422474e-05, |
| "loss": 0.7117, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.9785683445152205e-05, |
| "loss": 0.7319, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.9772939819251247e-05, |
| "loss": 0.7491, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.9759832521936424e-05, |
| "loss": 0.7559, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.974636204095752e-05, |
| "loss": 0.714, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.9732528877579145e-05, |
| "loss": 0.7416, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.971833354656208e-05, |
| "loss": 0.7311, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.9703776576144106e-05, |
| "loss": 0.7392, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.968885850802037e-05, |
| "loss": 0.7321, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.9673579897323204e-05, |
| "loss": 0.7592, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.9657941312601486e-05, |
| "loss": 0.7302, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.964194333579948e-05, |
| "loss": 0.7368, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.962558656223516e-05, |
| "loss": 0.7504, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.9608871600578095e-05, |
| "loss": 0.7226, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.9591799072826766e-05, |
| "loss": 0.6994, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.9574369614285426e-05, |
| "loss": 0.7191, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.9556583873540483e-05, |
| "loss": 0.7758, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.953844251243633e-05, |
| "loss": 0.7308, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.9519946206050737e-05, |
| "loss": 0.7293, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.9501095642669737e-05, |
| "loss": 0.7096, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.9481891523761985e-05, |
| "loss": 0.7234, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.946233456395269e-05, |
| "loss": 0.7762, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.9442425490996987e-05, |
| "loss": 0.7034, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.942216504575289e-05, |
| "loss": 0.7576, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.940155398215369e-05, |
| "loss": 0.7312, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.9380593067179934e-05, |
| "loss": 0.6968, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.9359283080830856e-05, |
| "loss": 0.678, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.933762481609536e-05, |
| "loss": 0.7154, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.9315619078922512e-05, |
| "loss": 0.7206, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.929326668819156e-05, |
| "loss": 0.7176, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.9270568475681442e-05, |
| "loss": 0.7361, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.9247525286039855e-05, |
| "loss": 0.7342, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.9224137976751797e-05, |
| "loss": 0.7231, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.920040741810768e-05, |
| "loss": 0.7328, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.917633449317095e-05, |
| "loss": 0.6792, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.915192009774519e-05, |
| "loss": 0.7461, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.9127165140340837e-05, |
| "loss": 0.7143, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.910207054214133e-05, |
| "loss": 0.7507, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.9076637236968852e-05, |
| "loss": 0.6742, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.905086617124958e-05, |
| "loss": 0.736, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.9024758303978457e-05, |
| "loss": 0.757, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.8998314606683522e-05, |
| "loss": 0.7392, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.8971536063389745e-05, |
| "loss": 0.7028, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.89444236705824e-05, |
| "loss": 0.7357, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.8916978437170006e-05, |
| "loss": 0.7076, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.888920138444678e-05, |
| "loss": 0.7689, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.8861093546054605e-05, |
| "loss": 0.7018, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.8832655967944607e-05, |
| "loss": 0.6787, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 1.8803889708338205e-05, |
| "loss": 0.7327, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 1.877479583768774e-05, |
| "loss": 0.7318, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.8745375438636632e-05, |
| "loss": 0.7109, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.871562960597912e-05, |
| "loss": 0.7287, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 1.868555944661949e-05, |
| "loss": 0.7207, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 1.8655166079530906e-05, |
| "loss": 0.6855, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.862445063571376e-05, |
| "loss": 0.6921, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.8593414258153588e-05, |
| "loss": 0.7363, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.856205810177855e-05, |
| "loss": 0.7143, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.853038333341642e-05, |
| "loss": 0.7234, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.84983911317512e-05, |
| "loss": 0.6807, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 1.8466082687279247e-05, |
| "loss": 0.7104, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 1.8433459202264963e-05, |
| "loss": 0.7297, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.8400521890696068e-05, |
| "loss": 0.7574, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.8367271978238422e-05, |
| "loss": 0.666, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.833371070219041e-05, |
| "loss": 0.7313, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.8299839311436905e-05, |
| "loss": 0.7253, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.8265659066402794e-05, |
| "loss": 0.7251, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.8231171239006077e-05, |
| "loss": 0.714, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.8196377112610524e-05, |
| "loss": 0.7023, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 1.8161277981977942e-05, |
| "loss": 0.6867, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 1.8125875153219966e-05, |
| "loss": 0.7594, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.8090169943749477e-05, |
| "loss": 0.6866, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.8054163682231567e-05, |
| "loss": 0.7443, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.8017857708534107e-05, |
| "loss": 0.7269, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.798125337367788e-05, |
| "loss": 0.7142, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.79443520397863e-05, |
| "loss": 0.6999, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.790715508003474e-05, |
| "loss": 0.7005, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.786966387859943e-05, |
| "loss": 0.6898, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 1.783187983060594e-05, |
| "loss": 0.7002, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.7793804342077258e-05, |
| "loss": 0.6982, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.7755438829881503e-05, |
| "loss": 0.6991, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.771678472167916e-05, |
| "loss": 0.7005, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.7677843455869984e-05, |
| "loss": 0.6961, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.763861648153945e-05, |
| "loss": 0.7546, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.759910525840485e-05, |
| "loss": 0.7155, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.7559311256760958e-05, |
| "loss": 0.7057, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.7519235957425334e-05, |
| "loss": 0.6821, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.74788808516832e-05, |
| "loss": 0.6797, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.743824744123196e-05, |
| "loss": 0.745, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.739733723812532e-05, |
| "loss": 0.703, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.7356151764717012e-05, |
| "loss": 0.6906, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.7314692553604143e-05, |
| "loss": 0.6708, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7272961147570177e-05, |
| "loss": 0.6731, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7230959099527512e-05, |
| "loss": 0.7075, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.7188687972459707e-05, |
| "loss": 0.7039, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.71461493393633e-05, |
| "loss": 0.7294, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.7103344783189292e-05, |
| "loss": 0.73, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7060275896784225e-05, |
| "loss": 0.7184, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7016944282830935e-05, |
| "loss": 0.6974, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.697335155378888e-05, |
| "loss": 0.6929, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.692949933183416e-05, |
| "loss": 0.7045, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.6885389248799153e-05, |
| "loss": 0.6938, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.6841022946111774e-05, |
| "loss": 0.7276, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.6796402074734404e-05, |
| "loss": 0.6981, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.675152829510246e-05, |
| "loss": 0.7262, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.67064032770626e-05, |
| "loss": 0.6724, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.666102869981059e-05, |
| "loss": 0.7055, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.6615406251828794e-05, |
| "loss": 0.6914, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.6569537630823385e-05, |
| "loss": 0.667, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.652342454366113e-05, |
| "loss": 0.736, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.64770687063059e-05, |
| "loss": 0.7076, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.6430471843754806e-05, |
| "loss": 0.6791, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.6383635689973997e-05, |
| "loss": 0.6903, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.6336561987834155e-05, |
| "loss": 0.6791, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.6289252489045625e-05, |
| "loss": 0.6617, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.6241708954093242e-05, |
| "loss": 0.6653, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.6193933152170812e-05, |
| "loss": 0.6855, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.614592686111527e-05, |
| "loss": 0.7106, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6097691867340547e-05, |
| "loss": 0.6551, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6049229965771054e-05, |
| "loss": 0.7079, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.600054295977494e-05, |
| "loss": 0.7083, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.5951632661096932e-05, |
| "loss": 0.6969, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.590250088979097e-05, |
| "loss": 0.6657, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.585314947415242e-05, |
| "loss": 0.6789, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.5803580250650098e-05, |
| "loss": 0.6983, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.5753795063857886e-05, |
| "loss": 0.651, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.5703795766386114e-05, |
| "loss": 0.6485, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.565358421881262e-05, |
| "loss": 0.742, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.5603162289613503e-05, |
| "loss": 0.6903, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.55525318550936e-05, |
| "loss": 0.6678, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.5501694799316672e-05, |
| "loss": 0.5696, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 1.5450653014035288e-05, |
| "loss": 0.5072, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 1.539940839862041e-05, |
| "loss": 0.5298, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1.5347962859990744e-05, |
| "loss": 0.4914, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 1.5296318312541768e-05, |
| "loss": 0.4611, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 1.5244476678074496e-05, |
| "loss": 0.4769, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1.5192439885723942e-05, |
| "loss": 0.4666, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1.514020987188737e-05, |
| "loss": 0.4796, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 1.5087788580152207e-05, |
| "loss": 0.4725, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 1.5035177961223727e-05, |
| "loss": 0.4876, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 1.4982379972852471e-05, |
| "loss": 0.5065, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 1.4929396579761378e-05, |
| "loss": 0.4662, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 1.4876229753572688e-05, |
| "loss": 0.4547, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 1.4822881472734563e-05, |
| "loss": 0.4817, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 1.4769353722447477e-05, |
| "loss": 0.4428, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1.4715648494590327e-05, |
| "loss": 0.4835, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1.4661767787646328e-05, |
| "loss": 0.4737, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 1.4607713606628627e-05, |
| "loss": 0.4999, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 1.4553487963005712e-05, |
| "loss": 0.5113, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 1.4499092874626546e-05, |
| "loss": 0.4856, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 1.444453036564548e-05, |
| "loss": 0.4806, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 1.4389802466446942e-05, |
| "loss": 0.4808, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 1.4334911213569872e-05, |
| "loss": 0.4667, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 1.427985864963193e-05, |
| "loss": 0.4798, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.4224646823253512e-05, |
| "loss": 0.4477, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.4169277788981489e-05, |
| "loss": 0.4907, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 1.4113753607212768e-05, |
| "loss": 0.4584, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 1.4058076344117617e-05, |
| "loss": 0.4593, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 1.400224807156278e-05, |
| "loss": 0.4871, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 1.3946270867034377e-05, |
| "loss": 0.4938, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 1.3890146813560592e-05, |
| "loss": 0.4844, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.383387799963417e-05, |
| "loss": 0.4567, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.3777466519134686e-05, |
| "loss": 0.4662, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 1.3720914471250644e-05, |
| "loss": 0.4884, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 1.3664223960401345e-05, |
| "loss": 0.4705, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 1.3607397096158588e-05, |
| "loss": 0.4653, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.3550435993168164e-05, |
| "loss": 0.4546, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.3493342771071171e-05, |
| "loss": 0.5141, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 1.3436119554425133e-05, |
| "loss": 0.4794, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 1.337876847262493e-05, |
| "loss": 0.4834, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.3321291659823588e-05, |
| "loss": 0.4697, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.3263691254852836e-05, |
| "loss": 0.5022, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 1.3205969401143517e-05, |
| "loss": 0.4845, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1.314812824664585e-05, |
| "loss": 0.4907, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1.3090169943749475e-05, |
| "loss": 0.4705, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.303209664920337e-05, |
| "loss": 0.4651, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.2973910524035588e-05, |
| "loss": 0.4918, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.2915613733472849e-05, |
| "loss": 0.4668, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.285720844685996e-05, |
| "loss": 0.4932, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.279869683757909e-05, |
| "loss": 0.4721, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.27400810829689e-05, |
| "loss": 0.4725, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 1.2681363364243511e-05, |
| "loss": 0.4366, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.2622545866411345e-05, |
| "loss": 0.4701, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.2563630778193805e-05, |
| "loss": 0.4725, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.2504620291943841e-05, |
| "loss": 0.4966, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.2445516603564365e-05, |
| "loss": 0.4431, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.2386321912426524e-05, |
| "loss": 0.4566, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.2327038421287879e-05, |
| "loss": 0.437, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 1.2267668336210411e-05, |
| "loss": 0.448, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 1.2208213866478454e-05, |
| "loss": 0.4411, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 1.214867722451646e-05, |
| "loss": 0.4848, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.2089060625806686e-05, |
| "loss": 0.479, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.202936628880675e-05, |
| "loss": 0.4511, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 1.1969596434867063e-05, |
| "loss": 0.4733, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 1.1909753288148183e-05, |
| "loss": 0.464, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 1.184983907553805e-05, |
| "loss": 0.4579, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 1.1789856026569103e-05, |
| "loss": 0.4772, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.1729806373335337e-05, |
| "loss": 0.4372, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.1669692350409223e-05, |
| "loss": 0.4962, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.1609516194758562e-05, |
| "loss": 0.4626, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.1549280145663245e-05, |
| "loss": 0.4809, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.148898644463192e-05, |
| "loss": 0.4721, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.142863733531859e-05, |
| "loss": 0.4763, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.1368235063439103e-05, |
| "loss": 0.4414, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.1307781876687611e-05, |
| "loss": 0.4621, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.124728002465291e-05, |
| "loss": 0.4525, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.1186731758734722e-05, |
| "loss": 0.4496, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.112613933205994e-05, |
| "loss": 0.4551, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.1065504999398762e-05, |
| "loss": 0.4506, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 1.1004831017080802e-05, |
| "loss": 0.4775, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 1.0944119642911108e-05, |
| "loss": 0.4732, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.0883373136086173e-05, |
| "loss": 0.4736, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.0822593757109835e-05, |
| "loss": 0.4692, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.0761783767709182e-05, |
| "loss": 0.4532, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 1.0700945430750373e-05, |
| "loss": 0.4855, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 1.0640081010154444e-05, |
| "loss": 0.4737, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.0579192770813053e-05, |
| "loss": 0.4691, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.0518282978504209e-05, |
| "loss": 0.4843, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 1.0457353899807947e-05, |
| "loss": 0.4376, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 1.0396407802021986e-05, |
| "loss": 0.4597, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.0335446953077366e-05, |
| "loss": 0.4639, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 1.027447362145405e-05, |
| "loss": 0.4622, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 1.0213490076096502e-05, |
| "loss": 0.483, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.015249858632926e-05, |
| "loss": 0.47, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.0091501421772496e-05, |
| "loss": 0.4806, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 1.0030500852257545e-05, |
| "loss": 0.489, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 9.969499147742455e-06, |
| "loss": 0.4736, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.908498578227505e-06, |
| "loss": 0.4608, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.847501413670742e-06, |
| "loss": 0.4895, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.786509923903503e-06, |
| "loss": 0.4773, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.725526378545953e-06, |
| "loss": 0.4446, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.664553046922634e-06, |
| "loss": 0.4815, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.603592197978017e-06, |
| "loss": 0.4996, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.542646100192056e-06, |
| "loss": 0.4772, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.481717021495795e-06, |
| "loss": 0.4468, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.42080722918695e-06, |
| "loss": 0.4736, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.359918989845558e-06, |
| "loss": 0.482, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.299054569249629e-06, |
| "loss": 0.518, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.238216232290821e-06, |
| "loss": 0.4673, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.177406242890168e-06, |
| "loss": 0.4968, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.116626863913827e-06, |
| "loss": 0.477, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.055880357088892e-06, |
| "loss": 0.4549, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 8.995168982919203e-06, |
| "loss": 0.4776, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 8.934495000601241e-06, |
| "loss": 0.4776, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 8.873860667940066e-06, |
| "loss": 0.4818, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 8.81326824126528e-06, |
| "loss": 0.4708, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 8.752719975347094e-06, |
| "loss": 0.4835, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 8.692218123312392e-06, |
| "loss": 0.4828, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.6317649365609e-06, |
| "loss": 0.4881, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.571362664681416e-06, |
| "loss": 0.4538, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.511013555368081e-06, |
| "loss": 0.4855, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.450719854336759e-06, |
| "loss": 0.4503, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.390483805241442e-06, |
| "loss": 0.447, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.330307649590782e-06, |
| "loss": 0.4663, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.270193626664666e-06, |
| "loss": 0.4758, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.210143973430897e-06, |
| "loss": 0.4567, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.150160924461954e-06, |
| "loss": 0.445, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.09024671185182e-06, |
| "loss": 0.4801, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.030403565132942e-06, |
| "loss": 0.4593, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 7.970633711193253e-06, |
| "loss": 0.4467, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 7.910939374193314e-06, |
| "loss": 0.4602, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 7.851322775483543e-06, |
| "loss": 0.4715, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 7.791786133521548e-06, |
| "loss": 0.44, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 7.732331663789592e-06, |
| "loss": 0.4562, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 7.672961578712126e-06, |
| "loss": 0.4518, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 7.613678087573476e-06, |
| "loss": 0.4855, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 7.554483396435638e-06, |
| "loss": 0.4739, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 7.495379708056162e-06, |
| "loss": 0.476, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 7.436369221806201e-06, |
| "loss": 0.4569, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 7.377454133588657e-06, |
| "loss": 0.4674, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 7.318636635756491e-06, |
| "loss": 0.4715, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 7.259918917031103e-06, |
| "loss": 0.4461, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 7.201303162420914e-06, |
| "loss": 0.4682, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 7.142791553140045e-06, |
| "loss": 0.4432, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 7.084386266527152e-06, |
| "loss": 0.4648, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 7.026089475964415e-06, |
| "loss": 0.473, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 6.967903350796632e-06, |
| "loss": 0.4472, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 6.909830056250527e-06, |
| "loss": 0.4828, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 6.851871753354154e-06, |
| "loss": 0.473, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 6.794030598856484e-06, |
| "loss": 0.4938, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 6.736308745147169e-06, |
| "loss": 0.4475, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 6.678708340176414e-06, |
| "loss": 0.4679, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.621231527375071e-06, |
| "loss": 0.4997, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.563880445574873e-06, |
| "loss": 0.4632, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 6.506657228928828e-06, |
| "loss": 0.4436, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 6.4495640068318365e-06, |
| "loss": 0.449, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 6.392602903841416e-06, |
| "loss": 0.4882, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 6.33577603959866e-06, |
| "loss": 0.4583, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 6.2790855287493605e-06, |
| "loss": 0.4686, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 6.222533480865316e-06, |
| "loss": 0.4691, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 6.166122000365835e-06, |
| "loss": 0.4757, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 6.109853186439411e-06, |
| "loss": 0.4659, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 6.053729132965626e-06, |
| "loss": 0.4869, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 5.99775192843722e-06, |
| "loss": 0.4737, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 5.9419236558823845e-06, |
| "loss": 0.4778, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 5.886246392787235e-06, |
| "loss": 0.4526, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 5.830722211018517e-06, |
| "loss": 0.4575, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 5.7753531767464895e-06, |
| "loss": 0.4463, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 5.720141350368072e-06, |
| "loss": 0.4316, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 5.665088786430129e-06, |
| "loss": 0.4701, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.610197533553058e-06, |
| "loss": 0.4637, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 5.555469634354521e-06, |
| "loss": 0.4754, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 5.500907125373458e-06, |
| "loss": 0.4665, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.446512036994287e-06, |
| "loss": 0.4801, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.392286393371373e-06, |
| "loss": 0.4374, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 5.338232212353676e-06, |
| "loss": 0.4634, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 5.284351505409675e-06, |
| "loss": 0.4495, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 5.230646277552528e-06, |
| "loss": 0.4081, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 5.177118527265438e-06, |
| "loss": 0.2993, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 5.123770246427315e-06, |
| "loss": 0.3176, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 5.070603420238625e-06, |
| "loss": 0.2931, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 5.017620027147534e-06, |
| "loss": 0.2917, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 4.964822038776277e-06, |
| "loss": 0.3262, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 4.912211419847795e-06, |
| "loss": 0.2681, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 4.859790128112631e-06, |
| "loss": 0.2766, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 4.807560114276059e-06, |
| "loss": 0.2923, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 4.755523321925508e-06, |
| "loss": 0.2703, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 4.703681687458231e-06, |
| "loss": 0.264, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 4.652037140009259e-06, |
| "loss": 0.2852, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 4.600591601379597e-06, |
| "loss": 0.2717, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 4.549346985964719e-06, |
| "loss": 0.2791, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 4.49830520068333e-06, |
| "loss": 0.2714, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 4.447468144906401e-06, |
| "loss": 0.2834, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 4.396837710386503e-06, |
| "loss": 0.2984, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 4.346415781187386e-06, |
| "loss": 0.2778, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 4.296204233613888e-06, |
| "loss": 0.2775, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 4.246204936142116e-06, |
| "loss": 0.2705, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 4.196419749349905e-06, |
| "loss": 0.2828, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 4.1468505258475785e-06, |
| "loss": 0.2695, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 4.097499110209032e-06, |
| "loss": 0.2637, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 4.0483673389030675e-06, |
| "loss": 0.2809, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 3.9994570402250656e-06, |
| "loss": 0.2638, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 3.950770034228946e-06, |
| "loss": 0.2842, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 3.902308132659457e-06, |
| "loss": 0.2625, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 3.854073138884731e-06, |
| "loss": 0.2624, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 3.806066847829192e-06, |
| "loss": 0.2998, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 3.758291045906761e-06, |
| "loss": 0.2672, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 3.7107475109543767e-06, |
| "loss": 0.2729, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 3.6634380121658484e-06, |
| "loss": 0.2758, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 3.616364310026006e-06, |
| "loss": 0.2573, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 3.5695281562451965e-06, |
| "loss": 0.2593, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 3.5229312936941017e-06, |
| "loss": 0.2614, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 3.476575456338871e-06, |
| "loss": 0.2877, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 3.4304623691766193e-06, |
| "loss": 0.28, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 3.38459374817121e-06, |
| "loss": 0.2853, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 3.3389713001894163e-06, |
| "loss": 0.28, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 3.293596722937399e-06, |
| "loss": 0.2792, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 3.24847170489754e-06, |
| "loss": 0.2887, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 3.203597925265598e-06, |
| "loss": 0.2718, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 3.1589770538882303e-06, |
| "loss": 0.2661, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 3.1146107512008505e-06, |
| "loss": 0.3056, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 3.07050066816584e-06, |
| "loss": 0.2823, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 3.0266484462111244e-06, |
| "loss": 0.2702, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 2.98305571716907e-06, |
| "loss": 0.2929, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 2.9397241032157764e-06, |
| "loss": 0.2758, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 2.8966552168107133e-06, |
| "loss": 0.2792, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 2.8538506606367033e-06, |
| "loss": 0.2856, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 2.8113120275402937e-06, |
| "loss": 0.2858, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 2.7690409004724883e-06, |
| "loss": 0.2864, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 2.7270388524298262e-06, |
| "loss": 0.2815, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 2.6853074463958618e-06, |
| "loss": 0.2969, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 2.6438482352829896e-06, |
| "loss": 0.2545, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 2.6026627618746793e-06, |
| "loss": 0.2652, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 2.56175255876804e-06, |
| "loss": 0.2715, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 2.521119148316803e-06, |
| "loss": 0.2738, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 2.4807640425746693e-06, |
| "loss": 0.2736, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 2.4406887432390426e-06, |
| "loss": 0.2651, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 2.400894741595152e-06, |
| "loss": 0.2757, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 2.3613835184605527e-06, |
| "loss": 0.2973, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2.32215654413002e-06, |
| "loss": 0.2528, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2.2832152783208393e-06, |
| "loss": 0.2672, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 2.2445611701185e-06, |
| "loss": 0.2783, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 2.206195657922745e-06, |
| "loss": 0.2881, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 2.1681201693940667e-06, |
| "loss": 0.2982, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 2.1303361214005723e-06, |
| "loss": 0.2579, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 2.0928449199652602e-06, |
| "loss": 0.2859, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 2.0556479602137036e-06, |
| "loss": 0.2796, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 2.0187466263221243e-06, |
| "loss": 0.2626, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1.982142291465896e-06, |
| "loss": 0.2751, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 1.9458363177684368e-06, |
| "loss": 0.2801, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 1.9098300562505266e-06, |
| "loss": 0.2709, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.8741248467800366e-06, |
| "loss": 0.2621, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.838722018022061e-06, |
| "loss": 0.2532, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1.8036228873894745e-06, |
| "loss": 0.2769, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1.7688287609939248e-06, |
| "loss": 0.25, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.7343409335972071e-06, |
| "loss": 0.2861, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 1.700160688563095e-06, |
| "loss": 0.268, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 1.6662892978095912e-06, |
| "loss": 0.2649, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 1.6327280217615793e-06, |
| "loss": 0.294, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 1.5994781093039336e-06, |
| "loss": 0.2808, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1.566540797735039e-06, |
| "loss": 0.2561, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1.5339173127207564e-06, |
| "loss": 0.2523, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 1.5016088682488027e-06, |
| "loss": 0.2657, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 1.4696166665835853e-06, |
| "loss": 0.2931, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 1.4379418982214544e-06, |
| "loss": 0.2747, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 1.4065857418464123e-06, |
| "loss": 0.2759, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 1.375549364286244e-06, |
| "loss": 0.2907, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 1.3448339204690975e-06, |
| "loss": 0.2907, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 1.3144405533805138e-06, |
| "loss": 0.2615, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 1.2843703940208818e-06, |
| "loss": 0.2708, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 1.254624561363369e-06, |
| "loss": 0.2554, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 1.2252041623122646e-06, |
| "loss": 0.2733, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 1.1961102916617962e-06, |
| "loss": 0.2721, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 1.1673440320553941e-06, |
| "loss": 0.2719, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 1.1389064539453953e-06, |
| "loss": 0.2629, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 1.1107986155532247e-06, |
| "loss": 0.2506, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.0830215628299956e-06, |
| "loss": 0.2671, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.0555763294176047e-06, |
| "loss": 0.2729, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 1.02846393661026e-06, |
| "loss": 0.2729, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 1.0016853933164773e-06, |
| "loss": 0.2661, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 9.75241696021544e-07, |
| "loss": 0.2659, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 9.491338287504249e-07, |
| "loss": 0.2772, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 9.233627630311503e-07, |
| "loss": 0.2874, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 8.979294578586739e-07, |
| "loss": 0.2815, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 8.728348596591641e-07, |
| "loss": 0.2656, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 8.480799022548114e-07, |
| "loss": 0.2672, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 8.236655068290556e-07, |
| "loss": 0.2705, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 7.995925818923222e-07, |
| "loss": 0.2587, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 7.758620232482083e-07, |
| "loss": 0.2844, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 7.524747139601474e-07, |
| "loss": 0.2846, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 7.294315243185579e-07, |
| "loss": 0.2802, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 7.067333118084429e-07, |
| "loss": 0.2652, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 6.843809210774921e-07, |
| "loss": 0.2939, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 6.623751839046455e-07, |
| "loss": 0.2772, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 6.407169191691465e-07, |
| "loss": 0.2659, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 6.194069328200669e-07, |
| "loss": 0.2629, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 5.984460178463103e-07, |
| "loss": 0.2837, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.77834954247114e-07, |
| "loss": 0.2851, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.575745090030138e-07, |
| "loss": 0.2751, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.376654360473121e-07, |
| "loss": 0.2587, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.181084762380151e-07, |
| "loss": 0.2821, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 4.989043573302655e-07, |
| "loss": 0.2923, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 4.800537939492645e-07, |
| "loss": 0.2614, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.61557487563673e-07, |
| "loss": 0.2755, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.4341612645952047e-07, |
| "loss": 0.2675, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.2563038571457605e-07, |
| "loss": 0.2566, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 4.08200927173239e-07, |
| "loss": 0.2642, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 3.9112839942190727e-07, |
| "loss": 0.2584, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 3.7441343776484116e-07, |
| "loss": 0.2634, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 3.5805666420052456e-07, |
| "loss": 0.2768, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 3.420586873985132e-07, |
| "loss": 0.2945, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 3.264201026767977e-07, |
| "loss": 0.2661, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 3.1114149197963185e-07, |
| "loss": 0.2597, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 2.9622342385589256e-07, |
| "loss": 0.2616, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 2.8166645343792096e-07, |
| "loss": 0.2608, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 2.674711224208548e-07, |
| "loss": 0.2586, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 2.536379590424809e-07, |
| "loss": 0.266, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 2.4016747806357657e-07, |
| "loss": 0.2741, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 2.2706018074875046e-07, |
| "loss": 0.2456, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 2.1431655484779435e-07, |
| "loss": 0.2601, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 2.019370745775273e-07, |
| "loss": 0.2556, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 1.8992220060415346e-07, |
| "loss": 0.2482, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.782723800261199e-07, |
| "loss": 0.2855, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.669880463574758e-07, |
| "loss": 0.2928, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 1.5606961951174394e-07, |
| "loss": 0.2831, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 1.4551750578629232e-07, |
| "loss": 0.2842, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 1.3533209784721502e-07, |
| "loss": 0.2862, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.2551377471472282e-07, |
| "loss": 0.2873, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.160629017490389e-07, |
| "loss": 0.2757, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 1.069798306367975e-07, |
| "loss": 0.2611, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 9.826489937796557e-08, |
| "loss": 0.2871, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 8.991843227325492e-08, |
| "loss": 0.2887, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 8.194073991206641e-08, |
| "loss": 0.2654, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 7.433211916092143e-08, |
| "loss": 0.2701, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 6.709285315242064e-08, |
| "loss": 0.2744, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 6.022321127470698e-08, |
| "loss": 0.2575, |
| "step": 513 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 5.3723449161439124e-08, |
| "loss": 0.2832, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 4.759380868228247e-08, |
| "loss": 0.2522, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 4.183451793390747e-08, |
| "loss": 0.2872, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 3.6445791231497496e-08, |
| "loss": 0.2871, |
| "step": 517 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 3.1427829100779686e-08, |
| "loss": 0.2628, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 2.6780818270562002e-08, |
| "loss": 0.2716, |
| "step": 519 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 2.250493166577772e-08, |
| "loss": 0.2707, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.860032840106163e-08, |
| "loss": 0.273, |
| "step": 521 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.5067153774820375e-08, |
| "loss": 0.2606, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 1.190553926382898e-08, |
| "loss": 0.2783, |
| "step": 523 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 9.115602518338096e-09, |
| "loss": 0.2792, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 6.697447357695286e-09, |
| "loss": 0.2715, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 4.651163766484779e-09, |
| "loss": 0.2901, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 2.9768278911723737e-09, |
| "loss": 0.2608, |
| "step": 527 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 1.6745020372777033e-09, |
| "loss": 0.2788, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 7.442346670549771e-10, |
| "loss": 0.2611, |
| "step": 529 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.8606039768775952e-10, |
| "loss": 0.2693, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 0.0, |
| "loss": 0.2671, |
| "step": 531 |
| }, |
| { |
| "epoch": 2.99, |
| "step": 531, |
| "total_flos": 5.02302897078272e+17, |
| "train_loss": 0.49581964204540363, |
| "train_runtime": 4921.9709, |
| "train_samples_per_second": 13.814, |
| "train_steps_per_second": 0.108 |
| } |
| ], |
| "max_steps": 531, |
| "num_train_epochs": 3, |
| "total_flos": 5.02302897078272e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|