| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5006802721088436, |
| "eval_steps": 500, |
| "global_step": 368, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.3980952380952381e-05, |
| "loss": 1.9251, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.3961904761904762e-05, |
| "loss": 2.237, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.3942857142857142e-05, |
| "loss": 2.3403, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.3923809523809523e-05, |
| "loss": 2.004, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.3904761904761905e-05, |
| "loss": 2.0853, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.3885714285714286e-05, |
| "loss": 1.946, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.3866666666666667e-05, |
| "loss": 1.941, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.3847619047619048e-05, |
| "loss": 1.7827, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.3828571428571428e-05, |
| "loss": 2.0452, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.380952380952381e-05, |
| "loss": 1.7634, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.379047619047619e-05, |
| "loss": 1.9573, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.3771428571428572e-05, |
| "loss": 1.9773, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.3752380952380953e-05, |
| "loss": 1.9236, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.3733333333333333e-05, |
| "loss": 1.8623, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.3714285714285714e-05, |
| "loss": 1.8837, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.3695238095238095e-05, |
| "loss": 2.029, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.3676190476190477e-05, |
| "loss": 2.0832, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.3657142857142858e-05, |
| "loss": 1.5689, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.3638095238095238e-05, |
| "loss": 1.8034, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.3619047619047619e-05, |
| "loss": 1.8121, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.36e-05, |
| "loss": 1.8425, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.3580952380952382e-05, |
| "loss": 1.8582, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.3561904761904763e-05, |
| "loss": 1.8883, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.3542857142857144e-05, |
| "loss": 1.7498, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.3523809523809524e-05, |
| "loss": 1.804, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.3504761904761905e-05, |
| "loss": 1.893, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.3485714285714286e-05, |
| "loss": 1.8625, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.3466666666666668e-05, |
| "loss": 1.6711, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.3447619047619047e-05, |
| "loss": 1.7258, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.3428571428571429e-05, |
| "loss": 1.4927, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.340952380952381e-05, |
| "loss": 1.8962, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.339047619047619e-05, |
| "loss": 1.5401, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.337142857142857e-05, |
| "loss": 1.7157, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.3352380952380952e-05, |
| "loss": 1.8139, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.3333333333333332e-05, |
| "loss": 1.6017, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.3314285714285713e-05, |
| "loss": 1.812, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.3295238095238094e-05, |
| "loss": 1.8849, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.3276190476190475e-05, |
| "loss": 1.7949, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.3257142857142857e-05, |
| "loss": 1.68, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.3238095238095238e-05, |
| "loss": 1.6812, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.3219047619047618e-05, |
| "loss": 1.8958, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.3199999999999999e-05, |
| "loss": 1.6719, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.318095238095238e-05, |
| "loss": 1.7752, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.3161904761904762e-05, |
| "loss": 1.7632, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.3142857142857143e-05, |
| "loss": 1.5865, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.3123809523809522e-05, |
| "loss": 1.9588, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.3104761904761904e-05, |
| "loss": 1.767, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.3085714285714285e-05, |
| "loss": 1.8413, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.3066666666666666e-05, |
| "loss": 1.7801, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.3047619047619048e-05, |
| "loss": 1.4807, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.3028571428571427e-05, |
| "loss": 1.718, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.3009523809523809e-05, |
| "loss": 1.6581, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.299047619047619e-05, |
| "loss": 1.611, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.2971428571428571e-05, |
| "loss": 1.7558, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.2952380952380952e-05, |
| "loss": 1.8626, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.2933333333333334e-05, |
| "loss": 1.781, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.2914285714285713e-05, |
| "loss": 1.705, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.2895238095238095e-05, |
| "loss": 1.7269, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.2876190476190476e-05, |
| "loss": 1.7764, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.2857142857142857e-05, |
| "loss": 1.7602, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.2838095238095239e-05, |
| "loss": 1.8244, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.2819047619047618e-05, |
| "loss": 1.7506, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.28e-05, |
| "loss": 1.9183, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.278095238095238e-05, |
| "loss": 1.8333, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.2761904761904762e-05, |
| "loss": 1.5797, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.2742857142857143e-05, |
| "loss": 1.5009, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.2723809523809525e-05, |
| "loss": 1.4625, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.2704761904761904e-05, |
| "loss": 1.9483, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.2685714285714286e-05, |
| "loss": 1.3773, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.2666666666666667e-05, |
| "loss": 1.4853, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.2647619047619048e-05, |
| "loss": 1.5423, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.262857142857143e-05, |
| "loss": 1.2449, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.2609523809523809e-05, |
| "loss": 1.9732, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.259047619047619e-05, |
| "loss": 1.5281, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.2571428571428572e-05, |
| "loss": 1.8673, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.2552380952380953e-05, |
| "loss": 1.5986, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.2533333333333334e-05, |
| "loss": 1.7682, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.2514285714285714e-05, |
| "loss": 1.893, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.2495238095238095e-05, |
| "loss": 1.5008, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.2476190476190476e-05, |
| "loss": 1.7897, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.2457142857142858e-05, |
| "loss": 1.8012, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.2438095238095239e-05, |
| "loss": 1.7117, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.241904761904762e-05, |
| "loss": 1.7093, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.24e-05, |
| "loss": 1.5255, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.2380952380952381e-05, |
| "loss": 1.4508, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.2361904761904762e-05, |
| "loss": 1.6286, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.2342857142857144e-05, |
| "loss": 1.6495, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.2323809523809525e-05, |
| "loss": 1.4856, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.2304761904761905e-05, |
| "loss": 1.6728, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.2285714285714286e-05, |
| "loss": 1.4032, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.2266666666666667e-05, |
| "loss": 1.6083, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.2247619047619047e-05, |
| "loss": 1.5496, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.2228571428571428e-05, |
| "loss": 2.0419, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.220952380952381e-05, |
| "loss": 1.5236, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.2190476190476189e-05, |
| "loss": 1.3895, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.217142857142857e-05, |
| "loss": 1.6532, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.2152380952380952e-05, |
| "loss": 1.6868, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.2133333333333333e-05, |
| "loss": 1.46, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.2114285714285714e-05, |
| "loss": 1.6373, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.2095238095238094e-05, |
| "loss": 1.4497, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.2076190476190475e-05, |
| "loss": 1.6194, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.2057142857142856e-05, |
| "loss": 1.7623, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.2038095238095238e-05, |
| "loss": 1.6722, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.2019047619047619e-05, |
| "loss": 1.765, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.1999999999999999e-05, |
| "loss": 1.9492, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.198095238095238e-05, |
| "loss": 1.2779, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.1961904761904761e-05, |
| "loss": 1.5185, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.1942857142857142e-05, |
| "loss": 1.685, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.1923809523809524e-05, |
| "loss": 1.7304, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.1904761904761903e-05, |
| "loss": 1.6961, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.1885714285714285e-05, |
| "loss": 1.3989, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.1866666666666666e-05, |
| "loss": 1.6383, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.1847619047619047e-05, |
| "loss": 1.6153, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.1828571428571429e-05, |
| "loss": 1.7371, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.180952380952381e-05, |
| "loss": 1.7609, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.179047619047619e-05, |
| "loss": 1.4269, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.177142857142857e-05, |
| "loss": 1.7976, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.1752380952380952e-05, |
| "loss": 1.538, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.1733333333333333e-05, |
| "loss": 1.6206, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.1714285714285715e-05, |
| "loss": 1.7221, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.1695238095238094e-05, |
| "loss": 1.2271, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.1676190476190476e-05, |
| "loss": 1.8847, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.1657142857142857e-05, |
| "loss": 2.1748, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.1638095238095238e-05, |
| "loss": 1.6654, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.161904761904762e-05, |
| "loss": 1.3367, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.16e-05, |
| "loss": 1.5722, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.158095238095238e-05, |
| "loss": 1.5672, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.1561904761904762e-05, |
| "loss": 1.7614, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.1542857142857143e-05, |
| "loss": 1.6648, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.1523809523809524e-05, |
| "loss": 1.8454, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.1504761904761906e-05, |
| "loss": 1.2734, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.1485714285714285e-05, |
| "loss": 1.3313, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.1466666666666666e-05, |
| "loss": 1.1914, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.1447619047619048e-05, |
| "loss": 1.6754, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.1428571428571429e-05, |
| "loss": 1.6821, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.140952380952381e-05, |
| "loss": 1.9894, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.139047619047619e-05, |
| "loss": 1.7721, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.1371428571428571e-05, |
| "loss": 1.644, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.1352380952380953e-05, |
| "loss": 0.942, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.1333333333333334e-05, |
| "loss": 1.3515, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.1314285714285715e-05, |
| "loss": 1.5084, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.1295238095238096e-05, |
| "loss": 1.9172, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.1276190476190476e-05, |
| "loss": 1.6386, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.1257142857142857e-05, |
| "loss": 1.5719, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.1238095238095239e-05, |
| "loss": 1.557, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.121904761904762e-05, |
| "loss": 1.6815, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.1200000000000001e-05, |
| "loss": 1.6925, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.118095238095238e-05, |
| "loss": 1.7414, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.1161904761904762e-05, |
| "loss": 1.5588, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.1142857142857143e-05, |
| "loss": 1.5205, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.1123809523809525e-05, |
| "loss": 1.6307, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.1104761904761904e-05, |
| "loss": 1.2504, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.1085714285714286e-05, |
| "loss": 1.8199, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.1066666666666667e-05, |
| "loss": 1.4118, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.1047619047619046e-05, |
| "loss": 1.4379, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.1028571428571428e-05, |
| "loss": 1.1513, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.1009523809523809e-05, |
| "loss": 1.4012, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.099047619047619e-05, |
| "loss": 1.3337, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.097142857142857e-05, |
| "loss": 1.2861, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.0952380952380951e-05, |
| "loss": 1.1884, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.0933333333333333e-05, |
| "loss": 1.5143, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.0914285714285714e-05, |
| "loss": 1.6417, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.0895238095238095e-05, |
| "loss": 1.6076, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.0876190476190475e-05, |
| "loss": 1.3847, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.0857142857142856e-05, |
| "loss": 1.5988, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.0838095238095237e-05, |
| "loss": 1.5277, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.0819047619047619e-05, |
| "loss": 1.5894, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.08e-05, |
| "loss": 1.4294, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.078095238095238e-05, |
| "loss": 1.5011, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.076190476190476e-05, |
| "loss": 1.3573, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.0742857142857142e-05, |
| "loss": 1.6822, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.0723809523809523e-05, |
| "loss": 1.5523, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.0704761904761905e-05, |
| "loss": 1.6135, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.0685714285714286e-05, |
| "loss": 1.981, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.0666666666666666e-05, |
| "loss": 1.5314, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.0647619047619047e-05, |
| "loss": 1.1368, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.0628571428571428e-05, |
| "loss": 1.763, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.060952380952381e-05, |
| "loss": 1.1155, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.059047619047619e-05, |
| "loss": 1.5571, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.057142857142857e-05, |
| "loss": 1.6422, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.0552380952380952e-05, |
| "loss": 1.5116, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.0533333333333333e-05, |
| "loss": 1.3852, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.0514285714285714e-05, |
| "loss": 1.5127, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.0495238095238096e-05, |
| "loss": 1.4374, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.0476190476190477e-05, |
| "loss": 1.4862, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.0457142857142856e-05, |
| "loss": 1.271, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.0438095238095238e-05, |
| "loss": 1.424, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.0419047619047619e-05, |
| "loss": 1.3103, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.04e-05, |
| "loss": 1.5764, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.0380952380952382e-05, |
| "loss": 1.2966, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.0361904761904761e-05, |
| "loss": 1.4407, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.0342857142857143e-05, |
| "loss": 1.4769, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.0323809523809524e-05, |
| "loss": 1.8485, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.0304761904761905e-05, |
| "loss": 1.3984, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.0285714285714286e-05, |
| "loss": 1.6489, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.0266666666666666e-05, |
| "loss": 1.6047, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.0247619047619047e-05, |
| "loss": 1.3263, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.0228571428571429e-05, |
| "loss": 1.1799, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.020952380952381e-05, |
| "loss": 1.7125, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.0190476190476191e-05, |
| "loss": 1.3331, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.0171428571428573e-05, |
| "loss": 1.474, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.0152380952380952e-05, |
| "loss": 1.3566, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.0133333333333333e-05, |
| "loss": 1.2508, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.0114285714285715e-05, |
| "loss": 1.5821, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.0095238095238096e-05, |
| "loss": 1.7047, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.0076190476190477e-05, |
| "loss": 1.5179, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.0057142857142857e-05, |
| "loss": 1.5492, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.0038095238095238e-05, |
| "loss": 1.4274, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.001904761904762e-05, |
| "loss": 1.4075, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1e-05, |
| "loss": 1.4878, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 9.980952380952382e-06, |
| "loss": 1.4027, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 9.961904761904762e-06, |
| "loss": 1.4672, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 9.942857142857143e-06, |
| "loss": 1.3631, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 9.923809523809524e-06, |
| "loss": 1.4177, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 9.904761904761904e-06, |
| "loss": 1.4329, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 9.885714285714285e-06, |
| "loss": 1.5616, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 9.866666666666667e-06, |
| "loss": 1.5772, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 9.847619047619046e-06, |
| "loss": 1.6318, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 9.828571428571427e-06, |
| "loss": 1.368, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 9.809523809523809e-06, |
| "loss": 1.2774, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 9.79047619047619e-06, |
| "loss": 1.3747, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 9.771428571428571e-06, |
| "loss": 1.3974, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 9.752380952380951e-06, |
| "loss": 1.5687, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 9.733333333333332e-06, |
| "loss": 1.5798, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 9.714285714285713e-06, |
| "loss": 1.7257, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 9.695238095238095e-06, |
| "loss": 0.996, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 9.676190476190476e-06, |
| "loss": 2.1266, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 9.657142857142857e-06, |
| "loss": 1.7335, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 9.638095238095237e-06, |
| "loss": 1.4647, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 9.619047619047618e-06, |
| "loss": 1.0974, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 9.6e-06, |
| "loss": 1.1022, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 9.580952380952381e-06, |
| "loss": 1.2352, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 9.561904761904762e-06, |
| "loss": 1.5827, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 9.542857142857142e-06, |
| "loss": 1.7707, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 9.523809523809523e-06, |
| "loss": 1.6125, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 9.504761904761904e-06, |
| "loss": 1.8655, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 9.485714285714286e-06, |
| "loss": 1.4079, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 9.466666666666667e-06, |
| "loss": 1.4635, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 9.447619047619047e-06, |
| "loss": 1.4203, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 9.428571428571428e-06, |
| "loss": 1.5604, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 9.409523809523809e-06, |
| "loss": 1.4702, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 9.39047619047619e-06, |
| "loss": 1.5907, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 9.371428571428572e-06, |
| "loss": 1.6619, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 9.352380952380953e-06, |
| "loss": 1.5003, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 9.333333333333333e-06, |
| "loss": 1.7973, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 9.314285714285714e-06, |
| "loss": 1.6517, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 9.295238095238095e-06, |
| "loss": 1.666, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 9.276190476190477e-06, |
| "loss": 1.3698, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 9.257142857142858e-06, |
| "loss": 1.623, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 9.238095238095237e-06, |
| "loss": 1.1884, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 9.219047619047619e-06, |
| "loss": 1.2523, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 9.2e-06, |
| "loss": 1.5694, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 9.180952380952381e-06, |
| "loss": 1.4466, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 9.161904761904763e-06, |
| "loss": 1.6821, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 9.142857142857142e-06, |
| "loss": 1.398, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 9.123809523809523e-06, |
| "loss": 1.6264, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 9.104761904761905e-06, |
| "loss": 1.5653, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 9.085714285714286e-06, |
| "loss": 1.6115, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 9.066666666666667e-06, |
| "loss": 1.455, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 9.047619047619049e-06, |
| "loss": 1.4232, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 9.028571428571428e-06, |
| "loss": 1.5055, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 9.00952380952381e-06, |
| "loss": 1.7067, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.990476190476191e-06, |
| "loss": 1.7505, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.971428571428572e-06, |
| "loss": 1.1468, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.952380952380953e-06, |
| "loss": 1.5362, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.933333333333333e-06, |
| "loss": 1.7492, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.914285714285714e-06, |
| "loss": 1.2201, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.895238095238096e-06, |
| "loss": 1.3609, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 8.876190476190477e-06, |
| "loss": 1.4403, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 8.857142857142858e-06, |
| "loss": 1.5651, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 8.838095238095238e-06, |
| "loss": 1.5318, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 8.819047619047619e-06, |
| "loss": 1.1585, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 8.8e-06, |
| "loss": 1.3241, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 8.780952380952382e-06, |
| "loss": 1.4923, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 8.761904761904761e-06, |
| "loss": 1.7976, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 8.742857142857143e-06, |
| "loss": 1.372, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 8.723809523809524e-06, |
| "loss": 1.22, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 8.704761904761904e-06, |
| "loss": 1.4038, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 8.685714285714285e-06, |
| "loss": 1.5937, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 8.666666666666666e-06, |
| "loss": 1.3512, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 8.647619047619047e-06, |
| "loss": 1.0502, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 8.628571428571427e-06, |
| "loss": 1.2045, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 8.609523809523808e-06, |
| "loss": 1.369, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 8.59047619047619e-06, |
| "loss": 1.5638, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 1.6589, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 8.552380952380952e-06, |
| "loss": 1.1079, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 8.533333333333334e-06, |
| "loss": 1.4133, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 8.514285714285713e-06, |
| "loss": 1.6174, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 8.495238095238094e-06, |
| "loss": 1.6332, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 8.476190476190476e-06, |
| "loss": 1.5578, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 8.457142857142857e-06, |
| "loss": 1.3662, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 8.438095238095238e-06, |
| "loss": 1.7247, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 8.419047619047618e-06, |
| "loss": 1.6704, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 8.4e-06, |
| "loss": 1.2688, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 8.38095238095238e-06, |
| "loss": 1.2547, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 8.361904761904762e-06, |
| "loss": 1.4751, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 8.342857142857143e-06, |
| "loss": 1.425, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 8.323809523809523e-06, |
| "loss": 1.6818, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 8.304761904761904e-06, |
| "loss": 1.5462, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 8.285714285714285e-06, |
| "loss": 1.1402, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 8.266666666666667e-06, |
| "loss": 1.3696, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 8.247619047619048e-06, |
| "loss": 1.404, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 8.22857142857143e-06, |
| "loss": 1.3068, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 8.209523809523809e-06, |
| "loss": 1.4081, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 8.19047619047619e-06, |
| "loss": 1.1629, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 8.171428571428571e-06, |
| "loss": 1.6461, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 8.152380952380953e-06, |
| "loss": 1.2648, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 8.133333333333334e-06, |
| "loss": 1.2813, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 8.114285714285714e-06, |
| "loss": 1.438, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 8.095238095238095e-06, |
| "loss": 1.4388, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 8.076190476190476e-06, |
| "loss": 1.3703, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 8.057142857142857e-06, |
| "loss": 1.3313, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 8.038095238095239e-06, |
| "loss": 1.2477, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 8.019047619047618e-06, |
| "loss": 1.3834, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 8e-06, |
| "loss": 1.2783, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 7.980952380952381e-06, |
| "loss": 1.4858, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 7.961904761904762e-06, |
| "loss": 1.342, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 7.942857142857144e-06, |
| "loss": 1.3243, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 7.923809523809525e-06, |
| "loss": 1.0396, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 7.904761904761904e-06, |
| "loss": 1.288, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 7.885714285714286e-06, |
| "loss": 1.3156, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 7.866666666666667e-06, |
| "loss": 1.5605, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 7.847619047619048e-06, |
| "loss": 1.5266, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 7.82857142857143e-06, |
| "loss": 1.0976, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 7.80952380952381e-06, |
| "loss": 1.2879, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 7.79047619047619e-06, |
| "loss": 1.3833, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 7.771428571428572e-06, |
| "loss": 1.4636, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 7.752380952380953e-06, |
| "loss": 1.1288, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 7.733333333333334e-06, |
| "loss": 1.0436, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 7.714285714285714e-06, |
| "loss": 0.9859, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 7.695238095238095e-06, |
| "loss": 1.3709, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 7.676190476190477e-06, |
| "loss": 1.3816, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 7.657142857142858e-06, |
| "loss": 1.1793, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 7.63809523809524e-06, |
| "loss": 1.3626, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 7.61904761904762e-06, |
| "loss": 1.3073, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 7.599999999999999e-06, |
| "loss": 1.0394, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 7.5809523809523805e-06, |
| "loss": 1.414, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 7.561904761904762e-06, |
| "loss": 1.3723, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 7.542857142857143e-06, |
| "loss": 1.5456, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 7.523809523809524e-06, |
| "loss": 1.4463, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 7.504761904761904e-06, |
| "loss": 1.4268, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 7.485714285714285e-06, |
| "loss": 1.3708, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 7.466666666666667e-06, |
| "loss": 1.2043, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 7.447619047619048e-06, |
| "loss": 1.3896, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 7.428571428571429e-06, |
| "loss": 0.9627, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 7.4095238095238105e-06, |
| "loss": 1.2312, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 7.39047619047619e-06, |
| "loss": 1.0419, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 7.371428571428571e-06, |
| "loss": 1.1359, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 7.352380952380953e-06, |
| "loss": 1.7454, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 7.333333333333334e-06, |
| "loss": 1.2688, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 7.3142857142857144e-06, |
| "loss": 1.5696, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 7.295238095238095e-06, |
| "loss": 1.2027, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 7.276190476190476e-06, |
| "loss": 1.4311, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 7.257142857142857e-06, |
| "loss": 1.559, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 7.238095238095238e-06, |
| "loss": 1.2771, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 7.219047619047619e-06, |
| "loss": 1.3038, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 7.199999999999999e-06, |
| "loss": 1.2849, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 7.18095238095238e-06, |
| "loss": 1.2649, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 7.161904761904761e-06, |
| "loss": 1.3374, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 7.142857142857143e-06, |
| "loss": 1.4303, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 7.123809523809524e-06, |
| "loss": 1.1511, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 7.104761904761905e-06, |
| "loss": 1.3124, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 7.085714285714285e-06, |
| "loss": 1.2916, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 7.066666666666666e-06, |
| "loss": 1.2252, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 7.0476190476190475e-06, |
| "loss": 1.361, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 7.028571428571429e-06, |
| "loss": 1.7486, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 7.00952380952381e-06, |
| "loss": 1.044, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 6.9904761904761905e-06, |
| "loss": 1.2656, |
| "step": 368 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 735, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 368, |
| "total_flos": 6.430858236710093e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|