| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 7.111111111111111, |
| "eval_steps": 500, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001985714285714286, |
| "loss": 2.5072, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019714285714285716, |
| "loss": 2.194, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00019571428571428572, |
| "loss": 1.9685, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0001942857142857143, |
| "loss": 1.7577, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00019285714285714286, |
| "loss": 1.6095, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00019142857142857145, |
| "loss": 1.5448, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00019, |
| "loss": 1.453, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00018857142857142857, |
| "loss": 1.41, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00018714285714285716, |
| "loss": 1.3054, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00018571428571428572, |
| "loss": 1.2634, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00018428571428571428, |
| "loss": 1.2269, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00018285714285714286, |
| "loss": 1.2405, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00018142857142857142, |
| "loss": 1.2436, |
| "step": 13 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00018, |
| "loss": 1.2063, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0001785714285714286, |
| "loss": 1.1789, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00017714285714285713, |
| "loss": 1.2007, |
| "step": 16 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00017571428571428572, |
| "loss": 1.1616, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.0001742857142857143, |
| "loss": 1.157, |
| "step": 18 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00017285714285714287, |
| "loss": 1.1555, |
| "step": 19 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00017142857142857143, |
| "loss": 1.1559, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.00017, |
| "loss": 1.1487, |
| "step": 21 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.00016857142857142857, |
| "loss": 1.1729, |
| "step": 22 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 0.00016714285714285716, |
| "loss": 1.1251, |
| "step": 23 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 0.00016571428571428575, |
| "loss": 1.1181, |
| "step": 24 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 0.00016428571428571428, |
| "loss": 1.1144, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 0.00016285714285714287, |
| "loss": 1.1416, |
| "step": 26 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 0.00016142857142857145, |
| "loss": 1.0965, |
| "step": 27 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 0.00016, |
| "loss": 1.0936, |
| "step": 28 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 0.00015857142857142857, |
| "loss": 1.0839, |
| "step": 29 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 0.00015714285714285716, |
| "loss": 1.127, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 0.00015571428571428572, |
| "loss": 1.0886, |
| "step": 31 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 0.0001542857142857143, |
| "loss": 1.0447, |
| "step": 32 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 0.00015285714285714287, |
| "loss": 1.0513, |
| "step": 33 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 0.00015142857142857143, |
| "loss": 1.098, |
| "step": 34 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 1.0628, |
| "step": 35 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 0.00014857142857142857, |
| "loss": 1.0814, |
| "step": 36 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 0.00014714285714285716, |
| "loss": 1.0638, |
| "step": 37 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 0.00014571428571428572, |
| "loss": 1.0652, |
| "step": 38 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 0.00014428571428571428, |
| "loss": 1.0463, |
| "step": 39 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 0.00014285714285714287, |
| "loss": 1.0349, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 0.00014142857142857145, |
| "loss": 1.0165, |
| "step": 41 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 0.00014, |
| "loss": 1.0905, |
| "step": 42 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 0.00013857142857142857, |
| "loss": 1.0297, |
| "step": 43 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 0.00013714285714285716, |
| "loss": 1.0061, |
| "step": 44 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 0.00013571428571428572, |
| "loss": 1.0019, |
| "step": 45 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 0.00013428571428571428, |
| "loss": 0.9555, |
| "step": 46 |
| }, |
| { |
| "epoch": 3.34, |
| "learning_rate": 0.00013285714285714287, |
| "loss": 1.038, |
| "step": 47 |
| }, |
| { |
| "epoch": 3.41, |
| "learning_rate": 0.00013142857142857143, |
| "loss": 0.9932, |
| "step": 48 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 0.00013000000000000002, |
| "loss": 1.0451, |
| "step": 49 |
| }, |
| { |
| "epoch": 3.56, |
| "learning_rate": 0.00012857142857142858, |
| "loss": 1.008, |
| "step": 50 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 0.00012714285714285714, |
| "loss": 1.0362, |
| "step": 51 |
| }, |
| { |
| "epoch": 3.7, |
| "learning_rate": 0.00012571428571428572, |
| "loss": 1.0007, |
| "step": 52 |
| }, |
| { |
| "epoch": 3.77, |
| "learning_rate": 0.00012428571428571428, |
| "loss": 1.0038, |
| "step": 53 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 0.00012285714285714287, |
| "loss": 1.0057, |
| "step": 54 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 0.00012142857142857143, |
| "loss": 1.0172, |
| "step": 55 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 0.00012, |
| "loss": 0.982, |
| "step": 56 |
| }, |
| { |
| "epoch": 4.05, |
| "learning_rate": 0.00011857142857142858, |
| "loss": 0.9838, |
| "step": 57 |
| }, |
| { |
| "epoch": 4.12, |
| "learning_rate": 0.00011714285714285715, |
| "loss": 0.9677, |
| "step": 58 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 0.00011571428571428574, |
| "loss": 0.9815, |
| "step": 59 |
| }, |
| { |
| "epoch": 4.27, |
| "learning_rate": 0.00011428571428571428, |
| "loss": 0.9711, |
| "step": 60 |
| }, |
| { |
| "epoch": 4.34, |
| "learning_rate": 0.00011285714285714286, |
| "loss": 1.0086, |
| "step": 61 |
| }, |
| { |
| "epoch": 4.41, |
| "learning_rate": 0.00011142857142857144, |
| "loss": 0.9485, |
| "step": 62 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 0.00011000000000000002, |
| "loss": 0.9342, |
| "step": 63 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 0.00010857142857142856, |
| "loss": 0.9887, |
| "step": 64 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 0.00010714285714285715, |
| "loss": 0.9614, |
| "step": 65 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 0.00010571428571428572, |
| "loss": 0.9644, |
| "step": 66 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 0.0001042857142857143, |
| "loss": 0.9267, |
| "step": 67 |
| }, |
| { |
| "epoch": 4.84, |
| "learning_rate": 0.00010285714285714286, |
| "loss": 0.954, |
| "step": 68 |
| }, |
| { |
| "epoch": 4.91, |
| "learning_rate": 0.00010142857142857143, |
| "loss": 0.919, |
| "step": 69 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 0.0001, |
| "loss": 0.9478, |
| "step": 70 |
| }, |
| { |
| "epoch": 5.05, |
| "learning_rate": 9.857142857142858e-05, |
| "loss": 0.9559, |
| "step": 71 |
| }, |
| { |
| "epoch": 5.12, |
| "learning_rate": 9.714285714285715e-05, |
| "loss": 0.9596, |
| "step": 72 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 9.571428571428573e-05, |
| "loss": 0.9151, |
| "step": 73 |
| }, |
| { |
| "epoch": 5.26, |
| "learning_rate": 9.428571428571429e-05, |
| "loss": 0.9059, |
| "step": 74 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 9.285714285714286e-05, |
| "loss": 0.8717, |
| "step": 75 |
| }, |
| { |
| "epoch": 5.4, |
| "learning_rate": 9.142857142857143e-05, |
| "loss": 0.8912, |
| "step": 76 |
| }, |
| { |
| "epoch": 5.48, |
| "learning_rate": 9e-05, |
| "loss": 0.9166, |
| "step": 77 |
| }, |
| { |
| "epoch": 5.55, |
| "learning_rate": 8.857142857142857e-05, |
| "loss": 0.9362, |
| "step": 78 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 8.714285714285715e-05, |
| "loss": 0.8969, |
| "step": 79 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 8.571428571428571e-05, |
| "loss": 0.898, |
| "step": 80 |
| }, |
| { |
| "epoch": 5.76, |
| "learning_rate": 8.428571428571429e-05, |
| "loss": 0.8626, |
| "step": 81 |
| }, |
| { |
| "epoch": 5.83, |
| "learning_rate": 8.285714285714287e-05, |
| "loss": 0.9353, |
| "step": 82 |
| }, |
| { |
| "epoch": 5.9, |
| "learning_rate": 8.142857142857143e-05, |
| "loss": 0.9353, |
| "step": 83 |
| }, |
| { |
| "epoch": 5.97, |
| "learning_rate": 8e-05, |
| "loss": 0.9277, |
| "step": 84 |
| }, |
| { |
| "epoch": 6.04, |
| "learning_rate": 7.857142857142858e-05, |
| "loss": 0.8856, |
| "step": 85 |
| }, |
| { |
| "epoch": 6.12, |
| "learning_rate": 7.714285714285715e-05, |
| "loss": 0.8771, |
| "step": 86 |
| }, |
| { |
| "epoch": 6.19, |
| "learning_rate": 7.571428571428571e-05, |
| "loss": 0.8634, |
| "step": 87 |
| }, |
| { |
| "epoch": 6.26, |
| "learning_rate": 7.428571428571429e-05, |
| "loss": 0.8655, |
| "step": 88 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 7.285714285714286e-05, |
| "loss": 0.856, |
| "step": 89 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 7.142857142857143e-05, |
| "loss": 0.8929, |
| "step": 90 |
| }, |
| { |
| "epoch": 6.47, |
| "learning_rate": 7e-05, |
| "loss": 0.8844, |
| "step": 91 |
| }, |
| { |
| "epoch": 6.54, |
| "learning_rate": 6.857142857142858e-05, |
| "loss": 0.8951, |
| "step": 92 |
| }, |
| { |
| "epoch": 6.61, |
| "learning_rate": 6.714285714285714e-05, |
| "loss": 0.8385, |
| "step": 93 |
| }, |
| { |
| "epoch": 6.68, |
| "learning_rate": 6.571428571428571e-05, |
| "loss": 0.873, |
| "step": 94 |
| }, |
| { |
| "epoch": 6.76, |
| "learning_rate": 6.428571428571429e-05, |
| "loss": 0.9033, |
| "step": 95 |
| }, |
| { |
| "epoch": 6.83, |
| "learning_rate": 6.285714285714286e-05, |
| "loss": 0.8643, |
| "step": 96 |
| }, |
| { |
| "epoch": 6.9, |
| "learning_rate": 6.142857142857143e-05, |
| "loss": 0.8894, |
| "step": 97 |
| }, |
| { |
| "epoch": 6.97, |
| "learning_rate": 6e-05, |
| "loss": 0.8436, |
| "step": 98 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 5.8571428571428575e-05, |
| "loss": 0.8362, |
| "step": 99 |
| }, |
| { |
| "epoch": 7.11, |
| "learning_rate": 5.714285714285714e-05, |
| "loss": 0.8162, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 140, |
| "num_train_epochs": 10, |
| "save_steps": 100, |
| "total_flos": 1.837898937498624e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|