| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 116, |
| "global_step": 464, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.9994, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "eval_loss": 1.0349713563919067, |
| "eval_runtime": 2.438, |
| "eval_samples_per_second": 41.018, |
| "eval_steps_per_second": 41.018, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.554, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6e-06, |
| "loss": 0.9687, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 1.0517, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1e-05, |
| "loss": 0.8594, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.2e-05, |
| "loss": 0.9004, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 0.917, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.7736, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.8e-05, |
| "loss": 0.8723, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 2e-05, |
| "loss": 0.9795, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 1.094, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2.4e-05, |
| "loss": 0.8869, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 0.6991, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.9742, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 3e-05, |
| "loss": 0.8167, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 1.0581, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.988, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 3.6e-05, |
| "loss": 0.9799, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 3.8e-05, |
| "loss": 1.138, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4e-05, |
| "loss": 0.9535, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.2e-05, |
| "loss": 0.958, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.8337, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 1.0907, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.8e-05, |
| "loss": 0.7637, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 5e-05, |
| "loss": 0.9891, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 1.0592, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 5.4000000000000005e-05, |
| "loss": 0.9176, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 0.8598, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 5.8e-05, |
| "loss": 0.9502, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 6e-05, |
| "loss": 0.806, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 6.2e-05, |
| "loss": 0.9362, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 1.0527, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 6.6e-05, |
| "loss": 1.0127, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 0.9728, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7e-05, |
| "loss": 1.0957, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.2e-05, |
| "loss": 1.2512, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.4e-05, |
| "loss": 1.0358, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.6e-05, |
| "loss": 1.6103, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 0.982, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 8e-05, |
| "loss": 1.5293, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 8.2e-05, |
| "loss": 0.9774, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 8.4e-05, |
| "loss": 1.1694, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 8.6e-05, |
| "loss": 1.2794, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 0.66, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9e-05, |
| "loss": 1.4438, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 0.8313, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.4e-05, |
| "loss": 1.4338, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.6e-05, |
| "loss": 1.0759, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.8e-05, |
| "loss": 1.1663, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0001, |
| "loss": 1.359, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00010200000000000001, |
| "loss": 1.5855, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00010400000000000001, |
| "loss": 1.4167, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00010600000000000002, |
| "loss": 1.1239, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00010800000000000001, |
| "loss": 0.8807, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00011000000000000002, |
| "loss": 1.319, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00011200000000000001, |
| "loss": 1.3731, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00011399999999999999, |
| "loss": 1.1958, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.000116, |
| "loss": 1.5947, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.000118, |
| "loss": 1.3972, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00012, |
| "loss": 0.9218, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.000122, |
| "loss": 1.3189, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.000124, |
| "loss": 1.3368, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.000126, |
| "loss": 1.636, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00012800000000000002, |
| "loss": 1.6146, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00013000000000000002, |
| "loss": 1.8519, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.000132, |
| "loss": 1.2888, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.000134, |
| "loss": 1.4018, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00013600000000000003, |
| "loss": 1.4954, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.000138, |
| "loss": 1.6397, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00014, |
| "loss": 1.3248, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.000142, |
| "loss": 1.2972, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.000144, |
| "loss": 1.2796, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.000146, |
| "loss": 1.4494, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.000148, |
| "loss": 1.5123, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 1.3374, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.000152, |
| "loss": 1.1158, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.000154, |
| "loss": 1.8782, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00015600000000000002, |
| "loss": 1.5696, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00015800000000000002, |
| "loss": 1.6433, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00016, |
| "loss": 1.9167, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.000162, |
| "loss": 1.8044, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.000164, |
| "loss": 1.7542, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.000166, |
| "loss": 1.5823, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.000168, |
| "loss": 2.0487, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00017, |
| "loss": 2.034, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.000172, |
| "loss": 2.2205, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.000174, |
| "loss": 1.8803, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00017600000000000002, |
| "loss": 1.327, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00017800000000000002, |
| "loss": 1.5811, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00018, |
| "loss": 2.165, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.000182, |
| "loss": 1.5937, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00018400000000000003, |
| "loss": 1.42, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00018600000000000002, |
| "loss": 1.7707, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.000188, |
| "loss": 2.1475, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00019, |
| "loss": 2.4986, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.000192, |
| "loss": 1.8784, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.000194, |
| "loss": 2.3775, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.000196, |
| "loss": 1.9046, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00019800000000000002, |
| "loss": 1.6854, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0002, |
| "loss": 1.9577, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00019999627553166294, |
| "loss": 2.2022, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00019998510240408496, |
| "loss": 2.1938, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0001999664814495453, |
| "loss": 1.7292, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00019994041405510705, |
| "loss": 2.0613, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00019990690216251396, |
| "loss": 1.702, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0001998659482680456, |
| "loss": 1.7804, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00019981755542233177, |
| "loss": 2.0285, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0001997617272301248, |
| "loss": 2.1402, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00019969846785003134, |
| "loss": 2.3233, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00019962778199420265, |
| "loss": 2.7725, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00019954967492798333, |
| "loss": 2.5352, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0001994641524695193, |
| "loss": 2.2977, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00019937122098932428, |
| "loss": 2.1913, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0001992708874098054, |
| "loss": 2.0498, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0001991631592047475, |
| "loss": 2.4744, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00019904804439875633, |
| "loss": 2.065, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_loss": 5.236201763153076, |
| "eval_runtime": 2.4368, |
| "eval_samples_per_second": 41.038, |
| "eval_steps_per_second": 41.038, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00019892555156666089, |
| "loss": 5.4596, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00019879568983287467, |
| "loss": 2.2682, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00019865846887071596, |
| "loss": 1.874, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0001985138989016874, |
| "loss": 1.4378, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00019836199069471437, |
| "loss": 2.3948, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00019820275556534304, |
| "loss": 2.074, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00019803620537489736, |
| "loss": 1.9271, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00019786235252959553, |
| "loss": 2.1771, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00019768120997962592, |
| "loss": 2.2844, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00019749279121818235, |
| "loss": 2.3891, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00019729711028045909, |
| "loss": 2.841, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0001970941817426052, |
| "loss": 2.4675, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00019688402072063903, |
| "loss": 1.746, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00019666664286932198, |
| "loss": 2.1284, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0001964420643809925, |
| "loss": 2.5688, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00019621030198436006, |
| "loss": 2.6393, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00019597137294325877, |
| "loss": 2.2703, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0001957252950553616, |
| "loss": 1.7504, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00019547208665085457, |
| "loss": 2.0839, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00019521176659107142, |
| "loss": 1.939, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00019494435426708855, |
| "loss": 2.526, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0001946698695982806, |
| "loss": 2.5513, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00019438833303083678, |
| "loss": 2.201, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00019409976553623766, |
| "loss": 2.4733, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00019380418860969322, |
| "loss": 2.1536, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0001935016242685415, |
| "loss": 3.1198, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0001931920950506087, |
| "loss": 2.7858, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00019287562401253022, |
| "loss": 2.6012, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00019255223472803334, |
| "loss": 2.8911, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00019222195128618106, |
| "loss": 2.4815, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00019188479828957772, |
| "loss": 2.3879, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00019154080085253666, |
| "loss": 2.3726, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00019118998459920902, |
| "loss": 2.4801, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0001908323756616754, |
| "loss": 1.6349, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0001904680006779991, |
| "loss": 2.5929, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0001900968867902419, |
| "loss": 2.154, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00018971906164244232, |
| "loss": 2.3055, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00018933455337855632, |
| "loss": 2.0643, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.000188943390640361, |
| "loss": 2.3043, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.000188545602565321, |
| "loss": 2.3552, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00018814121878441814, |
| "loss": 2.3875, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0001877302694199442, |
| "loss": 1.8648, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00018731278508325708, |
| "loss": 2.8517, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00018688879687250067, |
| "loss": 2.4357, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00018645833637028825, |
| "loss": 2.7364, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0001860214356413501, |
| "loss": 2.1984, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00018557812723014476, |
| "loss": 2.6134, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00018512844415843514, |
| "loss": 1.8059, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00018467241992282843, |
| "loss": 1.9953, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00018421008849228118, |
| "loss": 2.1807, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0001837414843055689, |
| "loss": 2.3809, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00018326664226872065, |
| "loss": 2.8582, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0001827855977524191, |
| "loss": 2.3851, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00018229838658936564, |
| "loss": 2.2002, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0001818050450716113, |
| "loss": 2.1547, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00018130560994785325, |
| "loss": 2.2487, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00018080011842069765, |
| "loss": 2.1702, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00018028860814388827, |
| "loss": 2.2144, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00017977111721950164, |
| "loss": 2.1576, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00017924768419510904, |
| "loss": 2.2208, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00017871834806090501, |
| "loss": 2.3436, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.000178183148246803, |
| "loss": 2.2956, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0001776421246194982, |
| "loss": 2.2207, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00017709531747949796, |
| "loss": 2.2767, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00017654276755811997, |
| "loss": 2.963, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0001759845160144579, |
| "loss": 2.7563, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00017542060443231572, |
| "loss": 2.4224, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00017485107481711012, |
| "loss": 2.6196, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00017427596959274143, |
| "loss": 2.4883, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00017369533159843369, |
| "loss": 2.7563, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00017310920408554332, |
| "loss": 2.5504, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00017251763071433765, |
| "loss": 2.0197, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00017192065555074245, |
| "loss": 2.5622, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00017131832306305965, |
| "loss": 2.4104, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00017071067811865476, |
| "loss": 2.1092, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00017009776598061495, |
| "loss": 2.8504, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00016947963230437725, |
| "loss": 2.4122, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0001688563231343277, |
| "loss": 2.352, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00016822788490037177, |
| "loss": 2.2958, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00016759436441447545, |
| "loss": 2.3624, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00016695580886717858, |
| "loss": 2.615, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00016631226582407952, |
| "loss": 2.2249, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00016566378322229204, |
| "loss": 2.3185, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00016501040936687443, |
| "loss": 2.1878, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00016435219292723147, |
| "loss": 2.3747, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00016368918293348892, |
| "loss": 2.6988, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00016302142877284138, |
| "loss": 2.6106, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00016234898018587337, |
| "loss": 2.5234, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00016167188726285434, |
| "loss": 2.3486, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00016099020044000727, |
| "loss": 2.404, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00016030397049575203, |
| "loss": 2.3012, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00015961324854692254, |
| "loss": 2.016, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00015891808604495938, |
| "loss": 2.6546, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00015821853477207708, |
| "loss": 3.0068, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00015751464683740697, |
| "loss": 2.4275, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00015680647467311557, |
| "loss": 3.7802, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00015609407103049896, |
| "loss": 2.6161, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0001553774889760533, |
| "loss": 2.2672, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0001546567818875221, |
| "loss": 2.6335, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00015393200344991995, |
| "loss": 2.0922, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00015320320765153367, |
| "loss": 3.0913, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0001524704487799008, |
| "loss": 1.8932, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00015173378141776568, |
| "loss": 2.257, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0001509932604390136, |
| "loss": 2.6868, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0001502489410045833, |
| "loss": 2.2564, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00014950087855835815, |
| "loss": 2.7258, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.000148749128823036, |
| "loss": 2.5184, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00014799374779597867, |
| "loss": 2.2444, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00014723479174504037, |
| "loss": 2.4531, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00014647231720437686, |
| "loss": 2.5242, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0001457063809702338, |
| "loss": 2.9933, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00014493704009671613, |
| "loss": 2.5476, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00014416435189153846, |
| "loss": 2.3409, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00014338837391175582, |
| "loss": 2.515, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00014260916395947656, |
| "loss": 2.0276, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0001418267800775565, |
| "loss": 1.9585, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_loss": 2.3423757553100586, |
| "eval_runtime": 2.443, |
| "eval_samples_per_second": 40.932, |
| "eval_steps_per_second": 40.932, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0001410412805452757, |
| "loss": 2.1835, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00014025272387399674, |
| "loss": 1.7615, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00013946116880280681, |
| "loss": 2.7107, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.0001386666742941419, |
| "loss": 2.3294, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00013786929952939477, |
| "loss": 3.013, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00013706910390450677, |
| "loss": 2.7057, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0001362661470255432, |
| "loss": 2.6102, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00013546048870425356, |
| "loss": 2.3016, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.000134652188953616, |
| "loss": 2.3193, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00013384130798336705, |
| "loss": 2.0571, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00013302790619551674, |
| "loss": 2.1643, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00013221204417984908, |
| "loss": 3.0413, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.000131393782709409, |
| "loss": 2.7372, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0001305731827359753, |
| "loss": 2.2497, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00012975030538552032, |
| "loss": 2.1391, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00012892521195365678, |
| "loss": 2.4507, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00012809796390107195, |
| "loss": 1.9877, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00012726862284894938, |
| "loss": 2.1831, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0001264372505743789, |
| "loss": 2.6859, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0001256039090057547, |
| "loss": 2.2376, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0001247686602181626, |
| "loss": 2.0557, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0001239315664287558, |
| "loss": 2.6424, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0001230926899921206, |
| "loss": 2.5532, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00012225209339563145, |
| "loss": 2.4243, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00012140983925479662, |
| "loss": 2.5765, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00012056599030859366, |
| "loss": 2.0706, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00011972060941479621, |
| "loss": 2.6067, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00011887375954529168, |
| "loss": 1.7002, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0001180255037813906, |
| "loss": 2.1888, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00011717590530912763, |
| "loss": 2.3502, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00011632502741455496, |
| "loss": 2.1983, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00011547293347902812, |
| "loss": 1.9767, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00011461968697448485, |
| "loss": 2.238, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00011376535145871684, |
| "loss": 2.5835, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00011290999057063569, |
| "loss": 2.2147, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0001120536680255323, |
| "loss": 2.3054, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00011119644761033078, |
| "loss": 2.1802, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00011033839317883701, |
| "loss": 2.4802, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00010947956864698223, |
| "loss": 2.4535, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00010862003798806196, |
| "loss": 2.704, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00010775986522797063, |
| "loss": 2.544, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00010689911444043248, |
| "loss": 1.9008, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00010603784974222861, |
| "loss": 2.5609, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00010517613528842097, |
| "loss": 1.9635, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00010431403526757347, |
| "loss": 2.2181, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00010345161389697082, |
| "loss": 2.3383, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00010258893541783476, |
| "loss": 2.3929, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00010172606409053886, |
| "loss": 2.1808, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0001008630641898219, |
| "loss": 2.2238, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0001, |
| "loss": 1.9188, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.913693581017812e-05, |
| "loss": 2.5552, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.827393590946116e-05, |
| "loss": 1.9871, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.741106458216528e-05, |
| "loss": 1.8843, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.654838610302923e-05, |
| "loss": 1.8005, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.568596473242654e-05, |
| "loss": 1.938, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.482386471157904e-05, |
| "loss": 2.3937, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.396215025777139e-05, |
| "loss": 2.6076, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.31008855595675e-05, |
| "loss": 1.8442, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 9.224013477202939e-05, |
| "loss": 2.2442, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 9.137996201193805e-05, |
| "loss": 2.2218, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 9.052043135301779e-05, |
| "loss": 2.1, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 8.9661606821163e-05, |
| "loss": 2.8692, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 8.880355238966923e-05, |
| "loss": 2.1588, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 8.79463319744677e-05, |
| "loss": 2.5515, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 8.709000942936433e-05, |
| "loss": 1.9598, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 8.62346485412832e-05, |
| "loss": 1.6887, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 8.538031302551522e-05, |
| "loss": 1.9965, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 8.452706652097186e-05, |
| "loss": 2.6144, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 8.367497258544507e-05, |
| "loss": 2.2058, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 8.282409469087239e-05, |
| "loss": 1.9281, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 8.197449621860943e-05, |
| "loss": 2.0314, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 8.112624045470835e-05, |
| "loss": 1.982, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 8.027939058520381e-05, |
| "loss": 2.1922, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 7.943400969140635e-05, |
| "loss": 2.0533, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 7.85901607452034e-05, |
| "loss": 2.3375, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 7.774790660436858e-05, |
| "loss": 1.6834, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 7.690731000787948e-05, |
| "loss": 1.9363, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 7.606843357124426e-05, |
| "loss": 1.8109, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 7.52313397818374e-05, |
| "loss": 1.9592, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 7.43960909942453e-05, |
| "loss": 1.9761, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 7.356274942562111e-05, |
| "loss": 1.996, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 7.273137715105063e-05, |
| "loss": 1.5629, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 7.190203609892808e-05, |
| "loss": 1.7531, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 7.107478804634325e-05, |
| "loss": 2.1283, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 7.024969461447972e-05, |
| "loss": 2.1364, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 6.942681726402473e-05, |
| "loss": 2.129, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 6.8606217290591e-05, |
| "loss": 1.9605, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 6.778795582015097e-05, |
| "loss": 1.8853, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 6.697209380448333e-05, |
| "loss": 2.1903, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 6.615869201663296e-05, |
| "loss": 1.8106, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 6.534781104638399e-05, |
| "loss": 2.1333, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 6.453951129574644e-05, |
| "loss": 1.7522, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 6.37338529744568e-05, |
| "loss": 1.6425, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 6.293089609549325e-05, |
| "loss": 1.865, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 6.213070047060524e-05, |
| "loss": 2.1398, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 6.133332570585812e-05, |
| "loss": 2.7922, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 6.05388311971932e-05, |
| "loss": 2.1964, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 5.9747276126003257e-05, |
| "loss": 2.1245, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 5.8958719454724346e-05, |
| "loss": 2.0207, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 5.817321992244351e-05, |
| "loss": 2.0353, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 5.739083604052351e-05, |
| "loss": 1.7121, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 5.6611626088244194e-05, |
| "loss": 1.8025, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 5.583564810846157e-05, |
| "loss": 1.8596, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 5.506295990328385e-05, |
| "loss": 1.5929, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 5.429361902976624e-05, |
| "loss": 1.9157, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 5.3527682795623146e-05, |
| "loss": 2.1214, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 5.276520825495963e-05, |
| "loss": 2.3418, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 5.200625220402139e-05, |
| "loss": 2.0864, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 5.1250871176964036e-05, |
| "loss": 2.2181, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 5.0499121441641864e-05, |
| "loss": 1.8434, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 4.975105899541671e-05, |
| "loss": 2.2302, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 4.900673956098644e-05, |
| "loss": 1.5765, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 4.826621858223431e-05, |
| "loss": 2.2774, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.75295512200992e-05, |
| "loss": 1.9499, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.6796792348466356e-05, |
| "loss": 1.6914, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.606799655008009e-05, |
| "loss": 2.7503, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.75, |
| "eval_loss": 1.8830461502075195, |
| "eval_runtime": 2.4367, |
| "eval_samples_per_second": 41.04, |
| "eval_steps_per_second": 41.04, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.5343218112477904e-05, |
| "loss": 1.9796, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.462251102394669e-05, |
| "loss": 2.0564, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 4.3905928969501056e-05, |
| "loss": 2.0165, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 4.3193525326884435e-05, |
| "loss": 1.9066, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 4.248535316259305e-05, |
| "loss": 2.1227, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 4.1781465227922957e-05, |
| "loss": 1.9861, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.108191395504064e-05, |
| "loss": 2.4979, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.038675145307747e-05, |
| "loss": 1.8026, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 3.9696029504247956e-05, |
| "loss": 2.1905, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 3.900979955999271e-05, |
| "loss": 1.7852, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 3.832811273714569e-05, |
| "loss": 1.5334, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.7651019814126654e-05, |
| "loss": 1.6633, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.697857122715865e-05, |
| "loss": 1.8311, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.6310817066511105e-05, |
| "loss": 2.157, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.5647807072768526e-05, |
| "loss": 1.6636, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.498959063312558e-05, |
| "loss": 1.1346, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.4336216777708e-05, |
| "loss": 2.638, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.36877341759205e-05, |
| "loss": 1.883, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.3044191132821454e-05, |
| "loss": 1.3731, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.2405635585524565e-05, |
| "loss": 2.1026, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 3.177211509962826e-05, |
| "loss": 2.2405, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 3.114367686567228e-05, |
| "loss": 1.7347, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 3.052036769562276e-05, |
| "loss": 1.8167, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.9902234019385057e-05, |
| "loss": 1.9998, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.9289321881345254e-05, |
| "loss": 1.8613, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2.8681676936940393e-05, |
| "loss": 1.9718, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2.8079344449257572e-05, |
| "loss": 1.7202, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2.7482369285662378e-05, |
| "loss": 2.5565, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2.6890795914456678e-05, |
| "loss": 1.874, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2.6304668401566335e-05, |
| "loss": 3.4799, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 2.572403040725855e-05, |
| "loss": 1.754, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 2.514892518288988e-05, |
| "loss": 2.2392, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 2.4579395567684283e-05, |
| "loss": 2.0043, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 2.401548398554213e-05, |
| "loss": 2.0013, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.345723244188006e-05, |
| "loss": 1.556, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.290468252050204e-05, |
| "loss": 1.6086, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.2357875380501836e-05, |
| "loss": 1.7659, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.181685175319702e-05, |
| "loss": 2.4276, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.1281651939094992e-05, |
| "loss": 1.8121, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.0752315804890977e-05, |
| "loss": 1.5507, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.0228882780498404e-05, |
| "loss": 1.7273, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.971139185611176e-05, |
| "loss": 1.8045, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.919988157930236e-05, |
| "loss": 2.6438, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.8694390052146737e-05, |
| "loss": 1.9753, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.819495492838872e-05, |
| "loss": 2.0211, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.7701613410634365e-05, |
| "loss": 2.0122, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.7214402247580918e-05, |
| "loss": 1.8367, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.6733357731279377e-05, |
| "loss": 1.7804, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.6258515694431144e-05, |
| "loss": 2.4845, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.5789911507718826e-05, |
| "loss": 1.6378, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.5327580077171587e-05, |
| "loss": 1.878, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.4871555841564887e-05, |
| "loss": 1.8673, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.442187276985526e-05, |
| "loss": 1.765, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.3978564358649927e-05, |
| "loss": 1.4879, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.3541663629711766e-05, |
| "loss": 1.6485, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.311120312749935e-05, |
| "loss": 1.9323, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.2687214916742918e-05, |
| "loss": 1.8893, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.2269730580055805e-05, |
| "loss": 1.9161, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.185878121558186e-05, |
| "loss": 1.6298, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.1454397434679021e-05, |
| "loss": 1.8041, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.1056609359639025e-05, |
| "loss": 1.6835, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.0665446621443708e-05, |
| "loss": 2.0931, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.028093835755769e-05, |
| "loss": 1.5514, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.903113209758096e-06, |
| "loss": 1.8661, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.531999322000885e-06, |
| "loss": 1.67, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.1676243383246e-06, |
| "loss": 1.7691, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 8.810015400790994e-06, |
| "loss": 1.6751, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 8.45919914746337e-06, |
| "loss": 1.5257, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 8.115201710422282e-06, |
| "loss": 1.8501, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 7.778048713818975e-06, |
| "loss": 1.7915, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 7.447765271966656e-06, |
| "loss": 1.5242, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.124375987469767e-06, |
| "loss": 2.3553, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 6.80790494939132e-06, |
| "loss": 2.0156, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 6.498375731458528e-06, |
| "loss": 1.6318, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 6.195811390306816e-06, |
| "loss": 1.7414, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 5.900234463762366e-06, |
| "loss": 1.7884, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 5.611666969163243e-06, |
| "loss": 1.6805, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 5.3301304017194135e-06, |
| "loss": 1.8931, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 5.055645732911462e-06, |
| "loss": 1.785, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 4.788233408928589e-06, |
| "loss": 1.707, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 4.527913349145441e-06, |
| "loss": 1.7147, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 4.27470494463843e-06, |
| "loss": 1.6978, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 4.028627056741252e-06, |
| "loss": 1.6563, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 3.789698015639953e-06, |
| "loss": 1.9431, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 3.5579356190074907e-06, |
| "loss": 1.5852, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3.3333571306780497e-06, |
| "loss": 1.7847, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3.115979279360992e-06, |
| "loss": 1.539, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.905818257394799e-06, |
| "loss": 1.3433, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.702889719540924e-06, |
| "loss": 1.7076, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.5072087818176382e-06, |
| "loss": 1.8148, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 2.3187900203740844e-06, |
| "loss": 1.9275, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 2.137647470404469e-06, |
| "loss": 1.8731, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.963794625102655e-06, |
| "loss": 1.6147, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.797244434656975e-06, |
| "loss": 1.6936, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.6380093052856483e-06, |
| "loss": 1.59, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.48610109831262e-06, |
| "loss": 1.8261, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.341531129284046e-06, |
| "loss": 1.6663, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.2043101671253554e-06, |
| "loss": 1.3988, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.0744484333391368e-06, |
| "loss": 1.8786, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.519556012436815e-07, |
| "loss": 2.0669, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 8.368407952525026e-07, |
| "loss": 1.6939, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 7.291125901946027e-07, |
| "loss": 1.8047, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 6.287790106757396e-07, |
| "loss": 1.7426, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 5.358475304807375e-07, |
| "loss": 1.9933, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 4.503250720166774e-07, |
| "loss": 1.54, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.7221800579735346e-07, |
| "loss": 1.6834, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.0153214996866406e-07, |
| "loss": 1.5283, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.382727698752474e-07, |
| "loss": 1.5974, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.824445776682504e-07, |
| "loss": 1.8299, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.340517319543877e-07, |
| "loss": 1.9635, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.309783748606693e-08, |
| "loss": 1.7428, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 5.958594489295921e-08, |
| "loss": 1.8075, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.351855045471419e-08, |
| "loss": 1.7204, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.4897595915053242e-08, |
| "loss": 1.8357, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.724468337085174e-09, |
| "loss": 1.4306, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0, |
| "loss": 1.5434, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.7537641525268555, |
| "eval_runtime": 2.432, |
| "eval_samples_per_second": 41.119, |
| "eval_steps_per_second": 41.119, |
| "step": 464 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 464, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "total_flos": 1.8836320897990656e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|