Training in progress, step 8000, checkpoint
Browse files- last-checkpoint/global_step8000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step8000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +293 -3
last-checkpoint/global_step8000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0aa42ddad4af96c74b65d7385afeb6f0d5fb2d664599cd656cd82fbd70ad62da
|
| 3 |
+
size 5117197489
|
last-checkpoint/global_step8000/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db5c9356593ff2b606f0316fc19dda6e153f3fa9391f9b150d5a17343e4eeeeb
|
| 3 |
+
size 859127933
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step8000
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 962205216
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0823a45cd5f5f262d4113d9c6af3e480a93b1328d895090e11d3841575e98029
|
| 3 |
size 962205216
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14709
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f2132517ec1780cf0e43d2d85e0457c9953dabc448540c499dce25e57e2b052
|
| 3 |
size 14709
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cba89e97c806c2994342d3ee7fc823d23ef358301180bf2dcf6ac57f1ab3869c
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 84.13012729844414,
|
| 3 |
"best_model_checkpoint": "./iteboshi_student_model_temp/checkpoint-7000",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 1000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2037,6 +2037,296 @@
|
|
| 2037 |
"eval_steps_per_second": 1.957,
|
| 2038 |
"eval_wer": 84.13012729844414,
|
| 2039 |
"step": 7000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2040 |
}
|
| 2041 |
],
|
| 2042 |
"logging_steps": 25,
|
|
@@ -2056,7 +2346,7 @@
|
|
| 2056 |
"attributes": {}
|
| 2057 |
}
|
| 2058 |
},
|
| 2059 |
-
"total_flos": 1.
|
| 2060 |
"train_batch_size": 4,
|
| 2061 |
"trial_name": null,
|
| 2062 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 84.13012729844414,
|
| 3 |
"best_model_checkpoint": "./iteboshi_student_model_temp/checkpoint-7000",
|
| 4 |
+
"epoch": 8.810572687224669,
|
| 5 |
"eval_steps": 1000,
|
| 6 |
+
"global_step": 8000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2037 |
"eval_steps_per_second": 1.957,
|
| 2038 |
"eval_wer": 84.13012729844414,
|
| 2039 |
"step": 7000
|
| 2040 |
+
},
|
| 2041 |
+
{
|
| 2042 |
+
"epoch": 7.736784140969163,
|
| 2043 |
+
"grad_norm": 0.5741052627563477,
|
| 2044 |
+
"learning_rate": 1.3307692307692309e-05,
|
| 2045 |
+
"loss": 0.0909,
|
| 2046 |
+
"step": 7025
|
| 2047 |
+
},
|
| 2048 |
+
{
|
| 2049 |
+
"epoch": 7.76431718061674,
|
| 2050 |
+
"grad_norm": 0.7617988586425781,
|
| 2051 |
+
"learning_rate": 1.3282051282051282e-05,
|
| 2052 |
+
"loss": 0.0753,
|
| 2053 |
+
"step": 7050
|
| 2054 |
+
},
|
| 2055 |
+
{
|
| 2056 |
+
"epoch": 7.791850220264317,
|
| 2057 |
+
"grad_norm": 0.5416741967201233,
|
| 2058 |
+
"learning_rate": 1.3256410256410258e-05,
|
| 2059 |
+
"loss": 0.0731,
|
| 2060 |
+
"step": 7075
|
| 2061 |
+
},
|
| 2062 |
+
{
|
| 2063 |
+
"epoch": 7.819383259911894,
|
| 2064 |
+
"grad_norm": 0.6349952816963196,
|
| 2065 |
+
"learning_rate": 1.3230769230769231e-05,
|
| 2066 |
+
"loss": 0.0703,
|
| 2067 |
+
"step": 7100
|
| 2068 |
+
},
|
| 2069 |
+
{
|
| 2070 |
+
"epoch": 7.846916299559472,
|
| 2071 |
+
"grad_norm": 0.33196786046028137,
|
| 2072 |
+
"learning_rate": 1.3205128205128207e-05,
|
| 2073 |
+
"loss": 0.0656,
|
| 2074 |
+
"step": 7125
|
| 2075 |
+
},
|
| 2076 |
+
{
|
| 2077 |
+
"epoch": 7.8744493392070485,
|
| 2078 |
+
"grad_norm": 0.379213809967041,
|
| 2079 |
+
"learning_rate": 1.317948717948718e-05,
|
| 2080 |
+
"loss": 0.0917,
|
| 2081 |
+
"step": 7150
|
| 2082 |
+
},
|
| 2083 |
+
{
|
| 2084 |
+
"epoch": 7.901982378854625,
|
| 2085 |
+
"grad_norm": 0.34401291608810425,
|
| 2086 |
+
"learning_rate": 1.3153846153846156e-05,
|
| 2087 |
+
"loss": 0.0911,
|
| 2088 |
+
"step": 7175
|
| 2089 |
+
},
|
| 2090 |
+
{
|
| 2091 |
+
"epoch": 7.929515418502203,
|
| 2092 |
+
"grad_norm": 0.4732189178466797,
|
| 2093 |
+
"learning_rate": 1.312820512820513e-05,
|
| 2094 |
+
"loss": 0.101,
|
| 2095 |
+
"step": 7200
|
| 2096 |
+
},
|
| 2097 |
+
{
|
| 2098 |
+
"epoch": 7.95704845814978,
|
| 2099 |
+
"grad_norm": 0.5580617785453796,
|
| 2100 |
+
"learning_rate": 1.3102564102564103e-05,
|
| 2101 |
+
"loss": 0.0898,
|
| 2102 |
+
"step": 7225
|
| 2103 |
+
},
|
| 2104 |
+
{
|
| 2105 |
+
"epoch": 7.984581497797357,
|
| 2106 |
+
"grad_norm": 0.42180871963500977,
|
| 2107 |
+
"learning_rate": 1.3076923076923078e-05,
|
| 2108 |
+
"loss": 0.086,
|
| 2109 |
+
"step": 7250
|
| 2110 |
+
},
|
| 2111 |
+
{
|
| 2112 |
+
"epoch": 8.012114537444933,
|
| 2113 |
+
"grad_norm": 0.2615683376789093,
|
| 2114 |
+
"learning_rate": 1.3051282051282052e-05,
|
| 2115 |
+
"loss": 0.0898,
|
| 2116 |
+
"step": 7275
|
| 2117 |
+
},
|
| 2118 |
+
{
|
| 2119 |
+
"epoch": 8.039647577092511,
|
| 2120 |
+
"grad_norm": 0.44722801446914673,
|
| 2121 |
+
"learning_rate": 1.3025641025641027e-05,
|
| 2122 |
+
"loss": 0.0602,
|
| 2123 |
+
"step": 7300
|
| 2124 |
+
},
|
| 2125 |
+
{
|
| 2126 |
+
"epoch": 8.067180616740089,
|
| 2127 |
+
"grad_norm": 0.3499121367931366,
|
| 2128 |
+
"learning_rate": 1.3000000000000001e-05,
|
| 2129 |
+
"loss": 0.0549,
|
| 2130 |
+
"step": 7325
|
| 2131 |
+
},
|
| 2132 |
+
{
|
| 2133 |
+
"epoch": 8.094713656387665,
|
| 2134 |
+
"grad_norm": 0.37767261266708374,
|
| 2135 |
+
"learning_rate": 1.2974358974358976e-05,
|
| 2136 |
+
"loss": 0.0573,
|
| 2137 |
+
"step": 7350
|
| 2138 |
+
},
|
| 2139 |
+
{
|
| 2140 |
+
"epoch": 8.122246696035242,
|
| 2141 |
+
"grad_norm": 0.4645783007144928,
|
| 2142 |
+
"learning_rate": 1.294871794871795e-05,
|
| 2143 |
+
"loss": 0.0566,
|
| 2144 |
+
"step": 7375
|
| 2145 |
+
},
|
| 2146 |
+
{
|
| 2147 |
+
"epoch": 8.14977973568282,
|
| 2148 |
+
"grad_norm": 0.6134966015815735,
|
| 2149 |
+
"learning_rate": 1.2923076923076925e-05,
|
| 2150 |
+
"loss": 0.0507,
|
| 2151 |
+
"step": 7400
|
| 2152 |
+
},
|
| 2153 |
+
{
|
| 2154 |
+
"epoch": 8.177312775330396,
|
| 2155 |
+
"grad_norm": 0.28678062558174133,
|
| 2156 |
+
"learning_rate": 1.2897435897435899e-05,
|
| 2157 |
+
"loss": 0.051,
|
| 2158 |
+
"step": 7425
|
| 2159 |
+
},
|
| 2160 |
+
{
|
| 2161 |
+
"epoch": 8.204845814977974,
|
| 2162 |
+
"grad_norm": 0.299078106880188,
|
| 2163 |
+
"learning_rate": 1.2871794871794874e-05,
|
| 2164 |
+
"loss": 0.045,
|
| 2165 |
+
"step": 7450
|
| 2166 |
+
},
|
| 2167 |
+
{
|
| 2168 |
+
"epoch": 8.232378854625551,
|
| 2169 |
+
"grad_norm": 0.6386272311210632,
|
| 2170 |
+
"learning_rate": 1.2846153846153848e-05,
|
| 2171 |
+
"loss": 0.0665,
|
| 2172 |
+
"step": 7475
|
| 2173 |
+
},
|
| 2174 |
+
{
|
| 2175 |
+
"epoch": 8.259911894273127,
|
| 2176 |
+
"grad_norm": 0.1515616923570633,
|
| 2177 |
+
"learning_rate": 1.2820512820512823e-05,
|
| 2178 |
+
"loss": 0.052,
|
| 2179 |
+
"step": 7500
|
| 2180 |
+
},
|
| 2181 |
+
{
|
| 2182 |
+
"epoch": 8.287444933920705,
|
| 2183 |
+
"grad_norm": 0.61899733543396,
|
| 2184 |
+
"learning_rate": 1.2794871794871795e-05,
|
| 2185 |
+
"loss": 0.0462,
|
| 2186 |
+
"step": 7525
|
| 2187 |
+
},
|
| 2188 |
+
{
|
| 2189 |
+
"epoch": 8.314977973568283,
|
| 2190 |
+
"grad_norm": 0.6535860300064087,
|
| 2191 |
+
"learning_rate": 1.2769230769230769e-05,
|
| 2192 |
+
"loss": 0.0518,
|
| 2193 |
+
"step": 7550
|
| 2194 |
+
},
|
| 2195 |
+
{
|
| 2196 |
+
"epoch": 8.342511013215859,
|
| 2197 |
+
"grad_norm": 0.4084964692592621,
|
| 2198 |
+
"learning_rate": 1.2743589743589744e-05,
|
| 2199 |
+
"loss": 0.0574,
|
| 2200 |
+
"step": 7575
|
| 2201 |
+
},
|
| 2202 |
+
{
|
| 2203 |
+
"epoch": 8.370044052863436,
|
| 2204 |
+
"grad_norm": 0.4185622036457062,
|
| 2205 |
+
"learning_rate": 1.2717948717948718e-05,
|
| 2206 |
+
"loss": 0.0466,
|
| 2207 |
+
"step": 7600
|
| 2208 |
+
},
|
| 2209 |
+
{
|
| 2210 |
+
"epoch": 8.397577092511014,
|
| 2211 |
+
"grad_norm": 0.5417298078536987,
|
| 2212 |
+
"learning_rate": 1.2692307692307693e-05,
|
| 2213 |
+
"loss": 0.0595,
|
| 2214 |
+
"step": 7625
|
| 2215 |
+
},
|
| 2216 |
+
{
|
| 2217 |
+
"epoch": 8.42511013215859,
|
| 2218 |
+
"grad_norm": 0.0882943645119667,
|
| 2219 |
+
"learning_rate": 1.2666666666666667e-05,
|
| 2220 |
+
"loss": 0.0441,
|
| 2221 |
+
"step": 7650
|
| 2222 |
+
},
|
| 2223 |
+
{
|
| 2224 |
+
"epoch": 8.452643171806168,
|
| 2225 |
+
"grad_norm": 0.5028131008148193,
|
| 2226 |
+
"learning_rate": 1.2641025641025642e-05,
|
| 2227 |
+
"loss": 0.0584,
|
| 2228 |
+
"step": 7675
|
| 2229 |
+
},
|
| 2230 |
+
{
|
| 2231 |
+
"epoch": 8.480176211453745,
|
| 2232 |
+
"grad_norm": 0.32492244243621826,
|
| 2233 |
+
"learning_rate": 1.2615384615384616e-05,
|
| 2234 |
+
"loss": 0.0519,
|
| 2235 |
+
"step": 7700
|
| 2236 |
+
},
|
| 2237 |
+
{
|
| 2238 |
+
"epoch": 8.507709251101321,
|
| 2239 |
+
"grad_norm": 0.199100524187088,
|
| 2240 |
+
"learning_rate": 1.2589743589743591e-05,
|
| 2241 |
+
"loss": 0.0519,
|
| 2242 |
+
"step": 7725
|
| 2243 |
+
},
|
| 2244 |
+
{
|
| 2245 |
+
"epoch": 8.535242290748899,
|
| 2246 |
+
"grad_norm": 0.560196578502655,
|
| 2247 |
+
"learning_rate": 1.2564102564102565e-05,
|
| 2248 |
+
"loss": 0.0601,
|
| 2249 |
+
"step": 7750
|
| 2250 |
+
},
|
| 2251 |
+
{
|
| 2252 |
+
"epoch": 8.562775330396477,
|
| 2253 |
+
"grad_norm": 0.3848872780799866,
|
| 2254 |
+
"learning_rate": 1.253846153846154e-05,
|
| 2255 |
+
"loss": 0.0561,
|
| 2256 |
+
"step": 7775
|
| 2257 |
+
},
|
| 2258 |
+
{
|
| 2259 |
+
"epoch": 8.590308370044053,
|
| 2260 |
+
"grad_norm": 0.6430539488792419,
|
| 2261 |
+
"learning_rate": 1.2512820512820514e-05,
|
| 2262 |
+
"loss": 0.0573,
|
| 2263 |
+
"step": 7800
|
| 2264 |
+
},
|
| 2265 |
+
{
|
| 2266 |
+
"epoch": 8.61784140969163,
|
| 2267 |
+
"grad_norm": 0.1402147263288498,
|
| 2268 |
+
"learning_rate": 1.2487179487179487e-05,
|
| 2269 |
+
"loss": 0.0613,
|
| 2270 |
+
"step": 7825
|
| 2271 |
+
},
|
| 2272 |
+
{
|
| 2273 |
+
"epoch": 8.645374449339208,
|
| 2274 |
+
"grad_norm": 0.3411605656147003,
|
| 2275 |
+
"learning_rate": 1.2461538461538463e-05,
|
| 2276 |
+
"loss": 0.0401,
|
| 2277 |
+
"step": 7850
|
| 2278 |
+
},
|
| 2279 |
+
{
|
| 2280 |
+
"epoch": 8.672907488986784,
|
| 2281 |
+
"grad_norm": 0.4999459981918335,
|
| 2282 |
+
"learning_rate": 1.2435897435897436e-05,
|
| 2283 |
+
"loss": 0.0393,
|
| 2284 |
+
"step": 7875
|
| 2285 |
+
},
|
| 2286 |
+
{
|
| 2287 |
+
"epoch": 8.700440528634362,
|
| 2288 |
+
"grad_norm": 0.6794586777687073,
|
| 2289 |
+
"learning_rate": 1.2410256410256412e-05,
|
| 2290 |
+
"loss": 0.061,
|
| 2291 |
+
"step": 7900
|
| 2292 |
+
},
|
| 2293 |
+
{
|
| 2294 |
+
"epoch": 8.72797356828194,
|
| 2295 |
+
"grad_norm": 0.30914783477783203,
|
| 2296 |
+
"learning_rate": 1.2384615384615385e-05,
|
| 2297 |
+
"loss": 0.0552,
|
| 2298 |
+
"step": 7925
|
| 2299 |
+
},
|
| 2300 |
+
{
|
| 2301 |
+
"epoch": 8.755506607929515,
|
| 2302 |
+
"grad_norm": 0.311613529920578,
|
| 2303 |
+
"learning_rate": 1.235897435897436e-05,
|
| 2304 |
+
"loss": 0.0556,
|
| 2305 |
+
"step": 7950
|
| 2306 |
+
},
|
| 2307 |
+
{
|
| 2308 |
+
"epoch": 8.783039647577093,
|
| 2309 |
+
"grad_norm": 0.48470553755760193,
|
| 2310 |
+
"learning_rate": 1.2333333333333334e-05,
|
| 2311 |
+
"loss": 0.0553,
|
| 2312 |
+
"step": 7975
|
| 2313 |
+
},
|
| 2314 |
+
{
|
| 2315 |
+
"epoch": 8.810572687224669,
|
| 2316 |
+
"grad_norm": 0.5605005621910095,
|
| 2317 |
+
"learning_rate": 1.230769230769231e-05,
|
| 2318 |
+
"loss": 0.0673,
|
| 2319 |
+
"step": 8000
|
| 2320 |
+
},
|
| 2321 |
+
{
|
| 2322 |
+
"epoch": 8.810572687224669,
|
| 2323 |
+
"eval_cer": 25.173386218518225,
|
| 2324 |
+
"eval_loss": 0.8125333189964294,
|
| 2325 |
+
"eval_runtime": 1304.9459,
|
| 2326 |
+
"eval_samples_per_second": 8.108,
|
| 2327 |
+
"eval_steps_per_second": 2.028,
|
| 2328 |
+
"eval_wer": 85.53512494106553,
|
| 2329 |
+
"step": 8000
|
| 2330 |
}
|
| 2331 |
],
|
| 2332 |
"logging_steps": 25,
|
|
|
|
| 2346 |
"attributes": {}
|
| 2347 |
}
|
| 2348 |
},
|
| 2349 |
+
"total_flos": 1.3745145491920781e+20,
|
| 2350 |
"train_batch_size": 4,
|
| 2351 |
"trial_name": null,
|
| 2352 |
"trial_params": null
|