Training in progress, step 300, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 335604696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7af36e198ceb1d302cb562bbb3dc9a7375d049e35455f558af1c5f3575bf1d7
|
| 3 |
size 335604696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 170920532
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fe6d668006fb535878145ee06b5142e4d3416d48a3a22d19887946d60b1ed74
|
| 3 |
size 170920532
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5510a5ba9c2306cb1cc1948c25f245584ae6bb11cae37474b32d1e5b9d035c3
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d93420319c4318ff13366855f16b6ec61d99b866bdf2a20293a1621b040b36f
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2087,6 +2087,154 @@
|
|
| 2087 |
"eval_samples_per_second": 3.965,
|
| 2088 |
"eval_steps_per_second": 3.965,
|
| 2089 |
"step": 280
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2090 |
}
|
| 2091 |
],
|
| 2092 |
"logging_steps": 1,
|
|
@@ -2101,12 +2249,12 @@
|
|
| 2101 |
"should_evaluate": false,
|
| 2102 |
"should_log": false,
|
| 2103 |
"should_save": true,
|
| 2104 |
-
"should_training_stop":
|
| 2105 |
},
|
| 2106 |
"attributes": {}
|
| 2107 |
}
|
| 2108 |
},
|
| 2109 |
-
"total_flos": 1.
|
| 2110 |
"train_batch_size": 1,
|
| 2111 |
"trial_name": null,
|
| 2112 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.28795968564400987,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 300,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2087 |
"eval_samples_per_second": 3.965,
|
| 2088 |
"eval_steps_per_second": 3.965,
|
| 2089 |
"step": 280
|
| 2090 |
+
},
|
| 2091 |
+
{
|
| 2092 |
+
"epoch": 0.2697222388865559,
|
| 2093 |
+
"grad_norm": 1.7310993671417236,
|
| 2094 |
+
"learning_rate": 3.166200546578718e-06,
|
| 2095 |
+
"loss": 2.5755,
|
| 2096 |
+
"step": 281
|
| 2097 |
+
},
|
| 2098 |
+
{
|
| 2099 |
+
"epoch": 0.2706821045053692,
|
| 2100 |
+
"grad_norm": 1.7038969993591309,
|
| 2101 |
+
"learning_rate": 2.8427160470641253e-06,
|
| 2102 |
+
"loss": 2.5515,
|
| 2103 |
+
"step": 282
|
| 2104 |
+
},
|
| 2105 |
+
{
|
| 2106 |
+
"epoch": 0.2716419701241826,
|
| 2107 |
+
"grad_norm": 1.7379988431930542,
|
| 2108 |
+
"learning_rate": 2.5365011072835117e-06,
|
| 2109 |
+
"loss": 2.624,
|
| 2110 |
+
"step": 283
|
| 2111 |
+
},
|
| 2112 |
+
{
|
| 2113 |
+
"epoch": 0.27260183574299596,
|
| 2114 |
+
"grad_norm": 1.8372347354888916,
|
| 2115 |
+
"learning_rate": 2.2475916629177415e-06,
|
| 2116 |
+
"loss": 2.6323,
|
| 2117 |
+
"step": 284
|
| 2118 |
+
},
|
| 2119 |
+
{
|
| 2120 |
+
"epoch": 0.27356170136180935,
|
| 2121 |
+
"grad_norm": 1.8558735847473145,
|
| 2122 |
+
"learning_rate": 1.9760216187710787e-06,
|
| 2123 |
+
"loss": 2.5233,
|
| 2124 |
+
"step": 285
|
| 2125 |
+
},
|
| 2126 |
+
{
|
| 2127 |
+
"epoch": 0.2745215669806227,
|
| 2128 |
+
"grad_norm": 1.8913716077804565,
|
| 2129 |
+
"learning_rate": 1.7218228447922867e-06,
|
| 2130 |
+
"loss": 2.4645,
|
| 2131 |
+
"step": 286
|
| 2132 |
+
},
|
| 2133 |
+
{
|
| 2134 |
+
"epoch": 0.2754814325994361,
|
| 2135 |
+
"grad_norm": 2.0636091232299805,
|
| 2136 |
+
"learning_rate": 1.4850251723345196e-06,
|
| 2137 |
+
"loss": 2.6357,
|
| 2138 |
+
"step": 287
|
| 2139 |
+
},
|
| 2140 |
+
{
|
| 2141 |
+
"epoch": 0.2764412982182494,
|
| 2142 |
+
"grad_norm": 1.8926513195037842,
|
| 2143 |
+
"learning_rate": 1.2656563906545902e-06,
|
| 2144 |
+
"loss": 2.4288,
|
| 2145 |
+
"step": 288
|
| 2146 |
+
},
|
| 2147 |
+
{
|
| 2148 |
+
"epoch": 0.2774011638370628,
|
| 2149 |
+
"grad_norm": 2.2932565212249756,
|
| 2150 |
+
"learning_rate": 1.0637422436516274e-06,
|
| 2151 |
+
"loss": 2.5419,
|
| 2152 |
+
"step": 289
|
| 2153 |
+
},
|
| 2154 |
+
{
|
| 2155 |
+
"epoch": 0.27836102945587615,
|
| 2156 |
+
"grad_norm": 2.0671229362487793,
|
| 2157 |
+
"learning_rate": 8.793064268460604e-07,
|
| 2158 |
+
"loss": 2.6396,
|
| 2159 |
+
"step": 290
|
| 2160 |
+
},
|
| 2161 |
+
{
|
| 2162 |
+
"epoch": 0.27932089507468955,
|
| 2163 |
+
"grad_norm": 2.107043743133545,
|
| 2164 |
+
"learning_rate": 7.123705845987093e-07,
|
| 2165 |
+
"loss": 2.6949,
|
| 2166 |
+
"step": 291
|
| 2167 |
+
},
|
| 2168 |
+
{
|
| 2169 |
+
"epoch": 0.2802807606935029,
|
| 2170 |
+
"grad_norm": 2.4163978099823,
|
| 2171 |
+
"learning_rate": 5.629543075708176e-07,
|
| 2172 |
+
"loss": 2.6468,
|
| 2173 |
+
"step": 292
|
| 2174 |
+
},
|
| 2175 |
+
{
|
| 2176 |
+
"epoch": 0.2812406263123163,
|
| 2177 |
+
"grad_norm": 2.3940374851226807,
|
| 2178 |
+
"learning_rate": 4.310751304249738e-07,
|
| 2179 |
+
"loss": 2.8079,
|
| 2180 |
+
"step": 293
|
| 2181 |
+
},
|
| 2182 |
+
{
|
| 2183 |
+
"epoch": 0.2822004919311296,
|
| 2184 |
+
"grad_norm": 2.288954973220825,
|
| 2185 |
+
"learning_rate": 3.167485297673411e-07,
|
| 2186 |
+
"loss": 2.5619,
|
| 2187 |
+
"step": 294
|
| 2188 |
+
},
|
| 2189 |
+
{
|
| 2190 |
+
"epoch": 0.283160357549943,
|
| 2191 |
+
"grad_norm": 2.4529731273651123,
|
| 2192 |
+
"learning_rate": 2.1998792233142714e-07,
|
| 2193 |
+
"loss": 2.539,
|
| 2194 |
+
"step": 295
|
| 2195 |
+
},
|
| 2196 |
+
{
|
| 2197 |
+
"epoch": 0.28412022316875635,
|
| 2198 |
+
"grad_norm": 2.7025234699249268,
|
| 2199 |
+
"learning_rate": 1.4080466340349316e-07,
|
| 2200 |
+
"loss": 2.7581,
|
| 2201 |
+
"step": 296
|
| 2202 |
+
},
|
| 2203 |
+
{
|
| 2204 |
+
"epoch": 0.28508008878756974,
|
| 2205 |
+
"grad_norm": 2.71325421333313,
|
| 2206 |
+
"learning_rate": 7.92080454900701e-08,
|
| 2207 |
+
"loss": 2.4698,
|
| 2208 |
+
"step": 297
|
| 2209 |
+
},
|
| 2210 |
+
{
|
| 2211 |
+
"epoch": 0.2860399544063831,
|
| 2212 |
+
"grad_norm": 3.07059907913208,
|
| 2213 |
+
"learning_rate": 3.5205297227380855e-08,
|
| 2214 |
+
"loss": 2.6203,
|
| 2215 |
+
"step": 298
|
| 2216 |
+
},
|
| 2217 |
+
{
|
| 2218 |
+
"epoch": 0.2869998200251965,
|
| 2219 |
+
"grad_norm": 3.6069223880767822,
|
| 2220 |
+
"learning_rate": 8.801582533035644e-09,
|
| 2221 |
+
"loss": 2.6736,
|
| 2222 |
+
"step": 299
|
| 2223 |
+
},
|
| 2224 |
+
{
|
| 2225 |
+
"epoch": 0.28795968564400987,
|
| 2226 |
+
"grad_norm": 4.671467304229736,
|
| 2227 |
+
"learning_rate": 0.0,
|
| 2228 |
+
"loss": 2.4599,
|
| 2229 |
+
"step": 300
|
| 2230 |
+
},
|
| 2231 |
+
{
|
| 2232 |
+
"epoch": 0.28795968564400987,
|
| 2233 |
+
"eval_loss": 2.5693867206573486,
|
| 2234 |
+
"eval_runtime": 86.0535,
|
| 2235 |
+
"eval_samples_per_second": 3.963,
|
| 2236 |
+
"eval_steps_per_second": 3.963,
|
| 2237 |
+
"step": 300
|
| 2238 |
}
|
| 2239 |
],
|
| 2240 |
"logging_steps": 1,
|
|
|
|
| 2249 |
"should_evaluate": false,
|
| 2250 |
"should_log": false,
|
| 2251 |
"should_save": true,
|
| 2252 |
+
"should_training_stop": true
|
| 2253 |
},
|
| 2254 |
"attributes": {}
|
| 2255 |
}
|
| 2256 |
},
|
| 2257 |
+
"total_flos": 1.205066715365376e+17,
|
| 2258 |
"train_batch_size": 1,
|
| 2259 |
"trial_name": null,
|
| 2260 |
"trial_params": null
|