Training in progress, step 3160, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +235 -3
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 778096664
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c78c8e751c48b040746199c31e75d4feef561d8684dffb5fcab27976fb39c540
|
| 3 |
size 778096664
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 395561780
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c99fe2c15f38fcdc70c5b58aeb7a136875edf29121b459165ec343854635f422
|
| 3 |
size 395561780
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3928ccf2665d8a8f3c5df0c8d51a1eb19cd2344599c97853e6f5d312bbb405f
|
| 3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7592e1462fd4ef7cdf5cfb2001145c35bf845f4d92dab45be159a6b9ee3ebede
|
| 3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08890ef963c10a9a7237efd117c8a94e5bd2fcf389061bead891541b89553427
|
| 3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3913c6f063ebbf405426bd1b09789ed429fc38cb3d4f60751f4a61b49df23be0
|
| 3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51411024d8c787642cc454881d33eeea730ff1d5a51ba77cdf08351edff87c13
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 320,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2067,6 +2067,238 @@
|
|
| 2067 |
"learning_rate": 3.420445597436056e-06,
|
| 2068 |
"loss": 1.4729,
|
| 2069 |
"step": 2840
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2070 |
}
|
| 2071 |
],
|
| 2072 |
"logging_steps": 10,
|
|
@@ -2086,7 +2318,7 @@
|
|
| 2086 |
"attributes": {}
|
| 2087 |
}
|
| 2088 |
},
|
| 2089 |
-
"total_flos": 1.
|
| 2090 |
"train_batch_size": 3,
|
| 2091 |
"trial_name": null,
|
| 2092 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.26158940397350994,
|
| 5 |
"eval_steps": 320,
|
| 6 |
+
"global_step": 3160,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2067 |
"learning_rate": 3.420445597436056e-06,
|
| 2068 |
"loss": 1.4729,
|
| 2069 |
"step": 2840
|
| 2070 |
+
},
|
| 2071 |
+
{
|
| 2072 |
+
"epoch": 0.2359271523178808,
|
| 2073 |
+
"grad_norm": 0.680798351764679,
|
| 2074 |
+
"learning_rate": 3.2351082041623125e-06,
|
| 2075 |
+
"loss": 1.5296,
|
| 2076 |
+
"step": 2850
|
| 2077 |
+
},
|
| 2078 |
+
{
|
| 2079 |
+
"epoch": 0.2367549668874172,
|
| 2080 |
+
"grad_norm": 0.7416069507598877,
|
| 2081 |
+
"learning_rate": 3.054765042128521e-06,
|
| 2082 |
+
"loss": 1.4074,
|
| 2083 |
+
"step": 2860
|
| 2084 |
+
},
|
| 2085 |
+
{
|
| 2086 |
+
"epoch": 0.23758278145695363,
|
| 2087 |
+
"grad_norm": 1.013679027557373,
|
| 2088 |
+
"learning_rate": 2.8794353709848178e-06,
|
| 2089 |
+
"loss": 1.5031,
|
| 2090 |
+
"step": 2870
|
| 2091 |
+
},
|
| 2092 |
+
{
|
| 2093 |
+
"epoch": 0.23841059602649006,
|
| 2094 |
+
"grad_norm": 0.684421956539154,
|
| 2095 |
+
"learning_rate": 2.7091379149682685e-06,
|
| 2096 |
+
"loss": 1.4008,
|
| 2097 |
+
"step": 2880
|
| 2098 |
+
},
|
| 2099 |
+
{
|
| 2100 |
+
"epoch": 0.23841059602649006,
|
| 2101 |
+
"eval_loss": 1.778387188911438,
|
| 2102 |
+
"eval_runtime": 379.3466,
|
| 2103 |
+
"eval_samples_per_second": 45.721,
|
| 2104 |
+
"eval_steps_per_second": 3.812,
|
| 2105 |
+
"step": 2880
|
| 2106 |
+
},
|
| 2107 |
+
{
|
| 2108 |
+
"epoch": 0.2392384105960265,
|
| 2109 |
+
"grad_norm": 0.6681047081947327,
|
| 2110 |
+
"learning_rate": 2.5438908609032698e-06,
|
| 2111 |
+
"loss": 1.37,
|
| 2112 |
+
"step": 2890
|
| 2113 |
+
},
|
| 2114 |
+
{
|
| 2115 |
+
"epoch": 0.24006622516556292,
|
| 2116 |
+
"grad_norm": 0.5961506366729736,
|
| 2117 |
+
"learning_rate": 2.3837118562592797e-06,
|
| 2118 |
+
"loss": 1.4046,
|
| 2119 |
+
"step": 2900
|
| 2120 |
+
},
|
| 2121 |
+
{
|
| 2122 |
+
"epoch": 0.24089403973509935,
|
| 2123 |
+
"grad_norm": 0.5972995758056641,
|
| 2124 |
+
"learning_rate": 2.22861800726617e-06,
|
| 2125 |
+
"loss": 1.5898,
|
| 2126 |
+
"step": 2910
|
| 2127 |
+
},
|
| 2128 |
+
{
|
| 2129 |
+
"epoch": 0.24172185430463577,
|
| 2130 |
+
"grad_norm": 0.7544158697128296,
|
| 2131 |
+
"learning_rate": 2.0786258770873647e-06,
|
| 2132 |
+
"loss": 1.4093,
|
| 2133 |
+
"step": 2920
|
| 2134 |
+
},
|
| 2135 |
+
{
|
| 2136 |
+
"epoch": 0.24254966887417218,
|
| 2137 |
+
"grad_norm": 0.5568986535072327,
|
| 2138 |
+
"learning_rate": 1.933751484051027e-06,
|
| 2139 |
+
"loss": 1.5618,
|
| 2140 |
+
"step": 2930
|
| 2141 |
+
},
|
| 2142 |
+
{
|
| 2143 |
+
"epoch": 0.2433774834437086,
|
| 2144 |
+
"grad_norm": 0.6362126469612122,
|
| 2145 |
+
"learning_rate": 1.7940102999393194e-06,
|
| 2146 |
+
"loss": 1.4395,
|
| 2147 |
+
"step": 2940
|
| 2148 |
+
},
|
| 2149 |
+
{
|
| 2150 |
+
"epoch": 0.24420529801324503,
|
| 2151 |
+
"grad_norm": 0.6483564376831055,
|
| 2152 |
+
"learning_rate": 1.6594172483361758e-06,
|
| 2153 |
+
"loss": 1.3861,
|
| 2154 |
+
"step": 2950
|
| 2155 |
+
},
|
| 2156 |
+
{
|
| 2157 |
+
"epoch": 0.24503311258278146,
|
| 2158 |
+
"grad_norm": 0.6191815137863159,
|
| 2159 |
+
"learning_rate": 1.5299867030334814e-06,
|
| 2160 |
+
"loss": 1.4288,
|
| 2161 |
+
"step": 2960
|
| 2162 |
+
},
|
| 2163 |
+
{
|
| 2164 |
+
"epoch": 0.2458609271523179,
|
| 2165 |
+
"grad_norm": 0.7167540788650513,
|
| 2166 |
+
"learning_rate": 1.4057324864960975e-06,
|
| 2167 |
+
"loss": 1.4678,
|
| 2168 |
+
"step": 2970
|
| 2169 |
+
},
|
| 2170 |
+
{
|
| 2171 |
+
"epoch": 0.24668874172185432,
|
| 2172 |
+
"grad_norm": 0.7405688762664795,
|
| 2173 |
+
"learning_rate": 1.286667868385627e-06,
|
| 2174 |
+
"loss": 1.3775,
|
| 2175 |
+
"step": 2980
|
| 2176 |
+
},
|
| 2177 |
+
{
|
| 2178 |
+
"epoch": 0.24751655629139072,
|
| 2179 |
+
"grad_norm": 0.7301118969917297,
|
| 2180 |
+
"learning_rate": 1.172805564143359e-06,
|
| 2181 |
+
"loss": 1.5456,
|
| 2182 |
+
"step": 2990
|
| 2183 |
+
},
|
| 2184 |
+
{
|
| 2185 |
+
"epoch": 0.24834437086092714,
|
| 2186 |
+
"grad_norm": 0.8688226938247681,
|
| 2187 |
+
"learning_rate": 1.064157733632276e-06,
|
| 2188 |
+
"loss": 1.4586,
|
| 2189 |
+
"step": 3000
|
| 2190 |
+
},
|
| 2191 |
+
{
|
| 2192 |
+
"epoch": 0.24917218543046357,
|
| 2193 |
+
"grad_norm": 0.7074631452560425,
|
| 2194 |
+
"learning_rate": 9.607359798384785e-07,
|
| 2195 |
+
"loss": 1.4873,
|
| 2196 |
+
"step": 3010
|
| 2197 |
+
},
|
| 2198 |
+
{
|
| 2199 |
+
"epoch": 0.25,
|
| 2200 |
+
"grad_norm": 0.7328817844390869,
|
| 2201 |
+
"learning_rate": 8.62551347632029e-07,
|
| 2202 |
+
"loss": 1.4613,
|
| 2203 |
+
"step": 3020
|
| 2204 |
+
},
|
| 2205 |
+
{
|
| 2206 |
+
"epoch": 0.2508278145695364,
|
| 2207 |
+
"grad_norm": 0.7635940909385681,
|
| 2208 |
+
"learning_rate": 7.696143225874475e-07,
|
| 2209 |
+
"loss": 1.4208,
|
| 2210 |
+
"step": 3030
|
| 2211 |
+
},
|
| 2212 |
+
{
|
| 2213 |
+
"epoch": 0.25165562913907286,
|
| 2214 |
+
"grad_norm": 0.6220166683197021,
|
| 2215 |
+
"learning_rate": 6.819348298638839e-07,
|
| 2216 |
+
"loss": 1.4609,
|
| 2217 |
+
"step": 3040
|
| 2218 |
+
},
|
| 2219 |
+
{
|
| 2220 |
+
"epoch": 0.25248344370860926,
|
| 2221 |
+
"grad_norm": 0.7133238911628723,
|
| 2222 |
+
"learning_rate": 5.995222331451722e-07,
|
| 2223 |
+
"loss": 1.4993,
|
| 2224 |
+
"step": 3050
|
| 2225 |
+
},
|
| 2226 |
+
{
|
| 2227 |
+
"epoch": 0.2533112582781457,
|
| 2228 |
+
"grad_norm": 0.697382390499115,
|
| 2229 |
+
"learning_rate": 5.223853336398632e-07,
|
| 2230 |
+
"loss": 1.4244,
|
| 2231 |
+
"step": 3060
|
| 2232 |
+
},
|
| 2233 |
+
{
|
| 2234 |
+
"epoch": 0.2541390728476821,
|
| 2235 |
+
"grad_norm": 0.7835958003997803,
|
| 2236 |
+
"learning_rate": 4.505323691412711e-07,
|
| 2237 |
+
"loss": 1.5133,
|
| 2238 |
+
"step": 3070
|
| 2239 |
+
},
|
| 2240 |
+
{
|
| 2241 |
+
"epoch": 0.25496688741721857,
|
| 2242 |
+
"grad_norm": 0.6556974053382874,
|
| 2243 |
+
"learning_rate": 3.839710131477492e-07,
|
| 2244 |
+
"loss": 1.4107,
|
| 2245 |
+
"step": 3080
|
| 2246 |
+
},
|
| 2247 |
+
{
|
| 2248 |
+
"epoch": 0.25579470198675497,
|
| 2249 |
+
"grad_norm": 0.8099976181983948,
|
| 2250 |
+
"learning_rate": 3.2270837404318464e-07,
|
| 2251 |
+
"loss": 1.4708,
|
| 2252 |
+
"step": 3090
|
| 2253 |
+
},
|
| 2254 |
+
{
|
| 2255 |
+
"epoch": 0.25662251655629137,
|
| 2256 |
+
"grad_norm": 0.7530120015144348,
|
| 2257 |
+
"learning_rate": 2.667509943378721e-07,
|
| 2258 |
+
"loss": 1.4437,
|
| 2259 |
+
"step": 3100
|
| 2260 |
+
},
|
| 2261 |
+
{
|
| 2262 |
+
"epoch": 0.2574503311258278,
|
| 2263 |
+
"grad_norm": 0.6962845921516418,
|
| 2264 |
+
"learning_rate": 2.161048499698115e-07,
|
| 2265 |
+
"loss": 1.504,
|
| 2266 |
+
"step": 3110
|
| 2267 |
+
},
|
| 2268 |
+
{
|
| 2269 |
+
"epoch": 0.2582781456953642,
|
| 2270 |
+
"grad_norm": 0.6753128170967102,
|
| 2271 |
+
"learning_rate": 1.7077534966650766e-07,
|
| 2272 |
+
"loss": 1.4186,
|
| 2273 |
+
"step": 3120
|
| 2274 |
+
},
|
| 2275 |
+
{
|
| 2276 |
+
"epoch": 0.2591059602649007,
|
| 2277 |
+
"grad_norm": 0.6573652625083923,
|
| 2278 |
+
"learning_rate": 1.3076733436734322e-07,
|
| 2279 |
+
"loss": 1.4241,
|
| 2280 |
+
"step": 3130
|
| 2281 |
+
},
|
| 2282 |
+
{
|
| 2283 |
+
"epoch": 0.2599337748344371,
|
| 2284 |
+
"grad_norm": 0.6068715453147888,
|
| 2285 |
+
"learning_rate": 9.60850767065924e-08,
|
| 2286 |
+
"loss": 1.3682,
|
| 2287 |
+
"step": 3140
|
| 2288 |
+
},
|
| 2289 |
+
{
|
| 2290 |
+
"epoch": 0.26076158940397354,
|
| 2291 |
+
"grad_norm": 0.7726837396621704,
|
| 2292 |
+
"learning_rate": 6.673228055715241e-08,
|
| 2293 |
+
"loss": 1.5453,
|
| 2294 |
+
"step": 3150
|
| 2295 |
+
},
|
| 2296 |
+
{
|
| 2297 |
+
"epoch": 0.26158940397350994,
|
| 2298 |
+
"grad_norm": 0.6201027035713196,
|
| 2299 |
+
"learning_rate": 4.2712080634949024e-08,
|
| 2300 |
+
"loss": 1.4545,
|
| 2301 |
+
"step": 3160
|
| 2302 |
}
|
| 2303 |
],
|
| 2304 |
"logging_steps": 10,
|
|
|
|
| 2318 |
"attributes": {}
|
| 2319 |
}
|
| 2320 |
},
|
| 2321 |
+
"total_flos": 1.404060712841511e+18,
|
| 2322 |
"train_batch_size": 3,
|
| 2323 |
"trial_name": null,
|
| 2324 |
"trial_params": null
|