irishprancer commited on
Commit
1959008
·
verified ·
1 Parent(s): eef0427

Training in progress, step 2400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95dc31204144af8854a2bd166b700ffe00c60e93c0807719622f465a8d64b4a6
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b7314d8b5f35930722e1c3d90a6061192742967a1457d59859792a0878fce57
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a5eb56f2c77d18a4cabe29b95f8b8d20c91a4f69adfa24c425612b6f1851b59
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72e4902bbfa6e4e56f6e17ab398c622f809713c649baa3c6fd399c0f05448a5f
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d81f454917d58ee0468d722bb9dc06ecc2918f8b10aaeacf5f5f0694536198a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:684fee310519fed9cc8ee66dc9698ba16f4e9489a577756f84e7b21aa51e01d4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71e101b8bc114bf6ab419ec6879047895e309c841d9a023f2c8c16366e9111d1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba3530fffdeb6293174ae0b25b4bed0ccc682e606b6e29d3d50fec77e3192eef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 97.82608695652173,
5
  "eval_steps": 150,
6
- "global_step": 2250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2182,6 +2182,151 @@
2182
  "EMA_steps_per_second": 25.124,
2183
  "epoch": 97.82608695652173,
2184
  "step": 2250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2185
  }
2186
  ],
2187
  "logging_steps": 10,
@@ -2201,7 +2346,7 @@
2201
  "attributes": {}
2202
  }
2203
  },
2204
- "total_flos": 5.800933883385446e+16,
2205
  "train_batch_size": 4,
2206
  "trial_name": null,
2207
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 104.34782608695652,
5
  "eval_steps": 150,
6
+ "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2182
  "EMA_steps_per_second": 25.124,
2183
  "epoch": 97.82608695652173,
2184
  "step": 2250
2185
+ },
2186
+ {
2187
+ "epoch": 98.26086956521739,
2188
+ "grad_norm": 1.9422506093978882,
2189
+ "learning_rate": 7.487548003627922e-06,
2190
+ "loss": 0.2414,
2191
+ "step": 2260
2192
+ },
2193
+ {
2194
+ "epoch": 98.69565217391305,
2195
+ "grad_norm": 1.745564341545105,
2196
+ "learning_rate": 7.487516099090849e-06,
2197
+ "loss": 0.278,
2198
+ "step": 2270
2199
+ },
2200
+ {
2201
+ "epoch": 99.1304347826087,
2202
+ "grad_norm": 2.0466256141662598,
2203
+ "learning_rate": 7.48748271071887e-06,
2204
+ "loss": 0.2487,
2205
+ "step": 2280
2206
+ },
2207
+ {
2208
+ "epoch": 99.56521739130434,
2209
+ "grad_norm": 2.3589112758636475,
2210
+ "learning_rate": 7.48744783852522e-06,
2211
+ "loss": 0.2882,
2212
+ "step": 2290
2213
+ },
2214
+ {
2215
+ "epoch": 100.0,
2216
+ "grad_norm": 2.6583240032196045,
2217
+ "learning_rate": 7.487411482523721e-06,
2218
+ "loss": 0.2324,
2219
+ "step": 2300
2220
+ },
2221
+ {
2222
+ "epoch": 100.43478260869566,
2223
+ "grad_norm": 2.685478448867798,
2224
+ "learning_rate": 7.4873736427287825e-06,
2225
+ "loss": 0.2368,
2226
+ "step": 2310
2227
+ },
2228
+ {
2229
+ "epoch": 100.8695652173913,
2230
+ "grad_norm": 1.7692900896072388,
2231
+ "learning_rate": 7.487334319155404e-06,
2232
+ "loss": 0.2694,
2233
+ "step": 2320
2234
+ },
2235
+ {
2236
+ "epoch": 101.30434782608695,
2237
+ "grad_norm": 2.5517287254333496,
2238
+ "learning_rate": 7.487293511819172e-06,
2239
+ "loss": 0.2417,
2240
+ "step": 2330
2241
+ },
2242
+ {
2243
+ "epoch": 101.73913043478261,
2244
+ "grad_norm": 1.7970623970031738,
2245
+ "learning_rate": 7.4872512207362605e-06,
2246
+ "loss": 0.2446,
2247
+ "step": 2340
2248
+ },
2249
+ {
2250
+ "epoch": 102.17391304347827,
2251
+ "grad_norm": 1.792651653289795,
2252
+ "learning_rate": 7.487207445923432e-06,
2253
+ "loss": 0.2934,
2254
+ "step": 2350
2255
+ },
2256
+ {
2257
+ "epoch": 102.6086956521739,
2258
+ "grad_norm": 2.1051220893859863,
2259
+ "learning_rate": 7.487162187398039e-06,
2260
+ "loss": 0.2844,
2261
+ "step": 2360
2262
+ },
2263
+ {
2264
+ "epoch": 103.04347826086956,
2265
+ "grad_norm": 1.9311975240707397,
2266
+ "learning_rate": 7.487115445178019e-06,
2267
+ "loss": 0.2162,
2268
+ "step": 2370
2269
+ },
2270
+ {
2271
+ "epoch": 103.47826086956522,
2272
+ "grad_norm": 2.12684965133667,
2273
+ "learning_rate": 7.487067219281901e-06,
2274
+ "loss": 0.2911,
2275
+ "step": 2380
2276
+ },
2277
+ {
2278
+ "epoch": 103.91304347826087,
2279
+ "grad_norm": 2.0107476711273193,
2280
+ "learning_rate": 7.4870175097287985e-06,
2281
+ "loss": 0.2413,
2282
+ "step": 2390
2283
+ },
2284
+ {
2285
+ "epoch": 104.34782608695652,
2286
+ "grad_norm": 1.9675108194351196,
2287
+ "learning_rate": 7.486966316538416e-06,
2288
+ "loss": 0.2557,
2289
+ "step": 2400
2290
+ },
2291
+ {
2292
+ "epoch": 104.34782608695652,
2293
+ "eval_loss": 0.9136893153190613,
2294
+ "eval_runtime": 0.4193,
2295
+ "eval_samples_per_second": 23.849,
2296
+ "eval_steps_per_second": 23.849,
2297
+ "step": 2400
2298
+ },
2299
+ {
2300
+ "Start_State_loss": 0.861186683177948,
2301
+ "Start_State_runtime": 0.4057,
2302
+ "Start_State_samples_per_second": 24.646,
2303
+ "Start_State_steps_per_second": 24.646,
2304
+ "epoch": 104.34782608695652,
2305
+ "step": 2400
2306
+ },
2307
+ {
2308
+ "Raw_Model_loss": 0.9136893153190613,
2309
+ "Raw_Model_runtime": 0.4082,
2310
+ "Raw_Model_samples_per_second": 24.497,
2311
+ "Raw_Model_steps_per_second": 24.497,
2312
+ "epoch": 104.34782608695652,
2313
+ "step": 2400
2314
+ },
2315
+ {
2316
+ "SWA_loss": 0.7567933797836304,
2317
+ "SWA_runtime": 0.4029,
2318
+ "SWA_samples_per_second": 24.818,
2319
+ "SWA_steps_per_second": 24.818,
2320
+ "epoch": 104.34782608695652,
2321
+ "step": 2400
2322
+ },
2323
+ {
2324
+ "EMA_loss": 0.8605263829231262,
2325
+ "EMA_runtime": 0.4051,
2326
+ "EMA_samples_per_second": 24.683,
2327
+ "EMA_steps_per_second": 24.683,
2328
+ "epoch": 104.34782608695652,
2329
+ "step": 2400
2330
  }
2331
  ],
2332
  "logging_steps": 10,
 
2346
  "attributes": {}
2347
  }
2348
  },
2349
+ "total_flos": 6.183025621814477e+16,
2350
  "train_batch_size": 4,
2351
  "trial_name": null,
2352
  "trial_params": null