irishprancer commited on
Commit
6c718d3
·
verified ·
1 Parent(s): 97c7dc1

Training in progress, step 2850, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a43112abe39c06a0b5ca17135fa11e0bfe258e79ff269b917a6b1d5a4e62477c
3
  size 1369734456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:409aaaeded0bcf83a1ad2f8a8aa2059e25e2d11c459671d0ad07902951d56ccb
3
  size 1369734456
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2110ad331782e7a158b2f226a8442097be138b75c8d14f4126f67501be717f6
3
  size 2739607354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a6fa660e08860b38ae51215c9910f76eac4a3b8c3ca3626a5f9a00ef37913e5
3
  size 2739607354
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18b3e8f27fcfe401550ad8e68339098586ba2c4a315de25f2c3e273105526b21
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34535a0f5b3e2caaabc8b8b4e44f0c5669dbce6fa3579b2dfd70851f33133c4d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1538c5f25725d45bc5a00763d606d44f6c2d4924289d5e4bc3ac345f59d988a
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fefd4a4db1014a9c64900d77fe95c22931ab60a40f8d1f6d23a762afabfc9cde
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.9992671012878418,
3
- "best_model_checkpoint": "./output/checkpoint-2700",
4
- "epoch": 0.7714285714285715,
5
  "eval_steps": 150,
6
- "global_step": 2700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2249,6 +2249,143 @@
2249
  "EMA_steps_per_second": 4.945,
2250
  "epoch": 0.7714285714285715,
2251
  "step": 2700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2252
  }
2253
  ],
2254
  "logging_steps": 10,
@@ -2268,7 +2405,7 @@
2268
  "attributes": {}
2269
  }
2270
  },
2271
- "total_flos": 1.0274909037272678e+18,
2272
  "train_batch_size": 4,
2273
  "trial_name": null,
2274
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9964351058006287,
3
+ "best_model_checkpoint": "./output/checkpoint-2850",
4
+ "epoch": 0.8142857142857143,
5
  "eval_steps": 150,
6
+ "global_step": 2850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2249
  "EMA_steps_per_second": 4.945,
2250
  "epoch": 0.7714285714285715,
2251
  "step": 2700
2252
+ },
2253
+ {
2254
+ "epoch": 0.7742857142857142,
2255
+ "grad_norm": 2.9369349479675293,
2256
+ "learning_rate": 3.440770640467657e-05,
2257
+ "loss": 1.0366,
2258
+ "step": 2710
2259
+ },
2260
+ {
2261
+ "epoch": 0.7771428571428571,
2262
+ "grad_norm": 2.5735695362091064,
2263
+ "learning_rate": 3.440591922877432e-05,
2264
+ "loss": 0.9736,
2265
+ "step": 2720
2266
+ },
2267
+ {
2268
+ "epoch": 0.78,
2269
+ "grad_norm": 3.095432996749878,
2270
+ "learning_rate": 3.440412528076098e-05,
2271
+ "loss": 0.987,
2272
+ "step": 2730
2273
+ },
2274
+ {
2275
+ "epoch": 0.7828571428571428,
2276
+ "grad_norm": 2.998671293258667,
2277
+ "learning_rate": 3.44023245613476e-05,
2278
+ "loss": 0.9876,
2279
+ "step": 2740
2280
+ },
2281
+ {
2282
+ "epoch": 0.7857142857142857,
2283
+ "grad_norm": 3.2118659019470215,
2284
+ "learning_rate": 3.4400517071247936e-05,
2285
+ "loss": 0.9523,
2286
+ "step": 2750
2287
+ },
2288
+ {
2289
+ "epoch": 0.7885714285714286,
2290
+ "grad_norm": 2.917165756225586,
2291
+ "learning_rate": 3.4398702811178414e-05,
2292
+ "loss": 0.9638,
2293
+ "step": 2760
2294
+ },
2295
+ {
2296
+ "epoch": 0.7914285714285715,
2297
+ "grad_norm": 3.0773892402648926,
2298
+ "learning_rate": 3.439688178185815e-05,
2299
+ "loss": 0.9997,
2300
+ "step": 2770
2301
+ },
2302
+ {
2303
+ "epoch": 0.7942857142857143,
2304
+ "grad_norm": 3.3943119049072266,
2305
+ "learning_rate": 3.4395053984008946e-05,
2306
+ "loss": 1.0008,
2307
+ "step": 2780
2308
+ },
2309
+ {
2310
+ "epoch": 0.7971428571428572,
2311
+ "grad_norm": 2.786585807800293,
2312
+ "learning_rate": 3.439321941835528e-05,
2313
+ "loss": 0.8922,
2314
+ "step": 2790
2315
+ },
2316
+ {
2317
+ "epoch": 0.8,
2318
+ "grad_norm": 3.2204439640045166,
2319
+ "learning_rate": 3.4391378085624315e-05,
2320
+ "loss": 1.0093,
2321
+ "step": 2800
2322
+ },
2323
+ {
2324
+ "epoch": 0.8028571428571428,
2325
+ "grad_norm": 3.294858694076538,
2326
+ "learning_rate": 3.43895299865459e-05,
2327
+ "loss": 1.024,
2328
+ "step": 2810
2329
+ },
2330
+ {
2331
+ "epoch": 0.8057142857142857,
2332
+ "grad_norm": 3.8853487968444824,
2333
+ "learning_rate": 3.4387675121852555e-05,
2334
+ "loss": 0.9855,
2335
+ "step": 2820
2336
+ },
2337
+ {
2338
+ "epoch": 0.8085714285714286,
2339
+ "grad_norm": 2.766892194747925,
2340
+ "learning_rate": 3.43858134922795e-05,
2341
+ "loss": 1.0017,
2342
+ "step": 2830
2343
+ },
2344
+ {
2345
+ "epoch": 0.8114285714285714,
2346
+ "grad_norm": 3.4095218181610107,
2347
+ "learning_rate": 3.4383945098564614e-05,
2348
+ "loss": 0.9873,
2349
+ "step": 2840
2350
+ },
2351
+ {
2352
+ "epoch": 0.8142857142857143,
2353
+ "grad_norm": 3.2278494834899902,
2354
+ "learning_rate": 3.438206994144847e-05,
2355
+ "loss": 1.0113,
2356
+ "step": 2850
2357
+ },
2358
+ {
2359
+ "epoch": 0.8142857142857143,
2360
+ "eval_loss": 0.9964351058006287,
2361
+ "eval_runtime": 101.0632,
2362
+ "eval_samples_per_second": 4.947,
2363
+ "eval_steps_per_second": 4.947,
2364
+ "step": 2850
2365
+ },
2366
+ {
2367
+ "Raw_Model_loss": 0.9964351058006287,
2368
+ "Raw_Model_runtime": 101.1423,
2369
+ "Raw_Model_samples_per_second": 4.944,
2370
+ "Raw_Model_steps_per_second": 4.944,
2371
+ "epoch": 0.8142857142857143,
2372
+ "step": 2850
2373
+ },
2374
+ {
2375
+ "SWA_loss": 0.995207667350769,
2376
+ "SWA_runtime": 101.0346,
2377
+ "SWA_samples_per_second": 4.949,
2378
+ "SWA_steps_per_second": 4.949,
2379
+ "epoch": 0.8142857142857143,
2380
+ "step": 2850
2381
+ },
2382
+ {
2383
+ "EMA_loss": 1.0361887216567993,
2384
+ "EMA_runtime": 101.0635,
2385
+ "EMA_samples_per_second": 4.947,
2386
+ "EMA_steps_per_second": 4.947,
2387
+ "epoch": 0.8142857142857143,
2388
+ "step": 2850
2389
  }
2390
  ],
2391
  "logging_steps": 10,
 
2405
  "attributes": {}
2406
  }
2407
  },
2408
+ "total_flos": 1.0842856817574298e+18,
2409
  "train_batch_size": 4,
2410
  "trial_name": null,
2411
  "trial_params": null