irishprancer commited on
Commit
d992cb9
·
verified ·
1 Parent(s): f6123c5

Training in progress, step 2550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b7314d8b5f35930722e1c3d90a6061192742967a1457d59859792a0878fce57
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16a48e299ffea4a1305480644e0252c4aab5e02d9191b6e951f282025c021c36
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72e4902bbfa6e4e56f6e17ab398c622f809713c649baa3c6fd399c0f05448a5f
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccb7656d0058945a544e068c4c181cbcbae03cf7066df76778cb4579b9066242
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:684fee310519fed9cc8ee66dc9698ba16f4e9489a577756f84e7b21aa51e01d4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33335d8e454fa636f80bfcf35b73daf17e2d9682ef2741d0ad1097e25ee4742d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba3530fffdeb6293174ae0b25b4bed0ccc682e606b6e29d3d50fec77e3192eef
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f98b757d648be3e63607e2156858bad579d6a12d490a3cb2c8748d9ae2cce45
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 104.34782608695652,
5
  "eval_steps": 150,
6
- "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2327,6 +2327,151 @@
2327
  "EMA_steps_per_second": 24.683,
2328
  "epoch": 104.34782608695652,
2329
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2330
  }
2331
  ],
2332
  "logging_steps": 10,
@@ -2346,7 +2491,7 @@
2346
  "attributes": {}
2347
  }
2348
  },
2349
- "total_flos": 6.183025621814477e+16,
2350
  "train_batch_size": 4,
2351
  "trial_name": null,
2352
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 110.8695652173913,
5
  "eval_steps": 150,
6
+ "global_step": 2550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2327
  "EMA_steps_per_second": 24.683,
2328
  "epoch": 104.34782608695652,
2329
  "step": 2400
2330
+ },
2331
+ {
2332
+ "epoch": 104.78260869565217,
2333
+ "grad_norm": 2.10827898979187,
2334
+ "learning_rate": 7.486913639731043e-06,
2335
+ "loss": 0.2495,
2336
+ "step": 2410
2337
+ },
2338
+ {
2339
+ "epoch": 105.21739130434783,
2340
+ "grad_norm": 2.025355815887451,
2341
+ "learning_rate": 7.48685947932756e-06,
2342
+ "loss": 0.2637,
2343
+ "step": 2420
2344
+ },
2345
+ {
2346
+ "epoch": 105.65217391304348,
2347
+ "grad_norm": 1.9276680946350098,
2348
+ "learning_rate": 7.4868038353494355e-06,
2349
+ "loss": 0.2603,
2350
+ "step": 2430
2351
+ },
2352
+ {
2353
+ "epoch": 106.08695652173913,
2354
+ "grad_norm": 2.324167490005493,
2355
+ "learning_rate": 7.486746707818724e-06,
2356
+ "loss": 0.2141,
2357
+ "step": 2440
2358
+ },
2359
+ {
2360
+ "epoch": 106.52173913043478,
2361
+ "grad_norm": 1.4006412029266357,
2362
+ "learning_rate": 7.486688096758069e-06,
2363
+ "loss": 0.2816,
2364
+ "step": 2450
2365
+ },
2366
+ {
2367
+ "epoch": 106.95652173913044,
2368
+ "grad_norm": 1.8922216892242432,
2369
+ "learning_rate": 7.486628002190702e-06,
2370
+ "loss": 0.2444,
2371
+ "step": 2460
2372
+ },
2373
+ {
2374
+ "epoch": 107.3913043478261,
2375
+ "grad_norm": 2.3611834049224854,
2376
+ "learning_rate": 7.486566424140442e-06,
2377
+ "loss": 0.3039,
2378
+ "step": 2470
2379
+ },
2380
+ {
2381
+ "epoch": 107.82608695652173,
2382
+ "grad_norm": 2.2470717430114746,
2383
+ "learning_rate": 7.486503362631699e-06,
2384
+ "loss": 0.2188,
2385
+ "step": 2480
2386
+ },
2387
+ {
2388
+ "epoch": 108.26086956521739,
2389
+ "grad_norm": 2.0604355335235596,
2390
+ "learning_rate": 7.486438817689465e-06,
2391
+ "loss": 0.2706,
2392
+ "step": 2490
2393
+ },
2394
+ {
2395
+ "epoch": 108.69565217391305,
2396
+ "grad_norm": 1.6355359554290771,
2397
+ "learning_rate": 7.486372789339326e-06,
2398
+ "loss": 0.2454,
2399
+ "step": 2500
2400
+ },
2401
+ {
2402
+ "epoch": 109.1304347826087,
2403
+ "grad_norm": 1.6156138181686401,
2404
+ "learning_rate": 7.486305277607452e-06,
2405
+ "loss": 0.2437,
2406
+ "step": 2510
2407
+ },
2408
+ {
2409
+ "epoch": 109.56521739130434,
2410
+ "grad_norm": 1.3432440757751465,
2411
+ "learning_rate": 7.486236282520606e-06,
2412
+ "loss": 0.2309,
2413
+ "step": 2520
2414
+ },
2415
+ {
2416
+ "epoch": 110.0,
2417
+ "grad_norm": 3.2272891998291016,
2418
+ "learning_rate": 7.48616580410613e-06,
2419
+ "loss": 0.2874,
2420
+ "step": 2530
2421
+ },
2422
+ {
2423
+ "epoch": 110.43478260869566,
2424
+ "grad_norm": 1.7123788595199585,
2425
+ "learning_rate": 7.486093842391963e-06,
2426
+ "loss": 0.2452,
2427
+ "step": 2540
2428
+ },
2429
+ {
2430
+ "epoch": 110.8695652173913,
2431
+ "grad_norm": 1.8407248258590698,
2432
+ "learning_rate": 7.486020397406629e-06,
2433
+ "loss": 0.2698,
2434
+ "step": 2550
2435
+ },
2436
+ {
2437
+ "epoch": 110.8695652173913,
2438
+ "eval_loss": 0.9264782071113586,
2439
+ "eval_runtime": 0.5916,
2440
+ "eval_samples_per_second": 16.903,
2441
+ "eval_steps_per_second": 16.903,
2442
+ "step": 2550
2443
+ },
2444
+ {
2445
+ "Start_State_loss": 0.861186683177948,
2446
+ "Start_State_runtime": 0.4032,
2447
+ "Start_State_samples_per_second": 24.805,
2448
+ "Start_State_steps_per_second": 24.805,
2449
+ "epoch": 110.8695652173913,
2450
+ "step": 2550
2451
+ },
2452
+ {
2453
+ "Raw_Model_loss": 0.9264782071113586,
2454
+ "Raw_Model_runtime": 0.4014,
2455
+ "Raw_Model_samples_per_second": 24.913,
2456
+ "Raw_Model_steps_per_second": 24.913,
2457
+ "epoch": 110.8695652173913,
2458
+ "step": 2550
2459
+ },
2460
+ {
2461
+ "SWA_loss": 0.759516716003418,
2462
+ "SWA_runtime": 0.403,
2463
+ "SWA_samples_per_second": 24.814,
2464
+ "SWA_steps_per_second": 24.814,
2465
+ "epoch": 110.8695652173913,
2466
+ "step": 2550
2467
+ },
2468
+ {
2469
+ "EMA_loss": 0.8597530126571655,
2470
+ "EMA_runtime": 0.415,
2471
+ "EMA_samples_per_second": 24.098,
2472
+ "EMA_steps_per_second": 24.098,
2473
+ "epoch": 110.8695652173913,
2474
+ "step": 2550
2475
  }
2476
  ],
2477
  "logging_steps": 10,
 
2491
  "attributes": {}
2492
  }
2493
  },
2494
+ "total_flos": 6.568246037407334e+16,
2495
  "train_batch_size": 4,
2496
  "trial_name": null,
2497
  "trial_params": null