rootxhacker commited on
Commit
a245454
·
verified ·
1 Parent(s): a6fc3f9

Training in progress, step 20500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bff1d47cdb13e83e8fe6a9d20b6c173dfae48b029aecf5c92bf458772133f5b2
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3fe29335718233e84f1e65e1e50047d21ed0639ab8d8950d51643ac36288179
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c26f8a7499e23aa16477b380bf134293878cb8936b98f8cad1d8a606fc561e1a
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c45065f737f44883104a4351c674ace9478a208428702e003f4cf5e07e96452
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b205d8106465470d82a9f668e113b1a4c937f3fe768b385e78f85eb171e49cc2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dbffb5b9d940a96eb51dbe7a35860718ecfeca99437ad8b1b71dbafaefdd573
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:344e181f9677a4d18097c7085524ad711f8dfb151d42735faf9658fba6102bed
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:007715702cfbd38b6fa4b8b523a7e58a1cfb0651b5c1c69d07c436463e26a8bf
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fc471d6cffa6d4b3db986d1e54c37de8f3b7f607849783a6e6b32bce97519eb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:780bb1353dd0d4e52605608cc24ae3410777df1acea35ed02a82bdb21fa7ba07
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 18750,
3
- "best_metric": 1.4680087566375732,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
5
- "epoch": 1.5383432043688947,
6
  "eval_steps": 250,
7
- "global_step": 20000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3448,6 +3448,92 @@
3448
  "eval_samples_per_second": 55.233,
3449
  "eval_steps_per_second": 13.808,
3450
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3451
  }
3452
  ],
3453
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 20250,
3
+ "best_metric": 1.4672300815582275,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
5
+ "epoch": 1.576801784478117,
6
  "eval_steps": 250,
7
+ "global_step": 20500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3448
  "eval_samples_per_second": 55.233,
3449
  "eval_steps_per_second": 13.808,
3450
  "step": 20000
3451
+ },
3452
+ {
3453
+ "epoch": 1.542189062379817,
3454
+ "grad_norm": 1.4523636102676392,
3455
+ "learning_rate": 3.7376567037976546e-05,
3456
+ "loss": 1.4538,
3457
+ "step": 20050
3458
+ },
3459
+ {
3460
+ "epoch": 1.5460349203907393,
3461
+ "grad_norm": 1.854066252708435,
3462
+ "learning_rate": 3.7119223557957426e-05,
3463
+ "loss": 1.4532,
3464
+ "step": 20100
3465
+ },
3466
+ {
3467
+ "epoch": 1.5498807784016613,
3468
+ "grad_norm": 1.8892920017242432,
3469
+ "learning_rate": 3.6861880077938306e-05,
3470
+ "loss": 1.4301,
3471
+ "step": 20150
3472
+ },
3473
+ {
3474
+ "epoch": 1.5537266364125837,
3475
+ "grad_norm": 1.2957504987716675,
3476
+ "learning_rate": 3.6609683467519574e-05,
3477
+ "loss": 1.4613,
3478
+ "step": 20200
3479
+ },
3480
+ {
3481
+ "epoch": 1.5575724944235059,
3482
+ "grad_norm": 1.9040348529815674,
3483
+ "learning_rate": 3.635233998750046e-05,
3484
+ "loss": 1.3847,
3485
+ "step": 20250
3486
+ },
3487
+ {
3488
+ "epoch": 1.5575724944235059,
3489
+ "eval_loss": 1.4672300815582275,
3490
+ "eval_runtime": 17.9888,
3491
+ "eval_samples_per_second": 55.59,
3492
+ "eval_steps_per_second": 13.898,
3493
+ "step": 20250
3494
+ },
3495
+ {
3496
+ "epoch": 1.561418352434428,
3497
+ "grad_norm": 1.4990596771240234,
3498
+ "learning_rate": 3.609499650748134e-05,
3499
+ "loss": 1.4243,
3500
+ "step": 20300
3501
+ },
3502
+ {
3503
+ "epoch": 1.5652642104453505,
3504
+ "grad_norm": 2.344515562057495,
3505
+ "learning_rate": 3.583765302746222e-05,
3506
+ "loss": 1.4971,
3507
+ "step": 20350
3508
+ },
3509
+ {
3510
+ "epoch": 1.5691100684562724,
3511
+ "grad_norm": 2.2836570739746094,
3512
+ "learning_rate": 3.5580309547443106e-05,
3513
+ "loss": 1.4641,
3514
+ "step": 20400
3515
+ },
3516
+ {
3517
+ "epoch": 1.5729559264671948,
3518
+ "grad_norm": 1.0165778398513794,
3519
+ "learning_rate": 3.5322966067423986e-05,
3520
+ "loss": 1.4268,
3521
+ "step": 20450
3522
+ },
3523
+ {
3524
+ "epoch": 1.576801784478117,
3525
+ "grad_norm": 0.5663600564002991,
3526
+ "learning_rate": 3.506562258740487e-05,
3527
+ "loss": 1.3487,
3528
+ "step": 20500
3529
+ },
3530
+ {
3531
+ "epoch": 1.576801784478117,
3532
+ "eval_loss": 1.4733059406280518,
3533
+ "eval_runtime": 18.0399,
3534
+ "eval_samples_per_second": 55.433,
3535
+ "eval_steps_per_second": 13.858,
3536
+ "step": 20500
3537
  }
3538
  ],
3539
  "logging_steps": 50,