rootxhacker commited on
Commit
adb3208
·
verified ·
1 Parent(s): 153ae45

Training in progress, step 14500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2519845d79c6f7a60a8e8bd9f60470c958586532b21af582e2a347c3d21c6c1a
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:048b14e8250392932daba4d22d09a460365951dbb6b68b5a41a1a0e5ce624541
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88aaf6b4c11b0e46f6cdd9f050476f7085d7e9069b7435c1b07136131ebac754
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b29852e5d0ff827b5e17125c299364e88ca1c70914de6e2a1ee93e58f54b613
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5de168b313b50f7920582863eb6c48735221da70f052aa0c3517b7e8965981bd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8ea42701e6bd5369fc7af2447cde260122e2711abf45d46209ed918523c1a2c
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:523d1d97459ff6f5c0b6df3436b48935f386618f5677e95bde450bb198a23ba7
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbaf929d345a2a40a03fb2b8f0c27dc126d0b7d5e2424d21837b1af94ede355e
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f36f6a58b58358dbb293c95ffa30cf143673db61f43b28cc950d65ff91360f28
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3969f570a31885b978deed67646d88ac34c614ef8cb8214e9d16328676e8af9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 13750,
3
  "best_metric": 1.5006794929504395,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-13000",
5
- "epoch": 1.0768402430582262,
6
  "eval_steps": 250,
7
- "global_step": 14000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2416,6 +2416,92 @@
2416
  "eval_samples_per_second": 55.107,
2417
  "eval_steps_per_second": 13.777,
2418
  "step": 14000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2419
  }
2420
  ],
2421
  "logging_steps": 50,
 
2
  "best_global_step": 13750,
3
  "best_metric": 1.5006794929504395,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-13000",
5
+ "epoch": 1.1152988231674485,
6
  "eval_steps": 250,
7
+ "global_step": 14500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2416
  "eval_samples_per_second": 55.107,
2417
  "eval_steps_per_second": 13.777,
2418
  "step": 14000
2419
+ },
2420
+ {
2421
+ "epoch": 1.0806861010691486,
2422
+ "grad_norm": 1.7406409978866577,
2423
+ "learning_rate": 6.825778464027057e-05,
2424
+ "loss": 1.3945,
2425
+ "step": 14050
2426
+ },
2427
+ {
2428
+ "epoch": 1.0845319590800708,
2429
+ "grad_norm": 1.1657389402389526,
2430
+ "learning_rate": 6.800044116025146e-05,
2431
+ "loss": 1.4528,
2432
+ "step": 14100
2433
+ },
2434
+ {
2435
+ "epoch": 1.088377817090993,
2436
+ "grad_norm": 1.380635380744934,
2437
+ "learning_rate": 6.774309768023234e-05,
2438
+ "loss": 1.442,
2439
+ "step": 14150
2440
+ },
2441
+ {
2442
+ "epoch": 1.0922236751019152,
2443
+ "grad_norm": 1.7555848360061646,
2444
+ "learning_rate": 6.748575420021322e-05,
2445
+ "loss": 1.5061,
2446
+ "step": 14200
2447
+ },
2448
+ {
2449
+ "epoch": 1.0960695331128374,
2450
+ "grad_norm": 1.6465975046157837,
2451
+ "learning_rate": 6.72284107201941e-05,
2452
+ "loss": 1.5004,
2453
+ "step": 14250
2454
+ },
2455
+ {
2456
+ "epoch": 1.0960695331128374,
2457
+ "eval_loss": 1.5090863704681396,
2458
+ "eval_runtime": 18.0174,
2459
+ "eval_samples_per_second": 55.502,
2460
+ "eval_steps_per_second": 13.876,
2461
+ "step": 14250
2462
+ },
2463
+ {
2464
+ "epoch": 1.0999153911237598,
2465
+ "grad_norm": 2.0214383602142334,
2466
+ "learning_rate": 6.697106724017498e-05,
2467
+ "loss": 1.5436,
2468
+ "step": 14300
2469
+ },
2470
+ {
2471
+ "epoch": 1.103761249134682,
2472
+ "grad_norm": 1.399170160293579,
2473
+ "learning_rate": 6.671372376015588e-05,
2474
+ "loss": 1.5242,
2475
+ "step": 14350
2476
+ },
2477
+ {
2478
+ "epoch": 1.1076071071456042,
2479
+ "grad_norm": 2.1806626319885254,
2480
+ "learning_rate": 6.645638028013676e-05,
2481
+ "loss": 1.4609,
2482
+ "step": 14400
2483
+ },
2484
+ {
2485
+ "epoch": 1.1114529651565264,
2486
+ "grad_norm": 1.1671562194824219,
2487
+ "learning_rate": 6.619903680011763e-05,
2488
+ "loss": 1.3789,
2489
+ "step": 14450
2490
+ },
2491
+ {
2492
+ "epoch": 1.1152988231674485,
2493
+ "grad_norm": 1.0041520595550537,
2494
+ "learning_rate": 6.594169332009851e-05,
2495
+ "loss": 1.4909,
2496
+ "step": 14500
2497
+ },
2498
+ {
2499
+ "epoch": 1.1152988231674485,
2500
+ "eval_loss": 1.509366750717163,
2501
+ "eval_runtime": 18.0148,
2502
+ "eval_samples_per_second": 55.51,
2503
+ "eval_steps_per_second": 13.877,
2504
+ "step": 14500
2505
  }
2506
  ],
2507
  "logging_steps": 50,