rootxhacker commited on
Commit
6c4f084
·
verified ·
1 Parent(s): 942117f

Training in progress, step 14000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0fec3c5b1e7e374b9bebda92218a1bdc7fadf77f9f2fd358d7738c92849ab15
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2519845d79c6f7a60a8e8bd9f60470c958586532b21af582e2a347c3d21c6c1a
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:961d4f3f88c2859e92add0976bc93ab8db0649cc3da6695526e297d735998366
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88aaf6b4c11b0e46f6cdd9f050476f7085d7e9069b7435c1b07136131ebac754
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81ec269cbd23a3955804ddaef963a9e0d68f2087a109e239da5baed50032493e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5de168b313b50f7920582863eb6c48735221da70f052aa0c3517b7e8965981bd
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:861918ef9bae87f3c0647e76fd5a519763415c2dba1ede7121af8dc80bfb456a
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:523d1d97459ff6f5c0b6df3436b48935f386618f5677e95bde450bb198a23ba7
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:483cd8caa2794b8eb45fe09567d024ef71ee8ff6f6ba358a1febd749509b4c7e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f36f6a58b58358dbb293c95ffa30cf143673db61f43b28cc950d65ff91360f28
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 13000,
3
- "best_metric": 1.5033278465270996,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-13000",
5
- "epoch": 1.038381662949004,
6
  "eval_steps": 250,
7
- "global_step": 13500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2330,6 +2330,92 @@
2330
  "eval_samples_per_second": 55.13,
2331
  "eval_steps_per_second": 13.782,
2332
  "step": 13500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2333
  }
2334
  ],
2335
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 13750,
3
+ "best_metric": 1.5006794929504395,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-13000",
5
+ "epoch": 1.0768402430582262,
6
  "eval_steps": 250,
7
+ "global_step": 14000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2330
  "eval_samples_per_second": 55.13,
2331
  "eval_steps_per_second": 13.782,
2332
  "step": 13500
2333
+ },
2334
+ {
2335
+ "epoch": 1.042227520959926,
2336
+ "grad_norm": 2.8053858280181885,
2337
+ "learning_rate": 7.083121944046175e-05,
2338
+ "loss": 1.5338,
2339
+ "step": 13550
2340
+ },
2341
+ {
2342
+ "epoch": 1.0460733789708483,
2343
+ "grad_norm": 1.2761131525039673,
2344
+ "learning_rate": 7.057387596044263e-05,
2345
+ "loss": 1.4137,
2346
+ "step": 13600
2347
+ },
2348
+ {
2349
+ "epoch": 1.0499192369817707,
2350
+ "grad_norm": 1.614910364151001,
2351
+ "learning_rate": 7.03165324804235e-05,
2352
+ "loss": 1.4634,
2353
+ "step": 13650
2354
+ },
2355
+ {
2356
+ "epoch": 1.0537650949926929,
2357
+ "grad_norm": 1.8560376167297363,
2358
+ "learning_rate": 7.00591890004044e-05,
2359
+ "loss": 1.5173,
2360
+ "step": 13700
2361
+ },
2362
+ {
2363
+ "epoch": 1.057610953003615,
2364
+ "grad_norm": 1.3471609354019165,
2365
+ "learning_rate": 6.980184552038528e-05,
2366
+ "loss": 1.4887,
2367
+ "step": 13750
2368
+ },
2369
+ {
2370
+ "epoch": 1.057610953003615,
2371
+ "eval_loss": 1.5006794929504395,
2372
+ "eval_runtime": 18.2151,
2373
+ "eval_samples_per_second": 54.9,
2374
+ "eval_steps_per_second": 13.725,
2375
+ "step": 13750
2376
+ },
2377
+ {
2378
+ "epoch": 1.0614568110145373,
2379
+ "grad_norm": 1.661996841430664,
2380
+ "learning_rate": 6.954450204036616e-05,
2381
+ "loss": 1.4428,
2382
+ "step": 13800
2383
+ },
2384
+ {
2385
+ "epoch": 1.0653026690254597,
2386
+ "grad_norm": 1.2982336282730103,
2387
+ "learning_rate": 6.928715856034704e-05,
2388
+ "loss": 1.4565,
2389
+ "step": 13850
2390
+ },
2391
+ {
2392
+ "epoch": 1.0691485270363819,
2393
+ "grad_norm": 0.9250918626785278,
2394
+ "learning_rate": 6.902981508032792e-05,
2395
+ "loss": 1.5353,
2396
+ "step": 13900
2397
+ },
2398
+ {
2399
+ "epoch": 1.072994385047304,
2400
+ "grad_norm": 1.8084945678710938,
2401
+ "learning_rate": 6.877247160030881e-05,
2402
+ "loss": 1.5047,
2403
+ "step": 13950
2404
+ },
2405
+ {
2406
+ "epoch": 1.0768402430582262,
2407
+ "grad_norm": 1.1049927473068237,
2408
+ "learning_rate": 6.851512812028969e-05,
2409
+ "loss": 1.5058,
2410
+ "step": 14000
2411
+ },
2412
+ {
2413
+ "epoch": 1.0768402430582262,
2414
+ "eval_loss": 1.5043680667877197,
2415
+ "eval_runtime": 18.1464,
2416
+ "eval_samples_per_second": 55.107,
2417
+ "eval_steps_per_second": 13.777,
2418
+ "step": 14000
2419
  }
2420
  ],
2421
  "logging_steps": 50,