rootxhacker commited on
Commit
47d7524
·
verified ·
1 Parent(s): 13ed286

Training in progress, step 8500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8feea685a107f8ccd353fa0c6a7247dc35bf0b9ab63a5ddc15a9bb4d1290d199
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:620de4edf53add8da5dbe95866a8bac876c40a3dc9c90cedc8474a386ec15455
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66b6610111b8d0f3342d2760bc32d4c23e23f0a918f9dd58106e340817cbdd89
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8995b41577d3d005c4fed5c2507e98c14db04d3eb6c0cbf4dcde21e469d9590
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d6bc30a519d7cae432bede0d2778a805102f7cbeae9a244275827f72067499b
3
- size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a993eb83d217882b8d4270d606bf3996e74fd8745c32e817e4ed4a0c36ae01a8
3
+ size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfe40af48d21f2cca8886bf415fbb1d77078d013ee962c12ffd6bac84ee28801
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929f0b36206c13ecf045b54d1e7d9597c22d5c9d16a63b3190a152cdd52e97ae
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d34cb07567bc31e1dbbaa78fe1d8a500cecd9a370ce5d35295b9afb817d2e6c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7dbaa01ecd0154e55c0c938d1611846e23b19dc766ea41b92962589d9d1b91a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 7250,
3
- "best_metric": 4.4039154052734375,
4
- "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-7000",
5
- "epoch": 0.6153372817475579,
6
  "eval_steps": 250,
7
- "global_step": 8000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1384,6 +1384,92 @@
1384
  "eval_samples_per_second": 52.962,
1385
  "eval_steps_per_second": 13.241,
1386
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1387
  }
1388
  ],
1389
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 8500,
3
+ "best_metric": 4.392988204956055,
4
+ "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-8500",
5
+ "epoch": 0.6537958618567803,
6
  "eval_steps": 250,
7
+ "global_step": 8500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1384
  "eval_samples_per_second": 52.962,
1385
  "eval_steps_per_second": 13.241,
1386
  "step": 8000
1387
+ },
1388
+ {
1389
+ "epoch": 0.6191831397584802,
1390
+ "grad_norm": 4.333257675170898,
1391
+ "learning_rate": 9.912870850336384e-05,
1392
+ "loss": 4.3148,
1393
+ "step": 8050
1394
+ },
1395
+ {
1396
+ "epoch": 0.6230289977694023,
1397
+ "grad_norm": 5.497674465179443,
1398
+ "learning_rate": 9.887136502334472e-05,
1399
+ "loss": 4.5952,
1400
+ "step": 8100
1401
+ },
1402
+ {
1403
+ "epoch": 0.6268748557803245,
1404
+ "grad_norm": 4.110482215881348,
1405
+ "learning_rate": 9.861402154332561e-05,
1406
+ "loss": 4.5036,
1407
+ "step": 8150
1408
+ },
1409
+ {
1410
+ "epoch": 0.6307207137912468,
1411
+ "grad_norm": 3.9359841346740723,
1412
+ "learning_rate": 9.835667806330649e-05,
1413
+ "loss": 4.409,
1414
+ "step": 8200
1415
+ },
1416
+ {
1417
+ "epoch": 0.634566571802169,
1418
+ "grad_norm": 4.095981597900391,
1419
+ "learning_rate": 9.809933458328738e-05,
1420
+ "loss": 4.3515,
1421
+ "step": 8250
1422
+ },
1423
+ {
1424
+ "epoch": 0.634566571802169,
1425
+ "eval_loss": 4.438499927520752,
1426
+ "eval_runtime": 18.9189,
1427
+ "eval_samples_per_second": 52.857,
1428
+ "eval_steps_per_second": 13.214,
1429
+ "step": 8250
1430
+ },
1431
+ {
1432
+ "epoch": 0.6384124298130913,
1433
+ "grad_norm": 4.357822895050049,
1434
+ "learning_rate": 9.784199110326825e-05,
1435
+ "loss": 4.3767,
1436
+ "step": 8300
1437
+ },
1438
+ {
1439
+ "epoch": 0.6422582878240135,
1440
+ "grad_norm": 3.039700508117676,
1441
+ "learning_rate": 9.758979449284952e-05,
1442
+ "loss": 4.4542,
1443
+ "step": 8350
1444
+ },
1445
+ {
1446
+ "epoch": 0.6461041458349358,
1447
+ "grad_norm": 6.7661919593811035,
1448
+ "learning_rate": 9.73324510128304e-05,
1449
+ "loss": 4.4073,
1450
+ "step": 8400
1451
+ },
1452
+ {
1453
+ "epoch": 0.649950003845858,
1454
+ "grad_norm": 4.223692893981934,
1455
+ "learning_rate": 9.70751075328113e-05,
1456
+ "loss": 4.4904,
1457
+ "step": 8450
1458
+ },
1459
+ {
1460
+ "epoch": 0.6537958618567803,
1461
+ "grad_norm": 4.621217250823975,
1462
+ "learning_rate": 9.681776405279216e-05,
1463
+ "loss": 4.7717,
1464
+ "step": 8500
1465
+ },
1466
+ {
1467
+ "epoch": 0.6537958618567803,
1468
+ "eval_loss": 4.392988204956055,
1469
+ "eval_runtime": 18.8399,
1470
+ "eval_samples_per_second": 53.079,
1471
+ "eval_steps_per_second": 13.27,
1472
+ "step": 8500
1473
  }
1474
  ],
1475
  "logging_steps": 50,