rootxhacker commited on
Commit
66cc8f0
·
verified ·
1 Parent(s): 49d45a5

Training in progress, step 9000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:620de4edf53add8da5dbe95866a8bac876c40a3dc9c90cedc8474a386ec15455
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eecb66b5b687b2480b18fa49a3a7093840558d372a33d29f18cc9ddc9d5973f6
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8995b41577d3d005c4fed5c2507e98c14db04d3eb6c0cbf4dcde21e469d9590
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:246b4fcf33df56c5b498c44dfc6d12184de263d7f241a380037b6387910c9a75
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a993eb83d217882b8d4270d606bf3996e74fd8745c32e817e4ed4a0c36ae01a8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b68f148982346537acf196edf0aa44542990dee8efc3893aa00dae2ca2e993b5
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:929f0b36206c13ecf045b54d1e7d9597c22d5c9d16a63b3190a152cdd52e97ae
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83c5d85e32786b1e35a549f6e7bfc25b63f0617678a2a77d09f7e94475702a2a
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7dbaa01ecd0154e55c0c938d1611846e23b19dc766ea41b92962589d9d1b91a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9af065edeaca88f16d8fcf52e4c33f05f28955d2f4f45d569975711d2168cd7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 8500,
3
  "best_metric": 4.392988204956055,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-8500",
5
- "epoch": 0.6537958618567803,
6
  "eval_steps": 250,
7
- "global_step": 8500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1470,6 +1470,92 @@
1470
  "eval_samples_per_second": 53.079,
1471
  "eval_steps_per_second": 13.27,
1472
  "step": 8500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1473
  }
1474
  ],
1475
  "logging_steps": 50,
 
2
  "best_global_step": 8500,
3
  "best_metric": 4.392988204956055,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-8500",
5
+ "epoch": 0.6922544419660026,
6
  "eval_steps": 250,
7
+ "global_step": 9000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1470
  "eval_samples_per_second": 53.079,
1471
  "eval_steps_per_second": 13.27,
1472
  "step": 8500
1473
+ },
1474
+ {
1475
+ "epoch": 0.6576417198677025,
1476
+ "grad_norm": 2.6913883686065674,
1477
+ "learning_rate": 9.656042057277304e-05,
1478
+ "loss": 4.4409,
1479
+ "step": 8550
1480
+ },
1481
+ {
1482
+ "epoch": 0.6614875778786247,
1483
+ "grad_norm": 3.749894618988037,
1484
+ "learning_rate": 9.630307709275394e-05,
1485
+ "loss": 4.5101,
1486
+ "step": 8600
1487
+ },
1488
+ {
1489
+ "epoch": 0.665333435889547,
1490
+ "grad_norm": 4.93977165222168,
1491
+ "learning_rate": 9.604573361273482e-05,
1492
+ "loss": 4.4504,
1493
+ "step": 8650
1494
+ },
1495
+ {
1496
+ "epoch": 0.6691792939004692,
1497
+ "grad_norm": 4.311313152313232,
1498
+ "learning_rate": 9.578839013271571e-05,
1499
+ "loss": 4.4857,
1500
+ "step": 8700
1501
+ },
1502
+ {
1503
+ "epoch": 0.6730251519113915,
1504
+ "grad_norm": 3.646656036376953,
1505
+ "learning_rate": 9.553104665269659e-05,
1506
+ "loss": 4.387,
1507
+ "step": 8750
1508
+ },
1509
+ {
1510
+ "epoch": 0.6730251519113915,
1511
+ "eval_loss": 4.401506423950195,
1512
+ "eval_runtime": 18.7931,
1513
+ "eval_samples_per_second": 53.211,
1514
+ "eval_steps_per_second": 13.303,
1515
+ "step": 8750
1516
+ },
1517
+ {
1518
+ "epoch": 0.6768710099223136,
1519
+ "grad_norm": 4.352843284606934,
1520
+ "learning_rate": 9.527370317267746e-05,
1521
+ "loss": 4.5279,
1522
+ "step": 8800
1523
+ },
1524
+ {
1525
+ "epoch": 0.6807168679332359,
1526
+ "grad_norm": 3.890216827392578,
1527
+ "learning_rate": 9.501635969265835e-05,
1528
+ "loss": 4.4485,
1529
+ "step": 8850
1530
+ },
1531
+ {
1532
+ "epoch": 0.6845627259441581,
1533
+ "grad_norm": 3.4119713306427,
1534
+ "learning_rate": 9.475901621263923e-05,
1535
+ "loss": 4.4428,
1536
+ "step": 8900
1537
+ },
1538
+ {
1539
+ "epoch": 0.6884085839550804,
1540
+ "grad_norm": 7.813595294952393,
1541
+ "learning_rate": 9.450167273262012e-05,
1542
+ "loss": 4.3308,
1543
+ "step": 8950
1544
+ },
1545
+ {
1546
+ "epoch": 0.6922544419660026,
1547
+ "grad_norm": 3.079829692840576,
1548
+ "learning_rate": 9.4244329252601e-05,
1549
+ "loss": 4.368,
1550
+ "step": 9000
1551
+ },
1552
+ {
1553
+ "epoch": 0.6922544419660026,
1554
+ "eval_loss": 4.393312931060791,
1555
+ "eval_runtime": 18.7727,
1556
+ "eval_samples_per_second": 53.269,
1557
+ "eval_steps_per_second": 13.317,
1558
+ "step": 9000
1559
  }
1560
  ],
1561
  "logging_steps": 50,