rootxhacker commited on
Commit
e584df0
·
verified ·
1 Parent(s): f831362

Training in progress, step 8000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbb6880d1881c67658cfa63c684e343fb06877abae1e3d1bb7df536e89b3486
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8feea685a107f8ccd353fa0c6a7247dc35bf0b9ab63a5ddc15a9bb4d1290d199
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b194834b162d096162d9becaaa7df444d7c084623419d3e2f2bdc489c1f6464b
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66b6610111b8d0f3342d2760bc32d4c23e23f0a918f9dd58106e340817cbdd89
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d18ab5183ebf11753d526532b8569bec6f06614d4e88fe8e90cb6ae5b6d98c0
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d6bc30a519d7cae432bede0d2778a805102f7cbeae9a244275827f72067499b
3
+ size 14180
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74bf8247b38694054da5df7ade6fe4af76b98c8b2246b36ed484d9dbaae0ad6d
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfe40af48d21f2cca8886bf415fbb1d77078d013ee962c12ffd6bac84ee28801
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fef16a0ad411b4ae4cfd3838e745cbacbb07d632abeb0359b1bec5b6523b89c0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d34cb07567bc31e1dbbaa78fe1d8a500cecd9a370ce5d35295b9afb817d2e6c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 7250,
3
  "best_metric": 4.4039154052734375,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-7000",
5
- "epoch": 0.5768787016383355,
6
  "eval_steps": 250,
7
- "global_step": 7500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1298,6 +1298,92 @@
1298
  "eval_samples_per_second": 53.19,
1299
  "eval_steps_per_second": 13.298,
1300
  "step": 7500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1301
  }
1302
  ],
1303
  "logging_steps": 50,
 
2
  "best_global_step": 7250,
3
  "best_metric": 4.4039154052734375,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-7000",
5
+ "epoch": 0.6153372817475579,
6
  "eval_steps": 250,
7
+ "global_step": 8000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1298
  "eval_samples_per_second": 53.19,
1299
  "eval_steps_per_second": 13.298,
1300
  "step": 7500
1301
+ },
1302
+ {
1303
+ "epoch": 0.5807245596492577,
1304
+ "grad_norm": 3.4961936473846436,
1305
+ "learning_rate": 0.00010170214330355501,
1306
+ "loss": 4.5194,
1307
+ "step": 7550
1308
+ },
1309
+ {
1310
+ "epoch": 0.58457041766018,
1311
+ "grad_norm": 2.529500961303711,
1312
+ "learning_rate": 0.00010144479982353589,
1313
+ "loss": 4.3337,
1314
+ "step": 7600
1315
+ },
1316
+ {
1317
+ "epoch": 0.5884162756711022,
1318
+ "grad_norm": 3.346160888671875,
1319
+ "learning_rate": 0.00010118745634351679,
1320
+ "loss": 4.5422,
1321
+ "step": 7650
1322
+ },
1323
+ {
1324
+ "epoch": 0.5922621336820244,
1325
+ "grad_norm": 3.8311049938201904,
1326
+ "learning_rate": 0.00010093011286349765,
1327
+ "loss": 4.4191,
1328
+ "step": 7700
1329
+ },
1330
+ {
1331
+ "epoch": 0.5961079916929467,
1332
+ "grad_norm": 4.324901580810547,
1333
+ "learning_rate": 0.00010067276938347853,
1334
+ "loss": 4.4613,
1335
+ "step": 7750
1336
+ },
1337
+ {
1338
+ "epoch": 0.5961079916929467,
1339
+ "eval_loss": 4.4118547439575195,
1340
+ "eval_runtime": 18.9517,
1341
+ "eval_samples_per_second": 52.766,
1342
+ "eval_steps_per_second": 13.191,
1343
+ "step": 7750
1344
+ },
1345
+ {
1346
+ "epoch": 0.5999538497038689,
1347
+ "grad_norm": 3.888192653656006,
1348
+ "learning_rate": 0.00010041542590345943,
1349
+ "loss": 4.5492,
1350
+ "step": 7800
1351
+ },
1352
+ {
1353
+ "epoch": 0.6037997077147912,
1354
+ "grad_norm": 2.718320608139038,
1355
+ "learning_rate": 0.0001001580824234403,
1356
+ "loss": 4.5371,
1357
+ "step": 7850
1358
+ },
1359
+ {
1360
+ "epoch": 0.6076455657257134,
1361
+ "grad_norm": 3.5970869064331055,
1362
+ "learning_rate": 9.99007389434212e-05,
1363
+ "loss": 4.4835,
1364
+ "step": 7900
1365
+ },
1366
+ {
1367
+ "epoch": 0.6114914237366357,
1368
+ "grad_norm": 4.563399314880371,
1369
+ "learning_rate": 9.964339546340208e-05,
1370
+ "loss": 4.4494,
1371
+ "step": 7950
1372
+ },
1373
+ {
1374
+ "epoch": 0.6153372817475579,
1375
+ "grad_norm": 5.080177307128906,
1376
+ "learning_rate": 9.938605198338294e-05,
1377
+ "loss": 4.6072,
1378
+ "step": 8000
1379
+ },
1380
+ {
1381
+ "epoch": 0.6153372817475579,
1382
+ "eval_loss": 4.428142547607422,
1383
+ "eval_runtime": 18.8815,
1384
+ "eval_samples_per_second": 52.962,
1385
+ "eval_steps_per_second": 13.241,
1386
+ "step": 8000
1387
  }
1388
  ],
1389
  "logging_steps": 50,