rootxhacker commited on
Commit
215a987
·
verified ·
1 Parent(s): 8729ef7

Training in progress, step 7500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3903c059a7b663d5bfd566efc9974ae82cc96ac9d5b539705078e742f56c337d
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdbb6880d1881c67658cfa63c684e343fb06877abae1e3d1bb7df536e89b3486
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4c871c606d134fb7ce2c803972d6df79093d0f6a67161c6b1b4060e6eb55be6
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b194834b162d096162d9becaaa7df444d7c084623419d3e2f2bdc489c1f6464b
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f4acfd5fa4964d8f20ab734945eda1f469ecfbbe259a3fd74f731ec4baabed1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d18ab5183ebf11753d526532b8569bec6f06614d4e88fe8e90cb6ae5b6d98c0
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6777a8c32c870836be295ac0aa7f4cc3d40129a0ebc96e1b375fa98ce8275f00
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74bf8247b38694054da5df7ade6fe4af76b98c8b2246b36ed484d9dbaae0ad6d
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61aefcf58c021497ade44ee93d84da1168bb6593c3269faeaeb7e7cace70ca34
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fef16a0ad411b4ae4cfd3838e745cbacbb07d632abeb0359b1bec5b6523b89c0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 7000,
3
- "best_metric": 4.411437034606934,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-7000",
5
- "epoch": 0.5384201215291131,
6
  "eval_steps": 250,
7
- "global_step": 7000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1212,6 +1212,92 @@
1212
  "eval_samples_per_second": 52.997,
1213
  "eval_steps_per_second": 13.249,
1214
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1215
  }
1216
  ],
1217
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 7250,
3
+ "best_metric": 4.4039154052734375,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-7000",
5
+ "epoch": 0.5768787016383355,
6
  "eval_steps": 250,
7
+ "global_step": 7500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1212
  "eval_samples_per_second": 52.997,
1213
  "eval_steps_per_second": 13.249,
1214
  "step": 7000
1215
+ },
1216
+ {
1217
+ "epoch": 0.5422659795400354,
1218
+ "grad_norm": 5.943952560424805,
1219
+ "learning_rate": 0.00010427557810374618,
1220
+ "loss": 4.3737,
1221
+ "step": 7050
1222
+ },
1223
+ {
1224
+ "epoch": 0.5461118375509576,
1225
+ "grad_norm": 4.010414123535156,
1226
+ "learning_rate": 0.00010401823462372706,
1227
+ "loss": 4.5472,
1228
+ "step": 7100
1229
+ },
1230
+ {
1231
+ "epoch": 0.5499576955618799,
1232
+ "grad_norm": 3.5218944549560547,
1233
+ "learning_rate": 0.00010376089114370794,
1234
+ "loss": 4.5854,
1235
+ "step": 7150
1236
+ },
1237
+ {
1238
+ "epoch": 0.5538035535728021,
1239
+ "grad_norm": 9.44631290435791,
1240
+ "learning_rate": 0.00010350354766368883,
1241
+ "loss": 4.3883,
1242
+ "step": 7200
1243
+ },
1244
+ {
1245
+ "epoch": 0.5576494115837243,
1246
+ "grad_norm": 4.5443434715271,
1247
+ "learning_rate": 0.00010324620418366971,
1248
+ "loss": 4.6685,
1249
+ "step": 7250
1250
+ },
1251
+ {
1252
+ "epoch": 0.5576494115837243,
1253
+ "eval_loss": 4.4039154052734375,
1254
+ "eval_runtime": 18.856,
1255
+ "eval_samples_per_second": 53.034,
1256
+ "eval_steps_per_second": 13.258,
1257
+ "step": 7250
1258
+ },
1259
+ {
1260
+ "epoch": 0.5614952695946466,
1261
+ "grad_norm": 3.646768569946289,
1262
+ "learning_rate": 0.0001029888607036506,
1263
+ "loss": 4.5259,
1264
+ "step": 7300
1265
+ },
1266
+ {
1267
+ "epoch": 0.5653411276055688,
1268
+ "grad_norm": 3.510744571685791,
1269
+ "learning_rate": 0.00010273151722363148,
1270
+ "loss": 4.4461,
1271
+ "step": 7350
1272
+ },
1273
+ {
1274
+ "epoch": 0.5691869856164911,
1275
+ "grad_norm": 3.874558687210083,
1276
+ "learning_rate": 0.00010247417374361235,
1277
+ "loss": 4.3743,
1278
+ "step": 7400
1279
+ },
1280
+ {
1281
+ "epoch": 0.5730328436274132,
1282
+ "grad_norm": 2.755722761154175,
1283
+ "learning_rate": 0.00010221683026359324,
1284
+ "loss": 4.4979,
1285
+ "step": 7450
1286
+ },
1287
+ {
1288
+ "epoch": 0.5768787016383355,
1289
+ "grad_norm": 3.5653252601623535,
1290
+ "learning_rate": 0.00010195948678357412,
1291
+ "loss": 4.5442,
1292
+ "step": 7500
1293
+ },
1294
+ {
1295
+ "epoch": 0.5768787016383355,
1296
+ "eval_loss": 4.44308614730835,
1297
+ "eval_runtime": 18.8004,
1298
+ "eval_samples_per_second": 53.19,
1299
+ "eval_steps_per_second": 13.298,
1300
+ "step": 7500
1301
  }
1302
  ],
1303
  "logging_steps": 50,