rootxhacker commited on
Commit
292a37e
·
verified ·
1 Parent(s): 783b6e2

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed248223fb076f4557238ee5eec9add808ee83dacf213c5b42e548a3180d84ba
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1615a27846e2fc7e29e90ffc163230511731ab45b760a836e71cf204236e624d
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca187211ec5ca22e22494af8fae9ec9214346dc466486151b4b1df2b8f4958fe
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6df96e6756fb0b0f050ff10208655f563c96fccd6a701602a5e90ccbd1e235e0
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2512ba1bd332fd1303d75224b66504a865cf77c5c151bdcdfcda52c23dbe2db6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74818f2f3daacf3c0fca3dde38a211872e713dacdefaa18d7435559b30dc197d
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fdbdc59fd9e02ea3e69d676110893ccd87d22af41e77bc0d26c87371fa56871
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc76fea5e634e306eb2d4aa830b8059981049eb83bd4681631ccd02732a47898
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e942a77a12aac95a1f43f3b73ecd979987695991d41d93e1b6557c90b404597e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d8c94e41b1ac1e8ac4ea58294a516d2e00679d859d84c40c165bc9d685fc309
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 2000,
3
- "best_metric": 4.620576858520508,
4
- "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-2000",
5
- "epoch": 0.15383432043688947,
6
  "eval_steps": 250,
7
- "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -352,6 +352,92 @@
352
  "eval_samples_per_second": 53.279,
353
  "eval_steps_per_second": 13.32,
354
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  }
356
  ],
357
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 2500,
3
+ "best_metric": 4.558788776397705,
4
+ "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-2500",
5
+ "epoch": 0.19229290054611184,
6
  "eval_steps": 250,
7
+ "global_step": 2500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
352
  "eval_samples_per_second": 53.279,
353
  "eval_steps_per_second": 13.32,
354
  "step": 2000
355
+ },
356
+ {
357
+ "epoch": 0.1576801784478117,
358
+ "grad_norm": 3.5292210578918457,
359
+ "learning_rate": 0.00012999448549685672,
360
+ "loss": 4.7815,
361
+ "step": 2050
362
+ },
363
+ {
364
+ "epoch": 0.16152603645873395,
365
+ "grad_norm": 4.665738105773926,
366
+ "learning_rate": 0.0001297371420168376,
367
+ "loss": 4.6789,
368
+ "step": 2100
369
+ },
370
+ {
371
+ "epoch": 0.16537189446965617,
372
+ "grad_norm": 4.332949161529541,
373
+ "learning_rate": 0.00012947979853681848,
374
+ "loss": 4.5991,
375
+ "step": 2150
376
+ },
377
+ {
378
+ "epoch": 0.1692177524805784,
379
+ "grad_norm": 3.8279120922088623,
380
+ "learning_rate": 0.00012922245505679936,
381
+ "loss": 4.5791,
382
+ "step": 2200
383
+ },
384
+ {
385
+ "epoch": 0.17306361049150065,
386
+ "grad_norm": 1.9522042274475098,
387
+ "learning_rate": 0.00012896511157678027,
388
+ "loss": 4.5643,
389
+ "step": 2250
390
+ },
391
+ {
392
+ "epoch": 0.17306361049150065,
393
+ "eval_loss": 4.609655857086182,
394
+ "eval_runtime": 18.946,
395
+ "eval_samples_per_second": 52.782,
396
+ "eval_steps_per_second": 13.195,
397
+ "step": 2250
398
+ },
399
+ {
400
+ "epoch": 0.1769094685024229,
401
+ "grad_norm": 4.264033794403076,
402
+ "learning_rate": 0.00012870776809676115,
403
+ "loss": 4.6666,
404
+ "step": 2300
405
+ },
406
+ {
407
+ "epoch": 0.18075532651334514,
408
+ "grad_norm": 4.572433948516846,
409
+ "learning_rate": 0.000128450424616742,
410
+ "loss": 4.6096,
411
+ "step": 2350
412
+ },
413
+ {
414
+ "epoch": 0.18460118452426735,
415
+ "grad_norm": 3.8559391498565674,
416
+ "learning_rate": 0.0001281930811367229,
417
+ "loss": 4.6425,
418
+ "step": 2400
419
+ },
420
+ {
421
+ "epoch": 0.1884470425351896,
422
+ "grad_norm": 2.9414010047912598,
423
+ "learning_rate": 0.0001279357376567038,
424
+ "loss": 4.6336,
425
+ "step": 2450
426
+ },
427
+ {
428
+ "epoch": 0.19229290054611184,
429
+ "grad_norm": 4.745160102844238,
430
+ "learning_rate": 0.00012767839417668467,
431
+ "loss": 4.6792,
432
+ "step": 2500
433
+ },
434
+ {
435
+ "epoch": 0.19229290054611184,
436
+ "eval_loss": 4.558788776397705,
437
+ "eval_runtime": 18.9882,
438
+ "eval_samples_per_second": 52.664,
439
+ "eval_steps_per_second": 13.166,
440
+ "step": 2500
441
  }
442
  ],
443
  "logging_steps": 50,