rootxhacker commited on
Commit
f116a14
·
verified ·
1 Parent(s): 21eaf8c

Training in progress, step 23500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43ec5a5e069d7a9559066b015901f6cd9d1e5e11a494d63a1a5a07d7cc56ce83
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50a6df0bbfc398611c32e7b9c5fbb763744e8ae009392a4507e80b8d8789121b
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5655b365a65633fda63eb12cb6b41fa6fff3c01576ce3df81bb8d871bd860169
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0956d4f6e738505bc85b225ecdc67ba6447932a06854f4fbb04754e75f0ec5f7
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f21304ce44e93f8f86da1b431eb1e188b0a7d5ce22c8a8c84f5d679245daaffa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb49390cd54c8aca2233c64b9e33d7396d51837a032de9f6d8f9419982ba2ef
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21cd7c7ff5865cbb23888889c7201c6bfb6882d80652a0ade03d69cb9ad78253
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:237814c22cb1dd740221cf140f477bd2c79d67cf7a39877240384dff408cb23a
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fdd8c08bac54a46207946491aba470a601d04858deabd8ec1d66f5286fc9a46
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:165f78a7ed0419fbaa8390d2bc8415e98ea8d31ac33ee06224d036bb3edf4415
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 22250,
3
  "best_metric": 1.457463264465332,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-22000",
5
- "epoch": 1.7690946850242288,
6
  "eval_steps": 250,
7
- "global_step": 23000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3964,6 +3964,92 @@
3964
  "eval_samples_per_second": 55.817,
3965
  "eval_steps_per_second": 13.954,
3966
  "step": 23000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3967
  }
3968
  ],
3969
  "logging_steps": 50,
 
2
  "best_global_step": 22250,
3
  "best_metric": 1.457463264465332,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-22000",
5
+ "epoch": 1.8075532651334512,
6
  "eval_steps": 250,
7
+ "global_step": 23500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3964
  "eval_samples_per_second": 55.817,
3965
  "eval_steps_per_second": 13.954,
3966
  "step": 23000
3967
+ },
3968
+ {
3969
+ "epoch": 1.7729405430351513,
3970
+ "grad_norm": 0.7428656220436096,
3971
+ "learning_rate": 2.194110510642991e-05,
3972
+ "loss": 1.4475,
3973
+ "step": 23050
3974
+ },
3975
+ {
3976
+ "epoch": 1.7767864010460732,
3977
+ "grad_norm": 1.4552706480026245,
3978
+ "learning_rate": 2.1683761626410793e-05,
3979
+ "loss": 1.517,
3980
+ "step": 23100
3981
+ },
3982
+ {
3983
+ "epoch": 1.7806322590569956,
3984
+ "grad_norm": 1.1563323736190796,
3985
+ "learning_rate": 2.1426418146391676e-05,
3986
+ "loss": 1.4806,
3987
+ "step": 23150
3988
+ },
3989
+ {
3990
+ "epoch": 1.7844781170679178,
3991
+ "grad_norm": 1.7244662046432495,
3992
+ "learning_rate": 2.116907466637256e-05,
3993
+ "loss": 1.4492,
3994
+ "step": 23200
3995
+ },
3996
+ {
3997
+ "epoch": 1.78832397507884,
3998
+ "grad_norm": 1.642321228981018,
3999
+ "learning_rate": 2.0911731186353442e-05,
4000
+ "loss": 1.4196,
4001
+ "step": 23250
4002
+ },
4003
+ {
4004
+ "epoch": 1.78832397507884,
4005
+ "eval_loss": 1.4725000858306885,
4006
+ "eval_runtime": 18.1814,
4007
+ "eval_samples_per_second": 55.001,
4008
+ "eval_steps_per_second": 13.75,
4009
+ "step": 23250
4010
+ },
4011
+ {
4012
+ "epoch": 1.7921698330897624,
4013
+ "grad_norm": 1.1381646394729614,
4014
+ "learning_rate": 2.0654387706334325e-05,
4015
+ "loss": 1.4653,
4016
+ "step": 23300
4017
+ },
4018
+ {
4019
+ "epoch": 1.7960156911006846,
4020
+ "grad_norm": 1.2550010681152344,
4021
+ "learning_rate": 2.0397044226315208e-05,
4022
+ "loss": 1.4836,
4023
+ "step": 23350
4024
+ },
4025
+ {
4026
+ "epoch": 1.7998615491116068,
4027
+ "grad_norm": 1.4335628747940063,
4028
+ "learning_rate": 2.013970074629609e-05,
4029
+ "loss": 1.4403,
4030
+ "step": 23400
4031
+ },
4032
+ {
4033
+ "epoch": 1.8037074071225292,
4034
+ "grad_norm": 1.8901276588439941,
4035
+ "learning_rate": 1.9882357266276974e-05,
4036
+ "loss": 1.4562,
4037
+ "step": 23450
4038
+ },
4039
+ {
4040
+ "epoch": 1.8075532651334512,
4041
+ "grad_norm": 1.2078189849853516,
4042
+ "learning_rate": 1.9625013786257857e-05,
4043
+ "loss": 1.4221,
4044
+ "step": 23500
4045
+ },
4046
+ {
4047
+ "epoch": 1.8075532651334512,
4048
+ "eval_loss": 1.4660383462905884,
4049
+ "eval_runtime": 18.0656,
4050
+ "eval_samples_per_second": 55.354,
4051
+ "eval_steps_per_second": 13.838,
4052
+ "step": 23500
4053
  }
4054
  ],
4055
  "logging_steps": 50,