Training in progress, step 29500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79c28d4b4840c8a771d27b1f27c6ac5c00a9f2991d7394f3a6733892a09080ab
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c30265894652137003ce0d462067739a1e1f87e1fef05727cce7c9bc27d9969d
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35ee565d832de94c600e576ee117f0978437f0823600766c050ee216a94c8988
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:734e3f7dc3063a42733f6397bd808730560212803e7502d677f32c1adc811a33
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4379a058e70e2b699ffe8db7ee723ac96ffd7adc9d62248596ac9aebf5e54f3
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f444f08db434c1664e05b94a04b805dac6e17ed393cf9fcf28a82949e586c98
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 29000,
|
| 3 |
"best_metric": 0.9999537467956543,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-29000",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4996,6 +4996,92 @@
|
|
| 4996 |
"eval_samples_per_second": 57.999,
|
| 4997 |
"eval_steps_per_second": 14.5,
|
| 4998 |
"step": 29000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4999 |
}
|
| 5000 |
],
|
| 5001 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 29000,
|
| 3 |
"best_metric": 0.9999537467956543,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-29000",
|
| 5 |
+
"epoch": 2.26905622644412,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 29500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4996 |
"eval_samples_per_second": 57.999,
|
| 4997 |
"eval_steps_per_second": 14.5,
|
| 4998 |
"step": 29000
|
| 4999 |
+
},
|
| 5000 |
+
{
|
| 5001 |
+
"epoch": 2.2344435043458195,
|
| 5002 |
+
"grad_norm": 0.5559306740760803,
|
| 5003 |
+
"learning_rate": 5.1772589148897485e-05,
|
| 5004 |
+
"loss": 0.9749,
|
| 5005 |
+
"step": 29050
|
| 5006 |
+
},
|
| 5007 |
+
{
|
| 5008 |
+
"epoch": 2.238289362356742,
|
| 5009 |
+
"grad_norm": 0.6720598936080933,
|
| 5010 |
+
"learning_rate": 5.151286912708101e-05,
|
| 5011 |
+
"loss": 0.9662,
|
| 5012 |
+
"step": 29100
|
| 5013 |
+
},
|
| 5014 |
+
{
|
| 5015 |
+
"epoch": 2.242135220367664,
|
| 5016 |
+
"grad_norm": 1.117200493812561,
|
| 5017 |
+
"learning_rate": 5.125314910526453e-05,
|
| 5018 |
+
"loss": 0.9552,
|
| 5019 |
+
"step": 29150
|
| 5020 |
+
},
|
| 5021 |
+
{
|
| 5022 |
+
"epoch": 2.2459810783785863,
|
| 5023 |
+
"grad_norm": 0.6818645000457764,
|
| 5024 |
+
"learning_rate": 5.0993429083448044e-05,
|
| 5025 |
+
"loss": 0.9032,
|
| 5026 |
+
"step": 29200
|
| 5027 |
+
},
|
| 5028 |
+
{
|
| 5029 |
+
"epoch": 2.2498269363895087,
|
| 5030 |
+
"grad_norm": 0.9796412587165833,
|
| 5031 |
+
"learning_rate": 5.0733709061631564e-05,
|
| 5032 |
+
"loss": 0.9429,
|
| 5033 |
+
"step": 29250
|
| 5034 |
+
},
|
| 5035 |
+
{
|
| 5036 |
+
"epoch": 2.2498269363895087,
|
| 5037 |
+
"eval_loss": 1.0121312141418457,
|
| 5038 |
+
"eval_runtime": 17.3211,
|
| 5039 |
+
"eval_samples_per_second": 57.733,
|
| 5040 |
+
"eval_steps_per_second": 14.433,
|
| 5041 |
+
"step": 29250
|
| 5042 |
+
},
|
| 5043 |
+
{
|
| 5044 |
+
"epoch": 2.2536727944004307,
|
| 5045 |
+
"grad_norm": 1.021713137626648,
|
| 5046 |
+
"learning_rate": 5.047398903981508e-05,
|
| 5047 |
+
"loss": 0.979,
|
| 5048 |
+
"step": 29300
|
| 5049 |
+
},
|
| 5050 |
+
{
|
| 5051 |
+
"epoch": 2.257518652411353,
|
| 5052 |
+
"grad_norm": 1.1321250200271606,
|
| 5053 |
+
"learning_rate": 5.02142690179986e-05,
|
| 5054 |
+
"loss": 1.0327,
|
| 5055 |
+
"step": 29350
|
| 5056 |
+
},
|
| 5057 |
+
{
|
| 5058 |
+
"epoch": 2.261364510422275,
|
| 5059 |
+
"grad_norm": 0.7670277953147888,
|
| 5060 |
+
"learning_rate": 4.9954548996182116e-05,
|
| 5061 |
+
"loss": 0.9668,
|
| 5062 |
+
"step": 29400
|
| 5063 |
+
},
|
| 5064 |
+
{
|
| 5065 |
+
"epoch": 2.2652103684331975,
|
| 5066 |
+
"grad_norm": 1.447698712348938,
|
| 5067 |
+
"learning_rate": 4.9694828974365635e-05,
|
| 5068 |
+
"loss": 0.9288,
|
| 5069 |
+
"step": 29450
|
| 5070 |
+
},
|
| 5071 |
+
{
|
| 5072 |
+
"epoch": 2.26905622644412,
|
| 5073 |
+
"grad_norm": 1.0438776016235352,
|
| 5074 |
+
"learning_rate": 4.9435108952549155e-05,
|
| 5075 |
+
"loss": 1.0154,
|
| 5076 |
+
"step": 29500
|
| 5077 |
+
},
|
| 5078 |
+
{
|
| 5079 |
+
"epoch": 2.26905622644412,
|
| 5080 |
+
"eval_loss": 1.0100510120391846,
|
| 5081 |
+
"eval_runtime": 17.4293,
|
| 5082 |
+
"eval_samples_per_second": 57.375,
|
| 5083 |
+
"eval_steps_per_second": 14.344,
|
| 5084 |
+
"step": 29500
|
| 5085 |
}
|
| 5086 |
],
|
| 5087 |
"logging_steps": 50,
|