Training in progress, step 29000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 132187888
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f63d64404a064684fdbda3be6790c82213e5012889870b2d9e4cf77a54d9d94
|
| 3 |
size 132187888
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1800
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4ef189210e24227c270ab8ae1c43df29bb9a4de77cf6f53f77a67953cd009cb
|
| 3 |
size 1800
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 264665786
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b8b0ae065786b82411fe6cf483993355053626c4697eb99e68382a645ddf49d
|
| 3 |
size 264665786
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:993642485acda165f546ca7e2c94b3614d2a294dacdb0f3665a7c4444f2d0fae
|
| 3 |
+
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c0f1b8b8c05dac4caf0e0e8f3e8fa0d1dd356db027075fed7b90fc2d0a97d25
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f34d6605c96e2830680aa8a7a9e3362d332648b178b366947137c49386617a03
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4910,6 +4910,92 @@
|
|
| 4910 |
"eval_samples_per_second": 22.493,
|
| 4911 |
"eval_steps_per_second": 5.623,
|
| 4912 |
"step": 28500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4913 |
}
|
| 4914 |
],
|
| 4915 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 29000,
|
| 3 |
+
"best_metric": 0.6262807250022888,
|
| 4 |
+
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-29000",
|
| 5 |
+
"epoch": 2.230597646334897,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 29000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4910 |
"eval_samples_per_second": 22.493,
|
| 4911 |
"eval_steps_per_second": 5.623,
|
| 4912 |
"step": 28500
|
| 4913 |
+
},
|
| 4914 |
+
{
|
| 4915 |
+
"epoch": 2.195984924236597,
|
| 4916 |
+
"grad_norm": 0.8482813835144043,
|
| 4917 |
+
"learning_rate": 5.4364594966625985e-05,
|
| 4918 |
+
"loss": 0.6012,
|
| 4919 |
+
"step": 28550
|
| 4920 |
+
},
|
| 4921 |
+
{
|
| 4922 |
+
"epoch": 2.1998307822475196,
|
| 4923 |
+
"grad_norm": 0.7037524580955505,
|
| 4924 |
+
"learning_rate": 5.4104874944809504e-05,
|
| 4925 |
+
"loss": 0.6288,
|
| 4926 |
+
"step": 28600
|
| 4927 |
+
},
|
| 4928 |
+
{
|
| 4929 |
+
"epoch": 2.2036766402584416,
|
| 4930 |
+
"grad_norm": 1.0364506244659424,
|
| 4931 |
+
"learning_rate": 5.384515492299301e-05,
|
| 4932 |
+
"loss": 0.6607,
|
| 4933 |
+
"step": 28650
|
| 4934 |
+
},
|
| 4935 |
+
{
|
| 4936 |
+
"epoch": 2.207522498269364,
|
| 4937 |
+
"grad_norm": 1.1424225568771362,
|
| 4938 |
+
"learning_rate": 5.358543490117654e-05,
|
| 4939 |
+
"loss": 0.625,
|
| 4940 |
+
"step": 28700
|
| 4941 |
+
},
|
| 4942 |
+
{
|
| 4943 |
+
"epoch": 2.211368356280286,
|
| 4944 |
+
"grad_norm": 0.5791661143302917,
|
| 4945 |
+
"learning_rate": 5.3325714879360056e-05,
|
| 4946 |
+
"loss": 0.645,
|
| 4947 |
+
"step": 28750
|
| 4948 |
+
},
|
| 4949 |
+
{
|
| 4950 |
+
"epoch": 2.211368356280286,
|
| 4951 |
+
"eval_loss": 0.6294763088226318,
|
| 4952 |
+
"eval_runtime": 21.4089,
|
| 4953 |
+
"eval_samples_per_second": 23.355,
|
| 4954 |
+
"eval_steps_per_second": 5.839,
|
| 4955 |
+
"step": 28750
|
| 4956 |
+
},
|
| 4957 |
+
{
|
| 4958 |
+
"epoch": 2.2152142142912084,
|
| 4959 |
+
"grad_norm": 0.843608021736145,
|
| 4960 |
+
"learning_rate": 5.306599485754357e-05,
|
| 4961 |
+
"loss": 0.6421,
|
| 4962 |
+
"step": 28800
|
| 4963 |
+
},
|
| 4964 |
+
{
|
| 4965 |
+
"epoch": 2.2190600723021308,
|
| 4966 |
+
"grad_norm": 0.5737313628196716,
|
| 4967 |
+
"learning_rate": 5.280627483572709e-05,
|
| 4968 |
+
"loss": 0.6488,
|
| 4969 |
+
"step": 28850
|
| 4970 |
+
},
|
| 4971 |
+
{
|
| 4972 |
+
"epoch": 2.2229059303130527,
|
| 4973 |
+
"grad_norm": 1.0083036422729492,
|
| 4974 |
+
"learning_rate": 5.254655481391061e-05,
|
| 4975 |
+
"loss": 0.6355,
|
| 4976 |
+
"step": 28900
|
| 4977 |
+
},
|
| 4978 |
+
{
|
| 4979 |
+
"epoch": 2.226751788323975,
|
| 4980 |
+
"grad_norm": 0.8519378900527954,
|
| 4981 |
+
"learning_rate": 5.228683479209413e-05,
|
| 4982 |
+
"loss": 0.6291,
|
| 4983 |
+
"step": 28950
|
| 4984 |
+
},
|
| 4985 |
+
{
|
| 4986 |
+
"epoch": 2.230597646334897,
|
| 4987 |
+
"grad_norm": 0.8886232972145081,
|
| 4988 |
+
"learning_rate": 5.202711477027764e-05,
|
| 4989 |
+
"loss": 0.6481,
|
| 4990 |
+
"step": 29000
|
| 4991 |
+
},
|
| 4992 |
+
{
|
| 4993 |
+
"epoch": 2.230597646334897,
|
| 4994 |
+
"eval_loss": 0.6262807250022888,
|
| 4995 |
+
"eval_runtime": 22.2422,
|
| 4996 |
+
"eval_samples_per_second": 22.48,
|
| 4997 |
+
"eval_steps_per_second": 5.62,
|
| 4998 |
+
"step": 29000
|
| 4999 |
}
|
| 5000 |
],
|
| 5001 |
"logging_steps": 50,
|