Training in progress, step 35000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 132187888
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:187b0895f6db7770ad3826278e4168eb73d6e6ed9c71ec20c7a7e8579ed764be
|
| 3 |
size 132187888
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1800
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11add07e0fdec640b0675ab2d16165508e317746c1a605059e3fcf527ae10315
|
| 3 |
size 1800
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 264665786
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77932285a3df82a40bad75f9043807d3f0399a9e710f3ce34c7c5543f06a1d00
|
| 3 |
size 264665786
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7dd827a2711c80977afcf1e29041cd37bfb2c6103a14b6effcc63506ecf6d4c7
|
| 3 |
+
size 14180
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88c8e4d90bec7e1ab80725ca72e32f87387270f70019247cb1ad002169d3fdc6
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc40176b44fff927c20f1bac9b67d6ff31ea020f24aee448725b6d82a306f911
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 31000,
|
| 3 |
"best_metric": 0.6043956279754639,
|
| 4 |
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5942,6 +5942,92 @@
|
|
| 5942 |
"eval_samples_per_second": 22.407,
|
| 5943 |
"eval_steps_per_second": 5.602,
|
| 5944 |
"step": 34500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5945 |
}
|
| 5946 |
],
|
| 5947 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 31000,
|
| 3 |
"best_metric": 0.6043956279754639,
|
| 4 |
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
|
| 5 |
+
"epoch": 2.692100607645566,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 35000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5942 |
"eval_samples_per_second": 22.407,
|
| 5943 |
"eval_steps_per_second": 5.602,
|
| 5944 |
"step": 34500
|
| 5945 |
+
},
|
| 5946 |
+
{
|
| 5947 |
+
"epoch": 2.6574878855472654,
|
| 5948 |
+
"grad_norm": 1.246099591255188,
|
| 5949 |
+
"learning_rate": 2.3208581149520815e-05,
|
| 5950 |
+
"loss": 0.6193,
|
| 5951 |
+
"step": 34550
|
| 5952 |
+
},
|
| 5953 |
+
{
|
| 5954 |
+
"epoch": 2.661333743558188,
|
| 5955 |
+
"grad_norm": 0.6598573923110962,
|
| 5956 |
+
"learning_rate": 2.2948861127704335e-05,
|
| 5957 |
+
"loss": 0.5973,
|
| 5958 |
+
"step": 34600
|
| 5959 |
+
},
|
| 5960 |
+
{
|
| 5961 |
+
"epoch": 2.6651796015691103,
|
| 5962 |
+
"grad_norm": 0.588585615158081,
|
| 5963 |
+
"learning_rate": 2.2689141105887855e-05,
|
| 5964 |
+
"loss": 0.6195,
|
| 5965 |
+
"step": 34650
|
| 5966 |
+
},
|
| 5967 |
+
{
|
| 5968 |
+
"epoch": 2.6690254595800322,
|
| 5969 |
+
"grad_norm": 0.8450884819030762,
|
| 5970 |
+
"learning_rate": 2.2429421084071374e-05,
|
| 5971 |
+
"loss": 0.6124,
|
| 5972 |
+
"step": 34700
|
| 5973 |
+
},
|
| 5974 |
+
{
|
| 5975 |
+
"epoch": 2.6728713175909546,
|
| 5976 |
+
"grad_norm": 0.9003159999847412,
|
| 5977 |
+
"learning_rate": 2.216970106225489e-05,
|
| 5978 |
+
"loss": 0.5948,
|
| 5979 |
+
"step": 34750
|
| 5980 |
+
},
|
| 5981 |
+
{
|
| 5982 |
+
"epoch": 2.6728713175909546,
|
| 5983 |
+
"eval_loss": 0.6135697960853577,
|
| 5984 |
+
"eval_runtime": 21.4775,
|
| 5985 |
+
"eval_samples_per_second": 23.28,
|
| 5986 |
+
"eval_steps_per_second": 5.82,
|
| 5987 |
+
"step": 34750
|
| 5988 |
+
},
|
| 5989 |
+
{
|
| 5990 |
+
"epoch": 2.676717175601877,
|
| 5991 |
+
"grad_norm": 1.0328209400177002,
|
| 5992 |
+
"learning_rate": 2.190998104043841e-05,
|
| 5993 |
+
"loss": 0.6453,
|
| 5994 |
+
"step": 34800
|
| 5995 |
+
},
|
| 5996 |
+
{
|
| 5997 |
+
"epoch": 2.680563033612799,
|
| 5998 |
+
"grad_norm": 0.8492136001586914,
|
| 5999 |
+
"learning_rate": 2.1650261018621926e-05,
|
| 6000 |
+
"loss": 0.5774,
|
| 6001 |
+
"step": 34850
|
| 6002 |
+
},
|
| 6003 |
+
{
|
| 6004 |
+
"epoch": 2.684408891623721,
|
| 6005 |
+
"grad_norm": 1.1003891229629517,
|
| 6006 |
+
"learning_rate": 2.1390540996805446e-05,
|
| 6007 |
+
"loss": 0.6325,
|
| 6008 |
+
"step": 34900
|
| 6009 |
+
},
|
| 6010 |
+
{
|
| 6011 |
+
"epoch": 2.6882547496346434,
|
| 6012 |
+
"grad_norm": 1.361631989479065,
|
| 6013 |
+
"learning_rate": 2.1130820974988962e-05,
|
| 6014 |
+
"loss": 0.6089,
|
| 6015 |
+
"step": 34950
|
| 6016 |
+
},
|
| 6017 |
+
{
|
| 6018 |
+
"epoch": 2.692100607645566,
|
| 6019 |
+
"grad_norm": 1.0950570106506348,
|
| 6020 |
+
"learning_rate": 2.087110095317248e-05,
|
| 6021 |
+
"loss": 0.5846,
|
| 6022 |
+
"step": 35000
|
| 6023 |
+
},
|
| 6024 |
+
{
|
| 6025 |
+
"epoch": 2.692100607645566,
|
| 6026 |
+
"eval_loss": 0.6102504134178162,
|
| 6027 |
+
"eval_runtime": 22.2494,
|
| 6028 |
+
"eval_samples_per_second": 22.473,
|
| 6029 |
+
"eval_steps_per_second": 5.618,
|
| 6030 |
+
"step": 35000
|
| 6031 |
}
|
| 6032 |
],
|
| 6033 |
"logging_steps": 50,
|