Training in progress, step 29500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 132187888
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3d2b56b1909f10ec091b339e19d3deed68b4ef6036485c876749907ab70feae
|
| 3 |
size 132187888
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1800
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7f39b7fecf41dc806808cbc1fcb7ebfc548308cac03fe5ff57a6ed111230c19
|
| 3 |
size 1800
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 264665786
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d18bda7cc478b78a2baff9b2ff268d792c1bfcb109692f6f43cd01c1334af6e4
|
| 3 |
size 264665786
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b564041a05d5dec52405f82824f25abbc3402c3fee815ee33c0e6e880970bde
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d10fe23746f9663211426e22e1f688a86e95134ebca1ba9cc0e90e060038ab25
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:424a6fa8dcc89ac1a4c9d7aceae072f365d903e39787d3dc1c38f7e0a9e82f96
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4996,6 +4996,92 @@
|
|
| 4996 |
"eval_samples_per_second": 22.48,
|
| 4997 |
"eval_steps_per_second": 5.62,
|
| 4998 |
"step": 29000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4999 |
}
|
| 5000 |
],
|
| 5001 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 29500,
|
| 3 |
+
"best_metric": 0.6208207607269287,
|
| 4 |
+
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-29500",
|
| 5 |
+
"epoch": 2.26905622644412,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 29500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4996 |
"eval_samples_per_second": 22.48,
|
| 4997 |
"eval_steps_per_second": 5.62,
|
| 4998 |
"step": 29000
|
| 4999 |
+
},
|
| 5000 |
+
{
|
| 5001 |
+
"epoch": 2.2344435043458195,
|
| 5002 |
+
"grad_norm": 0.5085247755050659,
|
| 5003 |
+
"learning_rate": 5.176739474846116e-05,
|
| 5004 |
+
"loss": 0.6245,
|
| 5005 |
+
"step": 29050
|
| 5006 |
+
},
|
| 5007 |
+
{
|
| 5008 |
+
"epoch": 2.238289362356742,
|
| 5009 |
+
"grad_norm": 0.43597960472106934,
|
| 5010 |
+
"learning_rate": 5.150767472664469e-05,
|
| 5011 |
+
"loss": 0.6155,
|
| 5012 |
+
"step": 29100
|
| 5013 |
+
},
|
| 5014 |
+
{
|
| 5015 |
+
"epoch": 2.242135220367664,
|
| 5016 |
+
"grad_norm": 0.8605113625526428,
|
| 5017 |
+
"learning_rate": 5.124795470482819e-05,
|
| 5018 |
+
"loss": 0.6298,
|
| 5019 |
+
"step": 29150
|
| 5020 |
+
},
|
| 5021 |
+
{
|
| 5022 |
+
"epoch": 2.2459810783785863,
|
| 5023 |
+
"grad_norm": 0.5974554419517517,
|
| 5024 |
+
"learning_rate": 5.098823468301171e-05,
|
| 5025 |
+
"loss": 0.5789,
|
| 5026 |
+
"step": 29200
|
| 5027 |
+
},
|
| 5028 |
+
{
|
| 5029 |
+
"epoch": 2.2498269363895087,
|
| 5030 |
+
"grad_norm": 0.9765536785125732,
|
| 5031 |
+
"learning_rate": 5.072851466119524e-05,
|
| 5032 |
+
"loss": 0.6074,
|
| 5033 |
+
"step": 29250
|
| 5034 |
+
},
|
| 5035 |
+
{
|
| 5036 |
+
"epoch": 2.2498269363895087,
|
| 5037 |
+
"eval_loss": 0.6241350769996643,
|
| 5038 |
+
"eval_runtime": 21.3208,
|
| 5039 |
+
"eval_samples_per_second": 23.451,
|
| 5040 |
+
"eval_steps_per_second": 5.863,
|
| 5041 |
+
"step": 29250
|
| 5042 |
+
},
|
| 5043 |
+
{
|
| 5044 |
+
"epoch": 2.2536727944004307,
|
| 5045 |
+
"grad_norm": 0.8351141214370728,
|
| 5046 |
+
"learning_rate": 5.0468794639378745e-05,
|
| 5047 |
+
"loss": 0.6125,
|
| 5048 |
+
"step": 29300
|
| 5049 |
+
},
|
| 5050 |
+
{
|
| 5051 |
+
"epoch": 2.257518652411353,
|
| 5052 |
+
"grad_norm": 0.8782539367675781,
|
| 5053 |
+
"learning_rate": 5.020907461756227e-05,
|
| 5054 |
+
"loss": 0.6395,
|
| 5055 |
+
"step": 29350
|
| 5056 |
+
},
|
| 5057 |
+
{
|
| 5058 |
+
"epoch": 2.261364510422275,
|
| 5059 |
+
"grad_norm": 0.5191802978515625,
|
| 5060 |
+
"learning_rate": 4.994935459574579e-05,
|
| 5061 |
+
"loss": 0.6223,
|
| 5062 |
+
"step": 29400
|
| 5063 |
+
},
|
| 5064 |
+
{
|
| 5065 |
+
"epoch": 2.2652103684331975,
|
| 5066 |
+
"grad_norm": 1.0012739896774292,
|
| 5067 |
+
"learning_rate": 4.9689634573929304e-05,
|
| 5068 |
+
"loss": 0.5918,
|
| 5069 |
+
"step": 29450
|
| 5070 |
+
},
|
| 5071 |
+
{
|
| 5072 |
+
"epoch": 2.26905622644412,
|
| 5073 |
+
"grad_norm": 0.9906120300292969,
|
| 5074 |
+
"learning_rate": 4.942991455211283e-05,
|
| 5075 |
+
"loss": 0.6604,
|
| 5076 |
+
"step": 29500
|
| 5077 |
+
},
|
| 5078 |
+
{
|
| 5079 |
+
"epoch": 2.26905622644412,
|
| 5080 |
+
"eval_loss": 0.6208207607269287,
|
| 5081 |
+
"eval_runtime": 22.3662,
|
| 5082 |
+
"eval_samples_per_second": 22.355,
|
| 5083 |
+
"eval_steps_per_second": 5.589,
|
| 5084 |
+
"step": 29500
|
| 5085 |
}
|
| 5086 |
],
|
| 5087 |
"logging_steps": 50,
|