Training in progress, step 34500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 132187888
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:581c31679972bdcfdd93694ec7c812b17d40019a808688f579a943d85463c904
|
| 3 |
size 132187888
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1800
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5748fa82d57453d9a0fe9a462a4395564f717f50912fdd1523a9305b512fa60
|
| 3 |
size 1800
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 264665786
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf55f9271c729abd81f424026c2d810f5ae513f2dba23347c474641256e114eb
|
| 3 |
size 264665786
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9ce460b1138284b49eaf780f57970d8521e6ca2b85cc1fe914b4f8e171c90fa
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d90106738b2858e018cd3ec3d64f24c362a10f5e044b671422e6245917f6420f
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b632fa657d89cf9d024b864a979b41e9d4484feec7658b4550443a85b8f54ac3
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 31000,
|
| 3 |
"best_metric": 0.6043956279754639,
|
| 4 |
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5856,6 +5856,92 @@
|
|
| 5856 |
"eval_samples_per_second": 22.562,
|
| 5857 |
"eval_steps_per_second": 5.64,
|
| 5858 |
"step": 34000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5859 |
}
|
| 5860 |
],
|
| 5861 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 31000,
|
| 3 |
"best_metric": 0.6043956279754639,
|
| 4 |
"best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-31000",
|
| 5 |
+
"epoch": 2.6536420275363435,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 34500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5856 |
"eval_samples_per_second": 22.562,
|
| 5857 |
"eval_steps_per_second": 5.64,
|
| 5858 |
"step": 34000
|
| 5859 |
+
},
|
| 5860 |
+
{
|
| 5861 |
+
"epoch": 2.619029305438043,
|
| 5862 |
+
"grad_norm": 0.581876814365387,
|
| 5863 |
+
"learning_rate": 2.580578136768564e-05,
|
| 5864 |
+
"loss": 0.6315,
|
| 5865 |
+
"step": 34050
|
| 5866 |
+
},
|
| 5867 |
+
{
|
| 5868 |
+
"epoch": 2.6228751634489655,
|
| 5869 |
+
"grad_norm": 0.8881607055664062,
|
| 5870 |
+
"learning_rate": 2.5546061345869156e-05,
|
| 5871 |
+
"loss": 0.603,
|
| 5872 |
+
"step": 34100
|
| 5873 |
+
},
|
| 5874 |
+
{
|
| 5875 |
+
"epoch": 2.626721021459888,
|
| 5876 |
+
"grad_norm": 0.4935351610183716,
|
| 5877 |
+
"learning_rate": 2.5286341324052672e-05,
|
| 5878 |
+
"loss": 0.6311,
|
| 5879 |
+
"step": 34150
|
| 5880 |
+
},
|
| 5881 |
+
{
|
| 5882 |
+
"epoch": 2.63056687947081,
|
| 5883 |
+
"grad_norm": 0.8172516226768494,
|
| 5884 |
+
"learning_rate": 2.502662130223619e-05,
|
| 5885 |
+
"loss": 0.6029,
|
| 5886 |
+
"step": 34200
|
| 5887 |
+
},
|
| 5888 |
+
{
|
| 5889 |
+
"epoch": 2.6344127374817323,
|
| 5890 |
+
"grad_norm": 0.6940003037452698,
|
| 5891 |
+
"learning_rate": 2.4766901280419708e-05,
|
| 5892 |
+
"loss": 0.6334,
|
| 5893 |
+
"step": 34250
|
| 5894 |
+
},
|
| 5895 |
+
{
|
| 5896 |
+
"epoch": 2.6344127374817323,
|
| 5897 |
+
"eval_loss": 0.6158848404884338,
|
| 5898 |
+
"eval_runtime": 21.2951,
|
| 5899 |
+
"eval_samples_per_second": 23.48,
|
| 5900 |
+
"eval_steps_per_second": 5.87,
|
| 5901 |
+
"step": 34250
|
| 5902 |
+
},
|
| 5903 |
+
{
|
| 5904 |
+
"epoch": 2.6382585954926543,
|
| 5905 |
+
"grad_norm": 0.9116266965866089,
|
| 5906 |
+
"learning_rate": 2.4507181258603227e-05,
|
| 5907 |
+
"loss": 0.6716,
|
| 5908 |
+
"step": 34300
|
| 5909 |
+
},
|
| 5910 |
+
{
|
| 5911 |
+
"epoch": 2.6421044535035767,
|
| 5912 |
+
"grad_norm": 0.8472510576248169,
|
| 5913 |
+
"learning_rate": 2.4247461236786744e-05,
|
| 5914 |
+
"loss": 0.5982,
|
| 5915 |
+
"step": 34350
|
| 5916 |
+
},
|
| 5917 |
+
{
|
| 5918 |
+
"epoch": 2.6459503115144987,
|
| 5919 |
+
"grad_norm": 0.9200981855392456,
|
| 5920 |
+
"learning_rate": 2.3987741214970263e-05,
|
| 5921 |
+
"loss": 0.6111,
|
| 5922 |
+
"step": 34400
|
| 5923 |
+
},
|
| 5924 |
+
{
|
| 5925 |
+
"epoch": 2.649796169525421,
|
| 5926 |
+
"grad_norm": 0.7209369540214539,
|
| 5927 |
+
"learning_rate": 2.3728021193153783e-05,
|
| 5928 |
+
"loss": 0.6498,
|
| 5929 |
+
"step": 34450
|
| 5930 |
+
},
|
| 5931 |
+
{
|
| 5932 |
+
"epoch": 2.6536420275363435,
|
| 5933 |
+
"grad_norm": 1.0441888570785522,
|
| 5934 |
+
"learning_rate": 2.3468301171337302e-05,
|
| 5935 |
+
"loss": 0.5979,
|
| 5936 |
+
"step": 34500
|
| 5937 |
+
},
|
| 5938 |
+
{
|
| 5939 |
+
"epoch": 2.6536420275363435,
|
| 5940 |
+
"eval_loss": 0.6152419447898865,
|
| 5941 |
+
"eval_runtime": 22.3142,
|
| 5942 |
+
"eval_samples_per_second": 22.407,
|
| 5943 |
+
"eval_steps_per_second": 5.602,
|
| 5944 |
+
"step": 34500
|
| 5945 |
}
|
| 5946 |
],
|
| 5947 |
"logging_steps": 50,
|