Training in progress, step 31000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aaeede45da47f0d75d62e988c6ec4e75278f17721a6f05fc8fadaeb434482f59
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63e60a966b76339936843c940f84b9ea07a91ec1e4d63f98283ff1b088f9fde6
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:187283a9cec55184e3ccd0a3e0d5f383455ef16110c948463ba075754a4a4a69
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1377909bbeca185f1cd667205e5ee426651b530652d98a241a6435759b20841
|
| 3 |
+
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0805ae3d6f83adad04a95ac3342264a30fddb6eac2cd341698788c5b29bb3024
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52fbed6fec9b9318154947715651eba157eaae1cb8891751ee3a2257f6530107
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 30000,
|
| 3 |
"best_metric": 0.9945911169052124,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-30000",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5254,6 +5254,92 @@
|
|
| 5254 |
"eval_samples_per_second": 57.682,
|
| 5255 |
"eval_steps_per_second": 14.421,
|
| 5256 |
"step": 30500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5257 |
}
|
| 5258 |
],
|
| 5259 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 30000,
|
| 3 |
"best_metric": 0.9945911169052124,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-30000",
|
| 5 |
+
"epoch": 2.384431966771787,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 31000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5254 |
"eval_samples_per_second": 57.682,
|
| 5255 |
"eval_steps_per_second": 14.421,
|
| 5256 |
"step": 30500
|
| 5257 |
+
},
|
| 5258 |
+
{
|
| 5259 |
+
"epoch": 2.3498192446734865,
|
| 5260 |
+
"grad_norm": 0.9951680302619934,
|
| 5261 |
+
"learning_rate": 4.398098849440303e-05,
|
| 5262 |
+
"loss": 1.0062,
|
| 5263 |
+
"step": 30550
|
| 5264 |
+
},
|
| 5265 |
+
{
|
| 5266 |
+
"epoch": 2.353665102684409,
|
| 5267 |
+
"grad_norm": 0.5752933025360107,
|
| 5268 |
+
"learning_rate": 4.372126847258655e-05,
|
| 5269 |
+
"loss": 0.9649,
|
| 5270 |
+
"step": 30600
|
| 5271 |
+
},
|
| 5272 |
+
{
|
| 5273 |
+
"epoch": 2.3575109606953313,
|
| 5274 |
+
"grad_norm": 1.0051320791244507,
|
| 5275 |
+
"learning_rate": 4.346154845077007e-05,
|
| 5276 |
+
"loss": 0.9958,
|
| 5277 |
+
"step": 30650
|
| 5278 |
+
},
|
| 5279 |
+
{
|
| 5280 |
+
"epoch": 2.3613568187062532,
|
| 5281 |
+
"grad_norm": 0.7760717868804932,
|
| 5282 |
+
"learning_rate": 4.320182842895359e-05,
|
| 5283 |
+
"loss": 1.0125,
|
| 5284 |
+
"step": 30700
|
| 5285 |
+
},
|
| 5286 |
+
{
|
| 5287 |
+
"epoch": 2.3652026767171757,
|
| 5288 |
+
"grad_norm": 0.852301836013794,
|
| 5289 |
+
"learning_rate": 4.294210840713711e-05,
|
| 5290 |
+
"loss": 0.9523,
|
| 5291 |
+
"step": 30750
|
| 5292 |
+
},
|
| 5293 |
+
{
|
| 5294 |
+
"epoch": 2.3652026767171757,
|
| 5295 |
+
"eval_loss": 1.003655195236206,
|
| 5296 |
+
"eval_runtime": 17.3241,
|
| 5297 |
+
"eval_samples_per_second": 57.723,
|
| 5298 |
+
"eval_steps_per_second": 14.431,
|
| 5299 |
+
"step": 30750
|
| 5300 |
+
},
|
| 5301 |
+
{
|
| 5302 |
+
"epoch": 2.3690485347280976,
|
| 5303 |
+
"grad_norm": 0.9062100648880005,
|
| 5304 |
+
"learning_rate": 4.2682388385320624e-05,
|
| 5305 |
+
"loss": 0.9735,
|
| 5306 |
+
"step": 30800
|
| 5307 |
+
},
|
| 5308 |
+
{
|
| 5309 |
+
"epoch": 2.37289439273902,
|
| 5310 |
+
"grad_norm": 1.309615969657898,
|
| 5311 |
+
"learning_rate": 4.2427862763940476e-05,
|
| 5312 |
+
"loss": 0.9668,
|
| 5313 |
+
"step": 30850
|
| 5314 |
+
},
|
| 5315 |
+
{
|
| 5316 |
+
"epoch": 2.3767402507499424,
|
| 5317 |
+
"grad_norm": 1.0907591581344604,
|
| 5318 |
+
"learning_rate": 4.2168142742123995e-05,
|
| 5319 |
+
"loss": 0.9502,
|
| 5320 |
+
"step": 30900
|
| 5321 |
+
},
|
| 5322 |
+
{
|
| 5323 |
+
"epoch": 2.3805861087608644,
|
| 5324 |
+
"grad_norm": 1.0946288108825684,
|
| 5325 |
+
"learning_rate": 4.190842272030751e-05,
|
| 5326 |
+
"loss": 0.9545,
|
| 5327 |
+
"step": 30950
|
| 5328 |
+
},
|
| 5329 |
+
{
|
| 5330 |
+
"epoch": 2.384431966771787,
|
| 5331 |
+
"grad_norm": 1.225540280342102,
|
| 5332 |
+
"learning_rate": 4.164870269849103e-05,
|
| 5333 |
+
"loss": 0.9635,
|
| 5334 |
+
"step": 31000
|
| 5335 |
+
},
|
| 5336 |
+
{
|
| 5337 |
+
"epoch": 2.384431966771787,
|
| 5338 |
+
"eval_loss": 1.0031681060791016,
|
| 5339 |
+
"eval_runtime": 17.2062,
|
| 5340 |
+
"eval_samples_per_second": 58.119,
|
| 5341 |
+
"eval_steps_per_second": 14.53,
|
| 5342 |
+
"step": 31000
|
| 5343 |
}
|
| 5344 |
],
|
| 5345 |
"logging_steps": 50,
|