Training in progress, step 35500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4d51eeeda51bc19f5c6d7222c3569a8b30c8252006cab154f166725a5d6a0a8
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc1b1084a32b0cff2b333748a7e838a8590ff81e228b7c3e840ae1cbbc113c15
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86e904a91e2cb70946038bcc4ef0f399dffd5e68a67a3db4e06d21064557dbb3
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c53689ea6f99d2293000a099d231d87ec2b98e3ae16c084822ed187b69c19695
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0b69fcd3d6df327b4f7a7f693a8457e7a615ee5a38d8957008f6b6d0162dd3f
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f72dabb66926d5f103787feb6d06e607565f49ca5105e75822f116bfd973c31b
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 34000,
|
| 3 |
"best_metric": 0.987713634967804,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-34000",
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6028,6 +6028,92 @@
|
|
| 6028 |
"eval_samples_per_second": 57.95,
|
| 6029 |
"eval_steps_per_second": 14.488,
|
| 6030 |
"step": 35000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6031 |
}
|
| 6032 |
],
|
| 6033 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": 34000,
|
| 3 |
"best_metric": 0.987713634967804,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-34000",
|
| 5 |
+
"epoch": 2.730559187754788,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 35500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6028 |
"eval_samples_per_second": 57.95,
|
| 6029 |
"eval_steps_per_second": 14.488,
|
| 6030 |
"step": 35000
|
| 6031 |
+
},
|
| 6032 |
+
{
|
| 6033 |
+
"epoch": 2.6959464656564878,
|
| 6034 |
+
"grad_norm": 1.3044216632843018,
|
| 6035 |
+
"learning_rate": 2.0611380931356e-05,
|
| 6036 |
+
"loss": 0.9787,
|
| 6037 |
+
"step": 35050
|
| 6038 |
+
},
|
| 6039 |
+
{
|
| 6040 |
+
"epoch": 2.69979232366741,
|
| 6041 |
+
"grad_norm": 1.4273097515106201,
|
| 6042 |
+
"learning_rate": 2.0351660909539518e-05,
|
| 6043 |
+
"loss": 0.9775,
|
| 6044 |
+
"step": 35100
|
| 6045 |
+
},
|
| 6046 |
+
{
|
| 6047 |
+
"epoch": 2.7036381816783326,
|
| 6048 |
+
"grad_norm": 1.6097638607025146,
|
| 6049 |
+
"learning_rate": 2.0091940887723034e-05,
|
| 6050 |
+
"loss": 0.9926,
|
| 6051 |
+
"step": 35150
|
| 6052 |
+
},
|
| 6053 |
+
{
|
| 6054 |
+
"epoch": 2.7074840396892546,
|
| 6055 |
+
"grad_norm": 1.4893895387649536,
|
| 6056 |
+
"learning_rate": 1.9832220865906553e-05,
|
| 6057 |
+
"loss": 1.0435,
|
| 6058 |
+
"step": 35200
|
| 6059 |
+
},
|
| 6060 |
+
{
|
| 6061 |
+
"epoch": 2.711329897700177,
|
| 6062 |
+
"grad_norm": 1.1620233058929443,
|
| 6063 |
+
"learning_rate": 1.9572500844090073e-05,
|
| 6064 |
+
"loss": 0.9525,
|
| 6065 |
+
"step": 35250
|
| 6066 |
+
},
|
| 6067 |
+
{
|
| 6068 |
+
"epoch": 2.711329897700177,
|
| 6069 |
+
"eval_loss": 0.9926208257675171,
|
| 6070 |
+
"eval_runtime": 17.3977,
|
| 6071 |
+
"eval_samples_per_second": 57.479,
|
| 6072 |
+
"eval_steps_per_second": 14.37,
|
| 6073 |
+
"step": 35250
|
| 6074 |
+
},
|
| 6075 |
+
{
|
| 6076 |
+
"epoch": 2.7151757557110994,
|
| 6077 |
+
"grad_norm": 1.2087517976760864,
|
| 6078 |
+
"learning_rate": 1.931278082227359e-05,
|
| 6079 |
+
"loss": 1.0005,
|
| 6080 |
+
"step": 35300
|
| 6081 |
+
},
|
| 6082 |
+
{
|
| 6083 |
+
"epoch": 2.7190216137220213,
|
| 6084 |
+
"grad_norm": 0.8885460495948792,
|
| 6085 |
+
"learning_rate": 1.9058255200893437e-05,
|
| 6086 |
+
"loss": 0.9911,
|
| 6087 |
+
"step": 35350
|
| 6088 |
+
},
|
| 6089 |
+
{
|
| 6090 |
+
"epoch": 2.7228674717329437,
|
| 6091 |
+
"grad_norm": 0.9543077945709229,
|
| 6092 |
+
"learning_rate": 1.8798535179076957e-05,
|
| 6093 |
+
"loss": 1.0437,
|
| 6094 |
+
"step": 35400
|
| 6095 |
+
},
|
| 6096 |
+
{
|
| 6097 |
+
"epoch": 2.7267133297438657,
|
| 6098 |
+
"grad_norm": 1.2362306118011475,
|
| 6099 |
+
"learning_rate": 1.8538815157260476e-05,
|
| 6100 |
+
"loss": 0.9766,
|
| 6101 |
+
"step": 35450
|
| 6102 |
+
},
|
| 6103 |
+
{
|
| 6104 |
+
"epoch": 2.730559187754788,
|
| 6105 |
+
"grad_norm": 1.1946227550506592,
|
| 6106 |
+
"learning_rate": 1.8279095135443993e-05,
|
| 6107 |
+
"loss": 1.0333,
|
| 6108 |
+
"step": 35500
|
| 6109 |
+
},
|
| 6110 |
+
{
|
| 6111 |
+
"epoch": 2.730559187754788,
|
| 6112 |
+
"eval_loss": 0.9953948855400085,
|
| 6113 |
+
"eval_runtime": 17.1888,
|
| 6114 |
+
"eval_samples_per_second": 58.178,
|
| 6115 |
+
"eval_steps_per_second": 14.544,
|
| 6116 |
+
"step": 35500
|
| 6117 |
}
|
| 6118 |
],
|
| 6119 |
"logging_steps": 50,
|