Training in progress, step 1011, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1321024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e41bdb0793d89736e61aa90347d62039b1cc5b91d002ff8baba99be9a418e29d
|
| 3 |
size 1321024
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2687115
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:602b1835241dd5c7abd50b47b76431b7081683679692152e8315862e2c995483
|
| 3 |
size 2687115
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14455
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:279402b83f987e0900d353b3d8f08d84cf30415c38cb898df2f810aaabbad630
|
| 3 |
size 14455
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa023108973b8b5d0e85230f52d73593ff1c13cfe94cfa4694449750a3f0f6c6
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 1000,
|
| 3 |
"best_metric": 3.7591054439544678,
|
| 4 |
"best_model_checkpoint": "/kaggle/working/drive/MyDrive/llama_lora_finetuned/checkpoint-1000",
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -7168,6 +7168,83 @@
|
|
| 7168 |
"eval_samples_per_second": 1.679,
|
| 7169 |
"eval_steps_per_second": 0.213,
|
| 7170 |
"step": 1000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7171 |
}
|
| 7172 |
],
|
| 7173 |
"logging_steps": 1,
|
|
@@ -7182,12 +7259,12 @@
|
|
| 7182 |
"should_evaluate": false,
|
| 7183 |
"should_log": false,
|
| 7184 |
"should_save": true,
|
| 7185 |
-
"should_training_stop":
|
| 7186 |
},
|
| 7187 |
"attributes": {}
|
| 7188 |
}
|
| 7189 |
},
|
| 7190 |
-
"total_flos":
|
| 7191 |
"train_batch_size": 4,
|
| 7192 |
"trial_name": null,
|
| 7193 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 1000,
|
| 3 |
"best_metric": 3.7591054439544678,
|
| 4 |
"best_model_checkpoint": "/kaggle/working/drive/MyDrive/llama_lora_finetuned/checkpoint-1000",
|
| 5 |
+
"epoch": 3.0,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 1011,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 7168 |
"eval_samples_per_second": 1.679,
|
| 7169 |
"eval_steps_per_second": 0.213,
|
| 7170 |
"step": 1000
|
| 7171 |
+
},
|
| 7172 |
+
{
|
| 7173 |
+
"epoch": 2.970326409495549,
|
| 7174 |
+
"grad_norm": 0.8667409420013428,
|
| 7175 |
+
"learning_rate": 3.604874476173525e-08,
|
| 7176 |
+
"loss": 3.3783,
|
| 7177 |
+
"step": 1001
|
| 7178 |
+
},
|
| 7179 |
+
{
|
| 7180 |
+
"epoch": 2.973293768545994,
|
| 7181 |
+
"grad_norm": 1.0062025785446167,
|
| 7182 |
+
"learning_rate": 2.9792972446479605e-08,
|
| 7183 |
+
"loss": 3.5963,
|
| 7184 |
+
"step": 1002
|
| 7185 |
+
},
|
| 7186 |
+
{
|
| 7187 |
+
"epoch": 2.9762611275964392,
|
| 7188 |
+
"grad_norm": 0.7768653035163879,
|
| 7189 |
+
"learning_rate": 2.4132763089035338e-08,
|
| 7190 |
+
"loss": 3.8658,
|
| 7191 |
+
"step": 1003
|
| 7192 |
+
},
|
| 7193 |
+
{
|
| 7194 |
+
"epoch": 2.979228486646884,
|
| 7195 |
+
"grad_norm": 1.0506861209869385,
|
| 7196 |
+
"learning_rate": 1.9068184149834134e-08,
|
| 7197 |
+
"loss": 3.8484,
|
| 7198 |
+
"step": 1004
|
| 7199 |
+
},
|
| 7200 |
+
{
|
| 7201 |
+
"epoch": 2.9821958456973294,
|
| 7202 |
+
"grad_norm": 0.7125198245048523,
|
| 7203 |
+
"learning_rate": 1.4599295990352924e-08,
|
| 7204 |
+
"loss": 3.7196,
|
| 7205 |
+
"step": 1005
|
| 7206 |
+
},
|
| 7207 |
+
{
|
| 7208 |
+
"epoch": 2.9851632047477743,
|
| 7209 |
+
"grad_norm": 0.7124053835868835,
|
| 7210 |
+
"learning_rate": 1.0726151872408884e-08,
|
| 7211 |
+
"loss": 4.2165,
|
| 7212 |
+
"step": 1006
|
| 7213 |
+
},
|
| 7214 |
+
{
|
| 7215 |
+
"epoch": 2.9881305637982196,
|
| 7216 |
+
"grad_norm": 1.1544585227966309,
|
| 7217 |
+
"learning_rate": 7.448797957526621e-09,
|
| 7218 |
+
"loss": 3.6743,
|
| 7219 |
+
"step": 1007
|
| 7220 |
+
},
|
| 7221 |
+
{
|
| 7222 |
+
"epoch": 2.991097922848665,
|
| 7223 |
+
"grad_norm": 0.9369633197784424,
|
| 7224 |
+
"learning_rate": 4.767273306405251e-09,
|
| 7225 |
+
"loss": 3.6329,
|
| 7226 |
+
"step": 1008
|
| 7227 |
+
},
|
| 7228 |
+
{
|
| 7229 |
+
"epoch": 2.99406528189911,
|
| 7230 |
+
"grad_norm": 0.8136985301971436,
|
| 7231 |
+
"learning_rate": 2.6816098784188117e-09,
|
| 7232 |
+
"loss": 3.6183,
|
| 7233 |
+
"step": 1009
|
| 7234 |
+
},
|
| 7235 |
+
{
|
| 7236 |
+
"epoch": 2.9970326409495547,
|
| 7237 |
+
"grad_norm": 0.9446889162063599,
|
| 7238 |
+
"learning_rate": 1.1918325312498812e-09,
|
| 7239 |
+
"loss": 4.1876,
|
| 7240 |
+
"step": 1010
|
| 7241 |
+
},
|
| 7242 |
+
{
|
| 7243 |
+
"epoch": 3.0,
|
| 7244 |
+
"grad_norm": 1.2427432537078857,
|
| 7245 |
+
"learning_rate": 2.979590206064753e-10,
|
| 7246 |
+
"loss": 3.8464,
|
| 7247 |
+
"step": 1011
|
| 7248 |
}
|
| 7249 |
],
|
| 7250 |
"logging_steps": 1,
|
|
|
|
| 7259 |
"should_evaluate": false,
|
| 7260 |
"should_log": false,
|
| 7261 |
"should_save": true,
|
| 7262 |
+
"should_training_stop": true
|
| 7263 |
},
|
| 7264 |
"attributes": {}
|
| 7265 |
}
|
| 7266 |
},
|
| 7267 |
+
"total_flos": 590731906547712.0,
|
| 7268 |
"train_batch_size": 4,
|
| 7269 |
"trial_name": null,
|
| 7270 |
"trial_params": null
|