Training in progress, step 1470, checkpoint
Browse files
last-checkpoint/adapter_config.json
CHANGED
|
@@ -20,11 +20,11 @@
|
|
| 20 |
"rank_pattern": {},
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
| 23 |
-
"lm_head",
|
| 24 |
"query_key_value",
|
|
|
|
|
|
|
| 25 |
"dense",
|
| 26 |
-
"dense_h_to_4h"
|
| 27 |
-
"dense_4h_to_h"
|
| 28 |
],
|
| 29 |
"task_type": "CAUSAL_LM",
|
| 30 |
"use_dora": false,
|
|
|
|
| 20 |
"rank_pattern": {},
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
|
|
|
| 23 |
"query_key_value",
|
| 24 |
+
"lm_head",
|
| 25 |
+
"dense_4h_to_h",
|
| 26 |
"dense",
|
| 27 |
+
"dense_h_to_4h"
|
|
|
|
| 28 |
],
|
| 29 |
"task_type": "CAUSAL_LM",
|
| 30 |
"use_dora": false,
|
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1316913776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3aa442a7b0a850e10f76551b4fc32c47a42a8fc8ae52fe78c48295baf3448481
|
| 3 |
size 1316913776
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8908124
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:912608e1f46c072dd62554a214c457e0bb9fbc29265936c8921001ea187536d0
|
| 3 |
size 8908124
|
last-checkpoint/rng_state.pth
CHANGED
|
Binary files a/last-checkpoint/rng_state.pth and b/last-checkpoint/rng_state.pth differ
|
|
|
last-checkpoint/scheduler.pt
CHANGED
|
Binary files a/last-checkpoint/scheduler.pt and b/last-checkpoint/scheduler.pt differ
|
|
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1015,14 +1015,35 @@
|
|
| 1015 |
"learning_rate": 0.00034542722198368397,
|
| 1016 |
"loss": 0.7839,
|
| 1017 |
"step": 1440
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1018 |
}
|
| 1019 |
],
|
| 1020 |
"logging_steps": 10,
|
| 1021 |
"max_steps": 4658,
|
| 1022 |
"num_input_tokens_seen": 0,
|
| 1023 |
"num_train_epochs": 1,
|
| 1024 |
-
"save_steps":
|
| 1025 |
-
"total_flos": 1.
|
| 1026 |
"train_batch_size": 3,
|
| 1027 |
"trial_name": null,
|
| 1028 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.31558608844997854,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1470,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1015 |
"learning_rate": 0.00034542722198368397,
|
| 1016 |
"loss": 0.7839,
|
| 1017 |
"step": 1440
|
| 1018 |
+
},
|
| 1019 |
+
{
|
| 1020 |
+
"epoch": 0.31129240017174753,
|
| 1021 |
+
"grad_norm": 1.4786028861999512,
|
| 1022 |
+
"learning_rate": 0.00034435379991412624,
|
| 1023 |
+
"loss": 0.7995,
|
| 1024 |
+
"step": 1450
|
| 1025 |
+
},
|
| 1026 |
+
{
|
| 1027 |
+
"epoch": 0.313439244310863,
|
| 1028 |
+
"grad_norm": 1.392654538154602,
|
| 1029 |
+
"learning_rate": 0.0003432803778445685,
|
| 1030 |
+
"loss": 0.8046,
|
| 1031 |
+
"step": 1460
|
| 1032 |
+
},
|
| 1033 |
+
{
|
| 1034 |
+
"epoch": 0.31558608844997854,
|
| 1035 |
+
"grad_norm": 1.730966567993164,
|
| 1036 |
+
"learning_rate": 0.00034220695577501074,
|
| 1037 |
+
"loss": 0.7909,
|
| 1038 |
+
"step": 1470
|
| 1039 |
}
|
| 1040 |
],
|
| 1041 |
"logging_steps": 10,
|
| 1042 |
"max_steps": 4658,
|
| 1043 |
"num_input_tokens_seen": 0,
|
| 1044 |
"num_train_epochs": 1,
|
| 1045 |
+
"save_steps": 30,
|
| 1046 |
+
"total_flos": 1.1131927713309773e+17,
|
| 1047 |
"train_batch_size": 3,
|
| 1048 |
"trial_name": null,
|
| 1049 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
Binary files a/last-checkpoint/training_args.bin and b/last-checkpoint/training_args.bin differ
|
|
|