Training in progress, epoch 1
Browse files- adapter_config.json +7 -2
- adapter_model.safetensors +1 -1
- special_tokens_map.json +0 -7
- tokenizer_config.json +1 -2
- trainer_log.jsonl +20 -20
- training_args.bin +2 -2
adapter_config.json
CHANGED
|
@@ -3,6 +3,9 @@
|
|
| 3 |
"auto_mapping": null,
|
| 4 |
"base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
|
| 5 |
"bias": "none",
|
|
|
|
|
|
|
|
|
|
| 6 |
"fan_in_fan_out": false,
|
| 7 |
"inference_mode": true,
|
| 8 |
"init_lora_weights": true,
|
|
@@ -11,6 +14,7 @@
|
|
| 11 |
"layers_to_transform": null,
|
| 12 |
"loftq_config": {},
|
| 13 |
"lora_alpha": 16,
|
|
|
|
| 14 |
"lora_dropout": 0.0,
|
| 15 |
"megatron_config": null,
|
| 16 |
"megatron_core": "megatron.core",
|
|
@@ -21,14 +25,15 @@
|
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
| 23 |
"o_proj",
|
|
|
|
|
|
|
| 24 |
"q_proj",
|
| 25 |
"v_proj",
|
| 26 |
"up_proj",
|
| 27 |
-
"k_proj",
|
| 28 |
-
"down_proj",
|
| 29 |
"gate_proj"
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 32 |
"use_dora": false,
|
| 33 |
"use_rslora": false
|
| 34 |
}
|
|
|
|
| 3 |
"auto_mapping": null,
|
| 4 |
"base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
|
| 5 |
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
"fan_in_fan_out": false,
|
| 10 |
"inference_mode": true,
|
| 11 |
"init_lora_weights": true,
|
|
|
|
| 14 |
"layers_to_transform": null,
|
| 15 |
"loftq_config": {},
|
| 16 |
"lora_alpha": 16,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
"lora_dropout": 0.0,
|
| 19 |
"megatron_config": null,
|
| 20 |
"megatron_core": "megatron.core",
|
|
|
|
| 25 |
"revision": null,
|
| 26 |
"target_modules": [
|
| 27 |
"o_proj",
|
| 28 |
+
"k_proj",
|
| 29 |
+
"down_proj",
|
| 30 |
"q_proj",
|
| 31 |
"v_proj",
|
| 32 |
"up_proj",
|
|
|
|
|
|
|
| 33 |
"gate_proj"
|
| 34 |
],
|
| 35 |
"task_type": "CAUSAL_LM",
|
| 36 |
+
"trainable_token_indices": null,
|
| 37 |
"use_dora": false,
|
| 38 |
"use_rslora": false
|
| 39 |
}
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 83945296
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a5ca7e0c2d095fba26a0c5aefdd0f450ff6ad65b96ee6c315614134c5bc446c
|
| 3 |
size 83945296
|
special_tokens_map.json
CHANGED
|
@@ -1,12 +1,5 @@
|
|
| 1 |
{
|
| 2 |
"additional_special_tokens": [
|
| 3 |
-
{
|
| 4 |
-
"content": "<|eot_id|>",
|
| 5 |
-
"lstrip": false,
|
| 6 |
-
"normalized": false,
|
| 7 |
-
"rstrip": false,
|
| 8 |
-
"single_word": false
|
| 9 |
-
},
|
| 10 |
{
|
| 11 |
"content": "<|eom_id|>",
|
| 12 |
"lstrip": false,
|
|
|
|
| 1 |
{
|
| 2 |
"additional_special_tokens": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
{
|
| 4 |
"content": "<|eom_id|>",
|
| 5 |
"lstrip": false,
|
tokenizer_config.json
CHANGED
|
@@ -2050,7 +2050,6 @@
|
|
| 2050 |
}
|
| 2051 |
},
|
| 2052 |
"additional_special_tokens": [
|
| 2053 |
-
"<|eot_id|>",
|
| 2054 |
"<|eom_id|>"
|
| 2055 |
],
|
| 2056 |
"bos_token": "<|begin_of_text|>",
|
|
@@ -2062,7 +2061,7 @@
|
|
| 2062 |
"input_ids",
|
| 2063 |
"attention_mask"
|
| 2064 |
],
|
| 2065 |
-
"model_max_length":
|
| 2066 |
"pad_token": "<|eot_id|>",
|
| 2067 |
"padding_side": "right",
|
| 2068 |
"split_special_tokens": false,
|
|
|
|
| 2050 |
}
|
| 2051 |
},
|
| 2052 |
"additional_special_tokens": [
|
|
|
|
| 2053 |
"<|eom_id|>"
|
| 2054 |
],
|
| 2055 |
"bos_token": "<|begin_of_text|>",
|
|
|
|
| 2061 |
"input_ids",
|
| 2062 |
"attention_mask"
|
| 2063 |
],
|
| 2064 |
+
"model_max_length": 131072,
|
| 2065 |
"pad_token": "<|eot_id|>",
|
| 2066 |
"padding_side": "right",
|
| 2067 |
"split_special_tokens": false,
|
trainer_log.jsonl
CHANGED
|
@@ -1,20 +1,20 @@
|
|
| 1 |
-
{"current_steps": 177, "total_steps": 3525, "loss": 0.
|
| 2 |
-
{"current_steps": 354, "total_steps": 3525, "loss": 0.
|
| 3 |
-
{"current_steps": 531, "total_steps": 3525, "loss": 0.
|
| 4 |
-
{"current_steps": 708, "total_steps": 3525, "loss": 0.
|
| 5 |
-
{"current_steps": 885, "total_steps": 3525, "loss": 0.
|
| 6 |
-
{"current_steps": 1062, "total_steps": 3525, "loss": 0.
|
| 7 |
-
{"current_steps": 1239, "total_steps": 3525, "loss": 0.
|
| 8 |
-
{"current_steps": 1416, "total_steps": 3525, "loss": 0.
|
| 9 |
-
{"current_steps": 1593, "total_steps": 3525, "loss": 0.
|
| 10 |
-
{"current_steps": 1770, "total_steps": 3525, "loss": 0.
|
| 11 |
-
{"current_steps": 1947, "total_steps": 3525, "loss": 0.
|
| 12 |
-
{"current_steps": 2124, "total_steps": 3525, "loss": 0.
|
| 13 |
-
{"current_steps": 2301, "total_steps": 3525, "loss": 0.
|
| 14 |
-
{"current_steps": 2478, "total_steps": 3525, "loss": 0.
|
| 15 |
-
{"current_steps": 2655, "total_steps": 3525, "loss": 0.
|
| 16 |
-
{"current_steps": 2832, "total_steps": 3525, "loss": 0.
|
| 17 |
-
{"current_steps": 3009, "total_steps": 3525, "loss": 0.
|
| 18 |
-
{"current_steps": 3186, "total_steps": 3525, "loss": 0.
|
| 19 |
-
{"current_steps": 3363, "total_steps": 3525, "loss": 0.
|
| 20 |
-
{"current_steps": 3525, "total_steps": 3525, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "2:
|
|
|
|
| 1 |
+
{"current_steps": 177, "total_steps": 3525, "loss": 0.4997, "lr": 5.014164305949008e-08, "epoch": 0.05021276595744681, "percentage": 5.02, "elapsed_time": "0:08:49", "remaining_time": "2:47:01"}
|
| 2 |
+
{"current_steps": 354, "total_steps": 3525, "loss": 0.4996, "lr": 9.996847414880202e-08, "epoch": 0.10042553191489362, "percentage": 10.04, "elapsed_time": "0:17:38", "remaining_time": "2:37:58"}
|
| 3 |
+
{"current_steps": 531, "total_steps": 3525, "loss": 0.4988, "lr": 9.438839848675913e-08, "epoch": 0.15063829787234043, "percentage": 15.06, "elapsed_time": "0:26:25", "remaining_time": "2:28:58"}
|
| 4 |
+
{"current_steps": 708, "total_steps": 3525, "loss": 0.4975, "lr": 8.880832282471626e-08, "epoch": 0.20085106382978724, "percentage": 20.09, "elapsed_time": "0:35:12", "remaining_time": "2:20:06"}
|
| 5 |
+
{"current_steps": 885, "total_steps": 3525, "loss": 0.4945, "lr": 8.32282471626734e-08, "epoch": 0.251063829787234, "percentage": 25.11, "elapsed_time": "0:44:01", "remaining_time": "2:11:19"}
|
| 6 |
+
{"current_steps": 1062, "total_steps": 3525, "loss": 0.4906, "lr": 7.76481715006305e-08, "epoch": 0.30127659574468085, "percentage": 30.13, "elapsed_time": "0:52:50", "remaining_time": "2:02:33"}
|
| 7 |
+
{"current_steps": 1239, "total_steps": 3525, "loss": 0.4848, "lr": 7.206809583858764e-08, "epoch": 0.35148936170212763, "percentage": 35.15, "elapsed_time": "1:01:37", "remaining_time": "1:53:41"}
|
| 8 |
+
{"current_steps": 1416, "total_steps": 3525, "loss": 0.4752, "lr": 6.648802017654477e-08, "epoch": 0.40170212765957447, "percentage": 40.17, "elapsed_time": "1:10:23", "remaining_time": "1:44:50"}
|
| 9 |
+
{"current_steps": 1593, "total_steps": 3525, "loss": 0.4649, "lr": 6.090794451450188e-08, "epoch": 0.45191489361702125, "percentage": 45.19, "elapsed_time": "1:19:11", "remaining_time": "1:36:02"}
|
| 10 |
+
{"current_steps": 1770, "total_steps": 3525, "loss": 0.4485, "lr": 5.5327868852459016e-08, "epoch": 0.502127659574468, "percentage": 50.21, "elapsed_time": "1:27:57", "remaining_time": "1:27:13"}
|
| 11 |
+
{"current_steps": 1947, "total_steps": 3525, "loss": 0.4349, "lr": 4.9747793190416137e-08, "epoch": 0.5523404255319149, "percentage": 55.23, "elapsed_time": "1:36:44", "remaining_time": "1:18:24"}
|
| 12 |
+
{"current_steps": 2124, "total_steps": 3525, "loss": 0.4076, "lr": 4.4167717528373264e-08, "epoch": 0.6025531914893617, "percentage": 60.26, "elapsed_time": "1:45:32", "remaining_time": "1:09:37"}
|
| 13 |
+
{"current_steps": 2301, "total_steps": 3525, "loss": 0.3791, "lr": 3.858764186633039e-08, "epoch": 0.6527659574468085, "percentage": 65.28, "elapsed_time": "1:54:19", "remaining_time": "1:00:49"}
|
| 14 |
+
{"current_steps": 2478, "total_steps": 3525, "loss": 0.357, "lr": 3.300756620428751e-08, "epoch": 0.7029787234042553, "percentage": 70.3, "elapsed_time": "2:03:08", "remaining_time": "0:52:01"}
|
| 15 |
+
{"current_steps": 2655, "total_steps": 3525, "loss": 0.3366, "lr": 2.742749054224464e-08, "epoch": 0.7531914893617021, "percentage": 75.32, "elapsed_time": "2:11:55", "remaining_time": "0:43:13"}
|
| 16 |
+
{"current_steps": 2832, "total_steps": 3525, "loss": 0.3189, "lr": 2.1847414880201765e-08, "epoch": 0.8034042553191489, "percentage": 80.34, "elapsed_time": "2:20:45", "remaining_time": "0:34:26"}
|
| 17 |
+
{"current_steps": 3009, "total_steps": 3525, "loss": 0.3081, "lr": 1.626733921815889e-08, "epoch": 0.8536170212765958, "percentage": 85.36, "elapsed_time": "2:29:33", "remaining_time": "0:25:38"}
|
| 18 |
+
{"current_steps": 3186, "total_steps": 3525, "loss": 0.2956, "lr": 1.0687263556116015e-08, "epoch": 0.9038297872340425, "percentage": 90.38, "elapsed_time": "2:38:20", "remaining_time": "0:16:50"}
|
| 19 |
+
{"current_steps": 3363, "total_steps": 3525, "loss": 0.2823, "lr": 5.1071878940731394e-09, "epoch": 0.9540425531914893, "percentage": 95.4, "elapsed_time": "2:47:07", "remaining_time": "0:08:03"}
|
| 20 |
+
{"current_steps": 3525, "total_steps": 3525, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "2:55:12", "remaining_time": "0:00:00"}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98ba55a25d43e50ee44552f8c3585ba8c0ca546103e9e7076e45f3c6d0d38a37
|
| 3 |
+
size 5752
|