Incomple commited on
Commit
b5fa1bd
·
verified ·
1 Parent(s): ebb10c5

Training in progress, epoch 1

Browse files
adapter_config.json CHANGED
@@ -3,6 +3,9 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
  "bias": "none",
 
 
 
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
@@ -11,6 +14,7 @@
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 16,
 
14
  "lora_dropout": 0.0,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
@@ -21,14 +25,15 @@
21
  "revision": null,
22
  "target_modules": [
23
  "o_proj",
 
 
24
  "q_proj",
25
  "v_proj",
26
  "up_proj",
27
- "k_proj",
28
- "down_proj",
29
  "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
32
  "use_dora": false,
33
  "use_rslora": false
34
  }
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
  "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
  "fan_in_fan_out": false,
10
  "inference_mode": true,
11
  "init_lora_weights": true,
 
14
  "layers_to_transform": null,
15
  "loftq_config": {},
16
  "lora_alpha": 16,
17
+ "lora_bias": false,
18
  "lora_dropout": 0.0,
19
  "megatron_config": null,
20
  "megatron_core": "megatron.core",
 
25
  "revision": null,
26
  "target_modules": [
27
  "o_proj",
28
+ "k_proj",
29
+ "down_proj",
30
  "q_proj",
31
  "v_proj",
32
  "up_proj",
 
 
33
  "gate_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
  "use_dora": false,
38
  "use_rslora": false
39
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:135aad8240dfdf7429691fbc90991bf2f30d38b16bcf2b02b1ecb493a17cb854
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a5ca7e0c2d095fba26a0c5aefdd0f450ff6ad65b96ee6c315614134c5bc446c
3
  size 83945296
special_tokens_map.json CHANGED
@@ -1,12 +1,5 @@
1
  {
2
  "additional_special_tokens": [
3
- {
4
- "content": "<|eot_id|>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
  {
11
  "content": "<|eom_id|>",
12
  "lstrip": false,
 
1
  {
2
  "additional_special_tokens": [
 
 
 
 
 
 
 
3
  {
4
  "content": "<|eom_id|>",
5
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -2050,7 +2050,6 @@
2050
  }
2051
  },
2052
  "additional_special_tokens": [
2053
- "<|eot_id|>",
2054
  "<|eom_id|>"
2055
  ],
2056
  "bos_token": "<|begin_of_text|>",
@@ -2062,7 +2061,7 @@
2062
  "input_ids",
2063
  "attention_mask"
2064
  ],
2065
- "model_max_length": 2048,
2066
  "pad_token": "<|eot_id|>",
2067
  "padding_side": "right",
2068
  "split_special_tokens": false,
 
2050
  }
2051
  },
2052
  "additional_special_tokens": [
 
2053
  "<|eom_id|>"
2054
  ],
2055
  "bos_token": "<|begin_of_text|>",
 
2061
  "input_ids",
2062
  "attention_mask"
2063
  ],
2064
+ "model_max_length": 131072,
2065
  "pad_token": "<|eot_id|>",
2066
  "padding_side": "right",
2067
  "split_special_tokens": false,
trainer_log.jsonl CHANGED
@@ -1,20 +1,20 @@
1
- {"current_steps": 177, "total_steps": 3525, "loss": 0.4996, "lr": 2.5070821529745037e-07, "epoch": 0.05021276595744681, "percentage": 5.02, "elapsed_time": "0:08:54", "remaining_time": "2:48:32"}
2
- {"current_steps": 354, "total_steps": 3525, "loss": 0.4952, "lr": 4.9984237074401e-07, "epoch": 0.10042553191489362, "percentage": 10.04, "elapsed_time": "0:17:47", "remaining_time": "2:39:26"}
3
- {"current_steps": 531, "total_steps": 3525, "loss": 0.3627, "lr": 4.719419924337957e-07, "epoch": 0.15063829787234043, "percentage": 15.06, "elapsed_time": "0:26:40", "remaining_time": "2:30:24"}
4
- {"current_steps": 708, "total_steps": 3525, "loss": 0.2129, "lr": 4.4404161412358134e-07, "epoch": 0.20085106382978724, "percentage": 20.09, "elapsed_time": "0:35:32", "remaining_time": "2:21:26"}
5
- {"current_steps": 885, "total_steps": 3525, "loss": 0.1991, "lr": 4.16141235813367e-07, "epoch": 0.251063829787234, "percentage": 25.11, "elapsed_time": "0:44:24", "remaining_time": "2:12:29"}
6
- {"current_steps": 1062, "total_steps": 3525, "loss": 0.2012, "lr": 3.882408575031526e-07, "epoch": 0.30127659574468085, "percentage": 30.13, "elapsed_time": "0:53:16", "remaining_time": "2:03:34"}
7
- {"current_steps": 1239, "total_steps": 3525, "loss": 0.1751, "lr": 3.6034047919293817e-07, "epoch": 0.35148936170212763, "percentage": 35.15, "elapsed_time": "1:02:08", "remaining_time": "1:54:39"}
8
- {"current_steps": 1416, "total_steps": 3525, "loss": 0.1891, "lr": 3.324401008827238e-07, "epoch": 0.40170212765957447, "percentage": 40.17, "elapsed_time": "1:11:00", "remaining_time": "1:45:45"}
9
- {"current_steps": 1593, "total_steps": 3525, "loss": 0.1931, "lr": 3.045397225725094e-07, "epoch": 0.45191489361702125, "percentage": 45.19, "elapsed_time": "1:19:52", "remaining_time": "1:36:52"}
10
- {"current_steps": 1770, "total_steps": 3525, "loss": 0.1944, "lr": 2.7663934426229505e-07, "epoch": 0.502127659574468, "percentage": 50.21, "elapsed_time": "1:28:46", "remaining_time": "1:28:00"}
11
- {"current_steps": 1947, "total_steps": 3525, "loss": 0.1996, "lr": 2.487389659520807e-07, "epoch": 0.5523404255319149, "percentage": 55.23, "elapsed_time": "1:37:39", "remaining_time": "1:19:09"}
12
- {"current_steps": 2124, "total_steps": 3525, "loss": 0.1886, "lr": 2.2083858764186634e-07, "epoch": 0.6025531914893617, "percentage": 60.26, "elapsed_time": "1:46:34", "remaining_time": "1:10:17"}
13
- {"current_steps": 2301, "total_steps": 3525, "loss": 0.1861, "lr": 1.9293820933165196e-07, "epoch": 0.6527659574468085, "percentage": 65.28, "elapsed_time": "1:55:28", "remaining_time": "1:01:25"}
14
- {"current_steps": 2478, "total_steps": 3525, "loss": 0.1853, "lr": 1.6503783102143755e-07, "epoch": 0.7029787234042553, "percentage": 70.3, "elapsed_time": "2:04:23", "remaining_time": "0:52:33"}
15
- {"current_steps": 2655, "total_steps": 3525, "loss": 0.1913, "lr": 1.371374527112232e-07, "epoch": 0.7531914893617021, "percentage": 75.32, "elapsed_time": "2:13:17", "remaining_time": "0:43:40"}
16
- {"current_steps": 2832, "total_steps": 3525, "loss": 0.1892, "lr": 1.0923707440100883e-07, "epoch": 0.8034042553191489, "percentage": 80.34, "elapsed_time": "2:22:14", "remaining_time": "0:34:48"}
17
- {"current_steps": 3009, "total_steps": 3525, "loss": 0.1951, "lr": 8.133669609079445e-08, "epoch": 0.8536170212765958, "percentage": 85.36, "elapsed_time": "2:31:07", "remaining_time": "0:25:55"}
18
- {"current_steps": 3186, "total_steps": 3525, "loss": 0.1877, "lr": 5.343631778058008e-08, "epoch": 0.9038297872340425, "percentage": 90.38, "elapsed_time": "2:40:01", "remaining_time": "0:17:01"}
19
- {"current_steps": 3363, "total_steps": 3525, "loss": 0.1786, "lr": 2.55359394703657e-08, "epoch": 0.9540425531914893, "percentage": 95.4, "elapsed_time": "2:48:54", "remaining_time": "0:08:08"}
20
- {"current_steps": 3525, "total_steps": 3525, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "2:57:05", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 177, "total_steps": 3525, "loss": 0.4997, "lr": 5.014164305949008e-08, "epoch": 0.05021276595744681, "percentage": 5.02, "elapsed_time": "0:08:49", "remaining_time": "2:47:01"}
2
+ {"current_steps": 354, "total_steps": 3525, "loss": 0.4996, "lr": 9.996847414880202e-08, "epoch": 0.10042553191489362, "percentage": 10.04, "elapsed_time": "0:17:38", "remaining_time": "2:37:58"}
3
+ {"current_steps": 531, "total_steps": 3525, "loss": 0.4988, "lr": 9.438839848675913e-08, "epoch": 0.15063829787234043, "percentage": 15.06, "elapsed_time": "0:26:25", "remaining_time": "2:28:58"}
4
+ {"current_steps": 708, "total_steps": 3525, "loss": 0.4975, "lr": 8.880832282471626e-08, "epoch": 0.20085106382978724, "percentage": 20.09, "elapsed_time": "0:35:12", "remaining_time": "2:20:06"}
5
+ {"current_steps": 885, "total_steps": 3525, "loss": 0.4945, "lr": 8.32282471626734e-08, "epoch": 0.251063829787234, "percentage": 25.11, "elapsed_time": "0:44:01", "remaining_time": "2:11:19"}
6
+ {"current_steps": 1062, "total_steps": 3525, "loss": 0.4906, "lr": 7.76481715006305e-08, "epoch": 0.30127659574468085, "percentage": 30.13, "elapsed_time": "0:52:50", "remaining_time": "2:02:33"}
7
+ {"current_steps": 1239, "total_steps": 3525, "loss": 0.4848, "lr": 7.206809583858764e-08, "epoch": 0.35148936170212763, "percentage": 35.15, "elapsed_time": "1:01:37", "remaining_time": "1:53:41"}
8
+ {"current_steps": 1416, "total_steps": 3525, "loss": 0.4752, "lr": 6.648802017654477e-08, "epoch": 0.40170212765957447, "percentage": 40.17, "elapsed_time": "1:10:23", "remaining_time": "1:44:50"}
9
+ {"current_steps": 1593, "total_steps": 3525, "loss": 0.4649, "lr": 6.090794451450188e-08, "epoch": 0.45191489361702125, "percentage": 45.19, "elapsed_time": "1:19:11", "remaining_time": "1:36:02"}
10
+ {"current_steps": 1770, "total_steps": 3525, "loss": 0.4485, "lr": 5.5327868852459016e-08, "epoch": 0.502127659574468, "percentage": 50.21, "elapsed_time": "1:27:57", "remaining_time": "1:27:13"}
11
+ {"current_steps": 1947, "total_steps": 3525, "loss": 0.4349, "lr": 4.9747793190416137e-08, "epoch": 0.5523404255319149, "percentage": 55.23, "elapsed_time": "1:36:44", "remaining_time": "1:18:24"}
12
+ {"current_steps": 2124, "total_steps": 3525, "loss": 0.4076, "lr": 4.4167717528373264e-08, "epoch": 0.6025531914893617, "percentage": 60.26, "elapsed_time": "1:45:32", "remaining_time": "1:09:37"}
13
+ {"current_steps": 2301, "total_steps": 3525, "loss": 0.3791, "lr": 3.858764186633039e-08, "epoch": 0.6527659574468085, "percentage": 65.28, "elapsed_time": "1:54:19", "remaining_time": "1:00:49"}
14
+ {"current_steps": 2478, "total_steps": 3525, "loss": 0.357, "lr": 3.300756620428751e-08, "epoch": 0.7029787234042553, "percentage": 70.3, "elapsed_time": "2:03:08", "remaining_time": "0:52:01"}
15
+ {"current_steps": 2655, "total_steps": 3525, "loss": 0.3366, "lr": 2.742749054224464e-08, "epoch": 0.7531914893617021, "percentage": 75.32, "elapsed_time": "2:11:55", "remaining_time": "0:43:13"}
16
+ {"current_steps": 2832, "total_steps": 3525, "loss": 0.3189, "lr": 2.1847414880201765e-08, "epoch": 0.8034042553191489, "percentage": 80.34, "elapsed_time": "2:20:45", "remaining_time": "0:34:26"}
17
+ {"current_steps": 3009, "total_steps": 3525, "loss": 0.3081, "lr": 1.626733921815889e-08, "epoch": 0.8536170212765958, "percentage": 85.36, "elapsed_time": "2:29:33", "remaining_time": "0:25:38"}
18
+ {"current_steps": 3186, "total_steps": 3525, "loss": 0.2956, "lr": 1.0687263556116015e-08, "epoch": 0.9038297872340425, "percentage": 90.38, "elapsed_time": "2:38:20", "remaining_time": "0:16:50"}
19
+ {"current_steps": 3363, "total_steps": 3525, "loss": 0.2823, "lr": 5.1071878940731394e-09, "epoch": 0.9540425531914893, "percentage": 95.4, "elapsed_time": "2:47:07", "remaining_time": "0:08:03"}
20
+ {"current_steps": 3525, "total_steps": 3525, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "2:55:12", "remaining_time": "0:00:00"}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2884b21165cb8859eda87f481892f546b5e29a4437306e735c9f33fc3d9238b
3
- size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98ba55a25d43e50ee44552f8c3585ba8c0ca546103e9e7076e45f3c6d0d38a37
3
+ size 5752