Add TaskMind LoRA adapter — trained on WhatsApp task extraction dataset
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- README.md +4 -3
- adapter_config.json +3 -3
- adapter_model.safetensors +2 -2
- checkpoint-17/README.md +1 -0
- checkpoint-17/adapter_config.json +3 -3
- checkpoint-17/adapter_model.safetensors +2 -2
- checkpoint-17/optimizer.pt +2 -2
- checkpoint-17/rng_state.pth +2 -2
- checkpoint-17/scheduler.pt +2 -2
- checkpoint-17/special_tokens_map.json +24 -0
- checkpoint-17/tokenizer.model +3 -0
- checkpoint-17/tokenizer_config.json +31 -3
- checkpoint-17/trainer_state.json +11 -11
- checkpoint-17/training_args.bin +2 -2
- checkpoint-34/README.md +1 -0
- checkpoint-34/adapter_config.json +3 -3
- checkpoint-34/adapter_model.safetensors +2 -2
- checkpoint-34/optimizer.pt +2 -2
- checkpoint-34/rng_state.pth +2 -2
- checkpoint-34/scheduler.pt +2 -2
- checkpoint-34/special_tokens_map.json +24 -0
- checkpoint-34/tokenizer.model +3 -0
- checkpoint-34/tokenizer_config.json +31 -3
- checkpoint-34/trainer_state.json +25 -25
- checkpoint-34/training_args.bin +2 -2
- checkpoint-51/README.md +1 -0
- checkpoint-51/adapter_config.json +3 -3
- checkpoint-51/adapter_model.safetensors +2 -2
- checkpoint-51/optimizer.pt +2 -2
- checkpoint-51/rng_state.pth +2 -2
- checkpoint-51/scheduler.pt +2 -2
- checkpoint-51/special_tokens_map.json +24 -0
- checkpoint-51/tokenizer.model +3 -0
- checkpoint-51/tokenizer_config.json +31 -3
- checkpoint-51/trainer_state.json +39 -39
- checkpoint-51/training_args.bin +2 -2
- checkpoint-68/README.md +1 -0
- checkpoint-68/adapter_config.json +3 -3
- checkpoint-68/adapter_model.safetensors +2 -2
- checkpoint-68/optimizer.pt +2 -2
- checkpoint-68/rng_state.pth +2 -2
- checkpoint-68/scheduler.pt +2 -2
- checkpoint-68/special_tokens_map.json +24 -0
- checkpoint-68/tokenizer.model +3 -0
- checkpoint-68/tokenizer_config.json +31 -3
- checkpoint-68/trainer_state.json +49 -49
- checkpoint-68/training_args.bin +2 -2
- checkpoint-85/README.md +1 -0
- checkpoint-85/adapter_config.json +3 -3
- checkpoint-85/adapter_model.safetensors +2 -2
README.md
CHANGED
|
@@ -6,6 +6,7 @@ tags:
|
|
| 6 |
- base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
| 7 |
- lora
|
| 8 |
- sft
|
|
|
|
| 9 |
- trl
|
| 10 |
licence: license
|
| 11 |
pipeline_tag: text-generation
|
|
@@ -39,10 +40,10 @@ This model was trained with SFT.
|
|
| 39 |
|
| 40 |
- PEFT 0.18.1
|
| 41 |
- TRL: 1.1.0
|
| 42 |
-
- Transformers:
|
| 43 |
-
- Pytorch: 2.
|
| 44 |
- Datasets: 4.8.4
|
| 45 |
-
- Tokenizers: 0.22.
|
| 46 |
|
| 47 |
## Citations
|
| 48 |
|
|
|
|
| 6 |
- base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
| 7 |
- lora
|
| 8 |
- sft
|
| 9 |
+
- transformers
|
| 10 |
- trl
|
| 11 |
licence: license
|
| 12 |
pipeline_tag: text-generation
|
|
|
|
| 40 |
|
| 41 |
- PEFT 0.18.1
|
| 42 |
- TRL: 1.1.0
|
| 43 |
+
- Transformers: 4.57.0
|
| 44 |
+
- Pytorch: 2.2.2
|
| 45 |
- Datasets: 4.8.4
|
| 46 |
+
- Tokenizers: 0.22.1
|
| 47 |
|
| 48 |
## Citations
|
| 49 |
|
adapter_config.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
-
"base_model_name_or_path":
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
@@ -29,8 +29,8 @@
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
],
|
| 35 |
"target_parameters": null,
|
| 36 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
+
"v_proj",
|
| 33 |
+
"q_proj"
|
| 34 |
],
|
| 35 |
"target_parameters": null,
|
| 36 |
"task_type": "CAUSAL_LM",
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0759e7617267665196662e1c86613a8737244bbac6bdc596e21dbfd3e571b57
|
| 3 |
+
size 9022864
|
checkpoint-17/README.md
CHANGED
|
@@ -6,6 +6,7 @@ tags:
|
|
| 6 |
- base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
| 7 |
- lora
|
| 8 |
- sft
|
|
|
|
| 9 |
- trl
|
| 10 |
---
|
| 11 |
|
|
|
|
| 6 |
- base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
| 7 |
- lora
|
| 8 |
- sft
|
| 9 |
+
- transformers
|
| 10 |
- trl
|
| 11 |
---
|
| 12 |
|
checkpoint-17/adapter_config.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
-
"base_model_name_or_path":
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
@@ -29,8 +29,8 @@
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
],
|
| 35 |
"target_parameters": null,
|
| 36 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
+
"v_proj",
|
| 33 |
+
"q_proj"
|
| 34 |
],
|
| 35 |
"target_parameters": null,
|
| 36 |
"task_type": "CAUSAL_LM",
|
checkpoint-17/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e264d2d1fc670dd47e9a96c318800447ed4f3c513553f217d6ea5088e7c52412
|
| 3 |
+
size 9022864
|
checkpoint-17/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13ad6eabf38d51382f680faef0d19bdc3b1eaebd712345ddb7514abe95504279
|
| 3 |
+
size 18094138
|
checkpoint-17/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b182573f61d8bcf5eaefcbf8f98d8734b6db51b44ad36aed3a305c431539fa1
|
| 3 |
+
size 13990
|
checkpoint-17/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:478edd44e20812aa45a79d35d53340fce54e97bd298a641bc41c78cfd3152b0c
|
| 3 |
+
size 1064
|
checkpoint-17/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
checkpoint-17/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
checkpoint-17/tokenizer_config.json
CHANGED
|
@@ -1,15 +1,43 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"bos_token": "<s>",
|
| 4 |
"clean_up_tokenization_spaces": false,
|
| 5 |
"eos_token": "</s>",
|
| 6 |
-
"
|
| 7 |
"legacy": false,
|
| 8 |
"model_max_length": 2048,
|
| 9 |
"pad_token": "</s>",
|
| 10 |
"padding_side": "right",
|
| 11 |
"sp_model_kwargs": {},
|
| 12 |
-
"tokenizer_class": "
|
| 13 |
"unk_token": "<unk>",
|
| 14 |
"use_default_system_prompt": false
|
| 15 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
"bos_token": "<s>",
|
| 32 |
"clean_up_tokenization_spaces": false,
|
| 33 |
"eos_token": "</s>",
|
| 34 |
+
"extra_special_tokens": {},
|
| 35 |
"legacy": false,
|
| 36 |
"model_max_length": 2048,
|
| 37 |
"pad_token": "</s>",
|
| 38 |
"padding_side": "right",
|
| 39 |
"sp_model_kwargs": {},
|
| 40 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 41 |
"unk_token": "<unk>",
|
| 42 |
"use_default_system_prompt": false
|
| 43 |
}
|
checkpoint-17/trainer_state.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": 17,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-17",
|
| 5 |
"epoch": 1.0,
|
| 6 |
"eval_steps": 500,
|
|
@@ -10,24 +10,24 @@
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
-
"entropy": 1.
|
| 14 |
"epoch": 0.6060606060606061,
|
| 15 |
-
"grad_norm": 1.
|
| 16 |
"learning_rate": 0.00018,
|
| 17 |
-
"loss": 2.
|
| 18 |
-
"mean_token_accuracy": 0.
|
| 19 |
"num_tokens": 14004.0,
|
| 20 |
"step": 10
|
| 21 |
},
|
| 22 |
{
|
| 23 |
"epoch": 1.0,
|
| 24 |
-
"eval_entropy": 1.
|
| 25 |
-
"eval_loss": 1.
|
| 26 |
-
"eval_mean_token_accuracy": 0.
|
| 27 |
"eval_num_tokens": 22901.0,
|
| 28 |
-
"eval_runtime":
|
| 29 |
-
"eval_samples_per_second":
|
| 30 |
-
"eval_steps_per_second":
|
| 31 |
"step": 17
|
| 32 |
}
|
| 33 |
],
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": 17,
|
| 3 |
+
"best_metric": 1.5357812643051147,
|
| 4 |
"best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-17",
|
| 5 |
"epoch": 1.0,
|
| 6 |
"eval_steps": 500,
|
|
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
+
"entropy": 1.772234356403351,
|
| 14 |
"epoch": 0.6060606060606061,
|
| 15 |
+
"grad_norm": 1.3780473470687866,
|
| 16 |
"learning_rate": 0.00018,
|
| 17 |
+
"loss": 2.2849,
|
| 18 |
+
"mean_token_accuracy": 0.5951868265867233,
|
| 19 |
"num_tokens": 14004.0,
|
| 20 |
"step": 10
|
| 21 |
},
|
| 22 |
{
|
| 23 |
"epoch": 1.0,
|
| 24 |
+
"eval_entropy": 1.685779293378194,
|
| 25 |
+
"eval_loss": 1.5357812643051147,
|
| 26 |
+
"eval_mean_token_accuracy": 0.6872214078903198,
|
| 27 |
"eval_num_tokens": 22901.0,
|
| 28 |
+
"eval_runtime": 2.3332,
|
| 29 |
+
"eval_samples_per_second": 10.286,
|
| 30 |
+
"eval_steps_per_second": 1.286,
|
| 31 |
"step": 17
|
| 32 |
}
|
| 33 |
],
|
checkpoint-17/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdfc63bd124cf8b58acfe531d95973daba5a5e131127762949c75765ea5acb7f
|
| 3 |
+
size 5880
|
checkpoint-34/README.md
CHANGED
|
@@ -6,6 +6,7 @@ tags:
|
|
| 6 |
- base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
| 7 |
- lora
|
| 8 |
- sft
|
|
|
|
| 9 |
- trl
|
| 10 |
---
|
| 11 |
|
|
|
|
| 6 |
- base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
| 7 |
- lora
|
| 8 |
- sft
|
| 9 |
+
- transformers
|
| 10 |
- trl
|
| 11 |
---
|
| 12 |
|
checkpoint-34/adapter_config.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
-
"base_model_name_or_path":
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
@@ -29,8 +29,8 @@
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
],
|
| 35 |
"target_parameters": null,
|
| 36 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
+
"v_proj",
|
| 33 |
+
"q_proj"
|
| 34 |
],
|
| 35 |
"target_parameters": null,
|
| 36 |
"task_type": "CAUSAL_LM",
|
checkpoint-34/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f93596021de1ef5682449b7f6b167c47a3b7777ee8b2dcb74bc7f97e66e8443
|
| 3 |
+
size 9022864
|
checkpoint-34/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e39d2a401c09c34aa876e14267aac03214eefa27a867e7333d9cee2d965bf911
|
| 3 |
+
size 18094138
|
checkpoint-34/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06e55bfc8723f269a626afca0be6f7def5753f3bb265436b94c5580b703cfcc7
|
| 3 |
+
size 13990
|
checkpoint-34/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85f978c32b22bc6a3ab73d3c7eaeaf097b1efaf63503b3ceb2ba5357b465036f
|
| 3 |
+
size 1064
|
checkpoint-34/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
checkpoint-34/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
checkpoint-34/tokenizer_config.json
CHANGED
|
@@ -1,15 +1,43 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"bos_token": "<s>",
|
| 4 |
"clean_up_tokenization_spaces": false,
|
| 5 |
"eos_token": "</s>",
|
| 6 |
-
"
|
| 7 |
"legacy": false,
|
| 8 |
"model_max_length": 2048,
|
| 9 |
"pad_token": "</s>",
|
| 10 |
"padding_side": "right",
|
| 11 |
"sp_model_kwargs": {},
|
| 12 |
-
"tokenizer_class": "
|
| 13 |
"unk_token": "<unk>",
|
| 14 |
"use_default_system_prompt": false
|
| 15 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
"bos_token": "<s>",
|
| 32 |
"clean_up_tokenization_spaces": false,
|
| 33 |
"eos_token": "</s>",
|
| 34 |
+
"extra_special_tokens": {},
|
| 35 |
"legacy": false,
|
| 36 |
"model_max_length": 2048,
|
| 37 |
"pad_token": "</s>",
|
| 38 |
"padding_side": "right",
|
| 39 |
"sp_model_kwargs": {},
|
| 40 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 41 |
"unk_token": "<unk>",
|
| 42 |
"use_default_system_prompt": false
|
| 43 |
}
|
checkpoint-34/trainer_state.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": 34,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-34",
|
| 5 |
"epoch": 2.0,
|
| 6 |
"eval_steps": 500,
|
|
@@ -10,55 +10,55 @@
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
-
"entropy": 1.
|
| 14 |
"epoch": 0.6060606060606061,
|
| 15 |
-
"grad_norm": 1.
|
| 16 |
"learning_rate": 0.00018,
|
| 17 |
-
"loss": 2.
|
| 18 |
-
"mean_token_accuracy": 0.
|
| 19 |
"num_tokens": 14004.0,
|
| 20 |
"step": 10
|
| 21 |
},
|
| 22 |
{
|
| 23 |
"epoch": 1.0,
|
| 24 |
-
"eval_entropy": 1.
|
| 25 |
-
"eval_loss": 1.
|
| 26 |
-
"eval_mean_token_accuracy": 0.
|
| 27 |
"eval_num_tokens": 22901.0,
|
| 28 |
-
"eval_runtime":
|
| 29 |
-
"eval_samples_per_second":
|
| 30 |
-
"eval_steps_per_second":
|
| 31 |
"step": 17
|
| 32 |
},
|
| 33 |
{
|
| 34 |
-
"entropy": 1.
|
| 35 |
"epoch": 1.1818181818181819,
|
| 36 |
-
"grad_norm": 1.
|
| 37 |
"learning_rate": 0.00017600000000000002,
|
| 38 |
-
"loss": 1.
|
| 39 |
-
"mean_token_accuracy": 0.
|
| 40 |
"num_tokens": 27108.0,
|
| 41 |
"step": 20
|
| 42 |
},
|
| 43 |
{
|
| 44 |
-
"entropy": 1.
|
| 45 |
"epoch": 1.7878787878787878,
|
| 46 |
-
"grad_norm": 2.
|
| 47 |
"learning_rate": 0.00014933333333333335,
|
| 48 |
-
"loss": 0.
|
| 49 |
-
"mean_token_accuracy": 0.
|
| 50 |
"num_tokens": 41182.0,
|
| 51 |
"step": 30
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 2.0,
|
| 55 |
-
"eval_entropy": 0.
|
| 56 |
-
"eval_loss": 0.
|
| 57 |
-
"eval_mean_token_accuracy": 0.
|
| 58 |
"eval_num_tokens": 45802.0,
|
| 59 |
-
"eval_runtime":
|
| 60 |
-
"eval_samples_per_second":
|
| 61 |
-
"eval_steps_per_second":
|
| 62 |
"step": 34
|
| 63 |
}
|
| 64 |
],
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": 34,
|
| 3 |
+
"best_metric": 0.557042121887207,
|
| 4 |
"best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-34",
|
| 5 |
"epoch": 2.0,
|
| 6 |
"eval_steps": 500,
|
|
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
+
"entropy": 1.772234356403351,
|
| 14 |
"epoch": 0.6060606060606061,
|
| 15 |
+
"grad_norm": 1.3780473470687866,
|
| 16 |
"learning_rate": 0.00018,
|
| 17 |
+
"loss": 2.2849,
|
| 18 |
+
"mean_token_accuracy": 0.5951868265867233,
|
| 19 |
"num_tokens": 14004.0,
|
| 20 |
"step": 10
|
| 21 |
},
|
| 22 |
{
|
| 23 |
"epoch": 1.0,
|
| 24 |
+
"eval_entropy": 1.685779293378194,
|
| 25 |
+
"eval_loss": 1.5357812643051147,
|
| 26 |
+
"eval_mean_token_accuracy": 0.6872214078903198,
|
| 27 |
"eval_num_tokens": 22901.0,
|
| 28 |
+
"eval_runtime": 2.3332,
|
| 29 |
+
"eval_samples_per_second": 10.286,
|
| 30 |
+
"eval_steps_per_second": 1.286,
|
| 31 |
"step": 17
|
| 32 |
},
|
| 33 |
{
|
| 34 |
+
"entropy": 1.6779554203936928,
|
| 35 |
"epoch": 1.1818181818181819,
|
| 36 |
+
"grad_norm": 1.9584565162658691,
|
| 37 |
"learning_rate": 0.00017600000000000002,
|
| 38 |
+
"loss": 1.6949,
|
| 39 |
+
"mean_token_accuracy": 0.6669363661816246,
|
| 40 |
"num_tokens": 27108.0,
|
| 41 |
"step": 20
|
| 42 |
},
|
| 43 |
{
|
| 44 |
+
"entropy": 1.0465361922979355,
|
| 45 |
"epoch": 1.7878787878787878,
|
| 46 |
+
"grad_norm": 2.725850820541382,
|
| 47 |
"learning_rate": 0.00014933333333333335,
|
| 48 |
+
"loss": 0.9223,
|
| 49 |
+
"mean_token_accuracy": 0.8229832291603089,
|
| 50 |
"num_tokens": 41182.0,
|
| 51 |
"step": 30
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 2.0,
|
| 55 |
+
"eval_entropy": 0.5895721216996511,
|
| 56 |
+
"eval_loss": 0.557042121887207,
|
| 57 |
+
"eval_mean_token_accuracy": 0.9006072084108988,
|
| 58 |
"eval_num_tokens": 45802.0,
|
| 59 |
+
"eval_runtime": 1.4033,
|
| 60 |
+
"eval_samples_per_second": 17.102,
|
| 61 |
+
"eval_steps_per_second": 2.138,
|
| 62 |
"step": 34
|
| 63 |
}
|
| 64 |
],
|
checkpoint-34/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdfc63bd124cf8b58acfe531d95973daba5a5e131127762949c75765ea5acb7f
|
| 3 |
+
size 5880
|
checkpoint-51/README.md
CHANGED
|
@@ -6,6 +6,7 @@ tags:
|
|
| 6 |
- base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
| 7 |
- lora
|
| 8 |
- sft
|
|
|
|
| 9 |
- trl
|
| 10 |
---
|
| 11 |
|
|
|
|
| 6 |
- base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
| 7 |
- lora
|
| 8 |
- sft
|
| 9 |
+
- transformers
|
| 10 |
- trl
|
| 11 |
---
|
| 12 |
|
checkpoint-51/adapter_config.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
-
"base_model_name_or_path":
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
@@ -29,8 +29,8 @@
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
],
|
| 35 |
"target_parameters": null,
|
| 36 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
+
"v_proj",
|
| 33 |
+
"q_proj"
|
| 34 |
],
|
| 35 |
"target_parameters": null,
|
| 36 |
"task_type": "CAUSAL_LM",
|
checkpoint-51/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ac034d5ecfdb8dcaafe60b742fd12fe259f77573283b91bbbe032b8c8077425
|
| 3 |
+
size 9022864
|
checkpoint-51/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13ae0fc37cbddf906480c6b6070792291f6f84a53831a7f1982bf5adee686ddf
|
| 3 |
+
size 18094138
|
checkpoint-51/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32d32051101ec51c2b04c4ee6a6d2c7f40562e56836cbb02d6e6e3126490484d
|
| 3 |
+
size 13990
|
checkpoint-51/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:923e4c8fc2fa2baa19e1b2632e746f9b643a6a2ed7982aae130c58462f1007a1
|
| 3 |
+
size 1064
|
checkpoint-51/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
checkpoint-51/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
checkpoint-51/tokenizer_config.json
CHANGED
|
@@ -1,15 +1,43 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"bos_token": "<s>",
|
| 4 |
"clean_up_tokenization_spaces": false,
|
| 5 |
"eos_token": "</s>",
|
| 6 |
-
"
|
| 7 |
"legacy": false,
|
| 8 |
"model_max_length": 2048,
|
| 9 |
"pad_token": "</s>",
|
| 10 |
"padding_side": "right",
|
| 11 |
"sp_model_kwargs": {},
|
| 12 |
-
"tokenizer_class": "
|
| 13 |
"unk_token": "<unk>",
|
| 14 |
"use_default_system_prompt": false
|
| 15 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
"bos_token": "<s>",
|
| 32 |
"clean_up_tokenization_spaces": false,
|
| 33 |
"eos_token": "</s>",
|
| 34 |
+
"extra_special_tokens": {},
|
| 35 |
"legacy": false,
|
| 36 |
"model_max_length": 2048,
|
| 37 |
"pad_token": "</s>",
|
| 38 |
"padding_side": "right",
|
| 39 |
"sp_model_kwargs": {},
|
| 40 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 41 |
"unk_token": "<unk>",
|
| 42 |
"use_default_system_prompt": false
|
| 43 |
}
|
checkpoint-51/trainer_state.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": 51,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-51",
|
| 5 |
"epoch": 3.0,
|
| 6 |
"eval_steps": 500,
|
|
@@ -10,86 +10,86 @@
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
-
"entropy": 1.
|
| 14 |
"epoch": 0.6060606060606061,
|
| 15 |
-
"grad_norm": 1.
|
| 16 |
"learning_rate": 0.00018,
|
| 17 |
-
"loss": 2.
|
| 18 |
-
"mean_token_accuracy": 0.
|
| 19 |
"num_tokens": 14004.0,
|
| 20 |
"step": 10
|
| 21 |
},
|
| 22 |
{
|
| 23 |
"epoch": 1.0,
|
| 24 |
-
"eval_entropy": 1.
|
| 25 |
-
"eval_loss": 1.
|
| 26 |
-
"eval_mean_token_accuracy": 0.
|
| 27 |
"eval_num_tokens": 22901.0,
|
| 28 |
-
"eval_runtime":
|
| 29 |
-
"eval_samples_per_second":
|
| 30 |
-
"eval_steps_per_second":
|
| 31 |
"step": 17
|
| 32 |
},
|
| 33 |
{
|
| 34 |
-
"entropy": 1.
|
| 35 |
"epoch": 1.1818181818181819,
|
| 36 |
-
"grad_norm": 1.
|
| 37 |
"learning_rate": 0.00017600000000000002,
|
| 38 |
-
"loss": 1.
|
| 39 |
-
"mean_token_accuracy": 0.
|
| 40 |
"num_tokens": 27108.0,
|
| 41 |
"step": 20
|
| 42 |
},
|
| 43 |
{
|
| 44 |
-
"entropy": 1.
|
| 45 |
"epoch": 1.7878787878787878,
|
| 46 |
-
"grad_norm": 2.
|
| 47 |
"learning_rate": 0.00014933333333333335,
|
| 48 |
-
"loss": 0.
|
| 49 |
-
"mean_token_accuracy": 0.
|
| 50 |
"num_tokens": 41182.0,
|
| 51 |
"step": 30
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 2.0,
|
| 55 |
-
"eval_entropy": 0.
|
| 56 |
-
"eval_loss": 0.
|
| 57 |
-
"eval_mean_token_accuracy": 0.
|
| 58 |
"eval_num_tokens": 45802.0,
|
| 59 |
-
"eval_runtime":
|
| 60 |
-
"eval_samples_per_second":
|
| 61 |
-
"eval_steps_per_second":
|
| 62 |
"step": 34
|
| 63 |
},
|
| 64 |
{
|
| 65 |
-
"entropy": 0.
|
| 66 |
"epoch": 2.3636363636363638,
|
| 67 |
-
"grad_norm": 0.
|
| 68 |
"learning_rate": 0.00012266666666666668,
|
| 69 |
-
"loss": 0.
|
| 70 |
-
"mean_token_accuracy": 0.
|
| 71 |
"num_tokens": 54283.0,
|
| 72 |
"step": 40
|
| 73 |
},
|
| 74 |
{
|
| 75 |
-
"entropy": 0.
|
| 76 |
"epoch": 2.9696969696969697,
|
| 77 |
-
"grad_norm": 0.
|
| 78 |
"learning_rate": 9.6e-05,
|
| 79 |
-
"loss": 0.
|
| 80 |
-
"mean_token_accuracy": 0.
|
| 81 |
"num_tokens": 68175.0,
|
| 82 |
"step": 50
|
| 83 |
},
|
| 84 |
{
|
| 85 |
"epoch": 3.0,
|
| 86 |
-
"eval_entropy": 0.
|
| 87 |
-
"eval_loss": 0.
|
| 88 |
-
"eval_mean_token_accuracy": 0.
|
| 89 |
"eval_num_tokens": 68703.0,
|
| 90 |
-
"eval_runtime":
|
| 91 |
-
"eval_samples_per_second":
|
| 92 |
-
"eval_steps_per_second":
|
| 93 |
"step": 51
|
| 94 |
}
|
| 95 |
],
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": 51,
|
| 3 |
+
"best_metric": 0.4862091839313507,
|
| 4 |
"best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-51",
|
| 5 |
"epoch": 3.0,
|
| 6 |
"eval_steps": 500,
|
|
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
+
"entropy": 1.772234356403351,
|
| 14 |
"epoch": 0.6060606060606061,
|
| 15 |
+
"grad_norm": 1.3780473470687866,
|
| 16 |
"learning_rate": 0.00018,
|
| 17 |
+
"loss": 2.2849,
|
| 18 |
+
"mean_token_accuracy": 0.5951868265867233,
|
| 19 |
"num_tokens": 14004.0,
|
| 20 |
"step": 10
|
| 21 |
},
|
| 22 |
{
|
| 23 |
"epoch": 1.0,
|
| 24 |
+
"eval_entropy": 1.685779293378194,
|
| 25 |
+
"eval_loss": 1.5357812643051147,
|
| 26 |
+
"eval_mean_token_accuracy": 0.6872214078903198,
|
| 27 |
"eval_num_tokens": 22901.0,
|
| 28 |
+
"eval_runtime": 2.3332,
|
| 29 |
+
"eval_samples_per_second": 10.286,
|
| 30 |
+
"eval_steps_per_second": 1.286,
|
| 31 |
"step": 17
|
| 32 |
},
|
| 33 |
{
|
| 34 |
+
"entropy": 1.6779554203936928,
|
| 35 |
"epoch": 1.1818181818181819,
|
| 36 |
+
"grad_norm": 1.9584565162658691,
|
| 37 |
"learning_rate": 0.00017600000000000002,
|
| 38 |
+
"loss": 1.6949,
|
| 39 |
+
"mean_token_accuracy": 0.6669363661816246,
|
| 40 |
"num_tokens": 27108.0,
|
| 41 |
"step": 20
|
| 42 |
},
|
| 43 |
{
|
| 44 |
+
"entropy": 1.0465361922979355,
|
| 45 |
"epoch": 1.7878787878787878,
|
| 46 |
+
"grad_norm": 2.725850820541382,
|
| 47 |
"learning_rate": 0.00014933333333333335,
|
| 48 |
+
"loss": 0.9223,
|
| 49 |
+
"mean_token_accuracy": 0.8229832291603089,
|
| 50 |
"num_tokens": 41182.0,
|
| 51 |
"step": 30
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 2.0,
|
| 55 |
+
"eval_entropy": 0.5895721216996511,
|
| 56 |
+
"eval_loss": 0.557042121887207,
|
| 57 |
+
"eval_mean_token_accuracy": 0.9006072084108988,
|
| 58 |
"eval_num_tokens": 45802.0,
|
| 59 |
+
"eval_runtime": 1.4033,
|
| 60 |
+
"eval_samples_per_second": 17.102,
|
| 61 |
+
"eval_steps_per_second": 2.138,
|
| 62 |
"step": 34
|
| 63 |
},
|
| 64 |
{
|
| 65 |
+
"entropy": 0.5621798069853532,
|
| 66 |
"epoch": 2.3636363636363638,
|
| 67 |
+
"grad_norm": 0.853744626045227,
|
| 68 |
"learning_rate": 0.00012266666666666668,
|
| 69 |
+
"loss": 0.5175,
|
| 70 |
+
"mean_token_accuracy": 0.902722396348652,
|
| 71 |
"num_tokens": 54283.0,
|
| 72 |
"step": 40
|
| 73 |
},
|
| 74 |
{
|
| 75 |
+
"entropy": 0.46219056397676467,
|
| 76 |
"epoch": 2.9696969696969697,
|
| 77 |
+
"grad_norm": 0.8320155739784241,
|
| 78 |
"learning_rate": 9.6e-05,
|
| 79 |
+
"loss": 0.4347,
|
| 80 |
+
"mean_token_accuracy": 0.9223286896944046,
|
| 81 |
"num_tokens": 68175.0,
|
| 82 |
"step": 50
|
| 83 |
},
|
| 84 |
{
|
| 85 |
"epoch": 3.0,
|
| 86 |
+
"eval_entropy": 0.4948213994503021,
|
| 87 |
+
"eval_loss": 0.4862091839313507,
|
| 88 |
+
"eval_mean_token_accuracy": 0.9130085905392965,
|
| 89 |
"eval_num_tokens": 68703.0,
|
| 90 |
+
"eval_runtime": 1.594,
|
| 91 |
+
"eval_samples_per_second": 15.056,
|
| 92 |
+
"eval_steps_per_second": 1.882,
|
| 93 |
"step": 51
|
| 94 |
}
|
| 95 |
],
|
checkpoint-51/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdfc63bd124cf8b58acfe531d95973daba5a5e131127762949c75765ea5acb7f
|
| 3 |
+
size 5880
|
checkpoint-68/README.md
CHANGED
|
@@ -6,6 +6,7 @@ tags:
|
|
| 6 |
- base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
| 7 |
- lora
|
| 8 |
- sft
|
|
|
|
| 9 |
- trl
|
| 10 |
---
|
| 11 |
|
|
|
|
| 6 |
- base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
| 7 |
- lora
|
| 8 |
- sft
|
| 9 |
+
- transformers
|
| 10 |
- trl
|
| 11 |
---
|
| 12 |
|
checkpoint-68/adapter_config.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
-
"base_model_name_or_path":
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
@@ -29,8 +29,8 @@
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
],
|
| 35 |
"target_parameters": null,
|
| 36 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
+
"v_proj",
|
| 33 |
+
"q_proj"
|
| 34 |
],
|
| 35 |
"target_parameters": null,
|
| 36 |
"task_type": "CAUSAL_LM",
|
checkpoint-68/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7529ae8b88bdc50cbbcf3ca825f59a3c8f13d84ddeb69a1c58657217d654f6cf
|
| 3 |
+
size 9022864
|
checkpoint-68/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3500cc18261ac4bc8c870bec4c983b932c2093620b48f022fff41154f7cbdf8
|
| 3 |
+
size 18094138
|
checkpoint-68/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fed77b14fe062f6db72d68cedd6fd95bae3305b7a735eef3c85da43fd15d476
|
| 3 |
+
size 13990
|
checkpoint-68/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db1cb9f8e0421507ae638faa350a64a3bc91c45ea59cad1a259e64ddce2114de
|
| 3 |
+
size 1064
|
checkpoint-68/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
checkpoint-68/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
checkpoint-68/tokenizer_config.json
CHANGED
|
@@ -1,15 +1,43 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"bos_token": "<s>",
|
| 4 |
"clean_up_tokenization_spaces": false,
|
| 5 |
"eos_token": "</s>",
|
| 6 |
-
"
|
| 7 |
"legacy": false,
|
| 8 |
"model_max_length": 2048,
|
| 9 |
"pad_token": "</s>",
|
| 10 |
"padding_side": "right",
|
| 11 |
"sp_model_kwargs": {},
|
| 12 |
-
"tokenizer_class": "
|
| 13 |
"unk_token": "<unk>",
|
| 14 |
"use_default_system_prompt": false
|
| 15 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
"bos_token": "<s>",
|
| 32 |
"clean_up_tokenization_spaces": false,
|
| 33 |
"eos_token": "</s>",
|
| 34 |
+
"extra_special_tokens": {},
|
| 35 |
"legacy": false,
|
| 36 |
"model_max_length": 2048,
|
| 37 |
"pad_token": "</s>",
|
| 38 |
"padding_side": "right",
|
| 39 |
"sp_model_kwargs": {},
|
| 40 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 41 |
"unk_token": "<unk>",
|
| 42 |
"use_default_system_prompt": false
|
| 43 |
}
|
checkpoint-68/trainer_state.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": 68,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-68",
|
| 5 |
"epoch": 4.0,
|
| 6 |
"eval_steps": 500,
|
|
@@ -10,107 +10,107 @@
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
-
"entropy": 1.
|
| 14 |
"epoch": 0.6060606060606061,
|
| 15 |
-
"grad_norm": 1.
|
| 16 |
"learning_rate": 0.00018,
|
| 17 |
-
"loss": 2.
|
| 18 |
-
"mean_token_accuracy": 0.
|
| 19 |
"num_tokens": 14004.0,
|
| 20 |
"step": 10
|
| 21 |
},
|
| 22 |
{
|
| 23 |
"epoch": 1.0,
|
| 24 |
-
"eval_entropy": 1.
|
| 25 |
-
"eval_loss": 1.
|
| 26 |
-
"eval_mean_token_accuracy": 0.
|
| 27 |
"eval_num_tokens": 22901.0,
|
| 28 |
-
"eval_runtime":
|
| 29 |
-
"eval_samples_per_second":
|
| 30 |
-
"eval_steps_per_second":
|
| 31 |
"step": 17
|
| 32 |
},
|
| 33 |
{
|
| 34 |
-
"entropy": 1.
|
| 35 |
"epoch": 1.1818181818181819,
|
| 36 |
-
"grad_norm": 1.
|
| 37 |
"learning_rate": 0.00017600000000000002,
|
| 38 |
-
"loss": 1.
|
| 39 |
-
"mean_token_accuracy": 0.
|
| 40 |
"num_tokens": 27108.0,
|
| 41 |
"step": 20
|
| 42 |
},
|
| 43 |
{
|
| 44 |
-
"entropy": 1.
|
| 45 |
"epoch": 1.7878787878787878,
|
| 46 |
-
"grad_norm": 2.
|
| 47 |
"learning_rate": 0.00014933333333333335,
|
| 48 |
-
"loss": 0.
|
| 49 |
-
"mean_token_accuracy": 0.
|
| 50 |
"num_tokens": 41182.0,
|
| 51 |
"step": 30
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 2.0,
|
| 55 |
-
"eval_entropy": 0.
|
| 56 |
-
"eval_loss": 0.
|
| 57 |
-
"eval_mean_token_accuracy": 0.
|
| 58 |
"eval_num_tokens": 45802.0,
|
| 59 |
-
"eval_runtime":
|
| 60 |
-
"eval_samples_per_second":
|
| 61 |
-
"eval_steps_per_second":
|
| 62 |
"step": 34
|
| 63 |
},
|
| 64 |
{
|
| 65 |
-
"entropy": 0.
|
| 66 |
"epoch": 2.3636363636363638,
|
| 67 |
-
"grad_norm": 0.
|
| 68 |
"learning_rate": 0.00012266666666666668,
|
| 69 |
-
"loss": 0.
|
| 70 |
-
"mean_token_accuracy": 0.
|
| 71 |
"num_tokens": 54283.0,
|
| 72 |
"step": 40
|
| 73 |
},
|
| 74 |
{
|
| 75 |
-
"entropy": 0.
|
| 76 |
"epoch": 2.9696969696969697,
|
| 77 |
-
"grad_norm": 0.
|
| 78 |
"learning_rate": 9.6e-05,
|
| 79 |
-
"loss": 0.
|
| 80 |
-
"mean_token_accuracy": 0.
|
| 81 |
"num_tokens": 68175.0,
|
| 82 |
"step": 50
|
| 83 |
},
|
| 84 |
{
|
| 85 |
"epoch": 3.0,
|
| 86 |
-
"eval_entropy": 0.
|
| 87 |
-
"eval_loss": 0.
|
| 88 |
-
"eval_mean_token_accuracy": 0.
|
| 89 |
"eval_num_tokens": 68703.0,
|
| 90 |
-
"eval_runtime":
|
| 91 |
-
"eval_samples_per_second":
|
| 92 |
-
"eval_steps_per_second":
|
| 93 |
"step": 51
|
| 94 |
},
|
| 95 |
{
|
| 96 |
-
"entropy": 0.
|
| 97 |
"epoch": 3.5454545454545454,
|
| 98 |
-
"grad_norm": 0.
|
| 99 |
"learning_rate": 6.933333333333334e-05,
|
| 100 |
-
"loss": 0.
|
| 101 |
-
"mean_token_accuracy": 0.
|
| 102 |
"num_tokens": 81270.0,
|
| 103 |
"step": 60
|
| 104 |
},
|
| 105 |
{
|
| 106 |
"epoch": 4.0,
|
| 107 |
-
"eval_entropy": 0.
|
| 108 |
-
"eval_loss": 0.
|
| 109 |
-
"eval_mean_token_accuracy": 0.
|
| 110 |
"eval_num_tokens": 91604.0,
|
| 111 |
-
"eval_runtime":
|
| 112 |
-
"eval_samples_per_second":
|
| 113 |
-
"eval_steps_per_second":
|
| 114 |
"step": 68
|
| 115 |
}
|
| 116 |
],
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": 68,
|
| 3 |
+
"best_metric": 0.46859970688819885,
|
| 4 |
"best_model_checkpoint": "out/taskmind_lora_peft/checkpoint-68",
|
| 5 |
"epoch": 4.0,
|
| 6 |
"eval_steps": 500,
|
|
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
+
"entropy": 1.772234356403351,
|
| 14 |
"epoch": 0.6060606060606061,
|
| 15 |
+
"grad_norm": 1.3780473470687866,
|
| 16 |
"learning_rate": 0.00018,
|
| 17 |
+
"loss": 2.2849,
|
| 18 |
+
"mean_token_accuracy": 0.5951868265867233,
|
| 19 |
"num_tokens": 14004.0,
|
| 20 |
"step": 10
|
| 21 |
},
|
| 22 |
{
|
| 23 |
"epoch": 1.0,
|
| 24 |
+
"eval_entropy": 1.685779293378194,
|
| 25 |
+
"eval_loss": 1.5357812643051147,
|
| 26 |
+
"eval_mean_token_accuracy": 0.6872214078903198,
|
| 27 |
"eval_num_tokens": 22901.0,
|
| 28 |
+
"eval_runtime": 2.3332,
|
| 29 |
+
"eval_samples_per_second": 10.286,
|
| 30 |
+
"eval_steps_per_second": 1.286,
|
| 31 |
"step": 17
|
| 32 |
},
|
| 33 |
{
|
| 34 |
+
"entropy": 1.6779554203936928,
|
| 35 |
"epoch": 1.1818181818181819,
|
| 36 |
+
"grad_norm": 1.9584565162658691,
|
| 37 |
"learning_rate": 0.00017600000000000002,
|
| 38 |
+
"loss": 1.6949,
|
| 39 |
+
"mean_token_accuracy": 0.6669363661816246,
|
| 40 |
"num_tokens": 27108.0,
|
| 41 |
"step": 20
|
| 42 |
},
|
| 43 |
{
|
| 44 |
+
"entropy": 1.0465361922979355,
|
| 45 |
"epoch": 1.7878787878787878,
|
| 46 |
+
"grad_norm": 2.725850820541382,
|
| 47 |
"learning_rate": 0.00014933333333333335,
|
| 48 |
+
"loss": 0.9223,
|
| 49 |
+
"mean_token_accuracy": 0.8229832291603089,
|
| 50 |
"num_tokens": 41182.0,
|
| 51 |
"step": 30
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 2.0,
|
| 55 |
+
"eval_entropy": 0.5895721216996511,
|
| 56 |
+
"eval_loss": 0.557042121887207,
|
| 57 |
+
"eval_mean_token_accuracy": 0.9006072084108988,
|
| 58 |
"eval_num_tokens": 45802.0,
|
| 59 |
+
"eval_runtime": 1.4033,
|
| 60 |
+
"eval_samples_per_second": 17.102,
|
| 61 |
+
"eval_steps_per_second": 2.138,
|
| 62 |
"step": 34
|
| 63 |
},
|
| 64 |
{
|
| 65 |
+
"entropy": 0.5621798069853532,
|
| 66 |
"epoch": 2.3636363636363638,
|
| 67 |
+
"grad_norm": 0.853744626045227,
|
| 68 |
"learning_rate": 0.00012266666666666668,
|
| 69 |
+
"loss": 0.5175,
|
| 70 |
+
"mean_token_accuracy": 0.902722396348652,
|
| 71 |
"num_tokens": 54283.0,
|
| 72 |
"step": 40
|
| 73 |
},
|
| 74 |
{
|
| 75 |
+
"entropy": 0.46219056397676467,
|
| 76 |
"epoch": 2.9696969696969697,
|
| 77 |
+
"grad_norm": 0.8320155739784241,
|
| 78 |
"learning_rate": 9.6e-05,
|
| 79 |
+
"loss": 0.4347,
|
| 80 |
+
"mean_token_accuracy": 0.9223286896944046,
|
| 81 |
"num_tokens": 68175.0,
|
| 82 |
"step": 50
|
| 83 |
},
|
| 84 |
{
|
| 85 |
"epoch": 3.0,
|
| 86 |
+
"eval_entropy": 0.4948213994503021,
|
| 87 |
+
"eval_loss": 0.4862091839313507,
|
| 88 |
+
"eval_mean_token_accuracy": 0.9130085905392965,
|
| 89 |
"eval_num_tokens": 68703.0,
|
| 90 |
+
"eval_runtime": 1.594,
|
| 91 |
+
"eval_samples_per_second": 15.056,
|
| 92 |
+
"eval_steps_per_second": 1.882,
|
| 93 |
"step": 51
|
| 94 |
},
|
| 95 |
{
|
| 96 |
+
"entropy": 0.4372325147453107,
|
| 97 |
"epoch": 3.5454545454545454,
|
| 98 |
+
"grad_norm": 0.6874417662620544,
|
| 99 |
"learning_rate": 6.933333333333334e-05,
|
| 100 |
+
"loss": 0.4267,
|
| 101 |
+
"mean_token_accuracy": 0.920473597551647,
|
| 102 |
"num_tokens": 81270.0,
|
| 103 |
"step": 60
|
| 104 |
},
|
| 105 |
{
|
| 106 |
"epoch": 4.0,
|
| 107 |
+
"eval_entropy": 0.4760642449061076,
|
| 108 |
+
"eval_loss": 0.46859970688819885,
|
| 109 |
+
"eval_mean_token_accuracy": 0.9156773686408997,
|
| 110 |
"eval_num_tokens": 91604.0,
|
| 111 |
+
"eval_runtime": 1.7122,
|
| 112 |
+
"eval_samples_per_second": 14.017,
|
| 113 |
+
"eval_steps_per_second": 1.752,
|
| 114 |
"step": 68
|
| 115 |
}
|
| 116 |
],
|
checkpoint-68/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdfc63bd124cf8b58acfe531d95973daba5a5e131127762949c75765ea5acb7f
|
| 3 |
+
size 5880
|
checkpoint-85/README.md
CHANGED
|
@@ -6,6 +6,7 @@ tags:
|
|
| 6 |
- base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
| 7 |
- lora
|
| 8 |
- sft
|
|
|
|
| 9 |
- trl
|
| 10 |
---
|
| 11 |
|
|
|
|
| 6 |
- base_model:adapter:TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
| 7 |
- lora
|
| 8 |
- sft
|
| 9 |
+
- transformers
|
| 10 |
- trl
|
| 11 |
---
|
| 12 |
|
checkpoint-85/adapter_config.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
-
"base_model_name_or_path":
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
@@ -29,8 +29,8 @@
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
],
|
| 35 |
"target_parameters": null,
|
| 36 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
|
|
| 29 |
"rank_pattern": {},
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
+
"v_proj",
|
| 33 |
+
"q_proj"
|
| 34 |
],
|
| 35 |
"target_parameters": null,
|
| 36 |
"task_type": "CAUSAL_LM",
|
checkpoint-85/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0759e7617267665196662e1c86613a8737244bbac6bdc596e21dbfd3e571b57
|
| 3 |
+
size 9022864
|