Upload folder using huggingface_hub
Browse files- README.md +4 -4
- adapter_config.json +5 -5
- adapter_model.safetensors +2 -2
- checkpoint-15/README.md +2 -2
- checkpoint-15/adapter_config.json +5 -5
- checkpoint-15/adapter_model.safetensors +2 -2
- checkpoint-15/optimizer.pt +2 -2
- checkpoint-15/ref/adapter_config.json +5 -5
- checkpoint-15/ref/adapter_model.safetensors +2 -2
- checkpoint-15/tokenizer.json +2 -2
- checkpoint-15/tokenizer_config.json +3 -3
- checkpoint-15/trainer_state.json +14 -14
- checkpoint-15/training_args.bin +1 -1
- ref/adapter_config.json +5 -5
- ref/adapter_model.safetensors +2 -2
- tokenizer.json +2 -2
- tokenizer_config.json +3 -3
README.md
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
---
|
| 2 |
-
base_model:
|
| 3 |
library_name: peft
|
| 4 |
model_name: base_dpo
|
| 5 |
tags:
|
| 6 |
-
- base_model:adapter:
|
| 7 |
- dpo
|
| 8 |
- lora
|
| 9 |
- transformers
|
|
@@ -14,7 +14,7 @@ pipeline_tag: text-generation
|
|
| 14 |
|
| 15 |
# Model Card for base_dpo
|
| 16 |
|
| 17 |
-
This model is a fine-tuned version of [
|
| 18 |
It has been trained using [TRL](https://github.com/huggingface/trl).
|
| 19 |
|
| 20 |
## Quick start
|
|
@@ -42,7 +42,7 @@ This model was trained with DPO, a method introduced in [Direct Preference Optim
|
|
| 42 |
- TRL: 0.29.0
|
| 43 |
- Transformers: 5.3.0
|
| 44 |
- Pytorch: 2.10.0
|
| 45 |
-
- Datasets: 4.
|
| 46 |
- Tokenizers: 0.22.2
|
| 47 |
|
| 48 |
## Citations
|
|
|
|
| 1 |
---
|
| 2 |
+
base_model: CohereLabs/tiny-aya-base
|
| 3 |
library_name: peft
|
| 4 |
model_name: base_dpo
|
| 5 |
tags:
|
| 6 |
+
- base_model:adapter:CohereLabs/tiny-aya-base
|
| 7 |
- dpo
|
| 8 |
- lora
|
| 9 |
- transformers
|
|
|
|
| 14 |
|
| 15 |
# Model Card for base_dpo
|
| 16 |
|
| 17 |
+
This model is a fine-tuned version of [CohereLabs/tiny-aya-base](https://huggingface.co/CohereLabs/tiny-aya-base).
|
| 18 |
It has been trained using [TRL](https://github.com/huggingface/trl).
|
| 19 |
|
| 20 |
## Quick start
|
|
|
|
| 42 |
- TRL: 0.29.0
|
| 43 |
- Transformers: 5.3.0
|
| 44 |
- Pytorch: 2.10.0
|
| 45 |
+
- Datasets: 4.8.3
|
| 46 |
- Tokenizers: 0.22.2
|
| 47 |
|
| 48 |
## Citations
|
adapter_config.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
-
"base_model_name_or_path": "
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
@@ -30,12 +30,12 @@
|
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
"q_proj",
|
| 33 |
-
"
|
| 34 |
-
"gate_proj",
|
| 35 |
"o_proj",
|
|
|
|
|
|
|
| 36 |
"down_proj",
|
| 37 |
-
"
|
| 38 |
-
"up_proj"
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "CohereLabs/tiny-aya-base",
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
|
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
"q_proj",
|
| 33 |
+
"k_proj",
|
|
|
|
| 34 |
"o_proj",
|
| 35 |
+
"up_proj",
|
| 36 |
+
"v_proj",
|
| 37 |
"down_proj",
|
| 38 |
+
"gate_proj"
|
|
|
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": "CAUSAL_LM",
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a6eeecf40c6db9e5588467babd1e4390df88a826da87e404890b843ed89bead
|
| 3 |
+
size 120981200
|
checkpoint-15/README.md
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
---
|
| 2 |
-
base_model:
|
| 3 |
library_name: peft
|
| 4 |
pipeline_tag: text-generation
|
| 5 |
tags:
|
| 6 |
-
- base_model:adapter:
|
| 7 |
- dpo
|
| 8 |
- lora
|
| 9 |
- transformers
|
|
|
|
| 1 |
---
|
| 2 |
+
base_model: CohereLabs/tiny-aya-base
|
| 3 |
library_name: peft
|
| 4 |
pipeline_tag: text-generation
|
| 5 |
tags:
|
| 6 |
+
- base_model:adapter:CohereLabs/tiny-aya-base
|
| 7 |
- dpo
|
| 8 |
- lora
|
| 9 |
- transformers
|
checkpoint-15/adapter_config.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
-
"base_model_name_or_path": "
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
@@ -30,12 +30,12 @@
|
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
"q_proj",
|
| 33 |
-
"
|
| 34 |
-
"gate_proj",
|
| 35 |
"o_proj",
|
|
|
|
|
|
|
| 36 |
"down_proj",
|
| 37 |
-
"
|
| 38 |
-
"up_proj"
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "CohereLabs/tiny-aya-base",
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
|
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
"q_proj",
|
| 33 |
+
"k_proj",
|
|
|
|
| 34 |
"o_proj",
|
| 35 |
+
"up_proj",
|
| 36 |
+
"v_proj",
|
| 37 |
"down_proj",
|
| 38 |
+
"gate_proj"
|
|
|
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": "CAUSAL_LM",
|
checkpoint-15/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a6eeecf40c6db9e5588467babd1e4390df88a826da87e404890b843ed89bead
|
| 3 |
+
size 120981200
|
checkpoint-15/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1dcf2ecb817ca23bf8fc4ebb941e743a381688d80d510c1a0cf8306b8c2ee23
|
| 3 |
+
size 242259659
|
checkpoint-15/ref/adapter_config.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
-
"base_model_name_or_path": "
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
@@ -30,12 +30,12 @@
|
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
"q_proj",
|
| 33 |
-
"
|
| 34 |
-
"gate_proj",
|
| 35 |
"o_proj",
|
|
|
|
|
|
|
| 36 |
"down_proj",
|
| 37 |
-
"
|
| 38 |
-
"up_proj"
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "CohereLabs/tiny-aya-base",
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
|
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
"q_proj",
|
| 33 |
+
"k_proj",
|
|
|
|
| 34 |
"o_proj",
|
| 35 |
+
"up_proj",
|
| 36 |
+
"v_proj",
|
| 37 |
"down_proj",
|
| 38 |
+
"gate_proj"
|
|
|
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": "CAUSAL_LM",
|
checkpoint-15/ref/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51cf1f4b51f98d4bb2ab367ccd886c36cc66d553f81d2e83059837903ffe1778
|
| 3 |
+
size 60524472
|
checkpoint-15/tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cd77e5246a42d44b52e94cb02bfe1ff1693e4315d8bf5bd264681710c03c6af
|
| 3 |
+
size 21374786
|
checkpoint-15/tokenizer_config.json
CHANGED
|
@@ -4,9 +4,9 @@
|
|
| 4 |
"bos_token": "<BOS_TOKEN>",
|
| 5 |
"clean_up_tokenization_spaces": false,
|
| 6 |
"cls_token": "<CLS>",
|
| 7 |
-
"eos_token": "<
|
| 8 |
"errors": "replace",
|
| 9 |
-
"is_local":
|
| 10 |
"legacy": true,
|
| 11 |
"mask_token": "<MASK_TOKEN>",
|
| 12 |
"model_max_length": 1000000000000000019884624838656,
|
|
@@ -15,6 +15,6 @@
|
|
| 15 |
"sp_model_kwargs": {},
|
| 16 |
"spaces_between_special_tokens": false,
|
| 17 |
"tokenizer_class": "CohereTokenizer",
|
| 18 |
-
"unk_token":
|
| 19 |
"use_default_system_prompt": false
|
| 20 |
}
|
|
|
|
| 4 |
"bos_token": "<BOS_TOKEN>",
|
| 5 |
"clean_up_tokenization_spaces": false,
|
| 6 |
"cls_token": "<CLS>",
|
| 7 |
+
"eos_token": "<EOS_TOKEN>",
|
| 8 |
"errors": "replace",
|
| 9 |
+
"is_local": true,
|
| 10 |
"legacy": true,
|
| 11 |
"mask_token": "<MASK_TOKEN>",
|
| 12 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
|
| 15 |
"sp_model_kwargs": {},
|
| 16 |
"spaces_between_special_tokens": false,
|
| 17 |
"tokenizer_class": "CohereTokenizer",
|
| 18 |
+
"unk_token": "<UNK>",
|
| 19 |
"use_default_system_prompt": false
|
| 20 |
}
|
checkpoint-15/trainer_state.json
CHANGED
|
@@ -10,21 +10,21 @@
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
-
"entropy":
|
| 14 |
"epoch": 2.0,
|
| 15 |
-
"grad_norm": 0.
|
| 16 |
"learning_rate": 6.909830056250527e-05,
|
| 17 |
-
"logits/chosen":
|
| 18 |
-
"logits/rejected":
|
| 19 |
-
"logps/chosen": -
|
| 20 |
-
"logps/rejected": -
|
| 21 |
-
"loss": 0.
|
| 22 |
-
"mean_token_accuracy": 0.
|
| 23 |
-
"num_tokens":
|
| 24 |
-
"rewards/accuracies": 0.
|
| 25 |
-
"rewards/chosen": 0.
|
| 26 |
-
"rewards/margins":
|
| 27 |
-
"rewards/rejected": -
|
| 28 |
"step": 10
|
| 29 |
}
|
| 30 |
],
|
|
@@ -45,7 +45,7 @@
|
|
| 45 |
"attributes": {}
|
| 46 |
}
|
| 47 |
},
|
| 48 |
-
"total_flos":
|
| 49 |
"train_batch_size": 4,
|
| 50 |
"trial_name": null,
|
| 51 |
"trial_params": null
|
|
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
+
"entropy": 2.091383949706429,
|
| 14 |
"epoch": 2.0,
|
| 15 |
+
"grad_norm": 0.143918976187706,
|
| 16 |
"learning_rate": 6.909830056250527e-05,
|
| 17 |
+
"logits/chosen": -2.6255019325219764,
|
| 18 |
+
"logits/rejected": -2.659680873301937,
|
| 19 |
+
"logps/chosen": -208.22477260388825,
|
| 20 |
+
"logps/rejected": -238.7575555098684,
|
| 21 |
+
"loss": 0.45360164642333983,
|
| 22 |
+
"mean_token_accuracy": 0.47160694003105164,
|
| 23 |
+
"num_tokens": 34910.0,
|
| 24 |
+
"rewards/accuracies": 0.7302631578947368,
|
| 25 |
+
"rewards/chosen": 0.3593159549391681,
|
| 26 |
+
"rewards/margins": 0.8960761683747956,
|
| 27 |
+
"rewards/rejected": -0.5367602110399228,
|
| 28 |
"step": 10
|
| 29 |
}
|
| 30 |
],
|
|
|
|
| 45 |
"attributes": {}
|
| 46 |
}
|
| 47 |
},
|
| 48 |
+
"total_flos": 1447691195695104.0,
|
| 49 |
"train_batch_size": 4,
|
| 50 |
"trial_name": null,
|
| 51 |
"trial_params": null
|
checkpoint-15/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5841
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a760a647b55cf71029eb7fe81ef09d1df5449c3aca773d514abd1884660ada8
|
| 3 |
size 5841
|
ref/adapter_config.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
-
"base_model_name_or_path": "
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
@@ -30,12 +30,12 @@
|
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
"q_proj",
|
| 33 |
-
"
|
| 34 |
-
"gate_proj",
|
| 35 |
"o_proj",
|
|
|
|
|
|
|
| 36 |
"down_proj",
|
| 37 |
-
"
|
| 38 |
-
"up_proj"
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": "CAUSAL_LM",
|
|
|
|
| 3 |
"alpha_pattern": {},
|
| 4 |
"arrow_config": null,
|
| 5 |
"auto_mapping": null,
|
| 6 |
+
"base_model_name_or_path": "CohereLabs/tiny-aya-base",
|
| 7 |
"bias": "none",
|
| 8 |
"corda_config": null,
|
| 9 |
"ensure_weight_tying": false,
|
|
|
|
| 30 |
"revision": null,
|
| 31 |
"target_modules": [
|
| 32 |
"q_proj",
|
| 33 |
+
"k_proj",
|
|
|
|
| 34 |
"o_proj",
|
| 35 |
+
"up_proj",
|
| 36 |
+
"v_proj",
|
| 37 |
"down_proj",
|
| 38 |
+
"gate_proj"
|
|
|
|
| 39 |
],
|
| 40 |
"target_parameters": null,
|
| 41 |
"task_type": "CAUSAL_LM",
|
ref/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51cf1f4b51f98d4bb2ab367ccd886c36cc66d553f81d2e83059837903ffe1778
|
| 3 |
+
size 60524472
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cd77e5246a42d44b52e94cb02bfe1ff1693e4315d8bf5bd264681710c03c6af
|
| 3 |
+
size 21374786
|
tokenizer_config.json
CHANGED
|
@@ -4,9 +4,9 @@
|
|
| 4 |
"bos_token": "<BOS_TOKEN>",
|
| 5 |
"clean_up_tokenization_spaces": false,
|
| 6 |
"cls_token": "<CLS>",
|
| 7 |
-
"eos_token": "<
|
| 8 |
"errors": "replace",
|
| 9 |
-
"is_local":
|
| 10 |
"legacy": true,
|
| 11 |
"mask_token": "<MASK_TOKEN>",
|
| 12 |
"model_max_length": 1000000000000000019884624838656,
|
|
@@ -15,6 +15,6 @@
|
|
| 15 |
"sp_model_kwargs": {},
|
| 16 |
"spaces_between_special_tokens": false,
|
| 17 |
"tokenizer_class": "CohereTokenizer",
|
| 18 |
-
"unk_token":
|
| 19 |
"use_default_system_prompt": false
|
| 20 |
}
|
|
|
|
| 4 |
"bos_token": "<BOS_TOKEN>",
|
| 5 |
"clean_up_tokenization_spaces": false,
|
| 6 |
"cls_token": "<CLS>",
|
| 7 |
+
"eos_token": "<EOS_TOKEN>",
|
| 8 |
"errors": "replace",
|
| 9 |
+
"is_local": true,
|
| 10 |
"legacy": true,
|
| 11 |
"mask_token": "<MASK_TOKEN>",
|
| 12 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
|
| 15 |
"sp_model_kwargs": {},
|
| 16 |
"spaces_between_special_tokens": false,
|
| 17 |
"tokenizer_class": "CohereTokenizer",
|
| 18 |
+
"unk_token": "<UNK>",
|
| 19 |
"use_default_system_prompt": false
|
| 20 |
}
|