cationic commited on 23 days ago

Commit

512fbaf

verified ·

1 Parent(s): e3fbfc7

Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

README.md +4 -4
adapter_config.json +5 -5
adapter_model.safetensors +2 -2
checkpoint-15/README.md +2 -2
checkpoint-15/adapter_config.json +5 -5
checkpoint-15/adapter_model.safetensors +2 -2
checkpoint-15/optimizer.pt +2 -2
checkpoint-15/ref/adapter_config.json +5 -5
checkpoint-15/ref/adapter_model.safetensors +2 -2
checkpoint-15/tokenizer.json +2 -2
checkpoint-15/tokenizer_config.json +3 -3
checkpoint-15/trainer_state.json +14 -14
checkpoint-15/training_args.bin +1 -1
ref/adapter_config.json +5 -5
ref/adapter_model.safetensors +2 -2
tokenizer.json +2 -2
tokenizer_config.json +3 -3

README.md CHANGED Viewed

@@ -1,9 +1,9 @@
 ---
-base_model: CohereForAI/aya-expanse-8b
 library_name: peft
 model_name: base_dpo
 tags:
-- base_model:adapter:CohereForAI/aya-expanse-8b
 - dpo
 - lora
 - transformers
@@ -14,7 +14,7 @@ pipeline_tag: text-generation
 # Model Card for base_dpo
-This model is a fine-tuned version of [CohereForAI/aya-expanse-8b](https://huggingface.co/CohereForAI/aya-expanse-8b).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -42,7 +42,7 @@ This model was trained with DPO, a method introduced in [Direct Preference Optim
 - TRL: 0.29.0
 - Transformers: 5.3.0
 - Pytorch: 2.10.0
-- Datasets: 4.7.0
 - Tokenizers: 0.22.2
 ## Citations

 ---
+base_model: CohereLabs/tiny-aya-base
 library_name: peft
 model_name: base_dpo
 tags:
+- base_model:adapter:CohereLabs/tiny-aya-base
 - dpo
 - lora
 - transformers
 # Model Card for base_dpo
+This model is a fine-tuned version of [CohereLabs/tiny-aya-base](https://huggingface.co/CohereLabs/tiny-aya-base).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 - TRL: 0.29.0
 - Transformers: 5.3.0
 - Pytorch: 2.10.0
+- Datasets: 4.8.3
 - Tokenizers: 0.22.2
 ## Citations

adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "CohereForAI/aya-expanse-8b",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
@@ -30,12 +30,12 @@
   "revision": null,
   "target_modules": [
     "q_proj",
-    "v_proj",
-    "gate_proj",
     "o_proj",
     "down_proj",
-    "k_proj",
-    "up_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "CohereLabs/tiny-aya-base",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
   "revision": null,
   "target_modules": [
     "q_proj",
+    "k_proj",
     "o_proj",
+    "up_proj",
+    "v_proj",
     "down_proj",
+    "gate_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db467a2e6ded99b842b17ff70e0cd30ab29af6e5edf8e25815306aab9ed2971d
-size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a6eeecf40c6db9e5588467babd1e4390df88a826da87e404890b843ed89bead
+size 120981200

checkpoint-15/README.md CHANGED Viewed

@@ -1,9 +1,9 @@
 ---
-base_model: CohereForAI/aya-expanse-8b
 library_name: peft
 pipeline_tag: text-generation
 tags:
-- base_model:adapter:CohereForAI/aya-expanse-8b
 - dpo
 - lora
 - transformers

 ---
+base_model: CohereLabs/tiny-aya-base
 library_name: peft
 pipeline_tag: text-generation
 tags:
+- base_model:adapter:CohereLabs/tiny-aya-base
 - dpo
 - lora
 - transformers

checkpoint-15/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "CohereForAI/aya-expanse-8b",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
@@ -30,12 +30,12 @@
   "revision": null,
   "target_modules": [
     "q_proj",
-    "v_proj",
-    "gate_proj",
     "o_proj",
     "down_proj",
-    "k_proj",
-    "up_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "CohereLabs/tiny-aya-base",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
   "revision": null,
   "target_modules": [
     "q_proj",
+    "k_proj",
     "o_proj",
+    "up_proj",
+    "v_proj",
     "down_proj",
+    "gate_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-15/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db467a2e6ded99b842b17ff70e0cd30ab29af6e5edf8e25815306aab9ed2971d
-size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a6eeecf40c6db9e5588467babd1e4390df88a826da87e404890b843ed89bead
+size 120981200

checkpoint-15/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb8ec488fe6f7af27e41098c7ca30778e8135c0ae3a2030f0fa78668ecace32e
-size 335929123

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1dcf2ecb817ca23bf8fc4ebb941e743a381688d80d510c1a0cf8306b8c2ee23
+size 242259659

checkpoint-15/ref/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "CohereForAI/aya-expanse-8b",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
@@ -30,12 +30,12 @@
   "revision": null,
   "target_modules": [
     "q_proj",
-    "v_proj",
-    "gate_proj",
     "o_proj",
     "down_proj",
-    "k_proj",
-    "up_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "CohereLabs/tiny-aya-base",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
   "revision": null,
   "target_modules": [
     "q_proj",
+    "k_proj",
     "o_proj",
+    "up_proj",
+    "v_proj",
     "down_proj",
+    "gate_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-15/ref/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37a56e30a5cd3d700ff121b2810cc55ba52a35ab186942ea53898b508426a067
-size 83946192

 version https://git-lfs.github.com/spec/v1
+oid sha256:51cf1f4b51f98d4bb2ab367ccd886c36cc66d553f81d2e83059837903ffe1778
+size 60524472

checkpoint-15/tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d72cb57ce120d4797ec0a544cee3792f228c9dca4cb1863e11cb341f0ec3e576
-size 20124086

 version https://git-lfs.github.com/spec/v1
+oid sha256:5cd77e5246a42d44b52e94cb02bfe1ff1693e4315d8bf5bd264681710c03c6af
+size 21374786

checkpoint-15/tokenizer_config.json CHANGED Viewed

@@ -4,9 +4,9 @@
   "bos_token": "<BOS_TOKEN>",
   "clean_up_tokenization_spaces": false,
   "cls_token": "<CLS>",
-  "eos_token": "<|END_OF_TURN_TOKEN|>",
   "errors": "replace",
-  "is_local": false,
   "legacy": true,
   "mask_token": "<MASK_TOKEN>",
   "model_max_length": 1000000000000000019884624838656,
@@ -15,6 +15,6 @@
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "CohereTokenizer",
-  "unk_token": null,
   "use_default_system_prompt": false
 }

   "bos_token": "<BOS_TOKEN>",
   "clean_up_tokenization_spaces": false,
   "cls_token": "<CLS>",
+  "eos_token": "<EOS_TOKEN>",
   "errors": "replace",
+  "is_local": true,
   "legacy": true,
   "mask_token": "<MASK_TOKEN>",
   "model_max_length": 1000000000000000019884624838656,
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "CohereTokenizer",
+  "unk_token": "<UNK>",
   "use_default_system_prompt": false
 }

checkpoint-15/trainer_state.json CHANGED Viewed

@@ -10,21 +10,21 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "entropy": 1.6097802864877802,
       "epoch": 2.0,
-      "grad_norm": 0.081545390188694,
       "learning_rate": 6.909830056250527e-05,
-      "logits/chosen": 1.135637378023223,
-      "logits/rejected": 1.1737397978498365,
-      "logps/chosen": -201.05456803974351,
-      "logps/rejected": -248.0248629921361,
-      "loss": 0.37813677787780764,
-      "mean_token_accuracy": 0.4706153202998011,
-      "num_tokens": 34538.0,
-      "rewards/accuracies": 0.756578947368421,
-      "rewards/chosen": 0.6639514451748446,
-      "rewards/margins": 1.841196086649832,
-      "rewards/rejected": -1.1772446451267522,
       "step": 10
     }
   ],
@@ -45,7 +45,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3521274310950912.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "is_world_process_zero": true,
   "log_history": [
     {
+      "entropy": 2.091383949706429,
       "epoch": 2.0,
+      "grad_norm": 0.143918976187706,
       "learning_rate": 6.909830056250527e-05,
+      "logits/chosen": -2.6255019325219764,
+      "logits/rejected": -2.659680873301937,
+      "logps/chosen": -208.22477260388825,
+      "logps/rejected": -238.7575555098684,
+      "loss": 0.45360164642333983,
+      "mean_token_accuracy": 0.47160694003105164,
+      "num_tokens": 34910.0,
+      "rewards/accuracies": 0.7302631578947368,
+      "rewards/chosen": 0.3593159549391681,
+      "rewards/margins": 0.8960761683747956,
+      "rewards/rejected": -0.5367602110399228,
       "step": 10
     }
   ],
       "attributes": {}
     }
   },
+  "total_flos": 1447691195695104.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

checkpoint-15/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:56f2535013460d339bc97fe84138a5fb2a89ca0f8f6d8d41d6edd9b792763a90
 size 5841

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a760a647b55cf71029eb7fe81ef09d1df5449c3aca773d514abd1884660ada8
 size 5841

ref/adapter_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
-  "base_model_name_or_path": "CohereForAI/aya-expanse-8b",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
@@ -30,12 +30,12 @@
   "revision": null,
   "target_modules": [
     "q_proj",
-    "v_proj",
-    "gate_proj",
     "o_proj",
     "down_proj",
-    "k_proj",
-    "up_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "alpha_pattern": {},
   "arrow_config": null,
   "auto_mapping": null,
+  "base_model_name_or_path": "CohereLabs/tiny-aya-base",
   "bias": "none",
   "corda_config": null,
   "ensure_weight_tying": false,
   "revision": null,
   "target_modules": [
     "q_proj",
+    "k_proj",
     "o_proj",
+    "up_proj",
+    "v_proj",
     "down_proj",
+    "gate_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

ref/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37a56e30a5cd3d700ff121b2810cc55ba52a35ab186942ea53898b508426a067
-size 83946192

 version https://git-lfs.github.com/spec/v1
+oid sha256:51cf1f4b51f98d4bb2ab367ccd886c36cc66d553f81d2e83059837903ffe1778
+size 60524472

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d72cb57ce120d4797ec0a544cee3792f228c9dca4cb1863e11cb341f0ec3e576
-size 20124086

 version https://git-lfs.github.com/spec/v1
+oid sha256:5cd77e5246a42d44b52e94cb02bfe1ff1693e4315d8bf5bd264681710c03c6af
+size 21374786

tokenizer_config.json CHANGED Viewed

@@ -4,9 +4,9 @@
   "bos_token": "<BOS_TOKEN>",
   "clean_up_tokenization_spaces": false,
   "cls_token": "<CLS>",
-  "eos_token": "<|END_OF_TURN_TOKEN|>",
   "errors": "replace",
-  "is_local": false,
   "legacy": true,
   "mask_token": "<MASK_TOKEN>",
   "model_max_length": 1000000000000000019884624838656,
@@ -15,6 +15,6 @@
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "CohereTokenizer",
-  "unk_token": null,
   "use_default_system_prompt": false
 }

   "bos_token": "<BOS_TOKEN>",
   "clean_up_tokenization_spaces": false,
   "cls_token": "<CLS>",
+  "eos_token": "<EOS_TOKEN>",
   "errors": "replace",
+  "is_local": true,
   "legacy": true,
   "mask_token": "<MASK_TOKEN>",
   "model_max_length": 1000000000000000019884624838656,
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "CohereTokenizer",
+  "unk_token": "<UNK>",
   "use_default_system_prompt": false
 }