Training in progress, epoch 1

Files changed (4) hide show

README.md CHANGED Viewed

@@ -5,8 +5,8 @@ model_name: dpo_simplification
 tags:
 - generated_from_trainer
 - unsloth
-- trl
 - dpo
 licence: license
 ---
@@ -28,18 +28,18 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/dpo_smiplification/runs/65n7eb10)
 This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
 ### Framework versions
-- TRL: 0.18.2
-- Transformers: 4.52.4
-- Pytorch: 2.7.0
 - Datasets: 3.6.0
-- Tokenizers: 0.21.1
 ## Citations

 tags:
 - generated_from_trainer
 - unsloth
 - dpo
+- trl
 licence: license
 ---
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/dpo_smiplification/runs/ztfwit0b)
 This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
 ### Framework versions
+- TRL: 0.23.0
+- Transformers: 4.56.2
+- Pytorch: 2.8.0
 - Datasets: 3.6.0
+- Tokenizers: 0.22.1
 ## Citations

adapter_config.json CHANGED Viewed

@@ -15,25 +15,28 @@
   "loftq_config": {},
   "lora_alpha": 16,
   "lora_bias": false,
-  "lora_dropout": 0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "down_proj",
-    "o_proj",
-    "k_proj",
     "v_proj",
-    "gate_proj",
     "q_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
   "use_rslora": false
 }

   "loftq_config": {},
   "lora_alpha": 16,
   "lora_bias": false,
+  "lora_dropout": 0.0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "qalora_group_size": 16,
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
     "q_proj",
+    "up_proj",
+    "k_proj",
+    "gate_proj",
+    "down_proj",
+    "o_proj"
   ],
+  "target_parameters": null,
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
+  "use_qalora": false,
   "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c8740bb048d09a4e666a4e82da812bb66b59a0e71a2061aa5361e6422a5559b5
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:d80ae13629cab0841cd4e8459e419577334bc45dcd9dc251f799f64010cebcfe
 size 167832240

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ccea210fd36a63fa1ce0ac98957941ed2ada55738267bd23984319dd76e9ed6
-size 6673

 version https://git-lfs.github.com/spec/v1
+oid sha256:d11c0521246a68bdb6a8335ed968d814e898bd344f90101460a310eed56b3895
+size 6865