IoakeimE commited on
Commit
0e6dac4
·
verified ·
1 Parent(s): 6deaeb5

Training in progress, epoch 1

Browse files
README.md CHANGED
@@ -5,8 +5,8 @@ model_name: dpo_simplification
5
  tags:
6
  - generated_from_trainer
7
  - unsloth
8
- - trl
9
  - dpo
 
10
  licence: license
11
  ---
12
 
@@ -28,18 +28,18 @@ print(output["generated_text"])
28
 
29
  ## Training procedure
30
 
31
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/dpo_smiplification/runs/65n7eb10)
32
 
33
 
34
  This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
35
 
36
  ### Framework versions
37
 
38
- - TRL: 0.18.2
39
- - Transformers: 4.52.4
40
- - Pytorch: 2.7.0
41
  - Datasets: 3.6.0
42
- - Tokenizers: 0.21.1
43
 
44
  ## Citations
45
 
 
5
  tags:
6
  - generated_from_trainer
7
  - unsloth
 
8
  - dpo
9
+ - trl
10
  licence: license
11
  ---
12
 
 
28
 
29
  ## Training procedure
30
 
31
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/dpo_smiplification/runs/ztfwit0b)
32
 
33
 
34
  This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
35
 
36
  ### Framework versions
37
 
38
+ - TRL: 0.23.0
39
+ - Transformers: 4.56.2
40
+ - Pytorch: 2.8.0
41
  - Datasets: 3.6.0
42
+ - Tokenizers: 0.22.1
43
 
44
  ## Citations
45
 
adapter_config.json CHANGED
@@ -15,25 +15,28 @@
15
  "loftq_config": {},
16
  "lora_alpha": 16,
17
  "lora_bias": false,
18
- "lora_dropout": 0,
19
  "megatron_config": null,
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
 
23
  "r": 16,
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
- "down_proj",
28
- "o_proj",
29
- "k_proj",
30
  "v_proj",
31
- "gate_proj",
32
  "q_proj",
33
- "up_proj"
 
 
 
 
34
  ],
 
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
37
  "use_dora": false,
 
38
  "use_rslora": false
39
  }
 
15
  "loftq_config": {},
16
  "lora_alpha": 16,
17
  "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
  "megatron_config": null,
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
  "r": 16,
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
 
 
 
28
  "v_proj",
 
29
  "q_proj",
30
+ "up_proj",
31
+ "k_proj",
32
+ "gate_proj",
33
+ "down_proj",
34
+ "o_proj"
35
  ],
36
+ "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
38
  "trainable_token_indices": null,
39
  "use_dora": false,
40
+ "use_qalora": false,
41
  "use_rslora": false
42
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8740bb048d09a4e666a4e82da812bb66b59a0e71a2061aa5361e6422a5559b5
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d80ae13629cab0841cd4e8459e419577334bc45dcd9dc251f799f64010cebcfe
3
  size 167832240
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ccea210fd36a63fa1ce0ac98957941ed2ada55738267bd23984319dd76e9ed6
3
- size 6673
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d11c0521246a68bdb6a8335ed968d814e898bd344f90101460a310eed56b3895
3
+ size 6865