dejori commited on
Commit
baf130b
·
verified ·
1 Parent(s): 688c96a

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +11 -0
  2. gemma-2b-dpo/README.md +72 -0
  3. gemma-2b-dpo/checkpoint-100/README.md +209 -0
  4. gemma-2b-dpo/checkpoint-100/adapter_config.json +46 -0
  5. gemma-2b-dpo/checkpoint-100/adapter_model.safetensors +3 -0
  6. gemma-2b-dpo/checkpoint-100/chat_template.jinja +4 -0
  7. gemma-2b-dpo/checkpoint-100/optimizer.pt +3 -0
  8. gemma-2b-dpo/checkpoint-100/rng_state.pth +3 -0
  9. gemma-2b-dpo/checkpoint-100/scheduler.pt +3 -0
  10. gemma-2b-dpo/checkpoint-100/tokenizer.json +3 -0
  11. gemma-2b-dpo/checkpoint-100/tokenizer_config.json +19 -0
  12. gemma-2b-dpo/checkpoint-100/trainer_state.json +334 -0
  13. gemma-2b-dpo/checkpoint-100/training_args.bin +3 -0
  14. gemma-2b-dpo/checkpoint-150/README.md +209 -0
  15. gemma-2b-dpo/checkpoint-150/adapter_config.json +46 -0
  16. gemma-2b-dpo/checkpoint-150/adapter_model.safetensors +3 -0
  17. gemma-2b-dpo/checkpoint-150/chat_template.jinja +4 -0
  18. gemma-2b-dpo/checkpoint-150/optimizer.pt +3 -0
  19. gemma-2b-dpo/checkpoint-150/rng_state.pth +3 -0
  20. gemma-2b-dpo/checkpoint-150/scheduler.pt +3 -0
  21. gemma-2b-dpo/checkpoint-150/tokenizer.json +3 -0
  22. gemma-2b-dpo/checkpoint-150/tokenizer_config.json +19 -0
  23. gemma-2b-dpo/checkpoint-150/trainer_state.json +484 -0
  24. gemma-2b-dpo/checkpoint-150/training_args.bin +3 -0
  25. gemma-2b-dpo/checkpoint-200/README.md +209 -0
  26. gemma-2b-dpo/checkpoint-200/adapter_config.json +46 -0
  27. gemma-2b-dpo/checkpoint-200/adapter_model.safetensors +3 -0
  28. gemma-2b-dpo/checkpoint-200/chat_template.jinja +4 -0
  29. gemma-2b-dpo/checkpoint-200/optimizer.pt +3 -0
  30. gemma-2b-dpo/checkpoint-200/rng_state.pth +3 -0
  31. gemma-2b-dpo/checkpoint-200/scheduler.pt +3 -0
  32. gemma-2b-dpo/checkpoint-200/tokenizer.json +3 -0
  33. gemma-2b-dpo/checkpoint-200/tokenizer_config.json +19 -0
  34. gemma-2b-dpo/checkpoint-200/trainer_state.json +634 -0
  35. gemma-2b-dpo/checkpoint-200/training_args.bin +3 -0
  36. gemma-2b-dpo/checkpoint-250/README.md +209 -0
  37. gemma-2b-dpo/checkpoint-250/adapter_config.json +46 -0
  38. gemma-2b-dpo/checkpoint-250/adapter_model.safetensors +3 -0
  39. gemma-2b-dpo/checkpoint-250/chat_template.jinja +4 -0
  40. gemma-2b-dpo/checkpoint-250/optimizer.pt +3 -0
  41. gemma-2b-dpo/checkpoint-250/rng_state.pth +3 -0
  42. gemma-2b-dpo/checkpoint-250/scheduler.pt +3 -0
  43. gemma-2b-dpo/checkpoint-250/tokenizer.json +3 -0
  44. gemma-2b-dpo/checkpoint-250/tokenizer_config.json +19 -0
  45. gemma-2b-dpo/checkpoint-250/trainer_state.json +784 -0
  46. gemma-2b-dpo/checkpoint-250/training_args.bin +3 -0
  47. gemma-2b-dpo/checkpoint-300/README.md +209 -0
  48. gemma-2b-dpo/checkpoint-300/adapter_config.json +46 -0
  49. gemma-2b-dpo/checkpoint-300/adapter_model.safetensors +3 -0
  50. gemma-2b-dpo/checkpoint-300/chat_template.jinja +4 -0
.gitattributes CHANGED
@@ -38,3 +38,14 @@ gemma-2b-dpo/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
  gemma-9b-dpo/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
  gguf/gemma-2b-dpo-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text
40
  gguf/gemma-2b-distilled-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
38
  gemma-9b-dpo/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
  gguf/gemma-2b-dpo-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text
40
  gguf/gemma-2b-distilled-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text
41
+ gemma-2b-dpo/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
+ gemma-2b-dpo/checkpoint-150/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
+ gemma-2b-dpo/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
44
+ gemma-2b-dpo/checkpoint-250/tokenizer.json filter=lfs diff=lfs merge=lfs -text
45
+ gemma-2b-dpo/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
46
+ gemma-2b-dpo/checkpoint-350/tokenizer.json filter=lfs diff=lfs merge=lfs -text
47
+ gemma-2b-dpo/checkpoint-400/tokenizer.json filter=lfs diff=lfs merge=lfs -text
48
+ gemma-2b-dpo/checkpoint-450/tokenizer.json filter=lfs diff=lfs merge=lfs -text
49
+ gemma-2b-dpo/checkpoint-50/tokenizer.json filter=lfs diff=lfs merge=lfs -text
50
+ gemma-2b-dpo/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
51
+ gemma-2b-dpo/checkpoint-540/tokenizer.json filter=lfs diff=lfs merge=lfs -text
gemma-2b-dpo/README.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/gemma-2-2b-it
3
+ library_name: peft
4
+ model_name: gemma-2b-dpo-600
5
+ tags:
6
+ - base_model:adapter:google/gemma-2-2b-it
7
+ - dpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ licence: license
12
+ pipeline_tag: text-generation
13
+ ---
14
+
15
+ # Model Card for gemma-2b-dpo-600
16
+
17
+ This model is a fine-tuned version of [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it).
18
+ It has been trained using [TRL](https://github.com/huggingface/trl).
19
+
20
+ ## Quick start
21
+
22
+ ```python
23
+ from transformers import pipeline
24
+
25
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
26
+ generator = pipeline("text-generation", model="None", device="cuda")
27
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
28
+ print(output["generated_text"])
29
+ ```
30
+
31
+ ## Training procedure
32
+
33
+
34
+
35
+
36
+ This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
37
+
38
+ ### Framework versions
39
+
40
+ - PEFT 0.18.1
41
+ - TRL: 0.28.0
42
+ - Transformers: 5.2.0
43
+ - Pytorch: 2.5.1+cu124
44
+ - Datasets: 4.5.0
45
+ - Tokenizers: 0.22.2
46
+
47
+ ## Citations
48
+
49
+ Cite DPO as:
50
+
51
+ ```bibtex
52
+ @inproceedings{rafailov2023direct,
53
+ title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}},
54
+ author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn},
55
+ year = 2023,
56
+ booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
57
+ url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html},
58
+ editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
59
+ }
60
+ ```
61
+
62
+ Cite TRL as:
63
+
64
+ ```bibtex
65
+ @software{vonwerra2020trl,
66
+ title = {{TRL: Transformers Reinforcement Learning}},
67
+ author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
68
+ license = {Apache-2.0},
69
+ url = {https://github.com/huggingface/trl},
70
+ year = {2020}
71
+ }
72
+ ```
gemma-2b-dpo/checkpoint-100/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/gemma-2-2b-it
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:google/gemma-2-2b-it
7
+ - dpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.1
gemma-2b-dpo/checkpoint-100/adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "google/gemma-2-2b-it",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "up_proj",
33
+ "q_proj",
34
+ "v_proj",
35
+ "gate_proj",
36
+ "k_proj",
37
+ "down_proj",
38
+ "o_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
gemma-2b-dpo/checkpoint-100/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec2ebd73f8cbaa2e4529e7145398cf6eab41cc20ab84f8803b6740f2b62d3cd9
3
+ size 83115256
gemma-2b-dpo/checkpoint-100/chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '
2
+ ' + message['content'] | trim + '<end_of_turn>
3
+ ' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
4
+ '}}{% endif %}
gemma-2b-dpo/checkpoint-100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4121779bcb5b52c4e6bf9f35c31b434b1d3e5da9512b366d7ddf549ba0d6843
3
+ size 42616388
gemma-2b-dpo/checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9cd6a0487226e5bd30d1846894c82af483733ab4381b75bae9c0745e05d405
3
+ size 14244
gemma-2b-dpo/checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59ed8b62efe8bca262c392a5e54068f61391c5a2eaa96781fe40ad3af0958511
3
+ size 1064
gemma-2b-dpo/checkpoint-100/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e
3
+ size 34362748
gemma-2b-dpo/checkpoint-100/tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<bos>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<eos>",
6
+ "extra_special_tokens": [
7
+ "<start_of_turn>",
8
+ "<end_of_turn>"
9
+ ],
10
+ "is_local": false,
11
+ "mask_token": "<mask>",
12
+ "model_max_length": 1000000000000000019884624838656,
13
+ "pad_token": "<pad>",
14
+ "sp_model_kwargs": {},
15
+ "spaces_between_special_tokens": false,
16
+ "tokenizer_class": "GemmaTokenizer",
17
+ "unk_token": "<unk>",
18
+ "use_default_system_prompt": false
19
+ }
gemma-2b-dpo/checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.5578800557880056,
6
+ "eval_steps": 500,
7
+ "global_step": 100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.02789400278940028,
14
+ "grad_norm": 2.7052793502807617,
15
+ "learning_rate": 3.7037037037037036e-07,
16
+ "logits/chosen": -6.208017826080322,
17
+ "logits/rejected": -6.18649435043335,
18
+ "logps/chosen": -417.861328125,
19
+ "logps/rejected": -431.774169921875,
20
+ "loss": 0.6978574275970459,
21
+ "rewards/accuracies": 0.25,
22
+ "rewards/chosen": 0.0027643204666674137,
23
+ "rewards/margins": -0.00830078125,
24
+ "rewards/rejected": 0.011065103113651276,
25
+ "step": 5
26
+ },
27
+ {
28
+ "epoch": 0.05578800557880056,
29
+ "grad_norm": 1.9632341861724854,
30
+ "learning_rate": 8.333333333333333e-07,
31
+ "logits/chosen": -6.051701545715332,
32
+ "logits/rejected": -6.098549842834473,
33
+ "logps/chosen": -442.61126708984375,
34
+ "logps/rejected": -419.4737243652344,
35
+ "loss": 0.6965099811553955,
36
+ "rewards/accuracies": 0.44999998807907104,
37
+ "rewards/chosen": -0.0022751614451408386,
38
+ "rewards/margins": -0.004312096629291773,
39
+ "rewards/rejected": 0.002036933321505785,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.08368200836820083,
44
+ "grad_norm": 1.9358311891555786,
45
+ "learning_rate": 1.2962962962962962e-06,
46
+ "logits/chosen": -6.1482648849487305,
47
+ "logits/rejected": -6.208896636962891,
48
+ "logps/chosen": -419.15155029296875,
49
+ "logps/rejected": -393.37322998046875,
50
+ "loss": 0.6971890449523925,
51
+ "rewards/accuracies": 0.4749999940395355,
52
+ "rewards/chosen": 0.004753150977194309,
53
+ "rewards/margins": -0.006633720360696316,
54
+ "rewards/rejected": 0.0113868722692132,
55
+ "step": 15
56
+ },
57
+ {
58
+ "epoch": 0.11157601115760112,
59
+ "grad_norm": 2.137960195541382,
60
+ "learning_rate": 1.7592592592592594e-06,
61
+ "logits/chosen": -6.1889142990112305,
62
+ "logits/rejected": -6.147027015686035,
63
+ "logps/chosen": -449.2413024902344,
64
+ "logps/rejected": -387.8244934082031,
65
+ "loss": 0.694630479812622,
66
+ "rewards/accuracies": 0.574999988079071,
67
+ "rewards/chosen": -0.01739494316279888,
68
+ "rewards/margins": -0.0010753620881587267,
69
+ "rewards/rejected": -0.016319578513503075,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.1394700139470014,
74
+ "grad_norm": 2.610708475112915,
75
+ "learning_rate": 2.222222222222222e-06,
76
+ "logits/chosen": -6.098985195159912,
77
+ "logits/rejected": -6.146561145782471,
78
+ "logps/chosen": -528.6546020507812,
79
+ "logps/rejected": -517.2868041992188,
80
+ "loss": 0.6923945903778076,
81
+ "rewards/accuracies": 0.5,
82
+ "rewards/chosen": 0.013269426301121712,
83
+ "rewards/margins": 0.004172402434051037,
84
+ "rewards/rejected": 0.009097023867070675,
85
+ "step": 25
86
+ },
87
+ {
88
+ "epoch": 0.16736401673640167,
89
+ "grad_norm": 3.0792224407196045,
90
+ "learning_rate": 2.6851851851851856e-06,
91
+ "logits/chosen": -6.156611442565918,
92
+ "logits/rejected": -6.146718502044678,
93
+ "logps/chosen": -427.1123962402344,
94
+ "logps/rejected": -413.99810791015625,
95
+ "loss": 0.6963389396667481,
96
+ "rewards/accuracies": 0.5,
97
+ "rewards/chosen": 0.0016099174972623587,
98
+ "rewards/margins": -0.0036583715118467808,
99
+ "rewards/rejected": 0.0052682883106172085,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.19525801952580196,
104
+ "grad_norm": 2.40751051902771,
105
+ "learning_rate": 3.1481481481481483e-06,
106
+ "logits/chosen": -6.270221710205078,
107
+ "logits/rejected": -6.222764492034912,
108
+ "logps/chosen": -433.89312744140625,
109
+ "logps/rejected": -442.81378173828125,
110
+ "loss": 0.6875874042510987,
111
+ "rewards/accuracies": 0.550000011920929,
112
+ "rewards/chosen": -0.003747978014871478,
113
+ "rewards/margins": 0.013033255934715271,
114
+ "rewards/rejected": -0.016781235113739967,
115
+ "step": 35
116
+ },
117
+ {
118
+ "epoch": 0.22315202231520223,
119
+ "grad_norm": 2.409308671951294,
120
+ "learning_rate": 3.6111111111111115e-06,
121
+ "logits/chosen": -6.171980857849121,
122
+ "logits/rejected": -6.236737251281738,
123
+ "logps/chosen": -411.51092529296875,
124
+ "logps/rejected": -454.578857421875,
125
+ "loss": 0.6975872993469239,
126
+ "rewards/accuracies": 0.5,
127
+ "rewards/chosen": -0.0006048586219549179,
128
+ "rewards/margins": -0.006687240209430456,
129
+ "rewards/rejected": 0.0060823829844594,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.2510460251046025,
134
+ "grad_norm": 2.8261911869049072,
135
+ "learning_rate": 4.074074074074074e-06,
136
+ "logits/chosen": -6.1633710861206055,
137
+ "logits/rejected": -6.245741367340088,
138
+ "logps/chosen": -373.363525390625,
139
+ "logps/rejected": -356.736572265625,
140
+ "loss": 0.6881499290466309,
141
+ "rewards/accuracies": 0.550000011920929,
142
+ "rewards/chosen": 0.01782766357064247,
143
+ "rewards/margins": 0.015053692273795605,
144
+ "rewards/rejected": 0.002773971762508154,
145
+ "step": 45
146
+ },
147
+ {
148
+ "epoch": 0.2789400278940028,
149
+ "grad_norm": 2.457179546356201,
150
+ "learning_rate": 4.537037037037038e-06,
151
+ "logits/chosen": -6.270019054412842,
152
+ "logits/rejected": -6.3202104568481445,
153
+ "logps/chosen": -466.30609130859375,
154
+ "logps/rejected": -476.45550537109375,
155
+ "loss": 0.6831833839416503,
156
+ "rewards/accuracies": 0.6000000238418579,
157
+ "rewards/chosen": 0.0046669007278978825,
158
+ "rewards/margins": 0.023042945191264153,
159
+ "rewards/rejected": -0.018376046791672707,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.3068340306834031,
164
+ "grad_norm": 1.6770554780960083,
165
+ "learning_rate": 5e-06,
166
+ "logits/chosen": -6.253265380859375,
167
+ "logits/rejected": -6.15267276763916,
168
+ "logps/chosen": -352.24908447265625,
169
+ "logps/rejected": -447.11444091796875,
170
+ "loss": 0.6791603088378906,
171
+ "rewards/accuracies": 0.6000000238418579,
172
+ "rewards/chosen": -0.010960197076201439,
173
+ "rewards/margins": 0.031198084354400635,
174
+ "rewards/rejected": -0.042158275842666626,
175
+ "step": 55
176
+ },
177
+ {
178
+ "epoch": 0.33472803347280333,
179
+ "grad_norm": 2.6027019023895264,
180
+ "learning_rate": 4.9485596707818935e-06,
181
+ "logits/chosen": -6.205387592315674,
182
+ "logits/rejected": -6.259293079376221,
183
+ "logps/chosen": -439.732421875,
184
+ "logps/rejected": -412.8099670410156,
185
+ "loss": 0.6736814975738525,
186
+ "rewards/accuracies": 0.550000011920929,
187
+ "rewards/chosen": -0.010224836878478527,
188
+ "rewards/margins": 0.04464374855160713,
189
+ "rewards/rejected": -0.054868586361408234,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.36262203626220363,
194
+ "grad_norm": 2.1717166900634766,
195
+ "learning_rate": 4.897119341563787e-06,
196
+ "logits/chosen": -6.1334547996521,
197
+ "logits/rejected": -6.148266792297363,
198
+ "logps/chosen": -390.00433349609375,
199
+ "logps/rejected": -376.676513671875,
200
+ "loss": 0.6825191974639893,
201
+ "rewards/accuracies": 0.5249999761581421,
202
+ "rewards/chosen": -0.04612758383154869,
203
+ "rewards/margins": 0.03951488807797432,
204
+ "rewards/rejected": -0.08564247190952301,
205
+ "step": 65
206
+ },
207
+ {
208
+ "epoch": 0.3905160390516039,
209
+ "grad_norm": 2.2574119567871094,
210
+ "learning_rate": 4.845679012345679e-06,
211
+ "logits/chosen": -6.236250877380371,
212
+ "logits/rejected": -6.165186882019043,
213
+ "logps/chosen": -411.1315002441406,
214
+ "logps/rejected": -447.22100830078125,
215
+ "loss": 0.6402256488800049,
216
+ "rewards/accuracies": 0.675000011920929,
217
+ "rewards/chosen": -0.017545931041240692,
218
+ "rewards/margins": 0.12250369787216187,
219
+ "rewards/rejected": -0.14004963636398315,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.41841004184100417,
224
+ "grad_norm": 2.3837037086486816,
225
+ "learning_rate": 4.794238683127572e-06,
226
+ "logits/chosen": -6.256176948547363,
227
+ "logits/rejected": -6.213258266448975,
228
+ "logps/chosen": -437.463623046875,
229
+ "logps/rejected": -404.8554992675781,
230
+ "loss": 0.6703986167907715,
231
+ "rewards/accuracies": 0.7250000238418579,
232
+ "rewards/chosen": -0.03428981825709343,
233
+ "rewards/margins": 0.05360151082277298,
234
+ "rewards/rejected": -0.08789133280515671,
235
+ "step": 75
236
+ },
237
+ {
238
+ "epoch": 0.44630404463040446,
239
+ "grad_norm": 3.304287910461426,
240
+ "learning_rate": 4.742798353909465e-06,
241
+ "logits/chosen": -6.2820305824279785,
242
+ "logits/rejected": -6.221312522888184,
243
+ "logps/chosen": -455.2318420410156,
244
+ "logps/rejected": -422.08837890625,
245
+ "loss": 0.7040590286254883,
246
+ "rewards/accuracies": 0.6000000238418579,
247
+ "rewards/chosen": -0.0881255492568016,
248
+ "rewards/margins": -0.006679975427687168,
249
+ "rewards/rejected": -0.08144557476043701,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.47419804741980476,
254
+ "grad_norm": 2.6312427520751953,
255
+ "learning_rate": 4.691358024691358e-06,
256
+ "logits/chosen": -6.1796159744262695,
257
+ "logits/rejected": -6.193436622619629,
258
+ "logps/chosen": -423.59930419921875,
259
+ "logps/rejected": -486.1690979003906,
260
+ "loss": 0.6397527694702149,
261
+ "rewards/accuracies": 0.6000000238418579,
262
+ "rewards/chosen": -0.043790053576231,
263
+ "rewards/margins": 0.12486596405506134,
264
+ "rewards/rejected": -0.16865602135658264,
265
+ "step": 85
266
+ },
267
+ {
268
+ "epoch": 0.502092050209205,
269
+ "grad_norm": 2.3493549823760986,
270
+ "learning_rate": 4.6399176954732515e-06,
271
+ "logits/chosen": -6.136630058288574,
272
+ "logits/rejected": -6.202858924865723,
273
+ "logps/chosen": -467.627685546875,
274
+ "logps/rejected": -441.41241455078125,
275
+ "loss": 0.5936434745788575,
276
+ "rewards/accuracies": 0.925000011920929,
277
+ "rewards/chosen": 0.06891433894634247,
278
+ "rewards/margins": 0.26186972856521606,
279
+ "rewards/rejected": -0.19295534491539001,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.5299860529986054,
284
+ "grad_norm": 2.4952447414398193,
285
+ "learning_rate": 4.588477366255145e-06,
286
+ "logits/chosen": -6.1503586769104,
287
+ "logits/rejected": -6.144400596618652,
288
+ "logps/chosen": -355.2735290527344,
289
+ "logps/rejected": -409.51702880859375,
290
+ "loss": 0.6157774925231934,
291
+ "rewards/accuracies": 0.7749999761581421,
292
+ "rewards/chosen": -0.0699276328086853,
293
+ "rewards/margins": 0.22694334387779236,
294
+ "rewards/rejected": -0.2968709468841553,
295
+ "step": 95
296
+ },
297
+ {
298
+ "epoch": 0.5578800557880056,
299
+ "grad_norm": 2.5470480918884277,
300
+ "learning_rate": 4.537037037037038e-06,
301
+ "logits/chosen": -6.14028263092041,
302
+ "logits/rejected": -6.104605197906494,
303
+ "logps/chosen": -429.1048889160156,
304
+ "logps/rejected": -454.7377014160156,
305
+ "loss": 0.6300024032592774,
306
+ "rewards/accuracies": 0.7250000238418579,
307
+ "rewards/chosen": -0.05769091844558716,
308
+ "rewards/margins": 0.14401891827583313,
309
+ "rewards/rejected": -0.2017098367214203,
310
+ "step": 100
311
+ }
312
+ ],
313
+ "logging_steps": 5,
314
+ "max_steps": 540,
315
+ "num_input_tokens_seen": 0,
316
+ "num_train_epochs": 3,
317
+ "save_steps": 50,
318
+ "stateful_callbacks": {
319
+ "TrainerControl": {
320
+ "args": {
321
+ "should_epoch_stop": false,
322
+ "should_evaluate": false,
323
+ "should_log": false,
324
+ "should_save": true,
325
+ "should_training_stop": false
326
+ },
327
+ "attributes": {}
328
+ }
329
+ },
330
+ "total_flos": 0.0,
331
+ "train_batch_size": 1,
332
+ "trial_name": null,
333
+ "trial_params": null
334
+ }
gemma-2b-dpo/checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9
3
+ size 5688
gemma-2b-dpo/checkpoint-150/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/gemma-2-2b-it
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:google/gemma-2-2b-it
7
+ - dpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.1
gemma-2b-dpo/checkpoint-150/adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "google/gemma-2-2b-it",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "up_proj",
33
+ "q_proj",
34
+ "v_proj",
35
+ "gate_proj",
36
+ "k_proj",
37
+ "down_proj",
38
+ "o_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
gemma-2b-dpo/checkpoint-150/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0e7281ac038a3a06b5f6bdf4372abd75e6c056a4533e9e5e306ad3eed5008bb
3
+ size 83115256
gemma-2b-dpo/checkpoint-150/chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '
2
+ ' + message['content'] | trim + '<end_of_turn>
3
+ ' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
4
+ '}}{% endif %}
gemma-2b-dpo/checkpoint-150/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af476b3fe04731657a48b4630d7070fac2df55bd08f5de1eed63298e735a9b38
3
+ size 42616388
gemma-2b-dpo/checkpoint-150/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9cd6a0487226e5bd30d1846894c82af483733ab4381b75bae9c0745e05d405
3
+ size 14244
gemma-2b-dpo/checkpoint-150/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25dd7b08411f6698ef9ec14f5080c8865e627e9cdd16cc854ed216ed294c1f45
3
+ size 1064
gemma-2b-dpo/checkpoint-150/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e
3
+ size 34362748
gemma-2b-dpo/checkpoint-150/tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<bos>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<eos>",
6
+ "extra_special_tokens": [
7
+ "<start_of_turn>",
8
+ "<end_of_turn>"
9
+ ],
10
+ "is_local": false,
11
+ "mask_token": "<mask>",
12
+ "model_max_length": 1000000000000000019884624838656,
13
+ "pad_token": "<pad>",
14
+ "sp_model_kwargs": {},
15
+ "spaces_between_special_tokens": false,
16
+ "tokenizer_class": "GemmaTokenizer",
17
+ "unk_token": "<unk>",
18
+ "use_default_system_prompt": false
19
+ }
gemma-2b-dpo/checkpoint-150/trainer_state.json ADDED
@@ -0,0 +1,484 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.8368200836820083,
6
+ "eval_steps": 500,
7
+ "global_step": 150,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.02789400278940028,
14
+ "grad_norm": 2.7052793502807617,
15
+ "learning_rate": 3.7037037037037036e-07,
16
+ "logits/chosen": -6.208017826080322,
17
+ "logits/rejected": -6.18649435043335,
18
+ "logps/chosen": -417.861328125,
19
+ "logps/rejected": -431.774169921875,
20
+ "loss": 0.6978574275970459,
21
+ "rewards/accuracies": 0.25,
22
+ "rewards/chosen": 0.0027643204666674137,
23
+ "rewards/margins": -0.00830078125,
24
+ "rewards/rejected": 0.011065103113651276,
25
+ "step": 5
26
+ },
27
+ {
28
+ "epoch": 0.05578800557880056,
29
+ "grad_norm": 1.9632341861724854,
30
+ "learning_rate": 8.333333333333333e-07,
31
+ "logits/chosen": -6.051701545715332,
32
+ "logits/rejected": -6.098549842834473,
33
+ "logps/chosen": -442.61126708984375,
34
+ "logps/rejected": -419.4737243652344,
35
+ "loss": 0.6965099811553955,
36
+ "rewards/accuracies": 0.44999998807907104,
37
+ "rewards/chosen": -0.0022751614451408386,
38
+ "rewards/margins": -0.004312096629291773,
39
+ "rewards/rejected": 0.002036933321505785,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.08368200836820083,
44
+ "grad_norm": 1.9358311891555786,
45
+ "learning_rate": 1.2962962962962962e-06,
46
+ "logits/chosen": -6.1482648849487305,
47
+ "logits/rejected": -6.208896636962891,
48
+ "logps/chosen": -419.15155029296875,
49
+ "logps/rejected": -393.37322998046875,
50
+ "loss": 0.6971890449523925,
51
+ "rewards/accuracies": 0.4749999940395355,
52
+ "rewards/chosen": 0.004753150977194309,
53
+ "rewards/margins": -0.006633720360696316,
54
+ "rewards/rejected": 0.0113868722692132,
55
+ "step": 15
56
+ },
57
+ {
58
+ "epoch": 0.11157601115760112,
59
+ "grad_norm": 2.137960195541382,
60
+ "learning_rate": 1.7592592592592594e-06,
61
+ "logits/chosen": -6.1889142990112305,
62
+ "logits/rejected": -6.147027015686035,
63
+ "logps/chosen": -449.2413024902344,
64
+ "logps/rejected": -387.8244934082031,
65
+ "loss": 0.694630479812622,
66
+ "rewards/accuracies": 0.574999988079071,
67
+ "rewards/chosen": -0.01739494316279888,
68
+ "rewards/margins": -0.0010753620881587267,
69
+ "rewards/rejected": -0.016319578513503075,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.1394700139470014,
74
+ "grad_norm": 2.610708475112915,
75
+ "learning_rate": 2.222222222222222e-06,
76
+ "logits/chosen": -6.098985195159912,
77
+ "logits/rejected": -6.146561145782471,
78
+ "logps/chosen": -528.6546020507812,
79
+ "logps/rejected": -517.2868041992188,
80
+ "loss": 0.6923945903778076,
81
+ "rewards/accuracies": 0.5,
82
+ "rewards/chosen": 0.013269426301121712,
83
+ "rewards/margins": 0.004172402434051037,
84
+ "rewards/rejected": 0.009097023867070675,
85
+ "step": 25
86
+ },
87
+ {
88
+ "epoch": 0.16736401673640167,
89
+ "grad_norm": 3.0792224407196045,
90
+ "learning_rate": 2.6851851851851856e-06,
91
+ "logits/chosen": -6.156611442565918,
92
+ "logits/rejected": -6.146718502044678,
93
+ "logps/chosen": -427.1123962402344,
94
+ "logps/rejected": -413.99810791015625,
95
+ "loss": 0.6963389396667481,
96
+ "rewards/accuracies": 0.5,
97
+ "rewards/chosen": 0.0016099174972623587,
98
+ "rewards/margins": -0.0036583715118467808,
99
+ "rewards/rejected": 0.0052682883106172085,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.19525801952580196,
104
+ "grad_norm": 2.40751051902771,
105
+ "learning_rate": 3.1481481481481483e-06,
106
+ "logits/chosen": -6.270221710205078,
107
+ "logits/rejected": -6.222764492034912,
108
+ "logps/chosen": -433.89312744140625,
109
+ "logps/rejected": -442.81378173828125,
110
+ "loss": 0.6875874042510987,
111
+ "rewards/accuracies": 0.550000011920929,
112
+ "rewards/chosen": -0.003747978014871478,
113
+ "rewards/margins": 0.013033255934715271,
114
+ "rewards/rejected": -0.016781235113739967,
115
+ "step": 35
116
+ },
117
+ {
118
+ "epoch": 0.22315202231520223,
119
+ "grad_norm": 2.409308671951294,
120
+ "learning_rate": 3.6111111111111115e-06,
121
+ "logits/chosen": -6.171980857849121,
122
+ "logits/rejected": -6.236737251281738,
123
+ "logps/chosen": -411.51092529296875,
124
+ "logps/rejected": -454.578857421875,
125
+ "loss": 0.6975872993469239,
126
+ "rewards/accuracies": 0.5,
127
+ "rewards/chosen": -0.0006048586219549179,
128
+ "rewards/margins": -0.006687240209430456,
129
+ "rewards/rejected": 0.0060823829844594,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.2510460251046025,
134
+ "grad_norm": 2.8261911869049072,
135
+ "learning_rate": 4.074074074074074e-06,
136
+ "logits/chosen": -6.1633710861206055,
137
+ "logits/rejected": -6.245741367340088,
138
+ "logps/chosen": -373.363525390625,
139
+ "logps/rejected": -356.736572265625,
140
+ "loss": 0.6881499290466309,
141
+ "rewards/accuracies": 0.550000011920929,
142
+ "rewards/chosen": 0.01782766357064247,
143
+ "rewards/margins": 0.015053692273795605,
144
+ "rewards/rejected": 0.002773971762508154,
145
+ "step": 45
146
+ },
147
+ {
148
+ "epoch": 0.2789400278940028,
149
+ "grad_norm": 2.457179546356201,
150
+ "learning_rate": 4.537037037037038e-06,
151
+ "logits/chosen": -6.270019054412842,
152
+ "logits/rejected": -6.3202104568481445,
153
+ "logps/chosen": -466.30609130859375,
154
+ "logps/rejected": -476.45550537109375,
155
+ "loss": 0.6831833839416503,
156
+ "rewards/accuracies": 0.6000000238418579,
157
+ "rewards/chosen": 0.0046669007278978825,
158
+ "rewards/margins": 0.023042945191264153,
159
+ "rewards/rejected": -0.018376046791672707,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.3068340306834031,
164
+ "grad_norm": 1.6770554780960083,
165
+ "learning_rate": 5e-06,
166
+ "logits/chosen": -6.253265380859375,
167
+ "logits/rejected": -6.15267276763916,
168
+ "logps/chosen": -352.24908447265625,
169
+ "logps/rejected": -447.11444091796875,
170
+ "loss": 0.6791603088378906,
171
+ "rewards/accuracies": 0.6000000238418579,
172
+ "rewards/chosen": -0.010960197076201439,
173
+ "rewards/margins": 0.031198084354400635,
174
+ "rewards/rejected": -0.042158275842666626,
175
+ "step": 55
176
+ },
177
+ {
178
+ "epoch": 0.33472803347280333,
179
+ "grad_norm": 2.6027019023895264,
180
+ "learning_rate": 4.9485596707818935e-06,
181
+ "logits/chosen": -6.205387592315674,
182
+ "logits/rejected": -6.259293079376221,
183
+ "logps/chosen": -439.732421875,
184
+ "logps/rejected": -412.8099670410156,
185
+ "loss": 0.6736814975738525,
186
+ "rewards/accuracies": 0.550000011920929,
187
+ "rewards/chosen": -0.010224836878478527,
188
+ "rewards/margins": 0.04464374855160713,
189
+ "rewards/rejected": -0.054868586361408234,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.36262203626220363,
194
+ "grad_norm": 2.1717166900634766,
195
+ "learning_rate": 4.897119341563787e-06,
196
+ "logits/chosen": -6.1334547996521,
197
+ "logits/rejected": -6.148266792297363,
198
+ "logps/chosen": -390.00433349609375,
199
+ "logps/rejected": -376.676513671875,
200
+ "loss": 0.6825191974639893,
201
+ "rewards/accuracies": 0.5249999761581421,
202
+ "rewards/chosen": -0.04612758383154869,
203
+ "rewards/margins": 0.03951488807797432,
204
+ "rewards/rejected": -0.08564247190952301,
205
+ "step": 65
206
+ },
207
+ {
208
+ "epoch": 0.3905160390516039,
209
+ "grad_norm": 2.2574119567871094,
210
+ "learning_rate": 4.845679012345679e-06,
211
+ "logits/chosen": -6.236250877380371,
212
+ "logits/rejected": -6.165186882019043,
213
+ "logps/chosen": -411.1315002441406,
214
+ "logps/rejected": -447.22100830078125,
215
+ "loss": 0.6402256488800049,
216
+ "rewards/accuracies": 0.675000011920929,
217
+ "rewards/chosen": -0.017545931041240692,
218
+ "rewards/margins": 0.12250369787216187,
219
+ "rewards/rejected": -0.14004963636398315,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.41841004184100417,
224
+ "grad_norm": 2.3837037086486816,
225
+ "learning_rate": 4.794238683127572e-06,
226
+ "logits/chosen": -6.256176948547363,
227
+ "logits/rejected": -6.213258266448975,
228
+ "logps/chosen": -437.463623046875,
229
+ "logps/rejected": -404.8554992675781,
230
+ "loss": 0.6703986167907715,
231
+ "rewards/accuracies": 0.7250000238418579,
232
+ "rewards/chosen": -0.03428981825709343,
233
+ "rewards/margins": 0.05360151082277298,
234
+ "rewards/rejected": -0.08789133280515671,
235
+ "step": 75
236
+ },
237
+ {
238
+ "epoch": 0.44630404463040446,
239
+ "grad_norm": 3.304287910461426,
240
+ "learning_rate": 4.742798353909465e-06,
241
+ "logits/chosen": -6.2820305824279785,
242
+ "logits/rejected": -6.221312522888184,
243
+ "logps/chosen": -455.2318420410156,
244
+ "logps/rejected": -422.08837890625,
245
+ "loss": 0.7040590286254883,
246
+ "rewards/accuracies": 0.6000000238418579,
247
+ "rewards/chosen": -0.0881255492568016,
248
+ "rewards/margins": -0.006679975427687168,
249
+ "rewards/rejected": -0.08144557476043701,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.47419804741980476,
254
+ "grad_norm": 2.6312427520751953,
255
+ "learning_rate": 4.691358024691358e-06,
256
+ "logits/chosen": -6.1796159744262695,
257
+ "logits/rejected": -6.193436622619629,
258
+ "logps/chosen": -423.59930419921875,
259
+ "logps/rejected": -486.1690979003906,
260
+ "loss": 0.6397527694702149,
261
+ "rewards/accuracies": 0.6000000238418579,
262
+ "rewards/chosen": -0.043790053576231,
263
+ "rewards/margins": 0.12486596405506134,
264
+ "rewards/rejected": -0.16865602135658264,
265
+ "step": 85
266
+ },
267
+ {
268
+ "epoch": 0.502092050209205,
269
+ "grad_norm": 2.3493549823760986,
270
+ "learning_rate": 4.6399176954732515e-06,
271
+ "logits/chosen": -6.136630058288574,
272
+ "logits/rejected": -6.202858924865723,
273
+ "logps/chosen": -467.627685546875,
274
+ "logps/rejected": -441.41241455078125,
275
+ "loss": 0.5936434745788575,
276
+ "rewards/accuracies": 0.925000011920929,
277
+ "rewards/chosen": 0.06891433894634247,
278
+ "rewards/margins": 0.26186972856521606,
279
+ "rewards/rejected": -0.19295534491539001,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.5299860529986054,
284
+ "grad_norm": 2.4952447414398193,
285
+ "learning_rate": 4.588477366255145e-06,
286
+ "logits/chosen": -6.1503586769104,
287
+ "logits/rejected": -6.144400596618652,
288
+ "logps/chosen": -355.2735290527344,
289
+ "logps/rejected": -409.51702880859375,
290
+ "loss": 0.6157774925231934,
291
+ "rewards/accuracies": 0.7749999761581421,
292
+ "rewards/chosen": -0.0699276328086853,
293
+ "rewards/margins": 0.22694334387779236,
294
+ "rewards/rejected": -0.2968709468841553,
295
+ "step": 95
296
+ },
297
+ {
298
+ "epoch": 0.5578800557880056,
299
+ "grad_norm": 2.5470480918884277,
300
+ "learning_rate": 4.537037037037038e-06,
301
+ "logits/chosen": -6.14028263092041,
302
+ "logits/rejected": -6.104605197906494,
303
+ "logps/chosen": -429.1048889160156,
304
+ "logps/rejected": -454.7377014160156,
305
+ "loss": 0.6300024032592774,
306
+ "rewards/accuracies": 0.7250000238418579,
307
+ "rewards/chosen": -0.05769091844558716,
308
+ "rewards/margins": 0.14401891827583313,
309
+ "rewards/rejected": -0.2017098367214203,
310
+ "step": 100
311
+ },
312
+ {
313
+ "epoch": 0.5857740585774058,
314
+ "grad_norm": 2.6023478507995605,
315
+ "learning_rate": 4.485596707818931e-06,
316
+ "logits/chosen": -6.196796894073486,
317
+ "logits/rejected": -6.226622104644775,
318
+ "logps/chosen": -442.52685546875,
319
+ "logps/rejected": -516.7334594726562,
320
+ "loss": 0.6245638847351074,
321
+ "rewards/accuracies": 0.699999988079071,
322
+ "rewards/chosen": -0.18519389629364014,
323
+ "rewards/margins": 0.24079546332359314,
324
+ "rewards/rejected": -0.42598938941955566,
325
+ "step": 105
326
+ },
327
+ {
328
+ "epoch": 0.6136680613668062,
329
+ "grad_norm": 2.0638511180877686,
330
+ "learning_rate": 4.434156378600823e-06,
331
+ "logits/chosen": -6.1991071701049805,
332
+ "logits/rejected": -6.119466781616211,
333
+ "logps/chosen": -410.86669921875,
334
+ "logps/rejected": -450.365478515625,
335
+ "loss": 0.6201879501342773,
336
+ "rewards/accuracies": 0.800000011920929,
337
+ "rewards/chosen": -0.0853937491774559,
338
+ "rewards/margins": 0.17646726965904236,
339
+ "rewards/rejected": -0.26186102628707886,
340
+ "step": 110
341
+ },
342
+ {
343
+ "epoch": 0.6415620641562064,
344
+ "grad_norm": 2.3625364303588867,
345
+ "learning_rate": 4.382716049382716e-06,
346
+ "logits/chosen": -6.220386505126953,
347
+ "logits/rejected": -6.223449230194092,
348
+ "logps/chosen": -435.92626953125,
349
+ "logps/rejected": -495.6065368652344,
350
+ "loss": 0.6151515483856201,
351
+ "rewards/accuracies": 0.824999988079071,
352
+ "rewards/chosen": -0.2208656519651413,
353
+ "rewards/margins": 0.21172885596752167,
354
+ "rewards/rejected": -0.43259453773498535,
355
+ "step": 115
356
+ },
357
+ {
358
+ "epoch": 0.6694560669456067,
359
+ "grad_norm": 1.8082666397094727,
360
+ "learning_rate": 4.331275720164609e-06,
361
+ "logits/chosen": -6.262181282043457,
362
+ "logits/rejected": -6.250016212463379,
363
+ "logps/chosen": -354.21795654296875,
364
+ "logps/rejected": -389.14556884765625,
365
+ "loss": 0.6109379768371582,
366
+ "rewards/accuracies": 0.7749999761581421,
367
+ "rewards/chosen": -0.17473874986171722,
368
+ "rewards/margins": 0.19315743446350098,
369
+ "rewards/rejected": -0.367896169424057,
370
+ "step": 120
371
+ },
372
+ {
373
+ "epoch": 0.697350069735007,
374
+ "grad_norm": 1.9556658267974854,
375
+ "learning_rate": 4.2798353909465025e-06,
376
+ "logits/chosen": -6.167700290679932,
377
+ "logits/rejected": -6.1421003341674805,
378
+ "logps/chosen": -379.1827392578125,
379
+ "logps/rejected": -426.69549560546875,
380
+ "loss": 0.6202447414398193,
381
+ "rewards/accuracies": 0.699999988079071,
382
+ "rewards/chosen": -0.17535170912742615,
383
+ "rewards/margins": 0.18863627314567566,
384
+ "rewards/rejected": -0.3639879822731018,
385
+ "step": 125
386
+ },
387
+ {
388
+ "epoch": 0.7252440725244073,
389
+ "grad_norm": 3.001298666000366,
390
+ "learning_rate": 4.228395061728396e-06,
391
+ "logits/chosen": -6.2535905838012695,
392
+ "logits/rejected": -6.232400894165039,
393
+ "logps/chosen": -424.8458557128906,
394
+ "logps/rejected": -494.52960205078125,
395
+ "loss": 0.5493914127349854,
396
+ "rewards/accuracies": 0.875,
397
+ "rewards/chosen": -0.22046081721782684,
398
+ "rewards/margins": 0.3785194754600525,
399
+ "rewards/rejected": -0.5989803075790405,
400
+ "step": 130
401
+ },
402
+ {
403
+ "epoch": 0.7531380753138075,
404
+ "grad_norm": 2.5210413932800293,
405
+ "learning_rate": 4.176954732510288e-06,
406
+ "logits/chosen": -6.078260898590088,
407
+ "logits/rejected": -6.0126447677612305,
408
+ "logps/chosen": -414.69940185546875,
409
+ "logps/rejected": -432.3282165527344,
410
+ "loss": 0.579456901550293,
411
+ "rewards/accuracies": 0.875,
412
+ "rewards/chosen": -0.15084640681743622,
413
+ "rewards/margins": 0.2960701882839203,
414
+ "rewards/rejected": -0.4469165802001953,
415
+ "step": 135
416
+ },
417
+ {
418
+ "epoch": 0.7810320781032078,
419
+ "grad_norm": 2.6807265281677246,
420
+ "learning_rate": 4.125514403292181e-06,
421
+ "logits/chosen": -6.243051052093506,
422
+ "logits/rejected": -6.220357418060303,
423
+ "logps/chosen": -400.6156311035156,
424
+ "logps/rejected": -450.0393981933594,
425
+ "loss": 0.5514531135559082,
426
+ "rewards/accuracies": 0.8999999761581421,
427
+ "rewards/chosen": -0.25727975368499756,
428
+ "rewards/margins": 0.3991738259792328,
429
+ "rewards/rejected": -0.6564534902572632,
430
+ "step": 140
431
+ },
432
+ {
433
+ "epoch": 0.8089260808926081,
434
+ "grad_norm": 2.4137353897094727,
435
+ "learning_rate": 4.074074074074074e-06,
436
+ "logits/chosen": -6.186558246612549,
437
+ "logits/rejected": -6.139374256134033,
438
+ "logps/chosen": -442.314453125,
439
+ "logps/rejected": -491.584716796875,
440
+ "loss": 0.5633067131042481,
441
+ "rewards/accuracies": 0.800000011920929,
442
+ "rewards/chosen": -0.35670942068099976,
443
+ "rewards/margins": 0.38038796186447144,
444
+ "rewards/rejected": -0.7370973825454712,
445
+ "step": 145
446
+ },
447
+ {
448
+ "epoch": 0.8368200836820083,
449
+ "grad_norm": 2.1043145656585693,
450
+ "learning_rate": 4.022633744855967e-06,
451
+ "logits/chosen": -6.177689552307129,
452
+ "logits/rejected": -6.167322635650635,
453
+ "logps/chosen": -435.2288513183594,
454
+ "logps/rejected": -469.41436767578125,
455
+ "loss": 0.5640112876892089,
456
+ "rewards/accuracies": 0.7749999761581421,
457
+ "rewards/chosen": -0.32313138246536255,
458
+ "rewards/margins": 0.45979684591293335,
459
+ "rewards/rejected": -0.7829282283782959,
460
+ "step": 150
461
+ }
462
+ ],
463
+ "logging_steps": 5,
464
+ "max_steps": 540,
465
+ "num_input_tokens_seen": 0,
466
+ "num_train_epochs": 3,
467
+ "save_steps": 50,
468
+ "stateful_callbacks": {
469
+ "TrainerControl": {
470
+ "args": {
471
+ "should_epoch_stop": false,
472
+ "should_evaluate": false,
473
+ "should_log": false,
474
+ "should_save": true,
475
+ "should_training_stop": false
476
+ },
477
+ "attributes": {}
478
+ }
479
+ },
480
+ "total_flos": 0.0,
481
+ "train_batch_size": 1,
482
+ "trial_name": null,
483
+ "trial_params": null
484
+ }
gemma-2b-dpo/checkpoint-150/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9
3
+ size 5688
gemma-2b-dpo/checkpoint-200/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/gemma-2-2b-it
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:google/gemma-2-2b-it
7
+ - dpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.1
gemma-2b-dpo/checkpoint-200/adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "google/gemma-2-2b-it",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "up_proj",
33
+ "q_proj",
34
+ "v_proj",
35
+ "gate_proj",
36
+ "k_proj",
37
+ "down_proj",
38
+ "o_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
gemma-2b-dpo/checkpoint-200/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9a8b1c232692070243d2eea541206d224f59667647d540f141eb6ffc01921c7
3
+ size 83115256
gemma-2b-dpo/checkpoint-200/chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '
2
+ ' + message['content'] | trim + '<end_of_turn>
3
+ ' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
4
+ '}}{% endif %}
gemma-2b-dpo/checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dc2a947fbcec57f9eb4155feb8616e481574b23f6236819610628b8cc9fa67f
3
+ size 42616388
gemma-2b-dpo/checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b66e3cc7c452b707ddac5caf0aa17618afb9bc1a0333600a22c4afb353f3165
3
+ size 14244
gemma-2b-dpo/checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8b8fddcda8c1fe5171026971b2f6b7f66207f007bd5795edf244b841b0d1519
3
+ size 1064
gemma-2b-dpo/checkpoint-200/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e
3
+ size 34362748
gemma-2b-dpo/checkpoint-200/tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<bos>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<eos>",
6
+ "extra_special_tokens": [
7
+ "<start_of_turn>",
8
+ "<end_of_turn>"
9
+ ],
10
+ "is_local": false,
11
+ "mask_token": "<mask>",
12
+ "model_max_length": 1000000000000000019884624838656,
13
+ "pad_token": "<pad>",
14
+ "sp_model_kwargs": {},
15
+ "spaces_between_special_tokens": false,
16
+ "tokenizer_class": "GemmaTokenizer",
17
+ "unk_token": "<unk>",
18
+ "use_default_system_prompt": false
19
+ }
gemma-2b-dpo/checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,634 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.1115760111576012,
6
+ "eval_steps": 500,
7
+ "global_step": 200,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.02789400278940028,
14
+ "grad_norm": 2.7052793502807617,
15
+ "learning_rate": 3.7037037037037036e-07,
16
+ "logits/chosen": -6.208017826080322,
17
+ "logits/rejected": -6.18649435043335,
18
+ "logps/chosen": -417.861328125,
19
+ "logps/rejected": -431.774169921875,
20
+ "loss": 0.6978574275970459,
21
+ "rewards/accuracies": 0.25,
22
+ "rewards/chosen": 0.0027643204666674137,
23
+ "rewards/margins": -0.00830078125,
24
+ "rewards/rejected": 0.011065103113651276,
25
+ "step": 5
26
+ },
27
+ {
28
+ "epoch": 0.05578800557880056,
29
+ "grad_norm": 1.9632341861724854,
30
+ "learning_rate": 8.333333333333333e-07,
31
+ "logits/chosen": -6.051701545715332,
32
+ "logits/rejected": -6.098549842834473,
33
+ "logps/chosen": -442.61126708984375,
34
+ "logps/rejected": -419.4737243652344,
35
+ "loss": 0.6965099811553955,
36
+ "rewards/accuracies": 0.44999998807907104,
37
+ "rewards/chosen": -0.0022751614451408386,
38
+ "rewards/margins": -0.004312096629291773,
39
+ "rewards/rejected": 0.002036933321505785,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.08368200836820083,
44
+ "grad_norm": 1.9358311891555786,
45
+ "learning_rate": 1.2962962962962962e-06,
46
+ "logits/chosen": -6.1482648849487305,
47
+ "logits/rejected": -6.208896636962891,
48
+ "logps/chosen": -419.15155029296875,
49
+ "logps/rejected": -393.37322998046875,
50
+ "loss": 0.6971890449523925,
51
+ "rewards/accuracies": 0.4749999940395355,
52
+ "rewards/chosen": 0.004753150977194309,
53
+ "rewards/margins": -0.006633720360696316,
54
+ "rewards/rejected": 0.0113868722692132,
55
+ "step": 15
56
+ },
57
+ {
58
+ "epoch": 0.11157601115760112,
59
+ "grad_norm": 2.137960195541382,
60
+ "learning_rate": 1.7592592592592594e-06,
61
+ "logits/chosen": -6.1889142990112305,
62
+ "logits/rejected": -6.147027015686035,
63
+ "logps/chosen": -449.2413024902344,
64
+ "logps/rejected": -387.8244934082031,
65
+ "loss": 0.694630479812622,
66
+ "rewards/accuracies": 0.574999988079071,
67
+ "rewards/chosen": -0.01739494316279888,
68
+ "rewards/margins": -0.0010753620881587267,
69
+ "rewards/rejected": -0.016319578513503075,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.1394700139470014,
74
+ "grad_norm": 2.610708475112915,
75
+ "learning_rate": 2.222222222222222e-06,
76
+ "logits/chosen": -6.098985195159912,
77
+ "logits/rejected": -6.146561145782471,
78
+ "logps/chosen": -528.6546020507812,
79
+ "logps/rejected": -517.2868041992188,
80
+ "loss": 0.6923945903778076,
81
+ "rewards/accuracies": 0.5,
82
+ "rewards/chosen": 0.013269426301121712,
83
+ "rewards/margins": 0.004172402434051037,
84
+ "rewards/rejected": 0.009097023867070675,
85
+ "step": 25
86
+ },
87
+ {
88
+ "epoch": 0.16736401673640167,
89
+ "grad_norm": 3.0792224407196045,
90
+ "learning_rate": 2.6851851851851856e-06,
91
+ "logits/chosen": -6.156611442565918,
92
+ "logits/rejected": -6.146718502044678,
93
+ "logps/chosen": -427.1123962402344,
94
+ "logps/rejected": -413.99810791015625,
95
+ "loss": 0.6963389396667481,
96
+ "rewards/accuracies": 0.5,
97
+ "rewards/chosen": 0.0016099174972623587,
98
+ "rewards/margins": -0.0036583715118467808,
99
+ "rewards/rejected": 0.0052682883106172085,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.19525801952580196,
104
+ "grad_norm": 2.40751051902771,
105
+ "learning_rate": 3.1481481481481483e-06,
106
+ "logits/chosen": -6.270221710205078,
107
+ "logits/rejected": -6.222764492034912,
108
+ "logps/chosen": -433.89312744140625,
109
+ "logps/rejected": -442.81378173828125,
110
+ "loss": 0.6875874042510987,
111
+ "rewards/accuracies": 0.550000011920929,
112
+ "rewards/chosen": -0.003747978014871478,
113
+ "rewards/margins": 0.013033255934715271,
114
+ "rewards/rejected": -0.016781235113739967,
115
+ "step": 35
116
+ },
117
+ {
118
+ "epoch": 0.22315202231520223,
119
+ "grad_norm": 2.409308671951294,
120
+ "learning_rate": 3.6111111111111115e-06,
121
+ "logits/chosen": -6.171980857849121,
122
+ "logits/rejected": -6.236737251281738,
123
+ "logps/chosen": -411.51092529296875,
124
+ "logps/rejected": -454.578857421875,
125
+ "loss": 0.6975872993469239,
126
+ "rewards/accuracies": 0.5,
127
+ "rewards/chosen": -0.0006048586219549179,
128
+ "rewards/margins": -0.006687240209430456,
129
+ "rewards/rejected": 0.0060823829844594,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.2510460251046025,
134
+ "grad_norm": 2.8261911869049072,
135
+ "learning_rate": 4.074074074074074e-06,
136
+ "logits/chosen": -6.1633710861206055,
137
+ "logits/rejected": -6.245741367340088,
138
+ "logps/chosen": -373.363525390625,
139
+ "logps/rejected": -356.736572265625,
140
+ "loss": 0.6881499290466309,
141
+ "rewards/accuracies": 0.550000011920929,
142
+ "rewards/chosen": 0.01782766357064247,
143
+ "rewards/margins": 0.015053692273795605,
144
+ "rewards/rejected": 0.002773971762508154,
145
+ "step": 45
146
+ },
147
+ {
148
+ "epoch": 0.2789400278940028,
149
+ "grad_norm": 2.457179546356201,
150
+ "learning_rate": 4.537037037037038e-06,
151
+ "logits/chosen": -6.270019054412842,
152
+ "logits/rejected": -6.3202104568481445,
153
+ "logps/chosen": -466.30609130859375,
154
+ "logps/rejected": -476.45550537109375,
155
+ "loss": 0.6831833839416503,
156
+ "rewards/accuracies": 0.6000000238418579,
157
+ "rewards/chosen": 0.0046669007278978825,
158
+ "rewards/margins": 0.023042945191264153,
159
+ "rewards/rejected": -0.018376046791672707,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.3068340306834031,
164
+ "grad_norm": 1.6770554780960083,
165
+ "learning_rate": 5e-06,
166
+ "logits/chosen": -6.253265380859375,
167
+ "logits/rejected": -6.15267276763916,
168
+ "logps/chosen": -352.24908447265625,
169
+ "logps/rejected": -447.11444091796875,
170
+ "loss": 0.6791603088378906,
171
+ "rewards/accuracies": 0.6000000238418579,
172
+ "rewards/chosen": -0.010960197076201439,
173
+ "rewards/margins": 0.031198084354400635,
174
+ "rewards/rejected": -0.042158275842666626,
175
+ "step": 55
176
+ },
177
+ {
178
+ "epoch": 0.33472803347280333,
179
+ "grad_norm": 2.6027019023895264,
180
+ "learning_rate": 4.9485596707818935e-06,
181
+ "logits/chosen": -6.205387592315674,
182
+ "logits/rejected": -6.259293079376221,
183
+ "logps/chosen": -439.732421875,
184
+ "logps/rejected": -412.8099670410156,
185
+ "loss": 0.6736814975738525,
186
+ "rewards/accuracies": 0.550000011920929,
187
+ "rewards/chosen": -0.010224836878478527,
188
+ "rewards/margins": 0.04464374855160713,
189
+ "rewards/rejected": -0.054868586361408234,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.36262203626220363,
194
+ "grad_norm": 2.1717166900634766,
195
+ "learning_rate": 4.897119341563787e-06,
196
+ "logits/chosen": -6.1334547996521,
197
+ "logits/rejected": -6.148266792297363,
198
+ "logps/chosen": -390.00433349609375,
199
+ "logps/rejected": -376.676513671875,
200
+ "loss": 0.6825191974639893,
201
+ "rewards/accuracies": 0.5249999761581421,
202
+ "rewards/chosen": -0.04612758383154869,
203
+ "rewards/margins": 0.03951488807797432,
204
+ "rewards/rejected": -0.08564247190952301,
205
+ "step": 65
206
+ },
207
+ {
208
+ "epoch": 0.3905160390516039,
209
+ "grad_norm": 2.2574119567871094,
210
+ "learning_rate": 4.845679012345679e-06,
211
+ "logits/chosen": -6.236250877380371,
212
+ "logits/rejected": -6.165186882019043,
213
+ "logps/chosen": -411.1315002441406,
214
+ "logps/rejected": -447.22100830078125,
215
+ "loss": 0.6402256488800049,
216
+ "rewards/accuracies": 0.675000011920929,
217
+ "rewards/chosen": -0.017545931041240692,
218
+ "rewards/margins": 0.12250369787216187,
219
+ "rewards/rejected": -0.14004963636398315,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.41841004184100417,
224
+ "grad_norm": 2.3837037086486816,
225
+ "learning_rate": 4.794238683127572e-06,
226
+ "logits/chosen": -6.256176948547363,
227
+ "logits/rejected": -6.213258266448975,
228
+ "logps/chosen": -437.463623046875,
229
+ "logps/rejected": -404.8554992675781,
230
+ "loss": 0.6703986167907715,
231
+ "rewards/accuracies": 0.7250000238418579,
232
+ "rewards/chosen": -0.03428981825709343,
233
+ "rewards/margins": 0.05360151082277298,
234
+ "rewards/rejected": -0.08789133280515671,
235
+ "step": 75
236
+ },
237
+ {
238
+ "epoch": 0.44630404463040446,
239
+ "grad_norm": 3.304287910461426,
240
+ "learning_rate": 4.742798353909465e-06,
241
+ "logits/chosen": -6.2820305824279785,
242
+ "logits/rejected": -6.221312522888184,
243
+ "logps/chosen": -455.2318420410156,
244
+ "logps/rejected": -422.08837890625,
245
+ "loss": 0.7040590286254883,
246
+ "rewards/accuracies": 0.6000000238418579,
247
+ "rewards/chosen": -0.0881255492568016,
248
+ "rewards/margins": -0.006679975427687168,
249
+ "rewards/rejected": -0.08144557476043701,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.47419804741980476,
254
+ "grad_norm": 2.6312427520751953,
255
+ "learning_rate": 4.691358024691358e-06,
256
+ "logits/chosen": -6.1796159744262695,
257
+ "logits/rejected": -6.193436622619629,
258
+ "logps/chosen": -423.59930419921875,
259
+ "logps/rejected": -486.1690979003906,
260
+ "loss": 0.6397527694702149,
261
+ "rewards/accuracies": 0.6000000238418579,
262
+ "rewards/chosen": -0.043790053576231,
263
+ "rewards/margins": 0.12486596405506134,
264
+ "rewards/rejected": -0.16865602135658264,
265
+ "step": 85
266
+ },
267
+ {
268
+ "epoch": 0.502092050209205,
269
+ "grad_norm": 2.3493549823760986,
270
+ "learning_rate": 4.6399176954732515e-06,
271
+ "logits/chosen": -6.136630058288574,
272
+ "logits/rejected": -6.202858924865723,
273
+ "logps/chosen": -467.627685546875,
274
+ "logps/rejected": -441.41241455078125,
275
+ "loss": 0.5936434745788575,
276
+ "rewards/accuracies": 0.925000011920929,
277
+ "rewards/chosen": 0.06891433894634247,
278
+ "rewards/margins": 0.26186972856521606,
279
+ "rewards/rejected": -0.19295534491539001,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.5299860529986054,
284
+ "grad_norm": 2.4952447414398193,
285
+ "learning_rate": 4.588477366255145e-06,
286
+ "logits/chosen": -6.1503586769104,
287
+ "logits/rejected": -6.144400596618652,
288
+ "logps/chosen": -355.2735290527344,
289
+ "logps/rejected": -409.51702880859375,
290
+ "loss": 0.6157774925231934,
291
+ "rewards/accuracies": 0.7749999761581421,
292
+ "rewards/chosen": -0.0699276328086853,
293
+ "rewards/margins": 0.22694334387779236,
294
+ "rewards/rejected": -0.2968709468841553,
295
+ "step": 95
296
+ },
297
+ {
298
+ "epoch": 0.5578800557880056,
299
+ "grad_norm": 2.5470480918884277,
300
+ "learning_rate": 4.537037037037038e-06,
301
+ "logits/chosen": -6.14028263092041,
302
+ "logits/rejected": -6.104605197906494,
303
+ "logps/chosen": -429.1048889160156,
304
+ "logps/rejected": -454.7377014160156,
305
+ "loss": 0.6300024032592774,
306
+ "rewards/accuracies": 0.7250000238418579,
307
+ "rewards/chosen": -0.05769091844558716,
308
+ "rewards/margins": 0.14401891827583313,
309
+ "rewards/rejected": -0.2017098367214203,
310
+ "step": 100
311
+ },
312
+ {
313
+ "epoch": 0.5857740585774058,
314
+ "grad_norm": 2.6023478507995605,
315
+ "learning_rate": 4.485596707818931e-06,
316
+ "logits/chosen": -6.196796894073486,
317
+ "logits/rejected": -6.226622104644775,
318
+ "logps/chosen": -442.52685546875,
319
+ "logps/rejected": -516.7334594726562,
320
+ "loss": 0.6245638847351074,
321
+ "rewards/accuracies": 0.699999988079071,
322
+ "rewards/chosen": -0.18519389629364014,
323
+ "rewards/margins": 0.24079546332359314,
324
+ "rewards/rejected": -0.42598938941955566,
325
+ "step": 105
326
+ },
327
+ {
328
+ "epoch": 0.6136680613668062,
329
+ "grad_norm": 2.0638511180877686,
330
+ "learning_rate": 4.434156378600823e-06,
331
+ "logits/chosen": -6.1991071701049805,
332
+ "logits/rejected": -6.119466781616211,
333
+ "logps/chosen": -410.86669921875,
334
+ "logps/rejected": -450.365478515625,
335
+ "loss": 0.6201879501342773,
336
+ "rewards/accuracies": 0.800000011920929,
337
+ "rewards/chosen": -0.0853937491774559,
338
+ "rewards/margins": 0.17646726965904236,
339
+ "rewards/rejected": -0.26186102628707886,
340
+ "step": 110
341
+ },
342
+ {
343
+ "epoch": 0.6415620641562064,
344
+ "grad_norm": 2.3625364303588867,
345
+ "learning_rate": 4.382716049382716e-06,
346
+ "logits/chosen": -6.220386505126953,
347
+ "logits/rejected": -6.223449230194092,
348
+ "logps/chosen": -435.92626953125,
349
+ "logps/rejected": -495.6065368652344,
350
+ "loss": 0.6151515483856201,
351
+ "rewards/accuracies": 0.824999988079071,
352
+ "rewards/chosen": -0.2208656519651413,
353
+ "rewards/margins": 0.21172885596752167,
354
+ "rewards/rejected": -0.43259453773498535,
355
+ "step": 115
356
+ },
357
+ {
358
+ "epoch": 0.6694560669456067,
359
+ "grad_norm": 1.8082666397094727,
360
+ "learning_rate": 4.331275720164609e-06,
361
+ "logits/chosen": -6.262181282043457,
362
+ "logits/rejected": -6.250016212463379,
363
+ "logps/chosen": -354.21795654296875,
364
+ "logps/rejected": -389.14556884765625,
365
+ "loss": 0.6109379768371582,
366
+ "rewards/accuracies": 0.7749999761581421,
367
+ "rewards/chosen": -0.17473874986171722,
368
+ "rewards/margins": 0.19315743446350098,
369
+ "rewards/rejected": -0.367896169424057,
370
+ "step": 120
371
+ },
372
+ {
373
+ "epoch": 0.697350069735007,
374
+ "grad_norm": 1.9556658267974854,
375
+ "learning_rate": 4.2798353909465025e-06,
376
+ "logits/chosen": -6.167700290679932,
377
+ "logits/rejected": -6.1421003341674805,
378
+ "logps/chosen": -379.1827392578125,
379
+ "logps/rejected": -426.69549560546875,
380
+ "loss": 0.6202447414398193,
381
+ "rewards/accuracies": 0.699999988079071,
382
+ "rewards/chosen": -0.17535170912742615,
383
+ "rewards/margins": 0.18863627314567566,
384
+ "rewards/rejected": -0.3639879822731018,
385
+ "step": 125
386
+ },
387
+ {
388
+ "epoch": 0.7252440725244073,
389
+ "grad_norm": 3.001298666000366,
390
+ "learning_rate": 4.228395061728396e-06,
391
+ "logits/chosen": -6.2535905838012695,
392
+ "logits/rejected": -6.232400894165039,
393
+ "logps/chosen": -424.8458557128906,
394
+ "logps/rejected": -494.52960205078125,
395
+ "loss": 0.5493914127349854,
396
+ "rewards/accuracies": 0.875,
397
+ "rewards/chosen": -0.22046081721782684,
398
+ "rewards/margins": 0.3785194754600525,
399
+ "rewards/rejected": -0.5989803075790405,
400
+ "step": 130
401
+ },
402
+ {
403
+ "epoch": 0.7531380753138075,
404
+ "grad_norm": 2.5210413932800293,
405
+ "learning_rate": 4.176954732510288e-06,
406
+ "logits/chosen": -6.078260898590088,
407
+ "logits/rejected": -6.0126447677612305,
408
+ "logps/chosen": -414.69940185546875,
409
+ "logps/rejected": -432.3282165527344,
410
+ "loss": 0.579456901550293,
411
+ "rewards/accuracies": 0.875,
412
+ "rewards/chosen": -0.15084640681743622,
413
+ "rewards/margins": 0.2960701882839203,
414
+ "rewards/rejected": -0.4469165802001953,
415
+ "step": 135
416
+ },
417
+ {
418
+ "epoch": 0.7810320781032078,
419
+ "grad_norm": 2.6807265281677246,
420
+ "learning_rate": 4.125514403292181e-06,
421
+ "logits/chosen": -6.243051052093506,
422
+ "logits/rejected": -6.220357418060303,
423
+ "logps/chosen": -400.6156311035156,
424
+ "logps/rejected": -450.0393981933594,
425
+ "loss": 0.5514531135559082,
426
+ "rewards/accuracies": 0.8999999761581421,
427
+ "rewards/chosen": -0.25727975368499756,
428
+ "rewards/margins": 0.3991738259792328,
429
+ "rewards/rejected": -0.6564534902572632,
430
+ "step": 140
431
+ },
432
+ {
433
+ "epoch": 0.8089260808926081,
434
+ "grad_norm": 2.4137353897094727,
435
+ "learning_rate": 4.074074074074074e-06,
436
+ "logits/chosen": -6.186558246612549,
437
+ "logits/rejected": -6.139374256134033,
438
+ "logps/chosen": -442.314453125,
439
+ "logps/rejected": -491.584716796875,
440
+ "loss": 0.5633067131042481,
441
+ "rewards/accuracies": 0.800000011920929,
442
+ "rewards/chosen": -0.35670942068099976,
443
+ "rewards/margins": 0.38038796186447144,
444
+ "rewards/rejected": -0.7370973825454712,
445
+ "step": 145
446
+ },
447
+ {
448
+ "epoch": 0.8368200836820083,
449
+ "grad_norm": 2.1043145656585693,
450
+ "learning_rate": 4.022633744855967e-06,
451
+ "logits/chosen": -6.177689552307129,
452
+ "logits/rejected": -6.167322635650635,
453
+ "logps/chosen": -435.2288513183594,
454
+ "logps/rejected": -469.41436767578125,
455
+ "loss": 0.5640112876892089,
456
+ "rewards/accuracies": 0.7749999761581421,
457
+ "rewards/chosen": -0.32313138246536255,
458
+ "rewards/margins": 0.45979684591293335,
459
+ "rewards/rejected": -0.7829282283782959,
460
+ "step": 150
461
+ },
462
+ {
463
+ "epoch": 0.8647140864714087,
464
+ "grad_norm": 1.9623620510101318,
465
+ "learning_rate": 3.97119341563786e-06,
466
+ "logits/chosen": -6.0590739250183105,
467
+ "logits/rejected": -6.033650875091553,
468
+ "logps/chosen": -421.5000915527344,
469
+ "logps/rejected": -370.30902099609375,
470
+ "loss": 0.6319089412689209,
471
+ "rewards/accuracies": 0.699999988079071,
472
+ "rewards/chosen": -0.29380694031715393,
473
+ "rewards/margins": 0.1507532149553299,
474
+ "rewards/rejected": -0.44456014037132263,
475
+ "step": 155
476
+ },
477
+ {
478
+ "epoch": 0.8926080892608089,
479
+ "grad_norm": 1.9432786703109741,
480
+ "learning_rate": 3.9197530864197535e-06,
481
+ "logits/chosen": -6.267019271850586,
482
+ "logits/rejected": -6.214621067047119,
483
+ "logps/chosen": -417.11724853515625,
484
+ "logps/rejected": -431.72698974609375,
485
+ "loss": 0.5186795234680176,
486
+ "rewards/accuracies": 0.925000011920929,
487
+ "rewards/chosen": -0.18987610936164856,
488
+ "rewards/margins": 0.5616164803504944,
489
+ "rewards/rejected": -0.7514925599098206,
490
+ "step": 160
491
+ },
492
+ {
493
+ "epoch": 0.9205020920502092,
494
+ "grad_norm": 1.8146827220916748,
495
+ "learning_rate": 3.868312757201647e-06,
496
+ "logits/chosen": -6.230213165283203,
497
+ "logits/rejected": -6.109362602233887,
498
+ "logps/chosen": -376.6744384765625,
499
+ "logps/rejected": -376.6526184082031,
500
+ "loss": 0.5346522808074952,
501
+ "rewards/accuracies": 0.8999999761581421,
502
+ "rewards/chosen": -0.19152329862117767,
503
+ "rewards/margins": 0.38060134649276733,
504
+ "rewards/rejected": -0.5721246004104614,
505
+ "step": 165
506
+ },
507
+ {
508
+ "epoch": 0.9483960948396095,
509
+ "grad_norm": 1.936680793762207,
510
+ "learning_rate": 3.81687242798354e-06,
511
+ "logits/chosen": -6.194340705871582,
512
+ "logits/rejected": -6.1444597244262695,
513
+ "logps/chosen": -389.017822265625,
514
+ "logps/rejected": -445.76544189453125,
515
+ "loss": 0.49420690536499023,
516
+ "rewards/accuracies": 0.925000011920929,
517
+ "rewards/chosen": -0.055042725056409836,
518
+ "rewards/margins": 0.5315954685211182,
519
+ "rewards/rejected": -0.5866381525993347,
520
+ "step": 170
521
+ },
522
+ {
523
+ "epoch": 0.9762900976290098,
524
+ "grad_norm": 3.2391903400421143,
525
+ "learning_rate": 3.7654320987654325e-06,
526
+ "logits/chosen": -6.190616607666016,
527
+ "logits/rejected": -6.1138739585876465,
528
+ "logps/chosen": -421.6878356933594,
529
+ "logps/rejected": -460.1180114746094,
530
+ "loss": 0.5374621391296387,
531
+ "rewards/accuracies": 0.8500000238418579,
532
+ "rewards/chosen": -0.22659805417060852,
533
+ "rewards/margins": 0.43736472725868225,
534
+ "rewards/rejected": -0.6639627814292908,
535
+ "step": 175
536
+ },
537
+ {
538
+ "epoch": 1.0,
539
+ "grad_norm": 4.300363063812256,
540
+ "learning_rate": 3.7139917695473256e-06,
541
+ "logits/chosen": -6.227687358856201,
542
+ "logits/rejected": -6.129978179931641,
543
+ "logps/chosen": -449.4482116699219,
544
+ "logps/rejected": -430.434326171875,
545
+ "loss": 0.49641432762146,
546
+ "rewards/accuracies": 0.8529411554336548,
547
+ "rewards/chosen": -0.03447714447975159,
548
+ "rewards/margins": 0.5154433846473694,
549
+ "rewards/rejected": -0.5499205589294434,
550
+ "step": 180
551
+ },
552
+ {
553
+ "epoch": 1.0278940027894004,
554
+ "grad_norm": 1.5967351198196411,
555
+ "learning_rate": 3.6625514403292183e-06,
556
+ "logits/chosen": -6.067181587219238,
557
+ "logits/rejected": -6.06889533996582,
558
+ "logps/chosen": -399.98968505859375,
559
+ "logps/rejected": -447.72039794921875,
560
+ "loss": 0.4079257011413574,
561
+ "rewards/accuracies": 0.9750000238418579,
562
+ "rewards/chosen": 0.03092392347753048,
563
+ "rewards/margins": 0.7688177824020386,
564
+ "rewards/rejected": -0.7378939390182495,
565
+ "step": 185
566
+ },
567
+ {
568
+ "epoch": 1.0557880055788005,
569
+ "grad_norm": 1.7787078619003296,
570
+ "learning_rate": 3.6111111111111115e-06,
571
+ "logits/chosen": -6.146653175354004,
572
+ "logits/rejected": -6.154219627380371,
573
+ "logps/chosen": -419.3291015625,
574
+ "logps/rejected": -463.1239318847656,
575
+ "loss": 0.42812933921813967,
576
+ "rewards/accuracies": 0.9750000238418579,
577
+ "rewards/chosen": -0.007557299919426441,
578
+ "rewards/margins": 0.7340337634086609,
579
+ "rewards/rejected": -0.7415911555290222,
580
+ "step": 190
581
+ },
582
+ {
583
+ "epoch": 1.0836820083682008,
584
+ "grad_norm": 1.7694693803787231,
585
+ "learning_rate": 3.559670781893004e-06,
586
+ "logits/chosen": -6.160831928253174,
587
+ "logits/rejected": -6.151050567626953,
588
+ "logps/chosen": -379.6001281738281,
589
+ "logps/rejected": -394.00531005859375,
590
+ "loss": 0.41410012245178224,
591
+ "rewards/accuracies": 0.949999988079071,
592
+ "rewards/chosen": 0.03558122366666794,
593
+ "rewards/margins": 0.8024934530258179,
594
+ "rewards/rejected": -0.7669121623039246,
595
+ "step": 195
596
+ },
597
+ {
598
+ "epoch": 1.1115760111576012,
599
+ "grad_norm": 1.3202013969421387,
600
+ "learning_rate": 3.5082304526748973e-06,
601
+ "logits/chosen": -6.110814094543457,
602
+ "logits/rejected": -6.176726341247559,
603
+ "logps/chosen": -400.85052490234375,
604
+ "logps/rejected": -405.44195556640625,
605
+ "loss": 0.391094446182251,
606
+ "rewards/accuracies": 0.925000011920929,
607
+ "rewards/chosen": -0.02833392843604088,
608
+ "rewards/margins": 0.8581112623214722,
609
+ "rewards/rejected": -0.8864452242851257,
610
+ "step": 200
611
+ }
612
+ ],
613
+ "logging_steps": 5,
614
+ "max_steps": 540,
615
+ "num_input_tokens_seen": 0,
616
+ "num_train_epochs": 3,
617
+ "save_steps": 50,
618
+ "stateful_callbacks": {
619
+ "TrainerControl": {
620
+ "args": {
621
+ "should_epoch_stop": false,
622
+ "should_evaluate": false,
623
+ "should_log": false,
624
+ "should_save": true,
625
+ "should_training_stop": false
626
+ },
627
+ "attributes": {}
628
+ }
629
+ },
630
+ "total_flos": 0.0,
631
+ "train_batch_size": 1,
632
+ "trial_name": null,
633
+ "trial_params": null
634
+ }
gemma-2b-dpo/checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9
3
+ size 5688
gemma-2b-dpo/checkpoint-250/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/gemma-2-2b-it
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:google/gemma-2-2b-it
7
+ - dpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.1
gemma-2b-dpo/checkpoint-250/adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "google/gemma-2-2b-it",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "up_proj",
33
+ "q_proj",
34
+ "v_proj",
35
+ "gate_proj",
36
+ "k_proj",
37
+ "down_proj",
38
+ "o_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
gemma-2b-dpo/checkpoint-250/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:521436babeb20da3f9706c576539c659fcd45d1a0c22acd84a44d3fc9d4fe370
3
+ size 83115256
gemma-2b-dpo/checkpoint-250/chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '
2
+ ' + message['content'] | trim + '<end_of_turn>
3
+ ' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
4
+ '}}{% endif %}
gemma-2b-dpo/checkpoint-250/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5783ae02fe015af6554b431db64dd006bf1b056f5f5ff304151d37bf7db93739
3
+ size 42616388
gemma-2b-dpo/checkpoint-250/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b66e3cc7c452b707ddac5caf0aa17618afb9bc1a0333600a22c4afb353f3165
3
+ size 14244
gemma-2b-dpo/checkpoint-250/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a04c3b44e05e12a36faabdecba5dc808f7cdeb9643fae2e7683908edc539b44
3
+ size 1064
gemma-2b-dpo/checkpoint-250/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e
3
+ size 34362748
gemma-2b-dpo/checkpoint-250/tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<bos>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<eos>",
6
+ "extra_special_tokens": [
7
+ "<start_of_turn>",
8
+ "<end_of_turn>"
9
+ ],
10
+ "is_local": false,
11
+ "mask_token": "<mask>",
12
+ "model_max_length": 1000000000000000019884624838656,
13
+ "pad_token": "<pad>",
14
+ "sp_model_kwargs": {},
15
+ "spaces_between_special_tokens": false,
16
+ "tokenizer_class": "GemmaTokenizer",
17
+ "unk_token": "<unk>",
18
+ "use_default_system_prompt": false
19
+ }
gemma-2b-dpo/checkpoint-250/trainer_state.json ADDED
@@ -0,0 +1,784 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.390516039051604,
6
+ "eval_steps": 500,
7
+ "global_step": 250,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.02789400278940028,
14
+ "grad_norm": 2.7052793502807617,
15
+ "learning_rate": 3.7037037037037036e-07,
16
+ "logits/chosen": -6.208017826080322,
17
+ "logits/rejected": -6.18649435043335,
18
+ "logps/chosen": -417.861328125,
19
+ "logps/rejected": -431.774169921875,
20
+ "loss": 0.6978574275970459,
21
+ "rewards/accuracies": 0.25,
22
+ "rewards/chosen": 0.0027643204666674137,
23
+ "rewards/margins": -0.00830078125,
24
+ "rewards/rejected": 0.011065103113651276,
25
+ "step": 5
26
+ },
27
+ {
28
+ "epoch": 0.05578800557880056,
29
+ "grad_norm": 1.9632341861724854,
30
+ "learning_rate": 8.333333333333333e-07,
31
+ "logits/chosen": -6.051701545715332,
32
+ "logits/rejected": -6.098549842834473,
33
+ "logps/chosen": -442.61126708984375,
34
+ "logps/rejected": -419.4737243652344,
35
+ "loss": 0.6965099811553955,
36
+ "rewards/accuracies": 0.44999998807907104,
37
+ "rewards/chosen": -0.0022751614451408386,
38
+ "rewards/margins": -0.004312096629291773,
39
+ "rewards/rejected": 0.002036933321505785,
40
+ "step": 10
41
+ },
42
+ {
43
+ "epoch": 0.08368200836820083,
44
+ "grad_norm": 1.9358311891555786,
45
+ "learning_rate": 1.2962962962962962e-06,
46
+ "logits/chosen": -6.1482648849487305,
47
+ "logits/rejected": -6.208896636962891,
48
+ "logps/chosen": -419.15155029296875,
49
+ "logps/rejected": -393.37322998046875,
50
+ "loss": 0.6971890449523925,
51
+ "rewards/accuracies": 0.4749999940395355,
52
+ "rewards/chosen": 0.004753150977194309,
53
+ "rewards/margins": -0.006633720360696316,
54
+ "rewards/rejected": 0.0113868722692132,
55
+ "step": 15
56
+ },
57
+ {
58
+ "epoch": 0.11157601115760112,
59
+ "grad_norm": 2.137960195541382,
60
+ "learning_rate": 1.7592592592592594e-06,
61
+ "logits/chosen": -6.1889142990112305,
62
+ "logits/rejected": -6.147027015686035,
63
+ "logps/chosen": -449.2413024902344,
64
+ "logps/rejected": -387.8244934082031,
65
+ "loss": 0.694630479812622,
66
+ "rewards/accuracies": 0.574999988079071,
67
+ "rewards/chosen": -0.01739494316279888,
68
+ "rewards/margins": -0.0010753620881587267,
69
+ "rewards/rejected": -0.016319578513503075,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 0.1394700139470014,
74
+ "grad_norm": 2.610708475112915,
75
+ "learning_rate": 2.222222222222222e-06,
76
+ "logits/chosen": -6.098985195159912,
77
+ "logits/rejected": -6.146561145782471,
78
+ "logps/chosen": -528.6546020507812,
79
+ "logps/rejected": -517.2868041992188,
80
+ "loss": 0.6923945903778076,
81
+ "rewards/accuracies": 0.5,
82
+ "rewards/chosen": 0.013269426301121712,
83
+ "rewards/margins": 0.004172402434051037,
84
+ "rewards/rejected": 0.009097023867070675,
85
+ "step": 25
86
+ },
87
+ {
88
+ "epoch": 0.16736401673640167,
89
+ "grad_norm": 3.0792224407196045,
90
+ "learning_rate": 2.6851851851851856e-06,
91
+ "logits/chosen": -6.156611442565918,
92
+ "logits/rejected": -6.146718502044678,
93
+ "logps/chosen": -427.1123962402344,
94
+ "logps/rejected": -413.99810791015625,
95
+ "loss": 0.6963389396667481,
96
+ "rewards/accuracies": 0.5,
97
+ "rewards/chosen": 0.0016099174972623587,
98
+ "rewards/margins": -0.0036583715118467808,
99
+ "rewards/rejected": 0.0052682883106172085,
100
+ "step": 30
101
+ },
102
+ {
103
+ "epoch": 0.19525801952580196,
104
+ "grad_norm": 2.40751051902771,
105
+ "learning_rate": 3.1481481481481483e-06,
106
+ "logits/chosen": -6.270221710205078,
107
+ "logits/rejected": -6.222764492034912,
108
+ "logps/chosen": -433.89312744140625,
109
+ "logps/rejected": -442.81378173828125,
110
+ "loss": 0.6875874042510987,
111
+ "rewards/accuracies": 0.550000011920929,
112
+ "rewards/chosen": -0.003747978014871478,
113
+ "rewards/margins": 0.013033255934715271,
114
+ "rewards/rejected": -0.016781235113739967,
115
+ "step": 35
116
+ },
117
+ {
118
+ "epoch": 0.22315202231520223,
119
+ "grad_norm": 2.409308671951294,
120
+ "learning_rate": 3.6111111111111115e-06,
121
+ "logits/chosen": -6.171980857849121,
122
+ "logits/rejected": -6.236737251281738,
123
+ "logps/chosen": -411.51092529296875,
124
+ "logps/rejected": -454.578857421875,
125
+ "loss": 0.6975872993469239,
126
+ "rewards/accuracies": 0.5,
127
+ "rewards/chosen": -0.0006048586219549179,
128
+ "rewards/margins": -0.006687240209430456,
129
+ "rewards/rejected": 0.0060823829844594,
130
+ "step": 40
131
+ },
132
+ {
133
+ "epoch": 0.2510460251046025,
134
+ "grad_norm": 2.8261911869049072,
135
+ "learning_rate": 4.074074074074074e-06,
136
+ "logits/chosen": -6.1633710861206055,
137
+ "logits/rejected": -6.245741367340088,
138
+ "logps/chosen": -373.363525390625,
139
+ "logps/rejected": -356.736572265625,
140
+ "loss": 0.6881499290466309,
141
+ "rewards/accuracies": 0.550000011920929,
142
+ "rewards/chosen": 0.01782766357064247,
143
+ "rewards/margins": 0.015053692273795605,
144
+ "rewards/rejected": 0.002773971762508154,
145
+ "step": 45
146
+ },
147
+ {
148
+ "epoch": 0.2789400278940028,
149
+ "grad_norm": 2.457179546356201,
150
+ "learning_rate": 4.537037037037038e-06,
151
+ "logits/chosen": -6.270019054412842,
152
+ "logits/rejected": -6.3202104568481445,
153
+ "logps/chosen": -466.30609130859375,
154
+ "logps/rejected": -476.45550537109375,
155
+ "loss": 0.6831833839416503,
156
+ "rewards/accuracies": 0.6000000238418579,
157
+ "rewards/chosen": 0.0046669007278978825,
158
+ "rewards/margins": 0.023042945191264153,
159
+ "rewards/rejected": -0.018376046791672707,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 0.3068340306834031,
164
+ "grad_norm": 1.6770554780960083,
165
+ "learning_rate": 5e-06,
166
+ "logits/chosen": -6.253265380859375,
167
+ "logits/rejected": -6.15267276763916,
168
+ "logps/chosen": -352.24908447265625,
169
+ "logps/rejected": -447.11444091796875,
170
+ "loss": 0.6791603088378906,
171
+ "rewards/accuracies": 0.6000000238418579,
172
+ "rewards/chosen": -0.010960197076201439,
173
+ "rewards/margins": 0.031198084354400635,
174
+ "rewards/rejected": -0.042158275842666626,
175
+ "step": 55
176
+ },
177
+ {
178
+ "epoch": 0.33472803347280333,
179
+ "grad_norm": 2.6027019023895264,
180
+ "learning_rate": 4.9485596707818935e-06,
181
+ "logits/chosen": -6.205387592315674,
182
+ "logits/rejected": -6.259293079376221,
183
+ "logps/chosen": -439.732421875,
184
+ "logps/rejected": -412.8099670410156,
185
+ "loss": 0.6736814975738525,
186
+ "rewards/accuracies": 0.550000011920929,
187
+ "rewards/chosen": -0.010224836878478527,
188
+ "rewards/margins": 0.04464374855160713,
189
+ "rewards/rejected": -0.054868586361408234,
190
+ "step": 60
191
+ },
192
+ {
193
+ "epoch": 0.36262203626220363,
194
+ "grad_norm": 2.1717166900634766,
195
+ "learning_rate": 4.897119341563787e-06,
196
+ "logits/chosen": -6.1334547996521,
197
+ "logits/rejected": -6.148266792297363,
198
+ "logps/chosen": -390.00433349609375,
199
+ "logps/rejected": -376.676513671875,
200
+ "loss": 0.6825191974639893,
201
+ "rewards/accuracies": 0.5249999761581421,
202
+ "rewards/chosen": -0.04612758383154869,
203
+ "rewards/margins": 0.03951488807797432,
204
+ "rewards/rejected": -0.08564247190952301,
205
+ "step": 65
206
+ },
207
+ {
208
+ "epoch": 0.3905160390516039,
209
+ "grad_norm": 2.2574119567871094,
210
+ "learning_rate": 4.845679012345679e-06,
211
+ "logits/chosen": -6.236250877380371,
212
+ "logits/rejected": -6.165186882019043,
213
+ "logps/chosen": -411.1315002441406,
214
+ "logps/rejected": -447.22100830078125,
215
+ "loss": 0.6402256488800049,
216
+ "rewards/accuracies": 0.675000011920929,
217
+ "rewards/chosen": -0.017545931041240692,
218
+ "rewards/margins": 0.12250369787216187,
219
+ "rewards/rejected": -0.14004963636398315,
220
+ "step": 70
221
+ },
222
+ {
223
+ "epoch": 0.41841004184100417,
224
+ "grad_norm": 2.3837037086486816,
225
+ "learning_rate": 4.794238683127572e-06,
226
+ "logits/chosen": -6.256176948547363,
227
+ "logits/rejected": -6.213258266448975,
228
+ "logps/chosen": -437.463623046875,
229
+ "logps/rejected": -404.8554992675781,
230
+ "loss": 0.6703986167907715,
231
+ "rewards/accuracies": 0.7250000238418579,
232
+ "rewards/chosen": -0.03428981825709343,
233
+ "rewards/margins": 0.05360151082277298,
234
+ "rewards/rejected": -0.08789133280515671,
235
+ "step": 75
236
+ },
237
+ {
238
+ "epoch": 0.44630404463040446,
239
+ "grad_norm": 3.304287910461426,
240
+ "learning_rate": 4.742798353909465e-06,
241
+ "logits/chosen": -6.2820305824279785,
242
+ "logits/rejected": -6.221312522888184,
243
+ "logps/chosen": -455.2318420410156,
244
+ "logps/rejected": -422.08837890625,
245
+ "loss": 0.7040590286254883,
246
+ "rewards/accuracies": 0.6000000238418579,
247
+ "rewards/chosen": -0.0881255492568016,
248
+ "rewards/margins": -0.006679975427687168,
249
+ "rewards/rejected": -0.08144557476043701,
250
+ "step": 80
251
+ },
252
+ {
253
+ "epoch": 0.47419804741980476,
254
+ "grad_norm": 2.6312427520751953,
255
+ "learning_rate": 4.691358024691358e-06,
256
+ "logits/chosen": -6.1796159744262695,
257
+ "logits/rejected": -6.193436622619629,
258
+ "logps/chosen": -423.59930419921875,
259
+ "logps/rejected": -486.1690979003906,
260
+ "loss": 0.6397527694702149,
261
+ "rewards/accuracies": 0.6000000238418579,
262
+ "rewards/chosen": -0.043790053576231,
263
+ "rewards/margins": 0.12486596405506134,
264
+ "rewards/rejected": -0.16865602135658264,
265
+ "step": 85
266
+ },
267
+ {
268
+ "epoch": 0.502092050209205,
269
+ "grad_norm": 2.3493549823760986,
270
+ "learning_rate": 4.6399176954732515e-06,
271
+ "logits/chosen": -6.136630058288574,
272
+ "logits/rejected": -6.202858924865723,
273
+ "logps/chosen": -467.627685546875,
274
+ "logps/rejected": -441.41241455078125,
275
+ "loss": 0.5936434745788575,
276
+ "rewards/accuracies": 0.925000011920929,
277
+ "rewards/chosen": 0.06891433894634247,
278
+ "rewards/margins": 0.26186972856521606,
279
+ "rewards/rejected": -0.19295534491539001,
280
+ "step": 90
281
+ },
282
+ {
283
+ "epoch": 0.5299860529986054,
284
+ "grad_norm": 2.4952447414398193,
285
+ "learning_rate": 4.588477366255145e-06,
286
+ "logits/chosen": -6.1503586769104,
287
+ "logits/rejected": -6.144400596618652,
288
+ "logps/chosen": -355.2735290527344,
289
+ "logps/rejected": -409.51702880859375,
290
+ "loss": 0.6157774925231934,
291
+ "rewards/accuracies": 0.7749999761581421,
292
+ "rewards/chosen": -0.0699276328086853,
293
+ "rewards/margins": 0.22694334387779236,
294
+ "rewards/rejected": -0.2968709468841553,
295
+ "step": 95
296
+ },
297
+ {
298
+ "epoch": 0.5578800557880056,
299
+ "grad_norm": 2.5470480918884277,
300
+ "learning_rate": 4.537037037037038e-06,
301
+ "logits/chosen": -6.14028263092041,
302
+ "logits/rejected": -6.104605197906494,
303
+ "logps/chosen": -429.1048889160156,
304
+ "logps/rejected": -454.7377014160156,
305
+ "loss": 0.6300024032592774,
306
+ "rewards/accuracies": 0.7250000238418579,
307
+ "rewards/chosen": -0.05769091844558716,
308
+ "rewards/margins": 0.14401891827583313,
309
+ "rewards/rejected": -0.2017098367214203,
310
+ "step": 100
311
+ },
312
+ {
313
+ "epoch": 0.5857740585774058,
314
+ "grad_norm": 2.6023478507995605,
315
+ "learning_rate": 4.485596707818931e-06,
316
+ "logits/chosen": -6.196796894073486,
317
+ "logits/rejected": -6.226622104644775,
318
+ "logps/chosen": -442.52685546875,
319
+ "logps/rejected": -516.7334594726562,
320
+ "loss": 0.6245638847351074,
321
+ "rewards/accuracies": 0.699999988079071,
322
+ "rewards/chosen": -0.18519389629364014,
323
+ "rewards/margins": 0.24079546332359314,
324
+ "rewards/rejected": -0.42598938941955566,
325
+ "step": 105
326
+ },
327
+ {
328
+ "epoch": 0.6136680613668062,
329
+ "grad_norm": 2.0638511180877686,
330
+ "learning_rate": 4.434156378600823e-06,
331
+ "logits/chosen": -6.1991071701049805,
332
+ "logits/rejected": -6.119466781616211,
333
+ "logps/chosen": -410.86669921875,
334
+ "logps/rejected": -450.365478515625,
335
+ "loss": 0.6201879501342773,
336
+ "rewards/accuracies": 0.800000011920929,
337
+ "rewards/chosen": -0.0853937491774559,
338
+ "rewards/margins": 0.17646726965904236,
339
+ "rewards/rejected": -0.26186102628707886,
340
+ "step": 110
341
+ },
342
+ {
343
+ "epoch": 0.6415620641562064,
344
+ "grad_norm": 2.3625364303588867,
345
+ "learning_rate": 4.382716049382716e-06,
346
+ "logits/chosen": -6.220386505126953,
347
+ "logits/rejected": -6.223449230194092,
348
+ "logps/chosen": -435.92626953125,
349
+ "logps/rejected": -495.6065368652344,
350
+ "loss": 0.6151515483856201,
351
+ "rewards/accuracies": 0.824999988079071,
352
+ "rewards/chosen": -0.2208656519651413,
353
+ "rewards/margins": 0.21172885596752167,
354
+ "rewards/rejected": -0.43259453773498535,
355
+ "step": 115
356
+ },
357
+ {
358
+ "epoch": 0.6694560669456067,
359
+ "grad_norm": 1.8082666397094727,
360
+ "learning_rate": 4.331275720164609e-06,
361
+ "logits/chosen": -6.262181282043457,
362
+ "logits/rejected": -6.250016212463379,
363
+ "logps/chosen": -354.21795654296875,
364
+ "logps/rejected": -389.14556884765625,
365
+ "loss": 0.6109379768371582,
366
+ "rewards/accuracies": 0.7749999761581421,
367
+ "rewards/chosen": -0.17473874986171722,
368
+ "rewards/margins": 0.19315743446350098,
369
+ "rewards/rejected": -0.367896169424057,
370
+ "step": 120
371
+ },
372
+ {
373
+ "epoch": 0.697350069735007,
374
+ "grad_norm": 1.9556658267974854,
375
+ "learning_rate": 4.2798353909465025e-06,
376
+ "logits/chosen": -6.167700290679932,
377
+ "logits/rejected": -6.1421003341674805,
378
+ "logps/chosen": -379.1827392578125,
379
+ "logps/rejected": -426.69549560546875,
380
+ "loss": 0.6202447414398193,
381
+ "rewards/accuracies": 0.699999988079071,
382
+ "rewards/chosen": -0.17535170912742615,
383
+ "rewards/margins": 0.18863627314567566,
384
+ "rewards/rejected": -0.3639879822731018,
385
+ "step": 125
386
+ },
387
+ {
388
+ "epoch": 0.7252440725244073,
389
+ "grad_norm": 3.001298666000366,
390
+ "learning_rate": 4.228395061728396e-06,
391
+ "logits/chosen": -6.2535905838012695,
392
+ "logits/rejected": -6.232400894165039,
393
+ "logps/chosen": -424.8458557128906,
394
+ "logps/rejected": -494.52960205078125,
395
+ "loss": 0.5493914127349854,
396
+ "rewards/accuracies": 0.875,
397
+ "rewards/chosen": -0.22046081721782684,
398
+ "rewards/margins": 0.3785194754600525,
399
+ "rewards/rejected": -0.5989803075790405,
400
+ "step": 130
401
+ },
402
+ {
403
+ "epoch": 0.7531380753138075,
404
+ "grad_norm": 2.5210413932800293,
405
+ "learning_rate": 4.176954732510288e-06,
406
+ "logits/chosen": -6.078260898590088,
407
+ "logits/rejected": -6.0126447677612305,
408
+ "logps/chosen": -414.69940185546875,
409
+ "logps/rejected": -432.3282165527344,
410
+ "loss": 0.579456901550293,
411
+ "rewards/accuracies": 0.875,
412
+ "rewards/chosen": -0.15084640681743622,
413
+ "rewards/margins": 0.2960701882839203,
414
+ "rewards/rejected": -0.4469165802001953,
415
+ "step": 135
416
+ },
417
+ {
418
+ "epoch": 0.7810320781032078,
419
+ "grad_norm": 2.6807265281677246,
420
+ "learning_rate": 4.125514403292181e-06,
421
+ "logits/chosen": -6.243051052093506,
422
+ "logits/rejected": -6.220357418060303,
423
+ "logps/chosen": -400.6156311035156,
424
+ "logps/rejected": -450.0393981933594,
425
+ "loss": 0.5514531135559082,
426
+ "rewards/accuracies": 0.8999999761581421,
427
+ "rewards/chosen": -0.25727975368499756,
428
+ "rewards/margins": 0.3991738259792328,
429
+ "rewards/rejected": -0.6564534902572632,
430
+ "step": 140
431
+ },
432
+ {
433
+ "epoch": 0.8089260808926081,
434
+ "grad_norm": 2.4137353897094727,
435
+ "learning_rate": 4.074074074074074e-06,
436
+ "logits/chosen": -6.186558246612549,
437
+ "logits/rejected": -6.139374256134033,
438
+ "logps/chosen": -442.314453125,
439
+ "logps/rejected": -491.584716796875,
440
+ "loss": 0.5633067131042481,
441
+ "rewards/accuracies": 0.800000011920929,
442
+ "rewards/chosen": -0.35670942068099976,
443
+ "rewards/margins": 0.38038796186447144,
444
+ "rewards/rejected": -0.7370973825454712,
445
+ "step": 145
446
+ },
447
+ {
448
+ "epoch": 0.8368200836820083,
449
+ "grad_norm": 2.1043145656585693,
450
+ "learning_rate": 4.022633744855967e-06,
451
+ "logits/chosen": -6.177689552307129,
452
+ "logits/rejected": -6.167322635650635,
453
+ "logps/chosen": -435.2288513183594,
454
+ "logps/rejected": -469.41436767578125,
455
+ "loss": 0.5640112876892089,
456
+ "rewards/accuracies": 0.7749999761581421,
457
+ "rewards/chosen": -0.32313138246536255,
458
+ "rewards/margins": 0.45979684591293335,
459
+ "rewards/rejected": -0.7829282283782959,
460
+ "step": 150
461
+ },
462
+ {
463
+ "epoch": 0.8647140864714087,
464
+ "grad_norm": 1.9623620510101318,
465
+ "learning_rate": 3.97119341563786e-06,
466
+ "logits/chosen": -6.0590739250183105,
467
+ "logits/rejected": -6.033650875091553,
468
+ "logps/chosen": -421.5000915527344,
469
+ "logps/rejected": -370.30902099609375,
470
+ "loss": 0.6319089412689209,
471
+ "rewards/accuracies": 0.699999988079071,
472
+ "rewards/chosen": -0.29380694031715393,
473
+ "rewards/margins": 0.1507532149553299,
474
+ "rewards/rejected": -0.44456014037132263,
475
+ "step": 155
476
+ },
477
+ {
478
+ "epoch": 0.8926080892608089,
479
+ "grad_norm": 1.9432786703109741,
480
+ "learning_rate": 3.9197530864197535e-06,
481
+ "logits/chosen": -6.267019271850586,
482
+ "logits/rejected": -6.214621067047119,
483
+ "logps/chosen": -417.11724853515625,
484
+ "logps/rejected": -431.72698974609375,
485
+ "loss": 0.5186795234680176,
486
+ "rewards/accuracies": 0.925000011920929,
487
+ "rewards/chosen": -0.18987610936164856,
488
+ "rewards/margins": 0.5616164803504944,
489
+ "rewards/rejected": -0.7514925599098206,
490
+ "step": 160
491
+ },
492
+ {
493
+ "epoch": 0.9205020920502092,
494
+ "grad_norm": 1.8146827220916748,
495
+ "learning_rate": 3.868312757201647e-06,
496
+ "logits/chosen": -6.230213165283203,
497
+ "logits/rejected": -6.109362602233887,
498
+ "logps/chosen": -376.6744384765625,
499
+ "logps/rejected": -376.6526184082031,
500
+ "loss": 0.5346522808074952,
501
+ "rewards/accuracies": 0.8999999761581421,
502
+ "rewards/chosen": -0.19152329862117767,
503
+ "rewards/margins": 0.38060134649276733,
504
+ "rewards/rejected": -0.5721246004104614,
505
+ "step": 165
506
+ },
507
+ {
508
+ "epoch": 0.9483960948396095,
509
+ "grad_norm": 1.936680793762207,
510
+ "learning_rate": 3.81687242798354e-06,
511
+ "logits/chosen": -6.194340705871582,
512
+ "logits/rejected": -6.1444597244262695,
513
+ "logps/chosen": -389.017822265625,
514
+ "logps/rejected": -445.76544189453125,
515
+ "loss": 0.49420690536499023,
516
+ "rewards/accuracies": 0.925000011920929,
517
+ "rewards/chosen": -0.055042725056409836,
518
+ "rewards/margins": 0.5315954685211182,
519
+ "rewards/rejected": -0.5866381525993347,
520
+ "step": 170
521
+ },
522
+ {
523
+ "epoch": 0.9762900976290098,
524
+ "grad_norm": 3.2391903400421143,
525
+ "learning_rate": 3.7654320987654325e-06,
526
+ "logits/chosen": -6.190616607666016,
527
+ "logits/rejected": -6.1138739585876465,
528
+ "logps/chosen": -421.6878356933594,
529
+ "logps/rejected": -460.1180114746094,
530
+ "loss": 0.5374621391296387,
531
+ "rewards/accuracies": 0.8500000238418579,
532
+ "rewards/chosen": -0.22659805417060852,
533
+ "rewards/margins": 0.43736472725868225,
534
+ "rewards/rejected": -0.6639627814292908,
535
+ "step": 175
536
+ },
537
+ {
538
+ "epoch": 1.0,
539
+ "grad_norm": 4.300363063812256,
540
+ "learning_rate": 3.7139917695473256e-06,
541
+ "logits/chosen": -6.227687358856201,
542
+ "logits/rejected": -6.129978179931641,
543
+ "logps/chosen": -449.4482116699219,
544
+ "logps/rejected": -430.434326171875,
545
+ "loss": 0.49641432762146,
546
+ "rewards/accuracies": 0.8529411554336548,
547
+ "rewards/chosen": -0.03447714447975159,
548
+ "rewards/margins": 0.5154433846473694,
549
+ "rewards/rejected": -0.5499205589294434,
550
+ "step": 180
551
+ },
552
+ {
553
+ "epoch": 1.0278940027894004,
554
+ "grad_norm": 1.5967351198196411,
555
+ "learning_rate": 3.6625514403292183e-06,
556
+ "logits/chosen": -6.067181587219238,
557
+ "logits/rejected": -6.06889533996582,
558
+ "logps/chosen": -399.98968505859375,
559
+ "logps/rejected": -447.72039794921875,
560
+ "loss": 0.4079257011413574,
561
+ "rewards/accuracies": 0.9750000238418579,
562
+ "rewards/chosen": 0.03092392347753048,
563
+ "rewards/margins": 0.7688177824020386,
564
+ "rewards/rejected": -0.7378939390182495,
565
+ "step": 185
566
+ },
567
+ {
568
+ "epoch": 1.0557880055788005,
569
+ "grad_norm": 1.7787078619003296,
570
+ "learning_rate": 3.6111111111111115e-06,
571
+ "logits/chosen": -6.146653175354004,
572
+ "logits/rejected": -6.154219627380371,
573
+ "logps/chosen": -419.3291015625,
574
+ "logps/rejected": -463.1239318847656,
575
+ "loss": 0.42812933921813967,
576
+ "rewards/accuracies": 0.9750000238418579,
577
+ "rewards/chosen": -0.007557299919426441,
578
+ "rewards/margins": 0.7340337634086609,
579
+ "rewards/rejected": -0.7415911555290222,
580
+ "step": 190
581
+ },
582
+ {
583
+ "epoch": 1.0836820083682008,
584
+ "grad_norm": 1.7694693803787231,
585
+ "learning_rate": 3.559670781893004e-06,
586
+ "logits/chosen": -6.160831928253174,
587
+ "logits/rejected": -6.151050567626953,
588
+ "logps/chosen": -379.6001281738281,
589
+ "logps/rejected": -394.00531005859375,
590
+ "loss": 0.41410012245178224,
591
+ "rewards/accuracies": 0.949999988079071,
592
+ "rewards/chosen": 0.03558122366666794,
593
+ "rewards/margins": 0.8024934530258179,
594
+ "rewards/rejected": -0.7669121623039246,
595
+ "step": 195
596
+ },
597
+ {
598
+ "epoch": 1.1115760111576012,
599
+ "grad_norm": 1.3202013969421387,
600
+ "learning_rate": 3.5082304526748973e-06,
601
+ "logits/chosen": -6.110814094543457,
602
+ "logits/rejected": -6.176726341247559,
603
+ "logps/chosen": -400.85052490234375,
604
+ "logps/rejected": -405.44195556640625,
605
+ "loss": 0.391094446182251,
606
+ "rewards/accuracies": 0.925000011920929,
607
+ "rewards/chosen": -0.02833392843604088,
608
+ "rewards/margins": 0.8581112623214722,
609
+ "rewards/rejected": -0.8864452242851257,
610
+ "step": 200
611
+ },
612
+ {
613
+ "epoch": 1.1394700139470013,
614
+ "grad_norm": 1.9367257356643677,
615
+ "learning_rate": 3.4567901234567904e-06,
616
+ "logits/chosen": -6.311105728149414,
617
+ "logits/rejected": -6.179243087768555,
618
+ "logps/chosen": -440.97625732421875,
619
+ "logps/rejected": -463.1673278808594,
620
+ "loss": 0.3870258331298828,
621
+ "rewards/accuracies": 0.949999988079071,
622
+ "rewards/chosen": -0.012000990100204945,
623
+ "rewards/margins": 0.9945917129516602,
624
+ "rewards/rejected": -1.0065927505493164,
625
+ "step": 205
626
+ },
627
+ {
628
+ "epoch": 1.1673640167364017,
629
+ "grad_norm": 2.4570703506469727,
630
+ "learning_rate": 3.405349794238683e-06,
631
+ "logits/chosen": -6.199883937835693,
632
+ "logits/rejected": -6.160645484924316,
633
+ "logps/chosen": -448.8758850097656,
634
+ "logps/rejected": -439.4584045410156,
635
+ "loss": 0.3908271551132202,
636
+ "rewards/accuracies": 0.9750000238418579,
637
+ "rewards/chosen": 0.04756501317024231,
638
+ "rewards/margins": 0.8816453218460083,
639
+ "rewards/rejected": -0.8340802192687988,
640
+ "step": 210
641
+ },
642
+ {
643
+ "epoch": 1.195258019525802,
644
+ "grad_norm": 1.5992087125778198,
645
+ "learning_rate": 3.3539094650205767e-06,
646
+ "logits/chosen": -6.163644313812256,
647
+ "logits/rejected": -6.093722343444824,
648
+ "logps/chosen": -449.8214416503906,
649
+ "logps/rejected": -481.3743591308594,
650
+ "loss": 0.3612337350845337,
651
+ "rewards/accuracies": 1.0,
652
+ "rewards/chosen": 0.09812992066144943,
653
+ "rewards/margins": 0.9562546014785767,
654
+ "rewards/rejected": -0.858124852180481,
655
+ "step": 215
656
+ },
657
+ {
658
+ "epoch": 1.2231520223152021,
659
+ "grad_norm": 1.4101840257644653,
660
+ "learning_rate": 3.30246913580247e-06,
661
+ "logits/chosen": -6.281071662902832,
662
+ "logits/rejected": -6.337766170501709,
663
+ "logps/chosen": -281.46795654296875,
664
+ "logps/rejected": -336.60845947265625,
665
+ "loss": 0.43022546768188474,
666
+ "rewards/accuracies": 0.9750000238418579,
667
+ "rewards/chosen": -0.1486438810825348,
668
+ "rewards/margins": 0.7064955234527588,
669
+ "rewards/rejected": -0.855139434337616,
670
+ "step": 220
671
+ },
672
+ {
673
+ "epoch": 1.2510460251046025,
674
+ "grad_norm": 1.5817450284957886,
675
+ "learning_rate": 3.2510288065843625e-06,
676
+ "logits/chosen": -6.1745758056640625,
677
+ "logits/rejected": -6.192706108093262,
678
+ "logps/chosen": -399.51190185546875,
679
+ "logps/rejected": -424.1844177246094,
680
+ "loss": 0.3992297887802124,
681
+ "rewards/accuracies": 0.949999988079071,
682
+ "rewards/chosen": -0.051351286470890045,
683
+ "rewards/margins": 0.827163815498352,
684
+ "rewards/rejected": -0.8785150647163391,
685
+ "step": 225
686
+ },
687
+ {
688
+ "epoch": 1.2789400278940029,
689
+ "grad_norm": 1.3157438039779663,
690
+ "learning_rate": 3.1995884773662556e-06,
691
+ "logits/chosen": -6.1543779373168945,
692
+ "logits/rejected": -6.1732258796691895,
693
+ "logps/chosen": -425.80755615234375,
694
+ "logps/rejected": -447.39453125,
695
+ "loss": 0.35113141536712644,
696
+ "rewards/accuracies": 0.949999988079071,
697
+ "rewards/chosen": 0.01585063710808754,
698
+ "rewards/margins": 1.0381678342819214,
699
+ "rewards/rejected": -1.0223171710968018,
700
+ "step": 230
701
+ },
702
+ {
703
+ "epoch": 1.3068340306834032,
704
+ "grad_norm": 1.4981003999710083,
705
+ "learning_rate": 3.1481481481481483e-06,
706
+ "logits/chosen": -6.220085620880127,
707
+ "logits/rejected": -6.215539455413818,
708
+ "logps/chosen": -393.77716064453125,
709
+ "logps/rejected": -474.4695739746094,
710
+ "loss": 0.35107009410858153,
711
+ "rewards/accuracies": 0.9750000238418579,
712
+ "rewards/chosen": 0.07319364696741104,
713
+ "rewards/margins": 1.0109622478485107,
714
+ "rewards/rejected": -0.9377686381340027,
715
+ "step": 235
716
+ },
717
+ {
718
+ "epoch": 1.3347280334728033,
719
+ "grad_norm": 1.6417901515960693,
720
+ "learning_rate": 3.0967078189300415e-06,
721
+ "logits/chosen": -6.223210334777832,
722
+ "logits/rejected": -6.187335968017578,
723
+ "logps/chosen": -454.0006408691406,
724
+ "logps/rejected": -439.3124084472656,
725
+ "loss": 0.3300657272338867,
726
+ "rewards/accuracies": 1.0,
727
+ "rewards/chosen": 0.09006929397583008,
728
+ "rewards/margins": 1.089814305305481,
729
+ "rewards/rejected": -0.9997450709342957,
730
+ "step": 240
731
+ },
732
+ {
733
+ "epoch": 1.3626220362622037,
734
+ "grad_norm": 1.3642381429672241,
735
+ "learning_rate": 3.0452674897119346e-06,
736
+ "logits/chosen": -6.211455821990967,
737
+ "logits/rejected": -6.1171441078186035,
738
+ "logps/chosen": -402.8586730957031,
739
+ "logps/rejected": -431.7958984375,
740
+ "loss": 0.3634498119354248,
741
+ "rewards/accuracies": 0.9750000238418579,
742
+ "rewards/chosen": -0.0397757962346077,
743
+ "rewards/margins": 1.0654242038726807,
744
+ "rewards/rejected": -1.1052000522613525,
745
+ "step": 245
746
+ },
747
+ {
748
+ "epoch": 1.390516039051604,
749
+ "grad_norm": 1.9008878469467163,
750
+ "learning_rate": 2.9938271604938273e-06,
751
+ "logits/chosen": -6.2566046714782715,
752
+ "logits/rejected": -6.222296714782715,
753
+ "logps/chosen": -433.3427734375,
754
+ "logps/rejected": -489.8169860839844,
755
+ "loss": 0.3385239839553833,
756
+ "rewards/accuracies": 0.9750000238418579,
757
+ "rewards/chosen": 0.04979880154132843,
758
+ "rewards/margins": 1.0801963806152344,
759
+ "rewards/rejected": -1.0303975343704224,
760
+ "step": 250
761
+ }
762
+ ],
763
+ "logging_steps": 5,
764
+ "max_steps": 540,
765
+ "num_input_tokens_seen": 0,
766
+ "num_train_epochs": 3,
767
+ "save_steps": 50,
768
+ "stateful_callbacks": {
769
+ "TrainerControl": {
770
+ "args": {
771
+ "should_epoch_stop": false,
772
+ "should_evaluate": false,
773
+ "should_log": false,
774
+ "should_save": true,
775
+ "should_training_stop": false
776
+ },
777
+ "attributes": {}
778
+ }
779
+ },
780
+ "total_flos": 0.0,
781
+ "train_batch_size": 1,
782
+ "trial_name": null,
783
+ "trial_params": null
784
+ }
gemma-2b-dpo/checkpoint-250/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3be0616829fa305775b58136a03c46cbb233332c99572ecc66875666e4681dc9
3
+ size 5688
gemma-2b-dpo/checkpoint-300/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: google/gemma-2-2b-it
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:google/gemma-2-2b-it
7
+ - dpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.1
gemma-2b-dpo/checkpoint-300/adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "google/gemma-2-2b-it",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "up_proj",
33
+ "q_proj",
34
+ "v_proj",
35
+ "gate_proj",
36
+ "k_proj",
37
+ "down_proj",
38
+ "o_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
gemma-2b-dpo/checkpoint-300/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3cb7b1f7ca8c60a99cef4f5748b8d2fdc829a7b08e5d5e51a8fc3eda5f5de86
3
+ size 83115256
gemma-2b-dpo/checkpoint-300/chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '
2
+ ' + message['content'] | trim + '<end_of_turn>
3
+ ' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
4
+ '}}{% endif %}