eeoonn commited on
Commit
debee10
·
verified ·
1 Parent(s): 34ff405

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-363/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: meta-llama/Llama-3.2-3B-Instruct
3
+ library_name: peft
4
+ model_name: simpo-xsum-low-mid-high-0220_0518
5
+ tags:
6
+ - base_model:adapter:meta-llama/Llama-3.2-3B-Instruct
7
+ - cpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ licence: license
12
+ pipeline_tag: text-generation
13
+ ---
14
+
15
+ # Model Card for simpo-xsum-low-mid-high-0220_0518
16
+
17
+ This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct).
18
+ It has been trained using [TRL](https://github.com/huggingface/trl).
19
+
20
+ ## Quick start
21
+
22
+ ```python
23
+ from transformers import pipeline
24
+
25
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
26
+ generator = pipeline("text-generation", model="None", device="cuda")
27
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
28
+ print(output["generated_text"])
29
+ ```
30
+
31
+ ## Training procedure
32
+
33
+
34
+
35
+
36
+ This model was trained with CPO, a method introduced in [Contrastive Preference Optimization: Pushing the Boundaries of LLM Performance in Machine Translation](https://huggingface.co/papers/2401.08417).
37
+
38
+ ### Framework versions
39
+
40
+ - PEFT 0.18.1
41
+ - TRL: 0.28.0
42
+ - Transformers: 5.2.0
43
+ - Pytorch: 2.10.0+cu126
44
+ - Datasets: 4.5.0
45
+ - Tokenizers: 0.22.2
46
+
47
+ ## Citations
48
+
49
+ Cite CPO as:
50
+
51
+ ```bibtex
52
+ @inproceedings{xu2024contrastive,
53
+ title = {{Contrastive Preference Optimization: Pushing the Boundaries of LLM Performance in Machine Translation}},
54
+ author = {Haoran Xu and Amr Sharaf and Yunmo Chen and Weiting Tan and Lingfeng Shen and Benjamin Van Durme and Kenton Murray and Young Jin Kim},
55
+ year = 2024,
56
+ booktitle = {Forty-first International Conference on Machine Learning, {ICML} 2024, Vienna, Austria, July 21-27, 2024},
57
+ publisher = {OpenReview.net},
58
+ url = {https://openreview.net/forum?id=51iwkioZpn}
59
+ }
60
+ ```
61
+
62
+ Cite TRL as:
63
+
64
+ ```bibtex
65
+ @software{vonwerra2020trl,
66
+ title = {{TRL: Transformers Reinforcement Learning}},
67
+ author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
68
+ license = {Apache-2.0},
69
+ url = {https://github.com/huggingface/trl},
70
+ year = {2020}
71
+ }
72
+ ```
adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "meta-llama/Llama-3.2-3B-Instruct",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "o_proj",
33
+ "k_proj",
34
+ "up_proj",
35
+ "down_proj",
36
+ "q_proj",
37
+ "gate_proj",
38
+ "v_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b6a7a1799949e19405ea4a80e1bf4bc530d9447cdffc470d3860350906ed52
3
+ size 97307544
chat_template.jinja ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if custom_tools is defined %}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if not tools_in_user_message is defined %}
6
+ {%- set tools_in_user_message = true %}
7
+ {%- endif %}
8
+ {%- if not date_string is defined %}
9
+ {%- if strftime_now is defined %}
10
+ {%- set date_string = strftime_now("%d %b %Y") %}
11
+ {%- else %}
12
+ {%- set date_string = "26 Jul 2024" %}
13
+ {%- endif %}
14
+ {%- endif %}
15
+ {%- if not tools is defined %}
16
+ {%- set tools = none %}
17
+ {%- endif %}
18
+
19
+ {#- This block extracts the system message, so we can slot it into the right place. #}
20
+ {%- if messages[0]['role'] == 'system' %}
21
+ {%- set system_message = messages[0]['content']|trim %}
22
+ {%- set messages = messages[1:] %}
23
+ {%- else %}
24
+ {%- set system_message = "" %}
25
+ {%- endif %}
26
+
27
+ {#- System message #}
28
+ {{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
29
+ {%- if tools is not none %}
30
+ {{- "Environment: ipython\n" }}
31
+ {%- endif %}
32
+ {{- "Cutting Knowledge Date: December 2023\n" }}
33
+ {{- "Today Date: " + date_string + "\n\n" }}
34
+ {%- if tools is not none and not tools_in_user_message %}
35
+ {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
36
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
37
+ {{- "Do not use variables.\n\n" }}
38
+ {%- for t in tools %}
39
+ {{- t | tojson(indent=4) }}
40
+ {{- "\n\n" }}
41
+ {%- endfor %}
42
+ {%- endif %}
43
+ {{- system_message }}
44
+ {{- "<|eot_id|>" }}
45
+
46
+ {#- Custom tools are passed in a user message with some extra guidance #}
47
+ {%- if tools_in_user_message and not tools is none %}
48
+ {#- Extract the first user message so we can plug it in here #}
49
+ {%- if messages | length != 0 %}
50
+ {%- set first_user_message = messages[0]['content']|trim %}
51
+ {%- set messages = messages[1:] %}
52
+ {%- else %}
53
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
54
+ {%- endif %}
55
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
56
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
57
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
58
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
59
+ {{- "Do not use variables.\n\n" }}
60
+ {%- for t in tools %}
61
+ {{- t | tojson(indent=4) }}
62
+ {{- "\n\n" }}
63
+ {%- endfor %}
64
+ {{- first_user_message + "<|eot_id|>"}}
65
+ {%- endif %}
66
+
67
+ {%- for message in messages %}
68
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
69
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
70
+ {%- elif 'tool_calls' in message %}
71
+ {%- if not message.tool_calls|length == 1 %}
72
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
73
+ {%- endif %}
74
+ {%- set tool_call = message.tool_calls[0].function %}
75
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
76
+ {{- '{"name": "' + tool_call.name + '", ' }}
77
+ {{- '"parameters": ' }}
78
+ {{- tool_call.arguments | tojson }}
79
+ {{- "}" }}
80
+ {{- "<|eot_id|>" }}
81
+ {%- elif message.role == "tool" or message.role == "ipython" %}
82
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
83
+ {%- if message.content is mapping or message.content is iterable %}
84
+ {{- message.content | tojson }}
85
+ {%- else %}
86
+ {{- message.content }}
87
+ {%- endif %}
88
+ {{- "<|eot_id|>" }}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
93
+ {%- endif %}
checkpoint-200/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: meta-llama/Llama-3.2-3B-Instruct
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:meta-llama/Llama-3.2-3B-Instruct
7
+ - cpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.1
checkpoint-200/adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "meta-llama/Llama-3.2-3B-Instruct",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "o_proj",
33
+ "k_proj",
34
+ "up_proj",
35
+ "down_proj",
36
+ "q_proj",
37
+ "gate_proj",
38
+ "v_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
checkpoint-200/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b6a7a1799949e19405ea4a80e1bf4bc530d9447cdffc470d3860350906ed52
3
+ size 97307544
checkpoint-200/chat_template.jinja ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if custom_tools is defined %}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if not tools_in_user_message is defined %}
6
+ {%- set tools_in_user_message = true %}
7
+ {%- endif %}
8
+ {%- if not date_string is defined %}
9
+ {%- if strftime_now is defined %}
10
+ {%- set date_string = strftime_now("%d %b %Y") %}
11
+ {%- else %}
12
+ {%- set date_string = "26 Jul 2024" %}
13
+ {%- endif %}
14
+ {%- endif %}
15
+ {%- if not tools is defined %}
16
+ {%- set tools = none %}
17
+ {%- endif %}
18
+
19
+ {#- This block extracts the system message, so we can slot it into the right place. #}
20
+ {%- if messages[0]['role'] == 'system' %}
21
+ {%- set system_message = messages[0]['content']|trim %}
22
+ {%- set messages = messages[1:] %}
23
+ {%- else %}
24
+ {%- set system_message = "" %}
25
+ {%- endif %}
26
+
27
+ {#- System message #}
28
+ {{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
29
+ {%- if tools is not none %}
30
+ {{- "Environment: ipython\n" }}
31
+ {%- endif %}
32
+ {{- "Cutting Knowledge Date: December 2023\n" }}
33
+ {{- "Today Date: " + date_string + "\n\n" }}
34
+ {%- if tools is not none and not tools_in_user_message %}
35
+ {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
36
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
37
+ {{- "Do not use variables.\n\n" }}
38
+ {%- for t in tools %}
39
+ {{- t | tojson(indent=4) }}
40
+ {{- "\n\n" }}
41
+ {%- endfor %}
42
+ {%- endif %}
43
+ {{- system_message }}
44
+ {{- "<|eot_id|>" }}
45
+
46
+ {#- Custom tools are passed in a user message with some extra guidance #}
47
+ {%- if tools_in_user_message and not tools is none %}
48
+ {#- Extract the first user message so we can plug it in here #}
49
+ {%- if messages | length != 0 %}
50
+ {%- set first_user_message = messages[0]['content']|trim %}
51
+ {%- set messages = messages[1:] %}
52
+ {%- else %}
53
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
54
+ {%- endif %}
55
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
56
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
57
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
58
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
59
+ {{- "Do not use variables.\n\n" }}
60
+ {%- for t in tools %}
61
+ {{- t | tojson(indent=4) }}
62
+ {{- "\n\n" }}
63
+ {%- endfor %}
64
+ {{- first_user_message + "<|eot_id|>"}}
65
+ {%- endif %}
66
+
67
+ {%- for message in messages %}
68
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
69
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
70
+ {%- elif 'tool_calls' in message %}
71
+ {%- if not message.tool_calls|length == 1 %}
72
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
73
+ {%- endif %}
74
+ {%- set tool_call = message.tool_calls[0].function %}
75
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
76
+ {{- '{"name": "' + tool_call.name + '", ' }}
77
+ {{- '"parameters": ' }}
78
+ {{- tool_call.arguments | tojson }}
79
+ {{- "}" }}
80
+ {{- "<|eot_id|>" }}
81
+ {%- elif message.role == "tool" or message.role == "ipython" %}
82
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
83
+ {%- if message.content is mapping or message.content is iterable %}
84
+ {{- message.content | tojson }}
85
+ {%- else %}
86
+ {{- message.content }}
87
+ {%- endif %}
88
+ {{- "<|eot_id|>" }}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
93
+ {%- endif %}
checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48c10040a2dc536ed94a855ffed6c35ae910add4c1f2f3c77d4a8c977d212f9e
3
+ size 194846331
checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a9f217e852f439efa6bd32fde98d6867f11aa6ea13ddc021ba10af6a0b0934
3
+ size 14645
checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2aac6971ca7b4125148bea59a564076d6e29fe2e213fc111d8f6e073bcc84d3f
3
+ size 1465
checkpoint-200/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
checkpoint-200/tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|begin_of_text|>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|eot_id|>",
6
+ "is_local": false,
7
+ "model_input_names": [
8
+ "input_ids",
9
+ "attention_mask"
10
+ ],
11
+ "model_max_length": 131072,
12
+ "pad_token": "<|eot_id|>",
13
+ "tokenizer_class": "TokenizersBackend"
14
+ }
checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 200,
3
+ "best_metric": 0.055398128926754,
4
+ "best_model_checkpoint": "/workspace/ckpts/simpo-xsum-low-mid-high-0220_0518/checkpoint-200",
5
+ "epoch": 0.5509641873278237,
6
+ "eval_steps": 100,
7
+ "global_step": 200,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.027548209366391185,
14
+ "grad_norm": 0.700347900390625,
15
+ "learning_rate": 9e-06,
16
+ "logits/chosen": 0.42375749349594116,
17
+ "logits/rejected": 0.15861473977565765,
18
+ "logps/chosen": -3.2552437782287598,
19
+ "logps/rejected": -2.853081464767456,
20
+ "loss": 1.711391067504883,
21
+ "nll_loss": 0.0,
22
+ "rewards/accuracies": 0.3125,
23
+ "rewards/chosen": -6.5104875564575195,
24
+ "rewards/margins": -0.8043240308761597,
25
+ "rewards/rejected": -5.706162929534912,
26
+ "step": 10
27
+ },
28
+ {
29
+ "epoch": 0.05509641873278237,
30
+ "grad_norm": 0.6906276345252991,
31
+ "learning_rate": 1.9e-05,
32
+ "logits/chosen": 0.4050915241241455,
33
+ "logits/rejected": 0.14284154772758484,
34
+ "logps/chosen": -2.8556487560272217,
35
+ "logps/rejected": -2.5371289253234863,
36
+ "loss": 1.5473010063171386,
37
+ "nll_loss": 0.0,
38
+ "rewards/accuracies": 0.25,
39
+ "rewards/chosen": -5.711297512054443,
40
+ "rewards/margins": -0.6370400190353394,
41
+ "rewards/rejected": -5.074257850646973,
42
+ "step": 20
43
+ },
44
+ {
45
+ "epoch": 0.08264462809917356,
46
+ "grad_norm": 0.5033866167068481,
47
+ "learning_rate": 2.9e-05,
48
+ "logits/chosen": 0.4237564206123352,
49
+ "logits/rejected": 0.19409556686878204,
50
+ "logps/chosen": -2.314438581466675,
51
+ "logps/rejected": -2.211418390274048,
52
+ "loss": 1.250732707977295,
53
+ "nll_loss": 0.0,
54
+ "rewards/accuracies": 0.4625000059604645,
55
+ "rewards/chosen": -4.62887716293335,
56
+ "rewards/margins": -0.20604053139686584,
57
+ "rewards/rejected": -4.422836780548096,
58
+ "step": 30
59
+ },
60
+ {
61
+ "epoch": 0.11019283746556474,
62
+ "grad_norm": 0.5612661838531494,
63
+ "learning_rate": 3.9000000000000006e-05,
64
+ "logits/chosen": 0.1684650182723999,
65
+ "logits/rejected": -0.03625474497675896,
66
+ "logps/chosen": -1.9959722757339478,
67
+ "logps/rejected": -2.0295603275299072,
68
+ "loss": 1.053350067138672,
69
+ "nll_loss": 0.0,
70
+ "rewards/accuracies": 0.48750001192092896,
71
+ "rewards/chosen": -3.9919445514678955,
72
+ "rewards/margins": 0.06717614084482193,
73
+ "rewards/rejected": -4.0591206550598145,
74
+ "step": 40
75
+ },
76
+ {
77
+ "epoch": 0.13774104683195593,
78
+ "grad_norm": 0.6303589940071106,
79
+ "learning_rate": 4.9e-05,
80
+ "logits/chosen": 0.23949162662029266,
81
+ "logits/rejected": 0.07211676985025406,
82
+ "logps/chosen": -1.8928591012954712,
83
+ "logps/rejected": -2.2719273567199707,
84
+ "loss": 0.6624780654907226,
85
+ "nll_loss": 0.0,
86
+ "rewards/accuracies": 0.887499988079071,
87
+ "rewards/chosen": -3.7857182025909424,
88
+ "rewards/margins": 0.7581357955932617,
89
+ "rewards/rejected": -4.543854713439941,
90
+ "step": 50
91
+ },
92
+ {
93
+ "epoch": 0.1652892561983471,
94
+ "grad_norm": 0.35993871092796326,
95
+ "learning_rate": 4.856230031948882e-05,
96
+ "logits/chosen": 0.5467869639396667,
97
+ "logits/rejected": 0.279127836227417,
98
+ "logps/chosen": -4.613501071929932,
99
+ "logps/rejected": -6.5180158615112305,
100
+ "loss": 0.24324963092803956,
101
+ "nll_loss": 0.0,
102
+ "rewards/accuracies": 0.9624999761581421,
103
+ "rewards/chosen": -9.227002143859863,
104
+ "rewards/margins": 3.8090293407440186,
105
+ "rewards/rejected": -13.036031723022461,
106
+ "step": 60
107
+ },
108
+ {
109
+ "epoch": 0.1928374655647383,
110
+ "grad_norm": 1.6727081537246704,
111
+ "learning_rate": 4.696485623003195e-05,
112
+ "logits/chosen": 1.2077550888061523,
113
+ "logits/rejected": 1.0736474990844727,
114
+ "logps/chosen": -6.114351272583008,
115
+ "logps/rejected": -10.010709762573242,
116
+ "loss": 0.2190621852874756,
117
+ "nll_loss": 0.0,
118
+ "rewards/accuracies": 0.9125000238418579,
119
+ "rewards/chosen": -12.228702545166016,
120
+ "rewards/margins": 7.792716979980469,
121
+ "rewards/rejected": -20.021419525146484,
122
+ "step": 70
123
+ },
124
+ {
125
+ "epoch": 0.22038567493112948,
126
+ "grad_norm": 0.6117808818817139,
127
+ "learning_rate": 4.536741214057508e-05,
128
+ "logits/chosen": 2.431880235671997,
129
+ "logits/rejected": 2.384572744369507,
130
+ "logps/chosen": -12.877838134765625,
131
+ "logps/rejected": -18.886180877685547,
132
+ "loss": 0.09992958307266235,
133
+ "nll_loss": 0.0,
134
+ "rewards/accuracies": 0.9750000238418579,
135
+ "rewards/chosen": -25.75567626953125,
136
+ "rewards/margins": 12.016683578491211,
137
+ "rewards/rejected": -37.772361755371094,
138
+ "step": 80
139
+ },
140
+ {
141
+ "epoch": 0.24793388429752067,
142
+ "grad_norm": 0.7680425047874451,
143
+ "learning_rate": 4.376996805111822e-05,
144
+ "logits/chosen": 2.9793498516082764,
145
+ "logits/rejected": 2.9862256050109863,
146
+ "logps/chosen": -12.75178337097168,
147
+ "logps/rejected": -19.746002197265625,
148
+ "loss": 0.03834239840507507,
149
+ "nll_loss": 0.0,
150
+ "rewards/accuracies": 1.0,
151
+ "rewards/chosen": -25.50356674194336,
152
+ "rewards/margins": 13.988436698913574,
153
+ "rewards/rejected": -39.49200439453125,
154
+ "step": 90
155
+ },
156
+ {
157
+ "epoch": 0.27548209366391185,
158
+ "grad_norm": 0.06667827814817429,
159
+ "learning_rate": 4.217252396166134e-05,
160
+ "logits/chosen": 3.7260806560516357,
161
+ "logits/rejected": 3.7755050659179688,
162
+ "logps/chosen": -11.765376091003418,
163
+ "logps/rejected": -19.428905487060547,
164
+ "loss": 0.02194259166717529,
165
+ "nll_loss": 0.0,
166
+ "rewards/accuracies": 1.0,
167
+ "rewards/chosen": -23.530752182006836,
168
+ "rewards/margins": 15.327054023742676,
169
+ "rewards/rejected": -38.857810974121094,
170
+ "step": 100
171
+ },
172
+ {
173
+ "epoch": 0.27548209366391185,
174
+ "eval_logits/chosen": 5.057583808898926,
175
+ "eval_logits/rejected": 5.168581485748291,
176
+ "eval_logps/chosen": -17.074237823486328,
177
+ "eval_logps/rejected": -24.14946937561035,
178
+ "eval_loss": 0.0956403836607933,
179
+ "eval_nll_loss": 0.0,
180
+ "eval_rewards/accuracies": 0.9660714268684387,
181
+ "eval_rewards/chosen": -34.148475646972656,
182
+ "eval_rewards/margins": 14.150466918945312,
183
+ "eval_rewards/rejected": -48.2989387512207,
184
+ "eval_runtime": 40.0379,
185
+ "eval_samples_per_second": 13.912,
186
+ "eval_steps_per_second": 1.748,
187
+ "step": 100
188
+ },
189
+ {
190
+ "epoch": 0.30303030303030304,
191
+ "grad_norm": 0.7173494696617126,
192
+ "learning_rate": 4.0575079872204476e-05,
193
+ "logits/chosen": 3.5066864490509033,
194
+ "logits/rejected": 3.5185413360595703,
195
+ "logps/chosen": -9.856405258178711,
196
+ "logps/rejected": -16.73386001586914,
197
+ "loss": 0.12056845426559448,
198
+ "nll_loss": 0.0,
199
+ "rewards/accuracies": 0.9750000238418579,
200
+ "rewards/chosen": -19.712810516357422,
201
+ "rewards/margins": 13.754910469055176,
202
+ "rewards/rejected": -33.46772003173828,
203
+ "step": 110
204
+ },
205
+ {
206
+ "epoch": 0.3305785123966942,
207
+ "grad_norm": 1.3427457809448242,
208
+ "learning_rate": 3.8977635782747605e-05,
209
+ "logits/chosen": 2.0150222778320312,
210
+ "logits/rejected": 2.052603244781494,
211
+ "logps/chosen": -6.702250003814697,
212
+ "logps/rejected": -12.878804206848145,
213
+ "loss": 0.05156264305114746,
214
+ "nll_loss": 0.0,
215
+ "rewards/accuracies": 1.0,
216
+ "rewards/chosen": -13.404500007629395,
217
+ "rewards/margins": 12.353109359741211,
218
+ "rewards/rejected": -25.75760841369629,
219
+ "step": 120
220
+ },
221
+ {
222
+ "epoch": 0.3581267217630854,
223
+ "grad_norm": 3.5135276317596436,
224
+ "learning_rate": 3.738019169329074e-05,
225
+ "logits/chosen": 2.870805263519287,
226
+ "logits/rejected": 3.019796371459961,
227
+ "logps/chosen": -10.168031692504883,
228
+ "logps/rejected": -17.399675369262695,
229
+ "loss": 0.05100045800209045,
230
+ "nll_loss": 0.0,
231
+ "rewards/accuracies": 0.987500011920929,
232
+ "rewards/chosen": -20.336063385009766,
233
+ "rewards/margins": 14.463289260864258,
234
+ "rewards/rejected": -34.79935073852539,
235
+ "step": 130
236
+ },
237
+ {
238
+ "epoch": 0.3856749311294766,
239
+ "grad_norm": 0.5818735957145691,
240
+ "learning_rate": 3.5782747603833865e-05,
241
+ "logits/chosen": 3.0271811485290527,
242
+ "logits/rejected": 3.1355438232421875,
243
+ "logps/chosen": -10.393911361694336,
244
+ "logps/rejected": -18.173566818237305,
245
+ "loss": 0.06783108711242676,
246
+ "nll_loss": 0.0,
247
+ "rewards/accuracies": 0.987500011920929,
248
+ "rewards/chosen": -20.787822723388672,
249
+ "rewards/margins": 15.55931282043457,
250
+ "rewards/rejected": -36.34713363647461,
251
+ "step": 140
252
+ },
253
+ {
254
+ "epoch": 0.4132231404958678,
255
+ "grad_norm": 7.5330939292907715,
256
+ "learning_rate": 3.4185303514377e-05,
257
+ "logits/chosen": 3.923037052154541,
258
+ "logits/rejected": 4.009919166564941,
259
+ "logps/chosen": -15.482580184936523,
260
+ "logps/rejected": -23.399499893188477,
261
+ "loss": 0.12233117818832398,
262
+ "nll_loss": 0.0,
263
+ "rewards/accuracies": 0.9750000238418579,
264
+ "rewards/chosen": -30.965160369873047,
265
+ "rewards/margins": 15.833839416503906,
266
+ "rewards/rejected": -46.79899978637695,
267
+ "step": 150
268
+ },
269
+ {
270
+ "epoch": 0.44077134986225897,
271
+ "grad_norm": 0.0513874888420105,
272
+ "learning_rate": 3.258785942492013e-05,
273
+ "logits/chosen": 4.252791404724121,
274
+ "logits/rejected": 4.307219505310059,
275
+ "logps/chosen": -13.000323295593262,
276
+ "logps/rejected": -21.655231475830078,
277
+ "loss": 0.03463771939277649,
278
+ "nll_loss": 0.0,
279
+ "rewards/accuracies": 0.987500011920929,
280
+ "rewards/chosen": -26.000646591186523,
281
+ "rewards/margins": 17.309818267822266,
282
+ "rewards/rejected": -43.310462951660156,
283
+ "step": 160
284
+ },
285
+ {
286
+ "epoch": 0.46831955922865015,
287
+ "grad_norm": 9.692298772279173e-05,
288
+ "learning_rate": 3.099041533546326e-05,
289
+ "logits/chosen": 4.098459243774414,
290
+ "logits/rejected": 4.160243511199951,
291
+ "logps/chosen": -11.710687637329102,
292
+ "logps/rejected": -21.676971435546875,
293
+ "loss": 0.024555636942386626,
294
+ "nll_loss": 0.0,
295
+ "rewards/accuracies": 0.987500011920929,
296
+ "rewards/chosen": -23.421375274658203,
297
+ "rewards/margins": 19.932567596435547,
298
+ "rewards/rejected": -43.35394287109375,
299
+ "step": 170
300
+ },
301
+ {
302
+ "epoch": 0.49586776859504134,
303
+ "grad_norm": 1.611127495765686,
304
+ "learning_rate": 2.939297124600639e-05,
305
+ "logits/chosen": 4.176002025604248,
306
+ "logits/rejected": 4.312728404998779,
307
+ "logps/chosen": -13.934991836547852,
308
+ "logps/rejected": -24.690296173095703,
309
+ "loss": 0.02139030247926712,
310
+ "nll_loss": 0.0,
311
+ "rewards/accuracies": 1.0,
312
+ "rewards/chosen": -27.869983673095703,
313
+ "rewards/margins": 21.51060676574707,
314
+ "rewards/rejected": -49.380592346191406,
315
+ "step": 180
316
+ },
317
+ {
318
+ "epoch": 0.5234159779614325,
319
+ "grad_norm": 1.21311616897583,
320
+ "learning_rate": 2.7795527156549523e-05,
321
+ "logits/chosen": 3.4004039764404297,
322
+ "logits/rejected": 3.5335049629211426,
323
+ "logps/chosen": -9.917387008666992,
324
+ "logps/rejected": -21.205930709838867,
325
+ "loss": 0.028738826513290405,
326
+ "nll_loss": 0.0,
327
+ "rewards/accuracies": 0.987500011920929,
328
+ "rewards/chosen": -19.834774017333984,
329
+ "rewards/margins": 22.577091217041016,
330
+ "rewards/rejected": -42.411861419677734,
331
+ "step": 190
332
+ },
333
+ {
334
+ "epoch": 0.5509641873278237,
335
+ "grad_norm": 8.95163631439209,
336
+ "learning_rate": 2.6198083067092656e-05,
337
+ "logits/chosen": 3.074721336364746,
338
+ "logits/rejected": 3.1740307807922363,
339
+ "logps/chosen": -10.301236152648926,
340
+ "logps/rejected": -22.369190216064453,
341
+ "loss": 0.11196700334548951,
342
+ "nll_loss": 0.0,
343
+ "rewards/accuracies": 0.9750000238418579,
344
+ "rewards/chosen": -20.60247230529785,
345
+ "rewards/margins": 24.135910034179688,
346
+ "rewards/rejected": -44.738380432128906,
347
+ "step": 200
348
+ },
349
+ {
350
+ "epoch": 0.5509641873278237,
351
+ "eval_logits/chosen": 3.9634761810302734,
352
+ "eval_logits/rejected": 4.095207691192627,
353
+ "eval_logps/chosen": -10.509795188903809,
354
+ "eval_logps/rejected": -22.200040817260742,
355
+ "eval_loss": 0.055398128926754,
356
+ "eval_nll_loss": 0.0,
357
+ "eval_rewards/accuracies": 0.9857142567634583,
358
+ "eval_rewards/chosen": -21.019590377807617,
359
+ "eval_rewards/margins": 23.380483627319336,
360
+ "eval_rewards/rejected": -44.400081634521484,
361
+ "eval_runtime": 40.0513,
362
+ "eval_samples_per_second": 13.907,
363
+ "eval_steps_per_second": 1.748,
364
+ "step": 200
365
+ }
366
+ ],
367
+ "logging_steps": 10,
368
+ "max_steps": 363,
369
+ "num_input_tokens_seen": 0,
370
+ "num_train_epochs": 1,
371
+ "save_steps": 200,
372
+ "stateful_callbacks": {
373
+ "TrainerControl": {
374
+ "args": {
375
+ "should_epoch_stop": false,
376
+ "should_evaluate": false,
377
+ "should_log": false,
378
+ "should_save": true,
379
+ "should_training_stop": false
380
+ },
381
+ "attributes": {}
382
+ }
383
+ },
384
+ "total_flos": 0.0,
385
+ "train_batch_size": 2,
386
+ "trial_name": null,
387
+ "trial_params": null
388
+ }
checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e3bae6079ee85f272ee5c1148388807254b89f462d2cec3c58b92c2a1cb918d
3
+ size 5585
checkpoint-363/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: meta-llama/Llama-3.2-3B-Instruct
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:meta-llama/Llama-3.2-3B-Instruct
7
+ - cpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.1
checkpoint-363/adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "meta-llama/Llama-3.2-3B-Instruct",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "o_proj",
33
+ "k_proj",
34
+ "up_proj",
35
+ "down_proj",
36
+ "q_proj",
37
+ "gate_proj",
38
+ "v_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
checkpoint-363/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ba300c39aa9407a9d2664880c936d5433dd8f22fb86115d09205c8e8d96818d
3
+ size 97307544
checkpoint-363/chat_template.jinja ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if custom_tools is defined %}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if not tools_in_user_message is defined %}
6
+ {%- set tools_in_user_message = true %}
7
+ {%- endif %}
8
+ {%- if not date_string is defined %}
9
+ {%- if strftime_now is defined %}
10
+ {%- set date_string = strftime_now("%d %b %Y") %}
11
+ {%- else %}
12
+ {%- set date_string = "26 Jul 2024" %}
13
+ {%- endif %}
14
+ {%- endif %}
15
+ {%- if not tools is defined %}
16
+ {%- set tools = none %}
17
+ {%- endif %}
18
+
19
+ {#- This block extracts the system message, so we can slot it into the right place. #}
20
+ {%- if messages[0]['role'] == 'system' %}
21
+ {%- set system_message = messages[0]['content']|trim %}
22
+ {%- set messages = messages[1:] %}
23
+ {%- else %}
24
+ {%- set system_message = "" %}
25
+ {%- endif %}
26
+
27
+ {#- System message #}
28
+ {{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
29
+ {%- if tools is not none %}
30
+ {{- "Environment: ipython\n" }}
31
+ {%- endif %}
32
+ {{- "Cutting Knowledge Date: December 2023\n" }}
33
+ {{- "Today Date: " + date_string + "\n\n" }}
34
+ {%- if tools is not none and not tools_in_user_message %}
35
+ {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
36
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
37
+ {{- "Do not use variables.\n\n" }}
38
+ {%- for t in tools %}
39
+ {{- t | tojson(indent=4) }}
40
+ {{- "\n\n" }}
41
+ {%- endfor %}
42
+ {%- endif %}
43
+ {{- system_message }}
44
+ {{- "<|eot_id|>" }}
45
+
46
+ {#- Custom tools are passed in a user message with some extra guidance #}
47
+ {%- if tools_in_user_message and not tools is none %}
48
+ {#- Extract the first user message so we can plug it in here #}
49
+ {%- if messages | length != 0 %}
50
+ {%- set first_user_message = messages[0]['content']|trim %}
51
+ {%- set messages = messages[1:] %}
52
+ {%- else %}
53
+ {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
54
+ {%- endif %}
55
+ {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
56
+ {{- "Given the following functions, please respond with a JSON for a function call " }}
57
+ {{- "with its proper arguments that best answers the given prompt.\n\n" }}
58
+ {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
59
+ {{- "Do not use variables.\n\n" }}
60
+ {%- for t in tools %}
61
+ {{- t | tojson(indent=4) }}
62
+ {{- "\n\n" }}
63
+ {%- endfor %}
64
+ {{- first_user_message + "<|eot_id|>"}}
65
+ {%- endif %}
66
+
67
+ {%- for message in messages %}
68
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
69
+ {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
70
+ {%- elif 'tool_calls' in message %}
71
+ {%- if not message.tool_calls|length == 1 %}
72
+ {{- raise_exception("This model only supports single tool-calls at once!") }}
73
+ {%- endif %}
74
+ {%- set tool_call = message.tool_calls[0].function %}
75
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
76
+ {{- '{"name": "' + tool_call.name + '", ' }}
77
+ {{- '"parameters": ' }}
78
+ {{- tool_call.arguments | tojson }}
79
+ {{- "}" }}
80
+ {{- "<|eot_id|>" }}
81
+ {%- elif message.role == "tool" or message.role == "ipython" %}
82
+ {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
83
+ {%- if message.content is mapping or message.content is iterable %}
84
+ {{- message.content | tojson }}
85
+ {%- else %}
86
+ {{- message.content }}
87
+ {%- endif %}
88
+ {{- "<|eot_id|>" }}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
93
+ {%- endif %}
checkpoint-363/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8a9e245ea537c892b222285fb04eb5f408b6e146bd058f237fae6b7cdac7036
3
+ size 194846331
checkpoint-363/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e2011629d8bed3ef560fa11175cac55684c4e12a72634bb24abf767b6c7399
3
+ size 14645
checkpoint-363/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0c423b41cddc772a681bf0ccebe0fe1a1ad6cd26b18884ca76701b58d6f95ba
3
+ size 1465
checkpoint-363/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
checkpoint-363/tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|begin_of_text|>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|eot_id|>",
6
+ "is_local": false,
7
+ "model_input_names": [
8
+ "input_ids",
9
+ "attention_mask"
10
+ ],
11
+ "model_max_length": 131072,
12
+ "pad_token": "<|eot_id|>",
13
+ "tokenizer_class": "TokenizersBackend"
14
+ }
checkpoint-363/trainer_state.json ADDED
@@ -0,0 +1,661 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 200,
3
+ "best_metric": 0.055398128926754,
4
+ "best_model_checkpoint": "/workspace/ckpts/simpo-xsum-low-mid-high-0220_0518/checkpoint-200",
5
+ "epoch": 1.0,
6
+ "eval_steps": 100,
7
+ "global_step": 363,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.027548209366391185,
14
+ "grad_norm": 0.700347900390625,
15
+ "learning_rate": 9e-06,
16
+ "logits/chosen": 0.42375749349594116,
17
+ "logits/rejected": 0.15861473977565765,
18
+ "logps/chosen": -3.2552437782287598,
19
+ "logps/rejected": -2.853081464767456,
20
+ "loss": 1.711391067504883,
21
+ "nll_loss": 0.0,
22
+ "rewards/accuracies": 0.3125,
23
+ "rewards/chosen": -6.5104875564575195,
24
+ "rewards/margins": -0.8043240308761597,
25
+ "rewards/rejected": -5.706162929534912,
26
+ "step": 10
27
+ },
28
+ {
29
+ "epoch": 0.05509641873278237,
30
+ "grad_norm": 0.6906276345252991,
31
+ "learning_rate": 1.9e-05,
32
+ "logits/chosen": 0.4050915241241455,
33
+ "logits/rejected": 0.14284154772758484,
34
+ "logps/chosen": -2.8556487560272217,
35
+ "logps/rejected": -2.5371289253234863,
36
+ "loss": 1.5473010063171386,
37
+ "nll_loss": 0.0,
38
+ "rewards/accuracies": 0.25,
39
+ "rewards/chosen": -5.711297512054443,
40
+ "rewards/margins": -0.6370400190353394,
41
+ "rewards/rejected": -5.074257850646973,
42
+ "step": 20
43
+ },
44
+ {
45
+ "epoch": 0.08264462809917356,
46
+ "grad_norm": 0.5033866167068481,
47
+ "learning_rate": 2.9e-05,
48
+ "logits/chosen": 0.4237564206123352,
49
+ "logits/rejected": 0.19409556686878204,
50
+ "logps/chosen": -2.314438581466675,
51
+ "logps/rejected": -2.211418390274048,
52
+ "loss": 1.250732707977295,
53
+ "nll_loss": 0.0,
54
+ "rewards/accuracies": 0.4625000059604645,
55
+ "rewards/chosen": -4.62887716293335,
56
+ "rewards/margins": -0.20604053139686584,
57
+ "rewards/rejected": -4.422836780548096,
58
+ "step": 30
59
+ },
60
+ {
61
+ "epoch": 0.11019283746556474,
62
+ "grad_norm": 0.5612661838531494,
63
+ "learning_rate": 3.9000000000000006e-05,
64
+ "logits/chosen": 0.1684650182723999,
65
+ "logits/rejected": -0.03625474497675896,
66
+ "logps/chosen": -1.9959722757339478,
67
+ "logps/rejected": -2.0295603275299072,
68
+ "loss": 1.053350067138672,
69
+ "nll_loss": 0.0,
70
+ "rewards/accuracies": 0.48750001192092896,
71
+ "rewards/chosen": -3.9919445514678955,
72
+ "rewards/margins": 0.06717614084482193,
73
+ "rewards/rejected": -4.0591206550598145,
74
+ "step": 40
75
+ },
76
+ {
77
+ "epoch": 0.13774104683195593,
78
+ "grad_norm": 0.6303589940071106,
79
+ "learning_rate": 4.9e-05,
80
+ "logits/chosen": 0.23949162662029266,
81
+ "logits/rejected": 0.07211676985025406,
82
+ "logps/chosen": -1.8928591012954712,
83
+ "logps/rejected": -2.2719273567199707,
84
+ "loss": 0.6624780654907226,
85
+ "nll_loss": 0.0,
86
+ "rewards/accuracies": 0.887499988079071,
87
+ "rewards/chosen": -3.7857182025909424,
88
+ "rewards/margins": 0.7581357955932617,
89
+ "rewards/rejected": -4.543854713439941,
90
+ "step": 50
91
+ },
92
+ {
93
+ "epoch": 0.1652892561983471,
94
+ "grad_norm": 0.35993871092796326,
95
+ "learning_rate": 4.856230031948882e-05,
96
+ "logits/chosen": 0.5467869639396667,
97
+ "logits/rejected": 0.279127836227417,
98
+ "logps/chosen": -4.613501071929932,
99
+ "logps/rejected": -6.5180158615112305,
100
+ "loss": 0.24324963092803956,
101
+ "nll_loss": 0.0,
102
+ "rewards/accuracies": 0.9624999761581421,
103
+ "rewards/chosen": -9.227002143859863,
104
+ "rewards/margins": 3.8090293407440186,
105
+ "rewards/rejected": -13.036031723022461,
106
+ "step": 60
107
+ },
108
+ {
109
+ "epoch": 0.1928374655647383,
110
+ "grad_norm": 1.6727081537246704,
111
+ "learning_rate": 4.696485623003195e-05,
112
+ "logits/chosen": 1.2077550888061523,
113
+ "logits/rejected": 1.0736474990844727,
114
+ "logps/chosen": -6.114351272583008,
115
+ "logps/rejected": -10.010709762573242,
116
+ "loss": 0.2190621852874756,
117
+ "nll_loss": 0.0,
118
+ "rewards/accuracies": 0.9125000238418579,
119
+ "rewards/chosen": -12.228702545166016,
120
+ "rewards/margins": 7.792716979980469,
121
+ "rewards/rejected": -20.021419525146484,
122
+ "step": 70
123
+ },
124
+ {
125
+ "epoch": 0.22038567493112948,
126
+ "grad_norm": 0.6117808818817139,
127
+ "learning_rate": 4.536741214057508e-05,
128
+ "logits/chosen": 2.431880235671997,
129
+ "logits/rejected": 2.384572744369507,
130
+ "logps/chosen": -12.877838134765625,
131
+ "logps/rejected": -18.886180877685547,
132
+ "loss": 0.09992958307266235,
133
+ "nll_loss": 0.0,
134
+ "rewards/accuracies": 0.9750000238418579,
135
+ "rewards/chosen": -25.75567626953125,
136
+ "rewards/margins": 12.016683578491211,
137
+ "rewards/rejected": -37.772361755371094,
138
+ "step": 80
139
+ },
140
+ {
141
+ "epoch": 0.24793388429752067,
142
+ "grad_norm": 0.7680425047874451,
143
+ "learning_rate": 4.376996805111822e-05,
144
+ "logits/chosen": 2.9793498516082764,
145
+ "logits/rejected": 2.9862256050109863,
146
+ "logps/chosen": -12.75178337097168,
147
+ "logps/rejected": -19.746002197265625,
148
+ "loss": 0.03834239840507507,
149
+ "nll_loss": 0.0,
150
+ "rewards/accuracies": 1.0,
151
+ "rewards/chosen": -25.50356674194336,
152
+ "rewards/margins": 13.988436698913574,
153
+ "rewards/rejected": -39.49200439453125,
154
+ "step": 90
155
+ },
156
+ {
157
+ "epoch": 0.27548209366391185,
158
+ "grad_norm": 0.06667827814817429,
159
+ "learning_rate": 4.217252396166134e-05,
160
+ "logits/chosen": 3.7260806560516357,
161
+ "logits/rejected": 3.7755050659179688,
162
+ "logps/chosen": -11.765376091003418,
163
+ "logps/rejected": -19.428905487060547,
164
+ "loss": 0.02194259166717529,
165
+ "nll_loss": 0.0,
166
+ "rewards/accuracies": 1.0,
167
+ "rewards/chosen": -23.530752182006836,
168
+ "rewards/margins": 15.327054023742676,
169
+ "rewards/rejected": -38.857810974121094,
170
+ "step": 100
171
+ },
172
+ {
173
+ "epoch": 0.27548209366391185,
174
+ "eval_logits/chosen": 5.057583808898926,
175
+ "eval_logits/rejected": 5.168581485748291,
176
+ "eval_logps/chosen": -17.074237823486328,
177
+ "eval_logps/rejected": -24.14946937561035,
178
+ "eval_loss": 0.0956403836607933,
179
+ "eval_nll_loss": 0.0,
180
+ "eval_rewards/accuracies": 0.9660714268684387,
181
+ "eval_rewards/chosen": -34.148475646972656,
182
+ "eval_rewards/margins": 14.150466918945312,
183
+ "eval_rewards/rejected": -48.2989387512207,
184
+ "eval_runtime": 40.0379,
185
+ "eval_samples_per_second": 13.912,
186
+ "eval_steps_per_second": 1.748,
187
+ "step": 100
188
+ },
189
+ {
190
+ "epoch": 0.30303030303030304,
191
+ "grad_norm": 0.7173494696617126,
192
+ "learning_rate": 4.0575079872204476e-05,
193
+ "logits/chosen": 3.5066864490509033,
194
+ "logits/rejected": 3.5185413360595703,
195
+ "logps/chosen": -9.856405258178711,
196
+ "logps/rejected": -16.73386001586914,
197
+ "loss": 0.12056845426559448,
198
+ "nll_loss": 0.0,
199
+ "rewards/accuracies": 0.9750000238418579,
200
+ "rewards/chosen": -19.712810516357422,
201
+ "rewards/margins": 13.754910469055176,
202
+ "rewards/rejected": -33.46772003173828,
203
+ "step": 110
204
+ },
205
+ {
206
+ "epoch": 0.3305785123966942,
207
+ "grad_norm": 1.3427457809448242,
208
+ "learning_rate": 3.8977635782747605e-05,
209
+ "logits/chosen": 2.0150222778320312,
210
+ "logits/rejected": 2.052603244781494,
211
+ "logps/chosen": -6.702250003814697,
212
+ "logps/rejected": -12.878804206848145,
213
+ "loss": 0.05156264305114746,
214
+ "nll_loss": 0.0,
215
+ "rewards/accuracies": 1.0,
216
+ "rewards/chosen": -13.404500007629395,
217
+ "rewards/margins": 12.353109359741211,
218
+ "rewards/rejected": -25.75760841369629,
219
+ "step": 120
220
+ },
221
+ {
222
+ "epoch": 0.3581267217630854,
223
+ "grad_norm": 3.5135276317596436,
224
+ "learning_rate": 3.738019169329074e-05,
225
+ "logits/chosen": 2.870805263519287,
226
+ "logits/rejected": 3.019796371459961,
227
+ "logps/chosen": -10.168031692504883,
228
+ "logps/rejected": -17.399675369262695,
229
+ "loss": 0.05100045800209045,
230
+ "nll_loss": 0.0,
231
+ "rewards/accuracies": 0.987500011920929,
232
+ "rewards/chosen": -20.336063385009766,
233
+ "rewards/margins": 14.463289260864258,
234
+ "rewards/rejected": -34.79935073852539,
235
+ "step": 130
236
+ },
237
+ {
238
+ "epoch": 0.3856749311294766,
239
+ "grad_norm": 0.5818735957145691,
240
+ "learning_rate": 3.5782747603833865e-05,
241
+ "logits/chosen": 3.0271811485290527,
242
+ "logits/rejected": 3.1355438232421875,
243
+ "logps/chosen": -10.393911361694336,
244
+ "logps/rejected": -18.173566818237305,
245
+ "loss": 0.06783108711242676,
246
+ "nll_loss": 0.0,
247
+ "rewards/accuracies": 0.987500011920929,
248
+ "rewards/chosen": -20.787822723388672,
249
+ "rewards/margins": 15.55931282043457,
250
+ "rewards/rejected": -36.34713363647461,
251
+ "step": 140
252
+ },
253
+ {
254
+ "epoch": 0.4132231404958678,
255
+ "grad_norm": 7.5330939292907715,
256
+ "learning_rate": 3.4185303514377e-05,
257
+ "logits/chosen": 3.923037052154541,
258
+ "logits/rejected": 4.009919166564941,
259
+ "logps/chosen": -15.482580184936523,
260
+ "logps/rejected": -23.399499893188477,
261
+ "loss": 0.12233117818832398,
262
+ "nll_loss": 0.0,
263
+ "rewards/accuracies": 0.9750000238418579,
264
+ "rewards/chosen": -30.965160369873047,
265
+ "rewards/margins": 15.833839416503906,
266
+ "rewards/rejected": -46.79899978637695,
267
+ "step": 150
268
+ },
269
+ {
270
+ "epoch": 0.44077134986225897,
271
+ "grad_norm": 0.0513874888420105,
272
+ "learning_rate": 3.258785942492013e-05,
273
+ "logits/chosen": 4.252791404724121,
274
+ "logits/rejected": 4.307219505310059,
275
+ "logps/chosen": -13.000323295593262,
276
+ "logps/rejected": -21.655231475830078,
277
+ "loss": 0.03463771939277649,
278
+ "nll_loss": 0.0,
279
+ "rewards/accuracies": 0.987500011920929,
280
+ "rewards/chosen": -26.000646591186523,
281
+ "rewards/margins": 17.309818267822266,
282
+ "rewards/rejected": -43.310462951660156,
283
+ "step": 160
284
+ },
285
+ {
286
+ "epoch": 0.46831955922865015,
287
+ "grad_norm": 9.692298772279173e-05,
288
+ "learning_rate": 3.099041533546326e-05,
289
+ "logits/chosen": 4.098459243774414,
290
+ "logits/rejected": 4.160243511199951,
291
+ "logps/chosen": -11.710687637329102,
292
+ "logps/rejected": -21.676971435546875,
293
+ "loss": 0.024555636942386626,
294
+ "nll_loss": 0.0,
295
+ "rewards/accuracies": 0.987500011920929,
296
+ "rewards/chosen": -23.421375274658203,
297
+ "rewards/margins": 19.932567596435547,
298
+ "rewards/rejected": -43.35394287109375,
299
+ "step": 170
300
+ },
301
+ {
302
+ "epoch": 0.49586776859504134,
303
+ "grad_norm": 1.611127495765686,
304
+ "learning_rate": 2.939297124600639e-05,
305
+ "logits/chosen": 4.176002025604248,
306
+ "logits/rejected": 4.312728404998779,
307
+ "logps/chosen": -13.934991836547852,
308
+ "logps/rejected": -24.690296173095703,
309
+ "loss": 0.02139030247926712,
310
+ "nll_loss": 0.0,
311
+ "rewards/accuracies": 1.0,
312
+ "rewards/chosen": -27.869983673095703,
313
+ "rewards/margins": 21.51060676574707,
314
+ "rewards/rejected": -49.380592346191406,
315
+ "step": 180
316
+ },
317
+ {
318
+ "epoch": 0.5234159779614325,
319
+ "grad_norm": 1.21311616897583,
320
+ "learning_rate": 2.7795527156549523e-05,
321
+ "logits/chosen": 3.4004039764404297,
322
+ "logits/rejected": 3.5335049629211426,
323
+ "logps/chosen": -9.917387008666992,
324
+ "logps/rejected": -21.205930709838867,
325
+ "loss": 0.028738826513290405,
326
+ "nll_loss": 0.0,
327
+ "rewards/accuracies": 0.987500011920929,
328
+ "rewards/chosen": -19.834774017333984,
329
+ "rewards/margins": 22.577091217041016,
330
+ "rewards/rejected": -42.411861419677734,
331
+ "step": 190
332
+ },
333
+ {
334
+ "epoch": 0.5509641873278237,
335
+ "grad_norm": 8.95163631439209,
336
+ "learning_rate": 2.6198083067092656e-05,
337
+ "logits/chosen": 3.074721336364746,
338
+ "logits/rejected": 3.1740307807922363,
339
+ "logps/chosen": -10.301236152648926,
340
+ "logps/rejected": -22.369190216064453,
341
+ "loss": 0.11196700334548951,
342
+ "nll_loss": 0.0,
343
+ "rewards/accuracies": 0.9750000238418579,
344
+ "rewards/chosen": -20.60247230529785,
345
+ "rewards/margins": 24.135910034179688,
346
+ "rewards/rejected": -44.738380432128906,
347
+ "step": 200
348
+ },
349
+ {
350
+ "epoch": 0.5509641873278237,
351
+ "eval_logits/chosen": 3.9634761810302734,
352
+ "eval_logits/rejected": 4.095207691192627,
353
+ "eval_logps/chosen": -10.509795188903809,
354
+ "eval_logps/rejected": -22.200040817260742,
355
+ "eval_loss": 0.055398128926754,
356
+ "eval_nll_loss": 0.0,
357
+ "eval_rewards/accuracies": 0.9857142567634583,
358
+ "eval_rewards/chosen": -21.019590377807617,
359
+ "eval_rewards/margins": 23.380483627319336,
360
+ "eval_rewards/rejected": -44.400081634521484,
361
+ "eval_runtime": 40.0513,
362
+ "eval_samples_per_second": 13.907,
363
+ "eval_steps_per_second": 1.748,
364
+ "step": 200
365
+ },
366
+ {
367
+ "epoch": 0.5785123966942148,
368
+ "grad_norm": 2.871490955352783,
369
+ "learning_rate": 2.4600638977635785e-05,
370
+ "logits/chosen": 2.7474374771118164,
371
+ "logits/rejected": 2.8057074546813965,
372
+ "logps/chosen": -6.542741298675537,
373
+ "logps/rejected": -17.251813888549805,
374
+ "loss": 0.05973906517028808,
375
+ "nll_loss": 0.0,
376
+ "rewards/accuracies": 1.0,
377
+ "rewards/chosen": -13.085482597351074,
378
+ "rewards/margins": 21.418142318725586,
379
+ "rewards/rejected": -34.50362777709961,
380
+ "step": 210
381
+ },
382
+ {
383
+ "epoch": 0.6060606060606061,
384
+ "grad_norm": 0.013486926443874836,
385
+ "learning_rate": 2.3003194888178915e-05,
386
+ "logits/chosen": 2.420773506164551,
387
+ "logits/rejected": 2.4592440128326416,
388
+ "logps/chosen": -4.120817184448242,
389
+ "logps/rejected": -13.536539077758789,
390
+ "loss": 0.03502379059791565,
391
+ "nll_loss": 0.0,
392
+ "rewards/accuracies": 1.0,
393
+ "rewards/chosen": -8.241634368896484,
394
+ "rewards/margins": 18.831443786621094,
395
+ "rewards/rejected": -27.073078155517578,
396
+ "step": 220
397
+ },
398
+ {
399
+ "epoch": 0.6336088154269972,
400
+ "grad_norm": 4.076554775238037,
401
+ "learning_rate": 2.1405750798722048e-05,
402
+ "logits/chosen": 3.2621731758117676,
403
+ "logits/rejected": 3.4068379402160645,
404
+ "logps/chosen": -7.666436672210693,
405
+ "logps/rejected": -18.324886322021484,
406
+ "loss": 0.12405169010162354,
407
+ "nll_loss": 0.0,
408
+ "rewards/accuracies": 0.9624999761581421,
409
+ "rewards/chosen": -15.332873344421387,
410
+ "rewards/margins": 21.316898345947266,
411
+ "rewards/rejected": -36.64977264404297,
412
+ "step": 230
413
+ },
414
+ {
415
+ "epoch": 0.6611570247933884,
416
+ "grad_norm": 1.421222448348999,
417
+ "learning_rate": 1.9808306709265177e-05,
418
+ "logits/chosen": 3.560548782348633,
419
+ "logits/rejected": 3.6851909160614014,
420
+ "logps/chosen": -9.624505996704102,
421
+ "logps/rejected": -20.099857330322266,
422
+ "loss": 0.03761735558509827,
423
+ "nll_loss": 0.0,
424
+ "rewards/accuracies": 0.987500011920929,
425
+ "rewards/chosen": -19.249011993408203,
426
+ "rewards/margins": 20.95070457458496,
427
+ "rewards/rejected": -40.19971466064453,
428
+ "step": 240
429
+ },
430
+ {
431
+ "epoch": 0.6887052341597796,
432
+ "grad_norm": 0.0010234045330435038,
433
+ "learning_rate": 1.8210862619808307e-05,
434
+ "logits/chosen": 3.57916259765625,
435
+ "logits/rejected": 3.6618869304656982,
436
+ "logps/chosen": -8.578134536743164,
437
+ "logps/rejected": -20.71200180053711,
438
+ "loss": 0.09314435720443726,
439
+ "nll_loss": 0.0,
440
+ "rewards/accuracies": 0.9750000238418579,
441
+ "rewards/chosen": -17.156269073486328,
442
+ "rewards/margins": 24.26773452758789,
443
+ "rewards/rejected": -41.42400360107422,
444
+ "step": 250
445
+ },
446
+ {
447
+ "epoch": 0.7162534435261708,
448
+ "grad_norm": 0.12575837969779968,
449
+ "learning_rate": 1.6613418530351437e-05,
450
+ "logits/chosen": 3.0451531410217285,
451
+ "logits/rejected": 3.036803722381592,
452
+ "logps/chosen": -5.424054145812988,
453
+ "logps/rejected": -15.074081420898438,
454
+ "loss": 0.013192214071750641,
455
+ "nll_loss": 0.0,
456
+ "rewards/accuracies": 1.0,
457
+ "rewards/chosen": -10.848108291625977,
458
+ "rewards/margins": 19.3000545501709,
459
+ "rewards/rejected": -30.148162841796875,
460
+ "step": 260
461
+ },
462
+ {
463
+ "epoch": 0.743801652892562,
464
+ "grad_norm": 3.9852426052093506,
465
+ "learning_rate": 1.501597444089457e-05,
466
+ "logits/chosen": 3.3318965435028076,
467
+ "logits/rejected": 3.3611156940460205,
468
+ "logps/chosen": -6.45603084564209,
469
+ "logps/rejected": -16.13960075378418,
470
+ "loss": 0.1191794753074646,
471
+ "nll_loss": 0.0,
472
+ "rewards/accuracies": 0.949999988079071,
473
+ "rewards/chosen": -12.91206169128418,
474
+ "rewards/margins": 19.367137908935547,
475
+ "rewards/rejected": -32.27920150756836,
476
+ "step": 270
477
+ },
478
+ {
479
+ "epoch": 0.7713498622589532,
480
+ "grad_norm": 0.00017260252207051963,
481
+ "learning_rate": 1.34185303514377e-05,
482
+ "logits/chosen": 2.8985068798065186,
483
+ "logits/rejected": 2.9405500888824463,
484
+ "logps/chosen": -5.470853328704834,
485
+ "logps/rejected": -17.427152633666992,
486
+ "loss": 0.11677546501159668,
487
+ "nll_loss": 0.0,
488
+ "rewards/accuracies": 0.9624999761581421,
489
+ "rewards/chosen": -10.941706657409668,
490
+ "rewards/margins": 23.912601470947266,
491
+ "rewards/rejected": -34.854305267333984,
492
+ "step": 280
493
+ },
494
+ {
495
+ "epoch": 0.7988980716253443,
496
+ "grad_norm": 1.2612056732177734,
497
+ "learning_rate": 1.182108626198083e-05,
498
+ "logits/chosen": 2.612856864929199,
499
+ "logits/rejected": 2.5626139640808105,
500
+ "logps/chosen": -4.082730770111084,
501
+ "logps/rejected": -15.125768661499023,
502
+ "loss": 0.06155251264572144,
503
+ "nll_loss": 0.0,
504
+ "rewards/accuracies": 0.987500011920929,
505
+ "rewards/chosen": -8.165461540222168,
506
+ "rewards/margins": 22.086074829101562,
507
+ "rewards/rejected": -30.251537322998047,
508
+ "step": 290
509
+ },
510
+ {
511
+ "epoch": 0.8264462809917356,
512
+ "grad_norm": 0.00040509909740649164,
513
+ "learning_rate": 1.0223642172523962e-05,
514
+ "logits/chosen": 2.4162771701812744,
515
+ "logits/rejected": 2.3998305797576904,
516
+ "logps/chosen": -3.854151487350464,
517
+ "logps/rejected": -13.961355209350586,
518
+ "loss": 0.024074266850948333,
519
+ "nll_loss": 0.0,
520
+ "rewards/accuracies": 1.0,
521
+ "rewards/chosen": -7.708302974700928,
522
+ "rewards/margins": 20.214406967163086,
523
+ "rewards/rejected": -27.922710418701172,
524
+ "step": 300
525
+ },
526
+ {
527
+ "epoch": 0.8264462809917356,
528
+ "eval_logits/chosen": 3.4136674404144287,
529
+ "eval_logits/rejected": 3.45212721824646,
530
+ "eval_logps/chosen": -4.449319362640381,
531
+ "eval_logps/rejected": -15.445338249206543,
532
+ "eval_loss": 0.0694827288389206,
533
+ "eval_nll_loss": 0.0,
534
+ "eval_rewards/accuracies": 0.9857142567634583,
535
+ "eval_rewards/chosen": -8.898638725280762,
536
+ "eval_rewards/margins": 21.992034912109375,
537
+ "eval_rewards/rejected": -30.890676498413086,
538
+ "eval_runtime": 40.0626,
539
+ "eval_samples_per_second": 13.903,
540
+ "eval_steps_per_second": 1.747,
541
+ "step": 300
542
+ },
543
+ {
544
+ "epoch": 0.8539944903581267,
545
+ "grad_norm": 1.521673560142517,
546
+ "learning_rate": 8.626198083067093e-06,
547
+ "logits/chosen": 2.45418381690979,
548
+ "logits/rejected": 2.533856153488159,
549
+ "logps/chosen": -4.843596458435059,
550
+ "logps/rejected": -17.073049545288086,
551
+ "loss": 0.08694828748703003,
552
+ "nll_loss": 0.0,
553
+ "rewards/accuracies": 0.987500011920929,
554
+ "rewards/chosen": -9.687192916870117,
555
+ "rewards/margins": 24.458908081054688,
556
+ "rewards/rejected": -34.14609909057617,
557
+ "step": 310
558
+ },
559
+ {
560
+ "epoch": 0.8815426997245179,
561
+ "grad_norm": 2.497852165106451e-07,
562
+ "learning_rate": 7.0287539936102235e-06,
563
+ "logits/chosen": 2.896289110183716,
564
+ "logits/rejected": 2.986301898956299,
565
+ "logps/chosen": -6.26573371887207,
566
+ "logps/rejected": -19.259273529052734,
567
+ "loss": 0.04625120460987091,
568
+ "nll_loss": 0.0,
569
+ "rewards/accuracies": 0.9750000238418579,
570
+ "rewards/chosen": -12.53146743774414,
571
+ "rewards/margins": 25.98708152770996,
572
+ "rewards/rejected": -38.51854705810547,
573
+ "step": 320
574
+ },
575
+ {
576
+ "epoch": 0.9090909090909091,
577
+ "grad_norm": 0.003939513117074966,
578
+ "learning_rate": 5.431309904153355e-06,
579
+ "logits/chosen": 2.6858134269714355,
580
+ "logits/rejected": 2.763576030731201,
581
+ "logps/chosen": -6.823577880859375,
582
+ "logps/rejected": -17.725753784179688,
583
+ "loss": 0.043733158707618715,
584
+ "nll_loss": 0.0,
585
+ "rewards/accuracies": 0.987500011920929,
586
+ "rewards/chosen": -13.64715576171875,
587
+ "rewards/margins": 21.804353713989258,
588
+ "rewards/rejected": -35.451507568359375,
589
+ "step": 330
590
+ },
591
+ {
592
+ "epoch": 0.9366391184573003,
593
+ "grad_norm": 0.0016138068167492747,
594
+ "learning_rate": 3.833865814696485e-06,
595
+ "logits/chosen": 3.1492819786071777,
596
+ "logits/rejected": 3.2142040729522705,
597
+ "logps/chosen": -7.908546447753906,
598
+ "logps/rejected": -21.04575538635254,
599
+ "loss": 0.004329894855618477,
600
+ "nll_loss": 0.0,
601
+ "rewards/accuracies": 1.0,
602
+ "rewards/chosen": -15.817092895507812,
603
+ "rewards/margins": 26.2744140625,
604
+ "rewards/rejected": -42.09151077270508,
605
+ "step": 340
606
+ },
607
+ {
608
+ "epoch": 0.9641873278236914,
609
+ "grad_norm": 0.00016785685147624463,
610
+ "learning_rate": 2.2364217252396165e-06,
611
+ "logits/chosen": 2.988264322280884,
612
+ "logits/rejected": 3.05815052986145,
613
+ "logps/chosen": -8.240289688110352,
614
+ "logps/rejected": -21.153888702392578,
615
+ "loss": 0.008007270097732545,
616
+ "nll_loss": 0.0,
617
+ "rewards/accuracies": 1.0,
618
+ "rewards/chosen": -16.480579376220703,
619
+ "rewards/margins": 25.827194213867188,
620
+ "rewards/rejected": -42.307777404785156,
621
+ "step": 350
622
+ },
623
+ {
624
+ "epoch": 0.9917355371900827,
625
+ "grad_norm": 3.048083543777466,
626
+ "learning_rate": 6.389776357827476e-07,
627
+ "logits/chosen": 3.002927780151367,
628
+ "logits/rejected": 3.080221652984619,
629
+ "logps/chosen": -8.972009658813477,
630
+ "logps/rejected": -20.78287124633789,
631
+ "loss": 0.08935376405715942,
632
+ "nll_loss": 0.0,
633
+ "rewards/accuracies": 0.9750000238418579,
634
+ "rewards/chosen": -17.944019317626953,
635
+ "rewards/margins": 23.621723175048828,
636
+ "rewards/rejected": -41.56574249267578,
637
+ "step": 360
638
+ }
639
+ ],
640
+ "logging_steps": 10,
641
+ "max_steps": 363,
642
+ "num_input_tokens_seen": 0,
643
+ "num_train_epochs": 1,
644
+ "save_steps": 200,
645
+ "stateful_callbacks": {
646
+ "TrainerControl": {
647
+ "args": {
648
+ "should_epoch_stop": false,
649
+ "should_evaluate": false,
650
+ "should_log": false,
651
+ "should_save": true,
652
+ "should_training_stop": true
653
+ },
654
+ "attributes": {}
655
+ }
656
+ },
657
+ "total_flos": 0.0,
658
+ "train_batch_size": 2,
659
+ "trial_name": null,
660
+ "trial_params": null
661
+ }
checkpoint-363/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e3bae6079ee85f272ee5c1148388807254b89f462d2cec3c58b92c2a1cb918d
3
+ size 5585
eval_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_dir": "/workspace/ckpts/simpo-xsum-low-mid-high-0220_0518",
3
+ "n_pairs": 197,
4
+ "auroc": 0.9057177458836868,
5
+ "wins": 193,
6
+ "ties": 0,
7
+ "losses": 4,
8
+ "mean_orig": -8.342779590935699,
9
+ "mean_misc": -18.936781556189185,
10
+ "n_style": 0,
11
+ "brio_mean": NaN,
12
+ "llama_mean": NaN,
13
+ "style_gap": NaN
14
+ }
style_control_results_xsum.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_dir": "/workspace/ckpts/simpo-xsum-low-mid-high-0220_0518",
3
+ "dataset": "xsum",
4
+ "n": 100,
5
+ "mean_orig": -8.612068988650032,
6
+ "mean_gpt_faithful": -23.530381366528278,
7
+ "mean_misc": -19.799843373021705,
8
+ "auroc_orig_vs_misc": 0.9271,
9
+ "auroc_gpt_vs_misc": 0.2939
10
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|begin_of_text|>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|eot_id|>",
6
+ "is_local": false,
7
+ "model_input_names": [
8
+ "input_ids",
9
+ "attention_mask"
10
+ ],
11
+ "model_max_length": 131072,
12
+ "pad_token": "<|eot_id|>",
13
+ "tokenizer_class": "TokenizersBackend"
14
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e3bae6079ee85f272ee5c1148388807254b89f462d2cec3c58b92c2a1cb918d
3
+ size 5585