Luxel commited on
Commit
781b110
·
verified ·
1 Parent(s): c4763d7

Upload run qwen3_4b_eval_aware on 2025-10-25T22:30:51.946276Z

Browse files
Files changed (28) hide show
  1. .gitattributes +2 -0
  2. qwen3_4b_eval_aware/checkpoint-32/README.md +209 -0
  3. qwen3_4b_eval_aware/checkpoint-32/adapter_config.json +42 -0
  4. qwen3_4b_eval_aware/checkpoint-32/adapter_model.safetensors +3 -0
  5. qwen3_4b_eval_aware/checkpoint-32/added_tokens.json +28 -0
  6. qwen3_4b_eval_aware/checkpoint-32/chat_template.jinja +86 -0
  7. qwen3_4b_eval_aware/checkpoint-32/merges.txt +0 -0
  8. qwen3_4b_eval_aware/checkpoint-32/optimizer.pt +3 -0
  9. qwen3_4b_eval_aware/checkpoint-32/rng_state_0.pth +3 -0
  10. qwen3_4b_eval_aware/checkpoint-32/rng_state_1.pth +3 -0
  11. qwen3_4b_eval_aware/checkpoint-32/scheduler.pt +3 -0
  12. qwen3_4b_eval_aware/checkpoint-32/special_tokens_map.json +31 -0
  13. qwen3_4b_eval_aware/checkpoint-32/tokenizer.json +3 -0
  14. qwen3_4b_eval_aware/checkpoint-32/tokenizer_config.json +239 -0
  15. qwen3_4b_eval_aware/checkpoint-32/trainer_state.json +514 -0
  16. qwen3_4b_eval_aware/checkpoint-32/training_args.bin +3 -0
  17. qwen3_4b_eval_aware/checkpoint-32/vocab.json +0 -0
  18. qwen3_4b_eval_aware/dpo_model/README.md +209 -0
  19. qwen3_4b_eval_aware/dpo_model/adapter_config.json +42 -0
  20. qwen3_4b_eval_aware/dpo_model/adapter_model.safetensors +3 -0
  21. qwen3_4b_eval_aware/dpo_model/added_tokens.json +28 -0
  22. qwen3_4b_eval_aware/dpo_model/chat_template.jinja +86 -0
  23. qwen3_4b_eval_aware/dpo_model/merges.txt +0 -0
  24. qwen3_4b_eval_aware/dpo_model/special_tokens_map.json +31 -0
  25. qwen3_4b_eval_aware/dpo_model/tokenizer.json +3 -0
  26. qwen3_4b_eval_aware/dpo_model/tokenizer_config.json +239 -0
  27. qwen3_4b_eval_aware/dpo_model/vocab.json +0 -0
  28. qwen3_4b_eval_aware/train_config.json +199 -0
.gitattributes CHANGED
@@ -38,3 +38,5 @@ qwen3_4b_bw96x4_dpo_ipo_b005_lr5e6_acc2/checkpoint-160/tokenizer.json filter=lfs
38
  qwen3_4b_bw96x4_dpo_ipo_b005_lr5e6_acc2/checkpoint-190/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
  qwen3_4b_bw96x4_dpo_ipo_b005_lr5e6_acc2/checkpoint-80/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
  qwen3_4b_bw96x4_dpo_ipo_b005_lr5e6_acc2/dpo_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
38
  qwen3_4b_bw96x4_dpo_ipo_b005_lr5e6_acc2/checkpoint-190/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
  qwen3_4b_bw96x4_dpo_ipo_b005_lr5e6_acc2/checkpoint-80/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
  qwen3_4b_bw96x4_dpo_ipo_b005_lr5e6_acc2/dpo_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ qwen3_4b_eval_aware/checkpoint-32/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
+ qwen3_4b_eval_aware/dpo_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
qwen3_4b_eval_aware/checkpoint-32/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen3-4B-Thinking-2507
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:Qwen/Qwen3-4B-Thinking-2507
7
+ - dpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.17.1
qwen3_4b_eval_aware/checkpoint-32/adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen3-4B-Thinking-2507",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 16,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "gate_proj",
29
+ "k_proj",
30
+ "v_proj",
31
+ "o_proj",
32
+ "q_proj",
33
+ "down_proj",
34
+ "up_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }
qwen3_4b_eval_aware/checkpoint-32/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bca19a61d584af3d4e051bdde7c5f4b4c9470d34b0c3dd3669566cf0aa0c1b24
3
+ size 132187888
qwen3_4b_eval_aware/checkpoint-32/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
qwen3_4b_eval_aware/checkpoint-32/chat_template.jinja ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if message.content is string %}
27
+ {%- set content = message.content %}
28
+ {%- else %}
29
+ {%- set content = '' %}
30
+ {%- endif %}
31
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
+ {%- elif message.role == "assistant" %}
34
+ {%- set reasoning_content = '' %}
35
+ {%- if message.reasoning_content is string %}
36
+ {%- set reasoning_content = message.reasoning_content %}
37
+ {%- else %}
38
+ {%- if '</think>' in content %}
39
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if loop.index0 > ns.last_query_index %}
44
+ {%- if loop.last or (not loop.last and reasoning_content) %}
45
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
+ {%- else %}
47
+ {{- '<|im_start|>' + message.role + '\n' + content }}
48
+ {%- endif %}
49
+ {%- else %}
50
+ {{- '<|im_start|>' + message.role + '\n' + content }}
51
+ {%- endif %}
52
+ {%- if message.tool_calls %}
53
+ {%- for tool_call in message.tool_calls %}
54
+ {%- if (loop.first and content) or (not loop.first) %}
55
+ {{- '\n' }}
56
+ {%- endif %}
57
+ {%- if tool_call.function %}
58
+ {%- set tool_call = tool_call.function %}
59
+ {%- endif %}
60
+ {{- '<tool_call>\n{"name": "' }}
61
+ {{- tool_call.name }}
62
+ {{- '", "arguments": ' }}
63
+ {%- if tool_call.arguments is string %}
64
+ {{- tool_call.arguments }}
65
+ {%- else %}
66
+ {{- tool_call.arguments | tojson }}
67
+ {%- endif %}
68
+ {{- '}\n</tool_call>' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '<|im_end|>\n' }}
72
+ {%- elif message.role == "tool" %}
73
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
+ {{- '<|im_start|>user' }}
75
+ {%- endif %}
76
+ {{- '\n<tool_response>\n' }}
77
+ {{- content }}
78
+ {{- '\n</tool_response>' }}
79
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
+ {{- '<|im_end|>\n' }}
81
+ {%- endif %}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n<think>\n' }}
86
+ {%- endif %}
qwen3_4b_eval_aware/checkpoint-32/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen3_4b_eval_aware/checkpoint-32/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a178154b05157df67ecd072585672f37f13c28e7285facefd683001aac93ecf
3
+ size 264673227
qwen3_4b_eval_aware/checkpoint-32/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dd7671ce88d469c49c0530724ac76b2306574002d1ecd1ca9294e41621fd96a
3
+ size 14917
qwen3_4b_eval_aware/checkpoint-32/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3246ef1170ccca541a03b89ad6f20e01c51eb6834a2c2211c78c71c70f896879
3
+ size 14917
qwen3_4b_eval_aware/checkpoint-32/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0a873edb088ab5375860da6f7ca28d9a12eead3ab160a0aba3ea3b08e3f37d3
3
+ size 1465
qwen3_4b_eval_aware/checkpoint-32/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
qwen3_4b_eval_aware/checkpoint-32/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
qwen3_4b_eval_aware/checkpoint-32/tokenizer_config.json ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "model_max_length": 262144,
235
+ "pad_token": "<|endoftext|>",
236
+ "split_special_tokens": false,
237
+ "tokenizer_class": "Qwen2Tokenizer",
238
+ "unk_token": null
239
+ }
qwen3_4b_eval_aware/checkpoint-32/trainer_state.json ADDED
@@ -0,0 +1,514 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.5079365079365079,
6
+ "eval_steps": 50,
7
+ "global_step": 32,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.015873015873015872,
14
+ "grad_norm": 73.43138122558594,
15
+ "learning_rate": 0.0,
16
+ "logits/chosen": -2.1276862621307373,
17
+ "logits/rejected": -2.3007192611694336,
18
+ "logps/chosen": -0.837126612663269,
19
+ "logps/rejected": -1.6845022439956665,
20
+ "loss": 100.575,
21
+ "rewards/accuracies": 0.4375,
22
+ "rewards/chosen": 0.009719086810946465,
23
+ "rewards/margins": -0.0013557381462305784,
24
+ "rewards/rejected": 0.011074826121330261,
25
+ "step": 1
26
+ },
27
+ {
28
+ "epoch": 0.031746031746031744,
29
+ "grad_norm": 61.139373779296875,
30
+ "learning_rate": 5e-06,
31
+ "logits/chosen": -2.224215030670166,
32
+ "logits/rejected": -2.4488682746887207,
33
+ "logps/chosen": -0.8742778301239014,
34
+ "logps/rejected": -1.7331535816192627,
35
+ "loss": 100.7677,
36
+ "rewards/accuracies": 0.59375,
37
+ "rewards/chosen": 0.01031375490128994,
38
+ "rewards/margins": -0.0017672583926469088,
39
+ "rewards/rejected": 0.01208101399242878,
40
+ "step": 2
41
+ },
42
+ {
43
+ "epoch": 0.047619047619047616,
44
+ "grad_norm": 61.41340255737305,
45
+ "learning_rate": 4.987173308479738e-06,
46
+ "logits/chosen": -2.124711036682129,
47
+ "logits/rejected": -2.3129348754882812,
48
+ "logps/chosen": -0.9234221577644348,
49
+ "logps/rejected": -1.6303091049194336,
50
+ "loss": 99.4497,
51
+ "rewards/accuracies": 0.625,
52
+ "rewards/chosen": 0.011429570615291595,
53
+ "rewards/margins": 0.0015148655511438847,
54
+ "rewards/rejected": 0.009914705529808998,
55
+ "step": 3
56
+ },
57
+ {
58
+ "epoch": 0.06349206349206349,
59
+ "grad_norm": 57.868431091308594,
60
+ "learning_rate": 4.948824853131237e-06,
61
+ "logits/chosen": -2.1755993366241455,
62
+ "logits/rejected": -2.4359540939331055,
63
+ "logps/chosen": -0.948157548904419,
64
+ "logps/rejected": -1.5917528867721558,
65
+ "loss": 99.2299,
66
+ "rewards/accuracies": 0.6875,
67
+ "rewards/chosen": 0.010555782355368137,
68
+ "rewards/margins": 0.0021268429700285196,
69
+ "rewards/rejected": 0.008428940549492836,
70
+ "step": 4
71
+ },
72
+ {
73
+ "epoch": 0.07936507936507936,
74
+ "grad_norm": 70.2706298828125,
75
+ "learning_rate": 4.8853481410001225e-06,
76
+ "logits/chosen": -2.1221580505371094,
77
+ "logits/rejected": -2.396707057952881,
78
+ "logps/chosen": -0.8689298629760742,
79
+ "logps/rejected": -1.8071589469909668,
80
+ "loss": 97.5463,
81
+ "rewards/accuracies": 0.84375,
82
+ "rewards/chosen": 0.011419497430324554,
83
+ "rewards/margins": 0.006311982404440641,
84
+ "rewards/rejected": 0.0051075154915452,
85
+ "step": 5
86
+ },
87
+ {
88
+ "epoch": 0.09523809523809523,
89
+ "grad_norm": 72.50617980957031,
90
+ "learning_rate": 4.797394529050577e-06,
91
+ "logits/chosen": -2.2033004760742188,
92
+ "logits/rejected": -2.463660955429077,
93
+ "logps/chosen": -1.182050347328186,
94
+ "logps/rejected": -1.72629976272583,
95
+ "loss": 99.1601,
96
+ "rewards/accuracies": 0.8125,
97
+ "rewards/chosen": 0.008959097787737846,
98
+ "rewards/margins": 0.0024480693973600864,
99
+ "rewards/rejected": 0.006511027924716473,
100
+ "step": 6
101
+ },
102
+ {
103
+ "epoch": 0.1111111111111111,
104
+ "grad_norm": 76.10721588134766,
105
+ "learning_rate": 4.685866540361456e-06,
106
+ "logits/chosen": -2.1102092266082764,
107
+ "logits/rejected": -2.3653409481048584,
108
+ "logps/chosen": -0.9124682545661926,
109
+ "logps/rejected": -1.835811972618103,
110
+ "loss": 96.8503,
111
+ "rewards/accuracies": 0.875,
112
+ "rewards/chosen": 0.011616631411015987,
113
+ "rewards/margins": 0.0080842524766922,
114
+ "rewards/rejected": 0.003532378701493144,
115
+ "step": 7
116
+ },
117
+ {
118
+ "epoch": 0.12698412698412698,
119
+ "grad_norm": 74.28392028808594,
120
+ "learning_rate": 4.551908603018191e-06,
121
+ "logits/chosen": -2.2063632011413574,
122
+ "logits/rejected": -2.4340498447418213,
123
+ "logps/chosen": -0.8536584973335266,
124
+ "logps/rejected": -1.8901478052139282,
125
+ "loss": 96.4469,
126
+ "rewards/accuracies": 0.875,
127
+ "rewards/chosen": 0.011865518987178802,
128
+ "rewards/margins": 0.00912485271692276,
129
+ "rewards/rejected": 0.0027406662702560425,
130
+ "step": 8
131
+ },
132
+ {
133
+ "epoch": 0.14285714285714285,
134
+ "grad_norm": 77.02012634277344,
135
+ "learning_rate": 4.396895306731978e-06,
136
+ "logits/chosen": -2.2196762561798096,
137
+ "logits/rejected": -2.465085744857788,
138
+ "logps/chosen": -0.9492424726486206,
139
+ "logps/rejected": -1.854007363319397,
140
+ "loss": 97.0908,
141
+ "rewards/accuracies": 0.9375,
142
+ "rewards/chosen": 0.009380538947880268,
143
+ "rewards/margins": 0.0075874775648117065,
144
+ "rewards/rejected": 0.0017930612666532397,
145
+ "step": 9
146
+ },
147
+ {
148
+ "epoch": 0.15873015873015872,
149
+ "grad_norm": 67.13955688476562,
150
+ "learning_rate": 4.222417297689217e-06,
151
+ "logits/chosen": -2.233217239379883,
152
+ "logits/rejected": -2.3442227840423584,
153
+ "logps/chosen": -0.8550275564193726,
154
+ "logps/rejected": -1.7330424785614014,
155
+ "loss": 96.7183,
156
+ "rewards/accuracies": 0.90625,
157
+ "rewards/chosen": 0.009713666513562202,
158
+ "rewards/margins": 0.008466679602861404,
159
+ "rewards/rejected": 0.0012469873763620853,
160
+ "step": 10
161
+ },
162
+ {
163
+ "epoch": 0.1746031746031746,
164
+ "grad_norm": 91.1172103881836,
165
+ "learning_rate": 4.030264956369158e-06,
166
+ "logits/chosen": -2.281071186065674,
167
+ "logits/rejected": -2.683786392211914,
168
+ "logps/chosen": -1.244257926940918,
169
+ "logps/rejected": -1.9201724529266357,
170
+ "loss": 98.4084,
171
+ "rewards/accuracies": 0.8125,
172
+ "rewards/chosen": 0.008490300737321377,
173
+ "rewards/margins": 0.004535912536084652,
174
+ "rewards/rejected": 0.0039543891325592995,
175
+ "step": 11
176
+ },
177
+ {
178
+ "epoch": 0.19047619047619047,
179
+ "grad_norm": 86.38648223876953,
180
+ "learning_rate": 3.8224100258174066e-06,
181
+ "logits/chosen": -2.2776074409484863,
182
+ "logits/rejected": -2.5513505935668945,
183
+ "logps/chosen": -0.8422654867172241,
184
+ "logps/rejected": -2.005890369415283,
185
+ "loss": 94.6299,
186
+ "rewards/accuracies": 0.9375,
187
+ "rewards/chosen": 0.010762966237962246,
188
+ "rewards/margins": 0.013820413500070572,
189
+ "rewards/rejected": -0.0030574467964470387,
190
+ "step": 12
191
+ },
192
+ {
193
+ "epoch": 0.20634920634920634,
194
+ "grad_norm": 84.03218078613281,
195
+ "learning_rate": 3.600985378894086e-06,
196
+ "logits/chosen": -2.2474443912506104,
197
+ "logits/rejected": -2.3934414386749268,
198
+ "logps/chosen": -0.8881982564926147,
199
+ "logps/rejected": -1.8603103160858154,
200
+ "loss": 93.1583,
201
+ "rewards/accuracies": 0.96875,
202
+ "rewards/chosen": 0.012732166796922684,
203
+ "rewards/margins": 0.01756295934319496,
204
+ "rewards/rejected": -0.004830792546272278,
205
+ "step": 13
206
+ },
207
+ {
208
+ "epoch": 0.2222222222222222,
209
+ "grad_norm": 108.10200500488281,
210
+ "learning_rate": 3.3682631321120507e-06,
211
+ "logits/chosen": -2.1857571601867676,
212
+ "logits/rejected": -2.470634937286377,
213
+ "logps/chosen": -0.8621047735214233,
214
+ "logps/rejected": -1.9189422130584717,
215
+ "loss": 93.5056,
216
+ "rewards/accuracies": 0.9375,
217
+ "rewards/chosen": 0.010991780087351799,
218
+ "rewards/margins": 0.016812698915600777,
219
+ "rewards/rejected": -0.00582092022523284,
220
+ "step": 14
221
+ },
222
+ {
223
+ "epoch": 0.23809523809523808,
224
+ "grad_norm": 98.29182434082031,
225
+ "learning_rate": 3.1266313306468018e-06,
226
+ "logits/chosen": -2.2839250564575195,
227
+ "logits/rejected": -2.4881529808044434,
228
+ "logps/chosen": -0.8307325839996338,
229
+ "logps/rejected": -2.0696005821228027,
230
+ "loss": 91.4297,
231
+ "rewards/accuracies": 0.96875,
232
+ "rewards/chosen": 0.011583537794649601,
233
+ "rewards/margins": 0.0221419557929039,
234
+ "rewards/rejected": -0.0105584179982543,
235
+ "step": 15
236
+ },
237
+ {
238
+ "epoch": 0.25396825396825395,
239
+ "grad_norm": 89.91815948486328,
240
+ "learning_rate": 2.878569443761442e-06,
241
+ "logits/chosen": -2.2810075283050537,
242
+ "logits/rejected": -2.4519784450531006,
243
+ "logps/chosen": -0.967061460018158,
244
+ "logps/rejected": -1.9306243658065796,
245
+ "loss": 93.0875,
246
+ "rewards/accuracies": 0.875,
247
+ "rewards/chosen": 0.011699460446834564,
248
+ "rewards/margins": 0.017987482249736786,
249
+ "rewards/rejected": -0.006288021802902222,
250
+ "step": 16
251
+ },
252
+ {
253
+ "epoch": 0.2698412698412698,
254
+ "grad_norm": 98.6678237915039,
255
+ "learning_rate": 2.626622922096782e-06,
256
+ "logits/chosen": -2.1972618103027344,
257
+ "logits/rejected": -2.514512538909912,
258
+ "logps/chosen": -0.8174502849578857,
259
+ "logps/rejected": -2.0018463134765625,
260
+ "loss": 91.7179,
261
+ "rewards/accuracies": 0.90625,
262
+ "rewards/chosen": 0.012017913162708282,
263
+ "rewards/margins": 0.02163715288043022,
264
+ "rewards/rejected": -0.00961923785507679,
265
+ "step": 17
266
+ },
267
+ {
268
+ "epoch": 0.2857142857142857,
269
+ "grad_norm": 107.2076644897461,
270
+ "learning_rate": 2.3733770779032185e-06,
271
+ "logits/chosen": -2.1779308319091797,
272
+ "logits/rejected": -2.548816442489624,
273
+ "logps/chosen": -0.9282850027084351,
274
+ "logps/rejected": -2.0622897148132324,
275
+ "loss": 92.204,
276
+ "rewards/accuracies": 0.90625,
277
+ "rewards/chosen": 0.008969704620540142,
278
+ "rewards/margins": 0.020185772329568863,
279
+ "rewards/rejected": -0.01121606770902872,
280
+ "step": 18
281
+ },
282
+ {
283
+ "epoch": 0.30158730158730157,
284
+ "grad_norm": 109.00663757324219,
285
+ "learning_rate": 2.1214305562385592e-06,
286
+ "logits/chosen": -2.3157405853271484,
287
+ "logits/rejected": -2.756890058517456,
288
+ "logps/chosen": -1.0933113098144531,
289
+ "logps/rejected": -1.983035683631897,
290
+ "loss": 94.0657,
291
+ "rewards/accuracies": 0.8125,
292
+ "rewards/chosen": 0.009589027613401413,
293
+ "rewards/margins": 0.0157773494720459,
294
+ "rewards/rejected": -0.00618832278996706,
295
+ "step": 19
296
+ },
297
+ {
298
+ "epoch": 0.31746031746031744,
299
+ "grad_norm": 108.45767974853516,
300
+ "learning_rate": 1.8733686693531986e-06,
301
+ "logits/chosen": -2.285343647003174,
302
+ "logits/rejected": -2.600506544113159,
303
+ "logps/chosen": -0.9763903617858887,
304
+ "logps/rejected": -2.109158992767334,
305
+ "loss": 91.7136,
306
+ "rewards/accuracies": 0.90625,
307
+ "rewards/chosen": 0.00986174400895834,
308
+ "rewards/margins": 0.02157197892665863,
309
+ "rewards/rejected": -0.011710233986377716,
310
+ "step": 20
311
+ },
312
+ {
313
+ "epoch": 0.3333333333333333,
314
+ "grad_norm": 117.7738265991211,
315
+ "learning_rate": 1.6317368678879497e-06,
316
+ "logits/chosen": -2.3075315952301025,
317
+ "logits/rejected": -2.4838249683380127,
318
+ "logps/chosen": -0.8424920439720154,
319
+ "logps/rejected": -2.226480007171631,
320
+ "loss": 88.9044,
321
+ "rewards/accuracies": 0.96875,
322
+ "rewards/chosen": 0.012006192468106747,
323
+ "rewards/margins": 0.028795666992664337,
324
+ "rewards/rejected": -0.016789473593235016,
325
+ "step": 21
326
+ },
327
+ {
328
+ "epoch": 0.3492063492063492,
329
+ "grad_norm": 112.16304779052734,
330
+ "learning_rate": 1.3990146211059141e-06,
331
+ "logits/chosen": -2.247889995574951,
332
+ "logits/rejected": -2.383714199066162,
333
+ "logps/chosen": -0.8421880006790161,
334
+ "logps/rejected": -2.0365967750549316,
335
+ "loss": 89.443,
336
+ "rewards/accuracies": 0.96875,
337
+ "rewards/chosen": 0.011269833892583847,
338
+ "rewards/margins": 0.02741769701242447,
339
+ "rewards/rejected": -0.016147863119840622,
340
+ "step": 22
341
+ },
342
+ {
343
+ "epoch": 0.36507936507936506,
344
+ "grad_norm": 111.17552185058594,
345
+ "learning_rate": 1.1775899741825947e-06,
346
+ "logits/chosen": -2.2497000694274902,
347
+ "logits/rejected": -2.543438673019409,
348
+ "logps/chosen": -0.8720284104347229,
349
+ "logps/rejected": -2.1090431213378906,
350
+ "loss": 89.9094,
351
+ "rewards/accuracies": 0.90625,
352
+ "rewards/chosen": 0.009760284796357155,
353
+ "rewards/margins": 0.02622319757938385,
354
+ "rewards/rejected": -0.016462914645671844,
355
+ "step": 23
356
+ },
357
+ {
358
+ "epoch": 0.38095238095238093,
359
+ "grad_norm": 107.88399505615234,
360
+ "learning_rate": 9.697350436308428e-07,
361
+ "logits/chosen": -2.311974048614502,
362
+ "logits/rejected": -2.7802984714508057,
363
+ "logps/chosen": -1.1230542659759521,
364
+ "logps/rejected": -2.2263827323913574,
365
+ "loss": 92.3867,
366
+ "rewards/accuracies": 0.8125,
367
+ "rewards/chosen": 0.008571119979023933,
368
+ "rewards/margins": 0.020269405096769333,
369
+ "rewards/rejected": -0.011698286980390549,
370
+ "step": 24
371
+ },
372
+ {
373
+ "epoch": 0.3968253968253968,
374
+ "grad_norm": 120.28370666503906,
375
+ "learning_rate": 7.775827023107835e-07,
376
+ "logits/chosen": -2.3289241790771484,
377
+ "logits/rejected": -2.556213855743408,
378
+ "logps/chosen": -0.8880805969238281,
379
+ "logps/rejected": -2.184584140777588,
380
+ "loss": 88.1746,
381
+ "rewards/accuracies": 0.9375,
382
+ "rewards/chosen": 0.0129635538905859,
383
+ "rewards/margins": 0.030874282121658325,
384
+ "rewards/rejected": -0.017910730093717575,
385
+ "step": 25
386
+ },
387
+ {
388
+ "epoch": 0.4126984126984127,
389
+ "grad_norm": 120.5097427368164,
390
+ "learning_rate": 6.031046932680229e-07,
391
+ "logits/chosen": -2.3363046646118164,
392
+ "logits/rejected": -2.7970316410064697,
393
+ "logps/chosen": -0.9201133847236633,
394
+ "logps/rejected": -2.2373695373535156,
395
+ "loss": 89.4875,
396
+ "rewards/accuracies": 0.875,
397
+ "rewards/chosen": 0.010034371167421341,
398
+ "rewards/margins": 0.027558699250221252,
399
+ "rewards/rejected": -0.017524326220154762,
400
+ "step": 26
401
+ },
402
+ {
403
+ "epoch": 0.42857142857142855,
404
+ "grad_norm": 129.77806091308594,
405
+ "learning_rate": 4.480913969818099e-07,
406
+ "logits/chosen": -2.1930630207061768,
407
+ "logits/rejected": -2.461454391479492,
408
+ "logps/chosen": -0.9183363914489746,
409
+ "logps/rejected": -2.2184934616088867,
410
+ "loss": 87.5978,
411
+ "rewards/accuracies": 0.9375,
412
+ "rewards/chosen": 0.011562383733689785,
413
+ "rewards/margins": 0.03248335421085358,
414
+ "rewards/rejected": -0.020920973271131516,
415
+ "step": 27
416
+ },
417
+ {
418
+ "epoch": 0.4444444444444444,
419
+ "grad_norm": 115.09199523925781,
420
+ "learning_rate": 3.141334596385448e-07,
421
+ "logits/chosen": -2.2993292808532715,
422
+ "logits/rejected": -2.6102182865142822,
423
+ "logps/chosen": -1.1710147857666016,
424
+ "logps/rejected": -2.1261191368103027,
425
+ "loss": 90.0134,
426
+ "rewards/accuracies": 0.875,
427
+ "rewards/chosen": 0.011231327429413795,
428
+ "rewards/margins": 0.026527734473347664,
429
+ "rewards/rejected": -0.015296407975256443,
430
+ "step": 28
431
+ },
432
+ {
433
+ "epoch": 0.4603174603174603,
434
+ "grad_norm": 118.78372192382812,
435
+ "learning_rate": 2.026054709494235e-07,
436
+ "logits/chosen": -2.333754062652588,
437
+ "logits/rejected": -2.62929630279541,
438
+ "logps/chosen": -0.9108929634094238,
439
+ "logps/rejected": -2.1907613277435303,
440
+ "loss": 89.9962,
441
+ "rewards/accuracies": 0.875,
442
+ "rewards/chosen": 0.010885003954172134,
443
+ "rewards/margins": 0.02634887769818306,
444
+ "rewards/rejected": -0.015463873744010925,
445
+ "step": 29
446
+ },
447
+ {
448
+ "epoch": 0.47619047619047616,
449
+ "grad_norm": 110.0257568359375,
450
+ "learning_rate": 1.1465185899987797e-07,
451
+ "logits/chosen": -2.2572035789489746,
452
+ "logits/rejected": -2.7623817920684814,
453
+ "logps/chosen": -1.2265293598175049,
454
+ "logps/rejected": -2.117170810699463,
455
+ "loss": 92.3271,
456
+ "rewards/accuracies": 0.84375,
457
+ "rewards/chosen": 0.008401270024478436,
458
+ "rewards/margins": 0.02065230906009674,
459
+ "rewards/rejected": -0.012251039035618305,
460
+ "step": 30
461
+ },
462
+ {
463
+ "epoch": 0.49206349206349204,
464
+ "grad_norm": 117.9716796875,
465
+ "learning_rate": 5.117514686876379e-08,
466
+ "logits/chosen": -2.2369933128356934,
467
+ "logits/rejected": -2.41340970993042,
468
+ "logps/chosen": -0.865552544593811,
469
+ "logps/rejected": -2.1379382610321045,
470
+ "loss": 88.3245,
471
+ "rewards/accuracies": 0.9375,
472
+ "rewards/chosen": 0.010352165438234806,
473
+ "rewards/margins": 0.030595146119594574,
474
+ "rewards/rejected": -0.020242983475327492,
475
+ "step": 31
476
+ },
477
+ {
478
+ "epoch": 0.5079365079365079,
479
+ "grad_norm": 112.72372436523438,
480
+ "learning_rate": 1.2826691520262114e-08,
481
+ "logits/chosen": -2.328519344329834,
482
+ "logits/rejected": -2.624027729034424,
483
+ "logps/chosen": -1.000898838043213,
484
+ "logps/rejected": -2.1262588500976562,
485
+ "loss": 89.0441,
486
+ "rewards/accuracies": 0.90625,
487
+ "rewards/chosen": 0.012559703551232815,
488
+ "rewards/margins": 0.028640277683734894,
489
+ "rewards/rejected": -0.016080575063824654,
490
+ "step": 32
491
+ }
492
+ ],
493
+ "logging_steps": 1,
494
+ "max_steps": 32,
495
+ "num_input_tokens_seen": 0,
496
+ "num_train_epochs": 1,
497
+ "save_steps": 50,
498
+ "stateful_callbacks": {
499
+ "TrainerControl": {
500
+ "args": {
501
+ "should_epoch_stop": false,
502
+ "should_evaluate": false,
503
+ "should_log": false,
504
+ "should_save": true,
505
+ "should_training_stop": true
506
+ },
507
+ "attributes": {}
508
+ }
509
+ },
510
+ "total_flos": 0.0,
511
+ "train_batch_size": 8,
512
+ "trial_name": null,
513
+ "trial_params": null
514
+ }
qwen3_4b_eval_aware/checkpoint-32/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2cb43d7832dfc00b4ac9cdb2d7c2ad54c74704a446b31f6aa858e68b714f472
3
+ size 6737
qwen3_4b_eval_aware/checkpoint-32/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qwen3_4b_eval_aware/dpo_model/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen3-4B-Thinking-2507
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:Qwen/Qwen3-4B-Thinking-2507
7
+ - dpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.17.1
qwen3_4b_eval_aware/dpo_model/adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen3-4B-Thinking-2507",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 16,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "gate_proj",
29
+ "k_proj",
30
+ "v_proj",
31
+ "o_proj",
32
+ "q_proj",
33
+ "down_proj",
34
+ "up_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }
qwen3_4b_eval_aware/dpo_model/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bca19a61d584af3d4e051bdde7c5f4b4c9470d34b0c3dd3669566cf0aa0c1b24
3
+ size 132187888
qwen3_4b_eval_aware/dpo_model/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
qwen3_4b_eval_aware/dpo_model/chat_template.jinja ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if message.content is string %}
27
+ {%- set content = message.content %}
28
+ {%- else %}
29
+ {%- set content = '' %}
30
+ {%- endif %}
31
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
+ {%- elif message.role == "assistant" %}
34
+ {%- set reasoning_content = '' %}
35
+ {%- if message.reasoning_content is string %}
36
+ {%- set reasoning_content = message.reasoning_content %}
37
+ {%- else %}
38
+ {%- if '</think>' in content %}
39
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if loop.index0 > ns.last_query_index %}
44
+ {%- if loop.last or (not loop.last and reasoning_content) %}
45
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
+ {%- else %}
47
+ {{- '<|im_start|>' + message.role + '\n' + content }}
48
+ {%- endif %}
49
+ {%- else %}
50
+ {{- '<|im_start|>' + message.role + '\n' + content }}
51
+ {%- endif %}
52
+ {%- if message.tool_calls %}
53
+ {%- for tool_call in message.tool_calls %}
54
+ {%- if (loop.first and content) or (not loop.first) %}
55
+ {{- '\n' }}
56
+ {%- endif %}
57
+ {%- if tool_call.function %}
58
+ {%- set tool_call = tool_call.function %}
59
+ {%- endif %}
60
+ {{- '<tool_call>\n{"name": "' }}
61
+ {{- tool_call.name }}
62
+ {{- '", "arguments": ' }}
63
+ {%- if tool_call.arguments is string %}
64
+ {{- tool_call.arguments }}
65
+ {%- else %}
66
+ {{- tool_call.arguments | tojson }}
67
+ {%- endif %}
68
+ {{- '}\n</tool_call>' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '<|im_end|>\n' }}
72
+ {%- elif message.role == "tool" %}
73
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
+ {{- '<|im_start|>user' }}
75
+ {%- endif %}
76
+ {{- '\n<tool_response>\n' }}
77
+ {{- content }}
78
+ {{- '\n</tool_response>' }}
79
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
+ {{- '<|im_end|>\n' }}
81
+ {%- endif %}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n<think>\n' }}
86
+ {%- endif %}
qwen3_4b_eval_aware/dpo_model/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen3_4b_eval_aware/dpo_model/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
qwen3_4b_eval_aware/dpo_model/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
qwen3_4b_eval_aware/dpo_model/tokenizer_config.json ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "model_max_length": 262144,
235
+ "pad_token": "<|endoftext|>",
236
+ "split_special_tokens": false,
237
+ "tokenizer_class": "Qwen2Tokenizer",
238
+ "unk_token": null
239
+ }
qwen3_4b_eval_aware/dpo_model/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qwen3_4b_eval_aware/train_config.json ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_dir": "qwen/second_stage/runs/qwen3_4b_dpo_eager_fixed",
3
+ "overwrite_output_dir": false,
4
+ "do_train": false,
5
+ "do_eval": true,
6
+ "do_predict": false,
7
+ "eval_strategy": "steps",
8
+ "prediction_loss_only": false,
9
+ "per_device_train_batch_size": 8,
10
+ "per_device_eval_batch_size": 8,
11
+ "per_gpu_train_batch_size": null,
12
+ "per_gpu_eval_batch_size": null,
13
+ "gradient_accumulation_steps": 2,
14
+ "eval_accumulation_steps": null,
15
+ "eval_delay": 0,
16
+ "torch_empty_cache_steps": null,
17
+ "learning_rate": 5e-06,
18
+ "weight_decay": 0.05,
19
+ "adam_beta1": 0.9,
20
+ "adam_beta2": 0.999,
21
+ "adam_epsilon": 1e-08,
22
+ "max_grad_norm": 1.0,
23
+ "num_train_epochs": 0.5,
24
+ "max_steps": -1,
25
+ "lr_scheduler_type": "cosine",
26
+ "lr_scheduler_kwargs": {},
27
+ "warmup_ratio": 0.03,
28
+ "warmup_steps": 0,
29
+ "log_level": "passive",
30
+ "log_level_replica": "warning",
31
+ "log_on_each_node": true,
32
+ "logging_dir": "qwen/second_stage/runs/qwen3_4b_dpo_eager_fixed/runs/Oct25_21-47-13_6c15deb18540",
33
+ "logging_strategy": "steps",
34
+ "logging_first_step": false,
35
+ "logging_steps": 1,
36
+ "logging_nan_inf_filter": true,
37
+ "save_strategy": "steps",
38
+ "save_steps": 50,
39
+ "save_total_limit": 10,
40
+ "save_safetensors": true,
41
+ "save_on_each_node": false,
42
+ "save_only_model": false,
43
+ "restore_callback_states_from_checkpoint": false,
44
+ "no_cuda": false,
45
+ "use_cpu": false,
46
+ "use_mps_device": false,
47
+ "seed": 42,
48
+ "data_seed": null,
49
+ "jit_mode_eval": false,
50
+ "use_ipex": false,
51
+ "bf16": true,
52
+ "fp16": false,
53
+ "fp16_opt_level": "O1",
54
+ "half_precision_backend": "auto",
55
+ "bf16_full_eval": false,
56
+ "fp16_full_eval": false,
57
+ "tf32": null,
58
+ "local_rank": 0,
59
+ "ddp_backend": null,
60
+ "tpu_num_cores": null,
61
+ "tpu_metrics_debug": false,
62
+ "debug": [],
63
+ "dataloader_drop_last": false,
64
+ "eval_steps": 50,
65
+ "dataloader_num_workers": 0,
66
+ "dataloader_prefetch_factor": null,
67
+ "past_index": -1,
68
+ "run_name": "qwen3_4b_dpo_eager_fixed",
69
+ "disable_tqdm": false,
70
+ "remove_unused_columns": false,
71
+ "label_names": null,
72
+ "load_best_model_at_end": true,
73
+ "metric_for_best_model": "eval_loss",
74
+ "greater_is_better": false,
75
+ "ignore_data_skip": false,
76
+ "fsdp": [],
77
+ "fsdp_min_num_params": 0,
78
+ "fsdp_config": {
79
+ "min_num_params": 0,
80
+ "xla": false,
81
+ "xla_fsdp_v2": false,
82
+ "xla_fsdp_grad_ckpt": false
83
+ },
84
+ "fsdp_transformer_layer_cls_to_wrap": null,
85
+ "accelerator_config": {
86
+ "split_batches": false,
87
+ "dispatch_batches": null,
88
+ "even_batches": true,
89
+ "use_seedable_sampler": true,
90
+ "non_blocking": false,
91
+ "gradient_accumulation_kwargs": null
92
+ },
93
+ "deepspeed": null,
94
+ "label_smoothing_factor": 0.0,
95
+ "optim": "adamw_torch_fused",
96
+ "optim_args": null,
97
+ "adafactor": false,
98
+ "group_by_length": false,
99
+ "length_column_name": "length",
100
+ "report_to": [
101
+ "wandb"
102
+ ],
103
+ "ddp_find_unused_parameters": false,
104
+ "ddp_bucket_cap_mb": null,
105
+ "ddp_broadcast_buffers": null,
106
+ "dataloader_pin_memory": true,
107
+ "dataloader_persistent_workers": false,
108
+ "skip_memory_metrics": true,
109
+ "use_legacy_prediction_loop": false,
110
+ "push_to_hub": false,
111
+ "resume_from_checkpoint": null,
112
+ "hub_model_id": null,
113
+ "hub_strategy": "every_save",
114
+ "hub_token": "<HUB_TOKEN>",
115
+ "hub_private_repo": null,
116
+ "hub_always_push": false,
117
+ "hub_revision": null,
118
+ "gradient_checkpointing": true,
119
+ "gradient_checkpointing_kwargs": {
120
+ "use_reentrant": false
121
+ },
122
+ "include_inputs_for_metrics": false,
123
+ "include_for_metrics": [],
124
+ "eval_do_concat_batches": true,
125
+ "fp16_backend": "auto",
126
+ "push_to_hub_model_id": null,
127
+ "push_to_hub_organization": null,
128
+ "push_to_hub_token": "<PUSH_TO_HUB_TOKEN>",
129
+ "mp_parameters": "",
130
+ "auto_find_batch_size": false,
131
+ "full_determinism": false,
132
+ "torchdynamo": null,
133
+ "ray_scope": "last",
134
+ "ddp_timeout": 1800,
135
+ "torch_compile": false,
136
+ "torch_compile_backend": null,
137
+ "torch_compile_mode": null,
138
+ "include_tokens_per_second": false,
139
+ "include_num_input_tokens_seen": false,
140
+ "neftune_noise_alpha": null,
141
+ "optim_target_modules": null,
142
+ "batch_eval_metrics": false,
143
+ "eval_on_start": false,
144
+ "use_liger_kernel": false,
145
+ "liger_kernel_config": null,
146
+ "eval_use_gather_object": false,
147
+ "average_tokens_across_devices": true,
148
+ "model_init_kwargs": null,
149
+ "ref_model_init_kwargs": null,
150
+ "model_adapter_name": null,
151
+ "ref_adapter_name": null,
152
+ "force_use_ref_model": false,
153
+ "disable_dropout": true,
154
+ "use_logits_to_keep": false,
155
+ "dataset_num_proc": null,
156
+ "padding_value": null,
157
+ "label_pad_token_id": -100,
158
+ "max_prompt_length": null,
159
+ "max_completion_length": null,
160
+ "max_length": null,
161
+ "truncation_mode": "keep_end",
162
+ "padding_free": false,
163
+ "precompute_ref_log_probs": false,
164
+ "precompute_ref_batch_size": null,
165
+ "tools": null,
166
+ "loss_type": "ipo",
167
+ "beta": 0.05,
168
+ "f_divergence_type": "reverse_kl",
169
+ "f_alpha_divergence_coef": 1.0,
170
+ "reference_free": false,
171
+ "label_smoothing": 0.0,
172
+ "use_weighting": false,
173
+ "rpo_alpha": null,
174
+ "discopop_tau": 0.05,
175
+ "sync_ref_model": false,
176
+ "ref_model_mixup_alpha": 0.6,
177
+ "ref_model_sync_steps": 512,
178
+ "generate_during_eval": false,
179
+ "model_name": "Qwen/Qwen3-4B-Thinking-2507",
180
+ "dataset_path": "qwen/second_stage/data/dpo_pairs_filt_final.clean.jsonl",
181
+ "num_train_pairs": 2014,
182
+ "validation_ratio": 0.02,
183
+ "use_lora": true,
184
+ "lora_adapter_repo": "qwen/second_stage/models/checkpoint-4465",
185
+ "lora_r": 16,
186
+ "lora_alpha": 32,
187
+ "lora_dropout": 0.05,
188
+ "lora_bias": "none",
189
+ "lora_target_modules": [
190
+ "q_proj",
191
+ "k_proj",
192
+ "v_proj",
193
+ "o_proj",
194
+ "up_proj",
195
+ "down_proj",
196
+ "gate_proj"
197
+ ],
198
+ "gradient_checkpointing_use_reentrant": false
199
+ }