SirajRLX commited on
Commit
0b86081
·
verified ·
1 Parent(s): 8b74230

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. dpo_qwen_14B/README.md +68 -0
  3. dpo_qwen_14B/best_adapter/README.md +209 -0
  4. dpo_qwen_14B/best_adapter/adapter_config.json +43 -0
  5. dpo_qwen_14B/best_adapter/adapter_model.safetensors +3 -0
  6. dpo_qwen_14B/best_adapter/chat_template.jinja +54 -0
  7. dpo_qwen_14B/best_adapter/optimizer.pt +3 -0
  8. dpo_qwen_14B/best_adapter/rng_state.pth +3 -0
  9. dpo_qwen_14B/best_adapter/scheduler.pt +3 -0
  10. dpo_qwen_14B/best_adapter/tokenizer.json +3 -0
  11. dpo_qwen_14B/best_adapter/tokenizer_config.json +29 -0
  12. dpo_qwen_14B/best_adapter/trainer_state.json +857 -0
  13. dpo_qwen_14B/best_adapter/training_args.bin +3 -0
  14. dpo_qwen_14B/checkpoint-100/README.md +209 -0
  15. dpo_qwen_14B/checkpoint-100/adapter_config.json +43 -0
  16. dpo_qwen_14B/checkpoint-100/adapter_model.safetensors +3 -0
  17. dpo_qwen_14B/checkpoint-100/chat_template.jinja +54 -0
  18. dpo_qwen_14B/checkpoint-100/optimizer.pt +3 -0
  19. dpo_qwen_14B/checkpoint-100/rng_state.pth +3 -0
  20. dpo_qwen_14B/checkpoint-100/scheduler.pt +3 -0
  21. dpo_qwen_14B/checkpoint-100/tokenizer.json +3 -0
  22. dpo_qwen_14B/checkpoint-100/tokenizer_config.json +29 -0
  23. dpo_qwen_14B/checkpoint-100/trainer_state.json +857 -0
  24. dpo_qwen_14B/checkpoint-100/training_args.bin +3 -0
  25. dpo_qwen_14B/config_resolved.yaml +93 -0
  26. dpo_qwen_14B/logs/eval.jsonl +5 -0
  27. dpo_qwen_14B/logs/train.jsonl +78 -0
  28. dpo_qwen_14B/wandb/debug-internal.log +11 -0
  29. dpo_qwen_14B/wandb/debug.log +26 -0
  30. dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/config.yaml +661 -0
  31. dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/output.log +189 -0
  32. dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/requirements.txt +104 -0
  33. dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/wandb-metadata.json +47 -0
  34. dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/wandb-summary.json +1 -0
  35. dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug-core.log +14 -0
  36. dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug-internal.log +11 -0
  37. dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug.log +26 -0
  38. dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/run-r9hfat2g.wandb +3 -0
  39. dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/config.yaml +165 -0
  40. dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/output.log +121 -0
  41. dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/requirements.txt +104 -0
  42. dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/wandb-metadata.json +47 -0
  43. dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/wandb-summary.json +1 -0
  44. dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug-core.log +14 -0
  45. dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug-internal.log +11 -0
  46. dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug.log +23 -0
  47. dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/run-r1nptay8.wandb +3 -0
  48. dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/config.yaml +661 -0
  49. dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/output.log +279 -0
  50. dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/requirements.txt +104 -0
.gitattributes CHANGED
@@ -59,3 +59,8 @@ sft_devstral_24B_v2/wandb/run-20251226_180613-i1cmzyri/run-i1cmzyri.wandb filter
59
  sft_devstral_24B_v2/wandb/run-20251226_180702-oordmylf/run-oordmylf.wandb filter=lfs diff=lfs merge=lfs -text
60
  sft_devstral_24B_v2/wandb/run-20251226_180808-ny9q48hd/run-ny9q48hd.wandb filter=lfs diff=lfs merge=lfs -text
61
  sft_qwen_14B_v2/wandb/run-20251226_181544-upub1jan/run-upub1jan.wandb filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
59
  sft_devstral_24B_v2/wandb/run-20251226_180702-oordmylf/run-oordmylf.wandb filter=lfs diff=lfs merge=lfs -text
60
  sft_devstral_24B_v2/wandb/run-20251226_180808-ny9q48hd/run-ny9q48hd.wandb filter=lfs diff=lfs merge=lfs -text
61
  sft_qwen_14B_v2/wandb/run-20251226_181544-upub1jan/run-upub1jan.wandb filter=lfs diff=lfs merge=lfs -text
62
+ dpo_qwen_14B/best_adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text
63
+ dpo_qwen_14B/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
64
+ dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/run-r9hfat2g.wandb filter=lfs diff=lfs merge=lfs -text
65
+ dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/run-r1nptay8.wandb filter=lfs diff=lfs merge=lfs -text
66
+ dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/run-wbzoafvt.wandb filter=lfs diff=lfs merge=lfs -text
dpo_qwen_14B/README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ model_name: dpo_run_14b_v1
4
+ tags:
5
+ - generated_from_trainer
6
+ - trl
7
+ - dpo
8
+ licence: license
9
+ ---
10
+
11
+ # Model Card for dpo_run_14b_v1
12
+
13
+ This model is a fine-tuned version of [None](https://huggingface.co/None).
14
+ It has been trained using [TRL](https://github.com/huggingface/trl).
15
+
16
+ ## Quick start
17
+
18
+ ```python
19
+ from transformers import pipeline
20
+
21
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
22
+ generator = pipeline("text-generation", model="None", device="cuda")
23
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
24
+ print(output["generated_text"])
25
+ ```
26
+
27
+ ## Training procedure
28
+
29
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/sirajuddin-shaik-007/dpo-training/runs/wbzoafvt)
30
+
31
+
32
+ This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
33
+
34
+ ### Framework versions
35
+
36
+ - TRL: 0.26.2
37
+ - Transformers: 5.0.0.dev0
38
+ - Pytorch: 2.5.1+cu121
39
+ - Datasets: 4.4.2
40
+ - Tokenizers: 0.22.1
41
+
42
+ ## Citations
43
+
44
+ Cite DPO as:
45
+
46
+ ```bibtex
47
+ @inproceedings{rafailov2023direct,
48
+ title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}},
49
+ author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn},
50
+ year = 2023,
51
+ booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
52
+ url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html},
53
+ editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
54
+ }
55
+ ```
56
+
57
+ Cite TRL as:
58
+
59
+ ```bibtex
60
+ @misc{vonwerra2022trl,
61
+ title = {{TRL: Transformer Reinforcement Learning}},
62
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
63
+ year = 2020,
64
+ journal = {GitHub repository},
65
+ publisher = {GitHub},
66
+ howpublished = {\url{https://github.com/huggingface/trl}}
67
+ }
68
+ ```
dpo_qwen_14B/best_adapter/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:../../Models/Qwen2.5-Coder-14B-CPT-SFT
7
+ - dpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.0
dpo_qwen_14B/best_adapter/adapter_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "../../Models/Qwen2.5-Coder-14B-CPT-SFT",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.0",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "k_proj",
33
+ "o_proj",
34
+ "v_proj",
35
+ "q_proj"
36
+ ],
37
+ "target_parameters": null,
38
+ "task_type": "CAUSAL_LM",
39
+ "trainable_token_indices": null,
40
+ "use_dora": false,
41
+ "use_qalora": false,
42
+ "use_rslora": false
43
+ }
dpo_qwen_14B/best_adapter/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6315595d0613ab1a98a34db46bcd956ffbcca002ea96096ef585ffbd10b082c9
3
+ size 100715016
dpo_qwen_14B/best_adapter/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
dpo_qwen_14B/best_adapter/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802de0809f197ada0a2f762d41b8a0c8e007ece14785be2ac75521db604c729b
3
+ size 201650194
dpo_qwen_14B/best_adapter/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecefbb3f17bb76b6655eb0157c98b5287c17fa4b4c72a6b9068b0823ce9fd18d
3
+ size 14244
dpo_qwen_14B/best_adapter/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f2d3d5485f7a1cfe5d5e69f9e55a45f72f0a8b17e757d0ca412c96a2d472fbf
3
+ size 1064
dpo_qwen_14B/best_adapter/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8
3
+ size 11421892
dpo_qwen_14B/best_adapter/tokenizer_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|endoftext|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": true,
24
+ "model_max_length": 32768,
25
+ "pad_token": "<|endoftext|>",
26
+ "split_special_tokens": false,
27
+ "tokenizer_class": "Qwen2Tokenizer",
28
+ "unk_token": null
29
+ }
dpo_qwen_14B/best_adapter/trainer_state.json ADDED
@@ -0,0 +1,857 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 0.04428481683135033,
4
+ "best_model_checkpoint": "runs/dpo_run_14b_v1/checkpoint-100",
5
+ "epoch": 0.11678832116788321,
6
+ "eval_steps": 25,
7
+ "global_step": 100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0023357664233576644,
14
+ "grad_norm": 1.242694616317749,
15
+ "learning_rate": 1.9379844961240311e-07,
16
+ "logits/chosen": 5.179401397705078,
17
+ "logits/rejected": 5.192930698394775,
18
+ "logps/chosen": -368.911865234375,
19
+ "logps/rejected": -398.83880615234375,
20
+ "loss": 0.6931473016738892,
21
+ "rewards/accuracies": 0.0,
22
+ "rewards/chosen": 0.0,
23
+ "rewards/margins": 0.0,
24
+ "rewards/rejected": 0.0,
25
+ "step": 2
26
+ },
27
+ {
28
+ "epoch": 0.004671532846715329,
29
+ "grad_norm": 1.392399787902832,
30
+ "learning_rate": 5.813953488372093e-07,
31
+ "logits/chosen": 5.403897762298584,
32
+ "logits/rejected": 5.4565606117248535,
33
+ "logps/chosen": -338.43792724609375,
34
+ "logps/rejected": -367.03057861328125,
35
+ "loss": 0.6949559450149536,
36
+ "rewards/accuracies": 0.625,
37
+ "rewards/chosen": 0.004504585638642311,
38
+ "rewards/margins": -0.003222561441361904,
39
+ "rewards/rejected": 0.007727146148681641,
40
+ "step": 4
41
+ },
42
+ {
43
+ "epoch": 0.0070072992700729924,
44
+ "grad_norm": 1.066603183746338,
45
+ "learning_rate": 9.689922480620155e-07,
46
+ "logits/chosen": 5.291868209838867,
47
+ "logits/rejected": 5.328356742858887,
48
+ "logps/chosen": -362.3431701660156,
49
+ "logps/rejected": -387.5829772949219,
50
+ "loss": 0.689236581325531,
51
+ "rewards/accuracies": 0.5625,
52
+ "rewards/chosen": -0.0034066196531057358,
53
+ "rewards/margins": 0.008255671709775925,
54
+ "rewards/rejected": -0.01166229322552681,
55
+ "step": 6
56
+ },
57
+ {
58
+ "epoch": 0.009343065693430658,
59
+ "grad_norm": 1.0005714893341064,
60
+ "learning_rate": 1.3565891472868218e-06,
61
+ "logits/chosen": 5.323437690734863,
62
+ "logits/rejected": 5.410858631134033,
63
+ "logps/chosen": -379.9283447265625,
64
+ "logps/rejected": -389.0852355957031,
65
+ "loss": 0.6943775415420532,
66
+ "rewards/accuracies": 0.375,
67
+ "rewards/chosen": 0.014657974243164062,
68
+ "rewards/margins": -0.0012350091710686684,
69
+ "rewards/rejected": 0.015892982482910156,
70
+ "step": 8
71
+ },
72
+ {
73
+ "epoch": 0.01167883211678832,
74
+ "grad_norm": 1.2461222410202026,
75
+ "learning_rate": 1.744186046511628e-06,
76
+ "logits/chosen": 5.435908317565918,
77
+ "logits/rejected": 5.494542121887207,
78
+ "logps/chosen": -363.2003479003906,
79
+ "logps/rejected": -389.67376708984375,
80
+ "loss": 0.693260908126831,
81
+ "rewards/accuracies": 0.625,
82
+ "rewards/chosen": -0.028497030958533287,
83
+ "rewards/margins": 0.00012636138126254082,
84
+ "rewards/rejected": -0.028623390942811966,
85
+ "step": 10
86
+ },
87
+ {
88
+ "epoch": 0.014014598540145985,
89
+ "grad_norm": 1.4030137062072754,
90
+ "learning_rate": 2.131782945736434e-06,
91
+ "logits/chosen": 5.3550801277160645,
92
+ "logits/rejected": 5.375768661499023,
93
+ "logps/chosen": -370.96429443359375,
94
+ "logps/rejected": -402.4786071777344,
95
+ "loss": 0.6882913112640381,
96
+ "rewards/accuracies": 0.5,
97
+ "rewards/chosen": 0.01622028276324272,
98
+ "rewards/margins": 0.010086631402373314,
99
+ "rewards/rejected": 0.006133650429546833,
100
+ "step": 12
101
+ },
102
+ {
103
+ "epoch": 0.01635036496350365,
104
+ "grad_norm": 1.1157702207565308,
105
+ "learning_rate": 2.5193798449612402e-06,
106
+ "logits/chosen": 5.515308380126953,
107
+ "logits/rejected": 5.561104774475098,
108
+ "logps/chosen": -336.7254333496094,
109
+ "logps/rejected": -357.52203369140625,
110
+ "loss": 0.6896716356277466,
111
+ "rewards/accuracies": 0.625,
112
+ "rewards/chosen": -0.017319394275546074,
113
+ "rewards/margins": 0.007328510750085115,
114
+ "rewards/rejected": -0.024647902697324753,
115
+ "step": 14
116
+ },
117
+ {
118
+ "epoch": 0.018686131386861315,
119
+ "grad_norm": 0.9470655918121338,
120
+ "learning_rate": 2.9069767441860468e-06,
121
+ "logits/chosen": 5.553088665008545,
122
+ "logits/rejected": 5.582851886749268,
123
+ "logps/chosen": -415.6842041015625,
124
+ "logps/rejected": -441.1054992675781,
125
+ "loss": 0.6904245018959045,
126
+ "rewards/accuracies": 0.5625,
127
+ "rewards/chosen": 0.03270244598388672,
128
+ "rewards/margins": 0.005826758686453104,
129
+ "rewards/rejected": 0.026875685900449753,
130
+ "step": 16
131
+ },
132
+ {
133
+ "epoch": 0.021021897810218976,
134
+ "grad_norm": 1.4397331476211548,
135
+ "learning_rate": 3.2945736434108533e-06,
136
+ "logits/chosen": 5.440742015838623,
137
+ "logits/rejected": 5.489529132843018,
138
+ "logps/chosen": -392.46221923828125,
139
+ "logps/rejected": -420.1712341308594,
140
+ "loss": 0.683630108833313,
141
+ "rewards/accuracies": 0.5625,
142
+ "rewards/chosen": 0.011020278558135033,
143
+ "rewards/margins": 0.01951923407614231,
144
+ "rewards/rejected": -0.008498954586684704,
145
+ "step": 18
146
+ },
147
+ {
148
+ "epoch": 0.02335766423357664,
149
+ "grad_norm": 1.5941083431243896,
150
+ "learning_rate": 3.6821705426356594e-06,
151
+ "logits/chosen": 5.318347930908203,
152
+ "logits/rejected": 5.397945404052734,
153
+ "logps/chosen": -345.2221374511719,
154
+ "logps/rejected": -365.9537048339844,
155
+ "loss": 0.6902388334274292,
156
+ "rewards/accuracies": 0.5625,
157
+ "rewards/chosen": 0.006536484230309725,
158
+ "rewards/margins": 0.006013393402099609,
159
+ "rewards/rejected": 0.0005230908282101154,
160
+ "step": 20
161
+ },
162
+ {
163
+ "epoch": 0.025693430656934305,
164
+ "grad_norm": 1.1363905668258667,
165
+ "learning_rate": 4.0697674418604655e-06,
166
+ "logits/chosen": 5.632981300354004,
167
+ "logits/rejected": 5.7265520095825195,
168
+ "logps/chosen": -347.9439697265625,
169
+ "logps/rejected": -370.65777587890625,
170
+ "loss": 0.691262423992157,
171
+ "rewards/accuracies": 0.5,
172
+ "rewards/chosen": 0.011908342130482197,
173
+ "rewards/margins": 0.004538153763860464,
174
+ "rewards/rejected": 0.007370188366621733,
175
+ "step": 22
176
+ },
177
+ {
178
+ "epoch": 0.02802919708029197,
179
+ "grad_norm": 1.0684627294540405,
180
+ "learning_rate": 4.457364341085272e-06,
181
+ "logits/chosen": 5.35699987411499,
182
+ "logits/rejected": 5.405580520629883,
183
+ "logps/chosen": -347.1539001464844,
184
+ "logps/rejected": -377.6044921875,
185
+ "loss": 0.6769475936889648,
186
+ "rewards/accuracies": 0.875,
187
+ "rewards/chosen": 0.01244144607335329,
188
+ "rewards/margins": 0.03289356082677841,
189
+ "rewards/rejected": -0.020452119410037994,
190
+ "step": 24
191
+ },
192
+ {
193
+ "epoch": 0.029197080291970802,
194
+ "eval_logits/chosen": 5.295141220092773,
195
+ "eval_logits/rejected": 5.345211029052734,
196
+ "eval_logps/chosen": -370.1607666015625,
197
+ "eval_logps/rejected": -395.7251892089844,
198
+ "eval_loss": 0.6836819648742676,
199
+ "eval_rewards/accuracies": 0.665354311466217,
200
+ "eval_rewards/chosen": 0.024636391550302505,
201
+ "eval_rewards/margins": 0.019555427134037018,
202
+ "eval_rewards/rejected": 0.005080964416265488,
203
+ "eval_runtime": 454.4375,
204
+ "eval_samples_per_second": 1.677,
205
+ "eval_steps_per_second": 1.677,
206
+ "step": 25
207
+ },
208
+ {
209
+ "epoch": 0.030364963503649634,
210
+ "grad_norm": 1.592353105545044,
211
+ "learning_rate": 4.844961240310078e-06,
212
+ "logits/chosen": 5.157042026519775,
213
+ "logits/rejected": 5.244912147521973,
214
+ "logps/chosen": -387.54876708984375,
215
+ "logps/rejected": -412.0630187988281,
216
+ "loss": 0.6849788427352905,
217
+ "rewards/accuracies": 0.625,
218
+ "rewards/chosen": 0.026385309174656868,
219
+ "rewards/margins": 0.016966437920928,
220
+ "rewards/rejected": 0.009418869391083717,
221
+ "step": 26
222
+ },
223
+ {
224
+ "epoch": 0.0327007299270073,
225
+ "grad_norm": 1.3181558847427368,
226
+ "learning_rate": 5.232558139534884e-06,
227
+ "logits/chosen": 5.545513153076172,
228
+ "logits/rejected": 5.54400110244751,
229
+ "logps/chosen": -360.41650390625,
230
+ "logps/rejected": -391.2162170410156,
231
+ "loss": 0.675189733505249,
232
+ "rewards/accuracies": 0.8125,
233
+ "rewards/chosen": 0.045946408063173294,
234
+ "rewards/margins": 0.03675585240125656,
235
+ "rewards/rejected": 0.009190557524561882,
236
+ "step": 28
237
+ },
238
+ {
239
+ "epoch": 0.035036496350364967,
240
+ "grad_norm": 1.443650722503662,
241
+ "learning_rate": 5.620155038759691e-06,
242
+ "logits/chosen": 5.136168003082275,
243
+ "logits/rejected": 5.239327907562256,
244
+ "logps/chosen": -378.6293640136719,
245
+ "logps/rejected": -405.3665466308594,
246
+ "loss": 0.6752142310142517,
247
+ "rewards/accuracies": 0.8125,
248
+ "rewards/chosen": 0.04194517061114311,
249
+ "rewards/margins": 0.03668833151459694,
250
+ "rewards/rejected": 0.005256845150142908,
251
+ "step": 30
252
+ },
253
+ {
254
+ "epoch": 0.03737226277372263,
255
+ "grad_norm": 1.379568338394165,
256
+ "learning_rate": 6.007751937984497e-06,
257
+ "logits/chosen": 5.411487579345703,
258
+ "logits/rejected": 5.427243232727051,
259
+ "logps/chosen": -358.5367736816406,
260
+ "logps/rejected": -382.4181213378906,
261
+ "loss": 0.6700581312179565,
262
+ "rewards/accuracies": 0.875,
263
+ "rewards/chosen": 0.06658173352479935,
264
+ "rewards/margins": 0.047193337231874466,
265
+ "rewards/rejected": 0.019388392567634583,
266
+ "step": 32
267
+ },
268
+ {
269
+ "epoch": 0.039708029197080295,
270
+ "grad_norm": 1.3260451555252075,
271
+ "learning_rate": 6.395348837209303e-06,
272
+ "logits/chosen": 5.207217216491699,
273
+ "logits/rejected": 5.254848480224609,
274
+ "logps/chosen": -326.9423828125,
275
+ "logps/rejected": -346.52081298828125,
276
+ "loss": 0.6610866785049438,
277
+ "rewards/accuracies": 0.9375,
278
+ "rewards/chosen": 0.07038869708776474,
279
+ "rewards/margins": 0.06587495654821396,
280
+ "rewards/rejected": 0.0045137410052120686,
281
+ "step": 34
282
+ },
283
+ {
284
+ "epoch": 0.04204379562043795,
285
+ "grad_norm": 1.5776340961456299,
286
+ "learning_rate": 6.782945736434108e-06,
287
+ "logits/chosen": 5.550538063049316,
288
+ "logits/rejected": 5.6374335289001465,
289
+ "logps/chosen": -359.9613952636719,
290
+ "logps/rejected": -384.31683349609375,
291
+ "loss": 0.6281551718711853,
292
+ "rewards/accuracies": 1.0,
293
+ "rewards/chosen": 0.11738375574350357,
294
+ "rewards/margins": 0.1363767683506012,
295
+ "rewards/rejected": -0.018992995843291283,
296
+ "step": 36
297
+ },
298
+ {
299
+ "epoch": 0.04437956204379562,
300
+ "grad_norm": 1.8589071035385132,
301
+ "learning_rate": 7.170542635658915e-06,
302
+ "logits/chosen": 5.39143180847168,
303
+ "logits/rejected": 5.412029266357422,
304
+ "logps/chosen": -325.8544616699219,
305
+ "logps/rejected": -351.9772644042969,
306
+ "loss": 0.6270830631256104,
307
+ "rewards/accuracies": 0.9375,
308
+ "rewards/chosen": 0.1617884635925293,
309
+ "rewards/margins": 0.1388537436723709,
310
+ "rewards/rejected": 0.022934721782803535,
311
+ "step": 38
312
+ },
313
+ {
314
+ "epoch": 0.04671532846715328,
315
+ "grad_norm": 1.3231571912765503,
316
+ "learning_rate": 7.558139534883721e-06,
317
+ "logits/chosen": 5.189720153808594,
318
+ "logits/rejected": 5.203127384185791,
319
+ "logps/chosen": -343.3839111328125,
320
+ "logps/rejected": -374.7848205566406,
321
+ "loss": 0.641180157661438,
322
+ "rewards/accuracies": 0.875,
323
+ "rewards/chosen": 0.15248623490333557,
324
+ "rewards/margins": 0.11158552765846252,
325
+ "rewards/rejected": 0.04090070724487305,
326
+ "step": 40
327
+ },
328
+ {
329
+ "epoch": 0.049051094890510946,
330
+ "grad_norm": 2.5331315994262695,
331
+ "learning_rate": 7.945736434108528e-06,
332
+ "logits/chosen": 5.420182228088379,
333
+ "logits/rejected": 5.45302677154541,
334
+ "logps/chosen": -341.813720703125,
335
+ "logps/rejected": -372.44952392578125,
336
+ "loss": 0.6093671321868896,
337
+ "rewards/accuracies": 0.9375,
338
+ "rewards/chosen": 0.2898235321044922,
339
+ "rewards/margins": 0.18158456683158875,
340
+ "rewards/rejected": 0.10823898762464523,
341
+ "step": 42
342
+ },
343
+ {
344
+ "epoch": 0.05138686131386861,
345
+ "grad_norm": 1.5247384309768677,
346
+ "learning_rate": 8.333333333333334e-06,
347
+ "logits/chosen": 5.383636951446533,
348
+ "logits/rejected": 5.397551536560059,
349
+ "logps/chosen": -354.49627685546875,
350
+ "logps/rejected": -376.88818359375,
351
+ "loss": 0.5815833210945129,
352
+ "rewards/accuracies": 0.8125,
353
+ "rewards/chosen": 0.32459571957588196,
354
+ "rewards/margins": 0.2510552406311035,
355
+ "rewards/rejected": 0.07354050129652023,
356
+ "step": 44
357
+ },
358
+ {
359
+ "epoch": 0.053722627737226275,
360
+ "grad_norm": 2.0814144611358643,
361
+ "learning_rate": 8.72093023255814e-06,
362
+ "logits/chosen": 5.269731044769287,
363
+ "logits/rejected": 5.287116050720215,
364
+ "logps/chosen": -331.1025390625,
365
+ "logps/rejected": -362.90118408203125,
366
+ "loss": 0.5269681215286255,
367
+ "rewards/accuracies": 0.9375,
368
+ "rewards/chosen": 0.6465227603912354,
369
+ "rewards/margins": 0.37582656741142273,
370
+ "rewards/rejected": 0.27069616317749023,
371
+ "step": 46
372
+ },
373
+ {
374
+ "epoch": 0.05605839416058394,
375
+ "grad_norm": 1.769063115119934,
376
+ "learning_rate": 9.108527131782946e-06,
377
+ "logits/chosen": 5.472540855407715,
378
+ "logits/rejected": 5.465417861938477,
379
+ "logps/chosen": -369.40283203125,
380
+ "logps/rejected": -400.18438720703125,
381
+ "loss": 0.5066201686859131,
382
+ "rewards/accuracies": 1.0,
383
+ "rewards/chosen": 0.6377636194229126,
384
+ "rewards/margins": 0.42650213837623596,
385
+ "rewards/rejected": 0.21126146614551544,
386
+ "step": 48
387
+ },
388
+ {
389
+ "epoch": 0.058394160583941604,
390
+ "grad_norm": 2.84169602394104,
391
+ "learning_rate": 9.496124031007753e-06,
392
+ "logits/chosen": 5.050387382507324,
393
+ "logits/rejected": 5.112288951873779,
394
+ "logps/chosen": -363.4556579589844,
395
+ "logps/rejected": -397.8169860839844,
396
+ "loss": 0.529259979724884,
397
+ "rewards/accuracies": 1.0,
398
+ "rewards/chosen": 0.7923164367675781,
399
+ "rewards/margins": 0.3787059783935547,
400
+ "rewards/rejected": 0.4136104881763458,
401
+ "step": 50
402
+ },
403
+ {
404
+ "epoch": 0.058394160583941604,
405
+ "eval_logits/chosen": 5.22359037399292,
406
+ "eval_logits/rejected": 5.286833763122559,
407
+ "eval_logps/chosen": -361.462890625,
408
+ "eval_logps/rejected": -392.5708312988281,
409
+ "eval_loss": 0.4610801041126251,
410
+ "eval_rewards/accuracies": 0.9619422554969788,
411
+ "eval_rewards/chosen": 0.8944254517555237,
412
+ "eval_rewards/margins": 0.5739086270332336,
413
+ "eval_rewards/rejected": 0.3205168545246124,
414
+ "eval_runtime": 454.5598,
415
+ "eval_samples_per_second": 1.676,
416
+ "eval_steps_per_second": 1.676,
417
+ "step": 50
418
+ },
419
+ {
420
+ "epoch": 0.06072992700729927,
421
+ "grad_norm": 1.6907895803451538,
422
+ "learning_rate": 9.883720930232558e-06,
423
+ "logits/chosen": 5.486469268798828,
424
+ "logits/rejected": 5.541717529296875,
425
+ "logps/chosen": -343.4534606933594,
426
+ "logps/rejected": -379.39508056640625,
427
+ "loss": 0.44602835178375244,
428
+ "rewards/accuracies": 0.9375,
429
+ "rewards/chosen": 0.9869746565818787,
430
+ "rewards/margins": 0.6056646108627319,
431
+ "rewards/rejected": 0.3813100755214691,
432
+ "step": 52
433
+ },
434
+ {
435
+ "epoch": 0.06306569343065693,
436
+ "grad_norm": 1.9458682537078857,
437
+ "learning_rate": 1.0271317829457365e-05,
438
+ "logits/chosen": 5.169528961181641,
439
+ "logits/rejected": 5.2688751220703125,
440
+ "logps/chosen": -379.5437316894531,
441
+ "logps/rejected": -401.5587463378906,
442
+ "loss": 0.43609702587127686,
443
+ "rewards/accuracies": 1.0,
444
+ "rewards/chosen": 0.7794930934906006,
445
+ "rewards/margins": 0.6265671253204346,
446
+ "rewards/rejected": 0.15292587876319885,
447
+ "step": 54
448
+ },
449
+ {
450
+ "epoch": 0.0654014598540146,
451
+ "grad_norm": 2.1266520023345947,
452
+ "learning_rate": 1.065891472868217e-05,
453
+ "logits/chosen": 5.097426414489746,
454
+ "logits/rejected": 5.15327262878418,
455
+ "logps/chosen": -378.0788269042969,
456
+ "logps/rejected": -413.27392578125,
457
+ "loss": 0.3928414583206177,
458
+ "rewards/accuracies": 0.9375,
459
+ "rewards/chosen": 1.274291753768921,
460
+ "rewards/margins": 0.7864217758178711,
461
+ "rewards/rejected": 0.4878700375556946,
462
+ "step": 56
463
+ },
464
+ {
465
+ "epoch": 0.06773722627737226,
466
+ "grad_norm": 1.5381489992141724,
467
+ "learning_rate": 1.1046511627906977e-05,
468
+ "logits/chosen": 5.138954162597656,
469
+ "logits/rejected": 5.20254373550415,
470
+ "logps/chosen": -372.93438720703125,
471
+ "logps/rejected": -401.8287658691406,
472
+ "loss": 0.35855019092559814,
473
+ "rewards/accuracies": 0.875,
474
+ "rewards/chosen": 1.2897911071777344,
475
+ "rewards/margins": 0.9354276061058044,
476
+ "rewards/rejected": 0.35436347126960754,
477
+ "step": 58
478
+ },
479
+ {
480
+ "epoch": 0.07007299270072993,
481
+ "grad_norm": 2.358330726623535,
482
+ "learning_rate": 1.1434108527131783e-05,
483
+ "logits/chosen": 5.071888446807861,
484
+ "logits/rejected": 5.187964916229248,
485
+ "logps/chosen": -360.984619140625,
486
+ "logps/rejected": -392.3192138671875,
487
+ "loss": 0.42801612615585327,
488
+ "rewards/accuracies": 0.875,
489
+ "rewards/chosen": 1.3823509216308594,
490
+ "rewards/margins": 0.729066014289856,
491
+ "rewards/rejected": 0.6532848477363586,
492
+ "step": 60
493
+ },
494
+ {
495
+ "epoch": 0.07240875912408759,
496
+ "grad_norm": 2.177586317062378,
497
+ "learning_rate": 1.182170542635659e-05,
498
+ "logits/chosen": 5.264093399047852,
499
+ "logits/rejected": 5.310842990875244,
500
+ "logps/chosen": -364.808349609375,
501
+ "logps/rejected": -401.0321044921875,
502
+ "loss": 0.31365492939949036,
503
+ "rewards/accuracies": 1.0,
504
+ "rewards/chosen": 1.6637591123580933,
505
+ "rewards/margins": 1.0887457132339478,
506
+ "rewards/rejected": 0.5750135183334351,
507
+ "step": 62
508
+ },
509
+ {
510
+ "epoch": 0.07474452554744526,
511
+ "grad_norm": 1.697789192199707,
512
+ "learning_rate": 1.2209302325581395e-05,
513
+ "logits/chosen": 5.191982269287109,
514
+ "logits/rejected": 5.261416912078857,
515
+ "logps/chosen": -359.8249816894531,
516
+ "logps/rejected": -397.2122497558594,
517
+ "loss": 0.3037749230861664,
518
+ "rewards/accuracies": 1.0,
519
+ "rewards/chosen": 1.6470392942428589,
520
+ "rewards/margins": 1.114844799041748,
521
+ "rewards/rejected": 0.5321945548057556,
522
+ "step": 64
523
+ },
524
+ {
525
+ "epoch": 0.07708029197080292,
526
+ "grad_norm": 1.3219914436340332,
527
+ "learning_rate": 1.2596899224806202e-05,
528
+ "logits/chosen": 5.293405532836914,
529
+ "logits/rejected": 5.3094048500061035,
530
+ "logps/chosen": -352.3752136230469,
531
+ "logps/rejected": -392.6779479980469,
532
+ "loss": 0.25026455521583557,
533
+ "rewards/accuracies": 1.0,
534
+ "rewards/chosen": 1.5671364068984985,
535
+ "rewards/margins": 1.4098074436187744,
536
+ "rewards/rejected": 0.15732917189598083,
537
+ "step": 66
538
+ },
539
+ {
540
+ "epoch": 0.07941605839416059,
541
+ "grad_norm": 1.8173967599868774,
542
+ "learning_rate": 1.2984496124031009e-05,
543
+ "logits/chosen": 5.025746822357178,
544
+ "logits/rejected": 5.114965438842773,
545
+ "logps/chosen": -319.99700927734375,
546
+ "logps/rejected": -364.115234375,
547
+ "loss": 0.3108353912830353,
548
+ "rewards/accuracies": 0.9375,
549
+ "rewards/chosen": 1.4788665771484375,
550
+ "rewards/margins": 1.2637410163879395,
551
+ "rewards/rejected": 0.2151254564523697,
552
+ "step": 68
553
+ },
554
+ {
555
+ "epoch": 0.08175182481751825,
556
+ "grad_norm": 1.0658400058746338,
557
+ "learning_rate": 1.3372093023255814e-05,
558
+ "logits/chosen": 4.945235729217529,
559
+ "logits/rejected": 4.959147930145264,
560
+ "logps/chosen": -383.84033203125,
561
+ "logps/rejected": -431.7752685546875,
562
+ "loss": 0.22991834580898285,
563
+ "rewards/accuracies": 1.0,
564
+ "rewards/chosen": 1.3950352668762207,
565
+ "rewards/margins": 1.4965243339538574,
566
+ "rewards/rejected": -0.1014888733625412,
567
+ "step": 70
568
+ },
569
+ {
570
+ "epoch": 0.0840875912408759,
571
+ "grad_norm": 1.0350896120071411,
572
+ "learning_rate": 1.375968992248062e-05,
573
+ "logits/chosen": 5.00426721572876,
574
+ "logits/rejected": 5.120238780975342,
575
+ "logps/chosen": -350.9471435546875,
576
+ "logps/rejected": -382.6837158203125,
577
+ "loss": 0.22603684663772583,
578
+ "rewards/accuracies": 1.0,
579
+ "rewards/chosen": 1.2978975772857666,
580
+ "rewards/margins": 1.644275426864624,
581
+ "rewards/rejected": -0.34637776017189026,
582
+ "step": 72
583
+ },
584
+ {
585
+ "epoch": 0.08642335766423358,
586
+ "grad_norm": 1.1595423221588135,
587
+ "learning_rate": 1.4147286821705426e-05,
588
+ "logits/chosen": 4.890130043029785,
589
+ "logits/rejected": 4.9504714012146,
590
+ "logps/chosen": -352.34967041015625,
591
+ "logps/rejected": -399.23028564453125,
592
+ "loss": 0.18921935558319092,
593
+ "rewards/accuracies": 1.0,
594
+ "rewards/chosen": 1.1984589099884033,
595
+ "rewards/margins": 1.7495291233062744,
596
+ "rewards/rejected": -0.5510700941085815,
597
+ "step": 74
598
+ },
599
+ {
600
+ "epoch": 0.08759124087591241,
601
+ "eval_logits/chosen": 4.930174827575684,
602
+ "eval_logits/rejected": 5.032296657562256,
603
+ "eval_logps/chosen": -359.19647216796875,
604
+ "eval_logps/rejected": -405.1120300292969,
605
+ "eval_loss": 0.16020436584949493,
606
+ "eval_rewards/accuracies": 0.9960629940032959,
607
+ "eval_rewards/chosen": 1.1210675239562988,
608
+ "eval_rewards/margins": 2.0546727180480957,
609
+ "eval_rewards/rejected": -0.9336051344871521,
610
+ "eval_runtime": 454.3435,
611
+ "eval_samples_per_second": 1.677,
612
+ "eval_steps_per_second": 1.677,
613
+ "step": 75
614
+ },
615
+ {
616
+ "epoch": 0.08875912408759123,
617
+ "grad_norm": 1.1433167457580566,
618
+ "learning_rate": 1.4534883720930233e-05,
619
+ "logits/chosen": 5.037275314331055,
620
+ "logits/rejected": 5.1315507888793945,
621
+ "logps/chosen": -313.110595703125,
622
+ "logps/rejected": -356.1000061035156,
623
+ "loss": 0.15998858213424683,
624
+ "rewards/accuracies": 1.0,
625
+ "rewards/chosen": 1.2128857374191284,
626
+ "rewards/margins": 2.0945115089416504,
627
+ "rewards/rejected": -0.8816256523132324,
628
+ "step": 76
629
+ },
630
+ {
631
+ "epoch": 0.0910948905109489,
632
+ "grad_norm": 0.9839214086532593,
633
+ "learning_rate": 1.4922480620155039e-05,
634
+ "logits/chosen": 4.817085266113281,
635
+ "logits/rejected": 4.874035835266113,
636
+ "logps/chosen": -366.2629089355469,
637
+ "logps/rejected": -405.7989196777344,
638
+ "loss": 0.1894684135913849,
639
+ "rewards/accuracies": 1.0,
640
+ "rewards/chosen": 1.0605502128601074,
641
+ "rewards/margins": 1.90762460231781,
642
+ "rewards/rejected": -0.8470743894577026,
643
+ "step": 78
644
+ },
645
+ {
646
+ "epoch": 0.09343065693430656,
647
+ "grad_norm": 0.9212782979011536,
648
+ "learning_rate": 1.5310077519379846e-05,
649
+ "logits/chosen": 5.046716690063477,
650
+ "logits/rejected": 5.157979965209961,
651
+ "logps/chosen": -348.0658264160156,
652
+ "logps/rejected": -395.23870849609375,
653
+ "loss": 0.15948188304901123,
654
+ "rewards/accuracies": 1.0,
655
+ "rewards/chosen": 0.676516056060791,
656
+ "rewards/margins": 2.167430877685547,
657
+ "rewards/rejected": -1.4909145832061768,
658
+ "step": 80
659
+ },
660
+ {
661
+ "epoch": 0.09576642335766423,
662
+ "grad_norm": 0.9820688366889954,
663
+ "learning_rate": 1.569767441860465e-05,
664
+ "logits/chosen": 4.690741539001465,
665
+ "logits/rejected": 4.771791458129883,
666
+ "logps/chosen": -378.8666076660156,
667
+ "logps/rejected": -436.9100036621094,
668
+ "loss": 0.12085139006376266,
669
+ "rewards/accuracies": 1.0,
670
+ "rewards/chosen": 0.8719685077667236,
671
+ "rewards/margins": 2.646538257598877,
672
+ "rewards/rejected": -1.7745698690414429,
673
+ "step": 82
674
+ },
675
+ {
676
+ "epoch": 0.09810218978102189,
677
+ "grad_norm": 0.66785728931427,
678
+ "learning_rate": 1.608527131782946e-05,
679
+ "logits/chosen": 4.880465984344482,
680
+ "logits/rejected": 4.961792945861816,
681
+ "logps/chosen": -346.51214599609375,
682
+ "logps/rejected": -400.1110534667969,
683
+ "loss": 0.08720710873603821,
684
+ "rewards/accuracies": 1.0,
685
+ "rewards/chosen": 1.1337480545043945,
686
+ "rewards/margins": 2.903944253921509,
687
+ "rewards/rejected": -1.7701961994171143,
688
+ "step": 84
689
+ },
690
+ {
691
+ "epoch": 0.10043795620437956,
692
+ "grad_norm": 0.5760660767555237,
693
+ "learning_rate": 1.647286821705426e-05,
694
+ "logits/chosen": 4.464397430419922,
695
+ "logits/rejected": 4.680055618286133,
696
+ "logps/chosen": -341.7489318847656,
697
+ "logps/rejected": -398.322021484375,
698
+ "loss": 0.07942983508110046,
699
+ "rewards/accuracies": 1.0,
700
+ "rewards/chosen": 1.2459325790405273,
701
+ "rewards/margins": 3.0152552127838135,
702
+ "rewards/rejected": -1.7693227529525757,
703
+ "step": 86
704
+ },
705
+ {
706
+ "epoch": 0.10277372262773722,
707
+ "grad_norm": 1.6020294427871704,
708
+ "learning_rate": 1.686046511627907e-05,
709
+ "logits/chosen": 4.563863277435303,
710
+ "logits/rejected": 4.680974960327148,
711
+ "logps/chosen": -344.9147644042969,
712
+ "logps/rejected": -395.4453125,
713
+ "loss": 0.1258174479007721,
714
+ "rewards/accuracies": 0.9375,
715
+ "rewards/chosen": 1.0706769227981567,
716
+ "rewards/margins": 3.118717670440674,
717
+ "rewards/rejected": -2.0480403900146484,
718
+ "step": 88
719
+ },
720
+ {
721
+ "epoch": 0.10510948905109489,
722
+ "grad_norm": 0.46413859724998474,
723
+ "learning_rate": 1.7248062015503875e-05,
724
+ "logits/chosen": 4.4989237785339355,
725
+ "logits/rejected": 4.673248291015625,
726
+ "logps/chosen": -326.9678649902344,
727
+ "logps/rejected": -388.4164123535156,
728
+ "loss": 0.06663060188293457,
729
+ "rewards/accuracies": 1.0,
730
+ "rewards/chosen": 1.4128761291503906,
731
+ "rewards/margins": 3.760685920715332,
732
+ "rewards/rejected": -2.3478102684020996,
733
+ "step": 90
734
+ },
735
+ {
736
+ "epoch": 0.10744525547445255,
737
+ "grad_norm": 0.6699568629264832,
738
+ "learning_rate": 1.7635658914728684e-05,
739
+ "logits/chosen": 4.7294535636901855,
740
+ "logits/rejected": 4.813880920410156,
741
+ "logps/chosen": -362.7267150878906,
742
+ "logps/rejected": -439.2985534667969,
743
+ "loss": 0.04481709748506546,
744
+ "rewards/accuracies": 1.0,
745
+ "rewards/chosen": 1.477597713470459,
746
+ "rewards/margins": 4.37883186340332,
747
+ "rewards/rejected": -2.9012341499328613,
748
+ "step": 92
749
+ },
750
+ {
751
+ "epoch": 0.10978102189781022,
752
+ "grad_norm": 0.4152977168560028,
753
+ "learning_rate": 1.802325581395349e-05,
754
+ "logits/chosen": 4.785149574279785,
755
+ "logits/rejected": 4.891542434692383,
756
+ "logps/chosen": -381.59246826171875,
757
+ "logps/rejected": -444.2817687988281,
758
+ "loss": 0.05632612109184265,
759
+ "rewards/accuracies": 1.0,
760
+ "rewards/chosen": 0.71366286277771,
761
+ "rewards/margins": 3.4584720134735107,
762
+ "rewards/rejected": -2.744809150695801,
763
+ "step": 94
764
+ },
765
+ {
766
+ "epoch": 0.11211678832116788,
767
+ "grad_norm": 0.3152717649936676,
768
+ "learning_rate": 1.8410852713178295e-05,
769
+ "logits/chosen": 4.603940486907959,
770
+ "logits/rejected": 4.804995536804199,
771
+ "logps/chosen": -356.7286376953125,
772
+ "logps/rejected": -414.69635009765625,
773
+ "loss": 0.040920041501522064,
774
+ "rewards/accuracies": 1.0,
775
+ "rewards/chosen": 1.7566397190093994,
776
+ "rewards/margins": 4.020595550537109,
777
+ "rewards/rejected": -2.263956069946289,
778
+ "step": 96
779
+ },
780
+ {
781
+ "epoch": 0.11445255474452555,
782
+ "grad_norm": 0.37698569893836975,
783
+ "learning_rate": 1.8798449612403103e-05,
784
+ "logits/chosen": 4.558542728424072,
785
+ "logits/rejected": 4.690641403198242,
786
+ "logps/chosen": -339.794189453125,
787
+ "logps/rejected": -413.8865966796875,
788
+ "loss": 0.025794224813580513,
789
+ "rewards/accuracies": 1.0,
790
+ "rewards/chosen": 1.3867536783218384,
791
+ "rewards/margins": 4.6542744636535645,
792
+ "rewards/rejected": -3.2675204277038574,
793
+ "step": 98
794
+ },
795
+ {
796
+ "epoch": 0.11678832116788321,
797
+ "grad_norm": 0.15023073554039001,
798
+ "learning_rate": 1.918604651162791e-05,
799
+ "logits/chosen": 4.387497425079346,
800
+ "logits/rejected": 4.494588375091553,
801
+ "logps/chosen": -346.2568054199219,
802
+ "logps/rejected": -418.9315185546875,
803
+ "loss": 0.015155203640460968,
804
+ "rewards/accuracies": 1.0,
805
+ "rewards/chosen": 1.7938623428344727,
806
+ "rewards/margins": 4.942529201507568,
807
+ "rewards/rejected": -3.1486666202545166,
808
+ "step": 100
809
+ },
810
+ {
811
+ "epoch": 0.11678832116788321,
812
+ "eval_logits/chosen": 4.285891056060791,
813
+ "eval_logits/rejected": 4.425926208496094,
814
+ "eval_logps/chosen": -353.15850830078125,
815
+ "eval_logps/rejected": -424.4124755859375,
816
+ "eval_loss": 0.04428481683135033,
817
+ "eval_rewards/accuracies": 0.9921259880065918,
818
+ "eval_rewards/chosen": 1.7248634099960327,
819
+ "eval_rewards/margins": 4.588510513305664,
820
+ "eval_rewards/rejected": -2.863647222518921,
821
+ "eval_runtime": 454.7251,
822
+ "eval_samples_per_second": 1.676,
823
+ "eval_steps_per_second": 1.676,
824
+ "step": 100
825
+ }
826
+ ],
827
+ "logging_steps": 2,
828
+ "max_steps": 2571,
829
+ "num_input_tokens_seen": 0,
830
+ "num_train_epochs": 3,
831
+ "save_steps": 100,
832
+ "stateful_callbacks": {
833
+ "EarlyStoppingCallback": {
834
+ "args": {
835
+ "early_stopping_patience": 5,
836
+ "early_stopping_threshold": 0.001
837
+ },
838
+ "attributes": {
839
+ "early_stopping_patience_counter": 0
840
+ }
841
+ },
842
+ "TrainerControl": {
843
+ "args": {
844
+ "should_epoch_stop": false,
845
+ "should_evaluate": false,
846
+ "should_log": false,
847
+ "should_save": true,
848
+ "should_training_stop": false
849
+ },
850
+ "attributes": {}
851
+ }
852
+ },
853
+ "total_flos": 0.0,
854
+ "train_batch_size": 1,
855
+ "trial_name": null,
856
+ "trial_params": null
857
+ }
dpo_qwen_14B/best_adapter/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21875ef630d3e8f528dce67596a0d783fd5cf223e6e245a98026996d1f3d3ade
3
+ size 5752
dpo_qwen_14B/checkpoint-100/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:../../Models/Qwen2.5-Coder-14B-CPT-SFT
7
+ - dpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.18.0
dpo_qwen_14B/checkpoint-100/adapter_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "../../Models/Qwen2.5-Coder-14B-CPT-SFT",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.0",
27
+ "qalora_group_size": 16,
28
+ "r": 16,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "k_proj",
33
+ "o_proj",
34
+ "v_proj",
35
+ "q_proj"
36
+ ],
37
+ "target_parameters": null,
38
+ "task_type": "CAUSAL_LM",
39
+ "trainable_token_indices": null,
40
+ "use_dora": false,
41
+ "use_qalora": false,
42
+ "use_rslora": false
43
+ }
dpo_qwen_14B/checkpoint-100/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6315595d0613ab1a98a34db46bcd956ffbcca002ea96096ef585ffbd10b082c9
3
+ size 100715016
dpo_qwen_14B/checkpoint-100/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
dpo_qwen_14B/checkpoint-100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802de0809f197ada0a2f762d41b8a0c8e007ece14785be2ac75521db604c729b
3
+ size 201650194
dpo_qwen_14B/checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecefbb3f17bb76b6655eb0157c98b5287c17fa4b4c72a6b9068b0823ce9fd18d
3
+ size 14244
dpo_qwen_14B/checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f2d3d5485f7a1cfe5d5e69f9e55a45f72f0a8b17e757d0ca412c96a2d472fbf
3
+ size 1064
dpo_qwen_14B/checkpoint-100/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8
3
+ size 11421892
dpo_qwen_14B/checkpoint-100/tokenizer_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|endoftext|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": true,
24
+ "model_max_length": 32768,
25
+ "pad_token": "<|endoftext|>",
26
+ "split_special_tokens": false,
27
+ "tokenizer_class": "Qwen2Tokenizer",
28
+ "unk_token": null
29
+ }
dpo_qwen_14B/checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,857 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 0.04428481683135033,
4
+ "best_model_checkpoint": "runs/dpo_run_14b_v1/checkpoint-100",
5
+ "epoch": 0.11678832116788321,
6
+ "eval_steps": 25,
7
+ "global_step": 100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0023357664233576644,
14
+ "grad_norm": 1.242694616317749,
15
+ "learning_rate": 1.9379844961240311e-07,
16
+ "logits/chosen": 5.179401397705078,
17
+ "logits/rejected": 5.192930698394775,
18
+ "logps/chosen": -368.911865234375,
19
+ "logps/rejected": -398.83880615234375,
20
+ "loss": 0.6931473016738892,
21
+ "rewards/accuracies": 0.0,
22
+ "rewards/chosen": 0.0,
23
+ "rewards/margins": 0.0,
24
+ "rewards/rejected": 0.0,
25
+ "step": 2
26
+ },
27
+ {
28
+ "epoch": 0.004671532846715329,
29
+ "grad_norm": 1.392399787902832,
30
+ "learning_rate": 5.813953488372093e-07,
31
+ "logits/chosen": 5.403897762298584,
32
+ "logits/rejected": 5.4565606117248535,
33
+ "logps/chosen": -338.43792724609375,
34
+ "logps/rejected": -367.03057861328125,
35
+ "loss": 0.6949559450149536,
36
+ "rewards/accuracies": 0.625,
37
+ "rewards/chosen": 0.004504585638642311,
38
+ "rewards/margins": -0.003222561441361904,
39
+ "rewards/rejected": 0.007727146148681641,
40
+ "step": 4
41
+ },
42
+ {
43
+ "epoch": 0.0070072992700729924,
44
+ "grad_norm": 1.066603183746338,
45
+ "learning_rate": 9.689922480620155e-07,
46
+ "logits/chosen": 5.291868209838867,
47
+ "logits/rejected": 5.328356742858887,
48
+ "logps/chosen": -362.3431701660156,
49
+ "logps/rejected": -387.5829772949219,
50
+ "loss": 0.689236581325531,
51
+ "rewards/accuracies": 0.5625,
52
+ "rewards/chosen": -0.0034066196531057358,
53
+ "rewards/margins": 0.008255671709775925,
54
+ "rewards/rejected": -0.01166229322552681,
55
+ "step": 6
56
+ },
57
+ {
58
+ "epoch": 0.009343065693430658,
59
+ "grad_norm": 1.0005714893341064,
60
+ "learning_rate": 1.3565891472868218e-06,
61
+ "logits/chosen": 5.323437690734863,
62
+ "logits/rejected": 5.410858631134033,
63
+ "logps/chosen": -379.9283447265625,
64
+ "logps/rejected": -389.0852355957031,
65
+ "loss": 0.6943775415420532,
66
+ "rewards/accuracies": 0.375,
67
+ "rewards/chosen": 0.014657974243164062,
68
+ "rewards/margins": -0.0012350091710686684,
69
+ "rewards/rejected": 0.015892982482910156,
70
+ "step": 8
71
+ },
72
+ {
73
+ "epoch": 0.01167883211678832,
74
+ "grad_norm": 1.2461222410202026,
75
+ "learning_rate": 1.744186046511628e-06,
76
+ "logits/chosen": 5.435908317565918,
77
+ "logits/rejected": 5.494542121887207,
78
+ "logps/chosen": -363.2003479003906,
79
+ "logps/rejected": -389.67376708984375,
80
+ "loss": 0.693260908126831,
81
+ "rewards/accuracies": 0.625,
82
+ "rewards/chosen": -0.028497030958533287,
83
+ "rewards/margins": 0.00012636138126254082,
84
+ "rewards/rejected": -0.028623390942811966,
85
+ "step": 10
86
+ },
87
+ {
88
+ "epoch": 0.014014598540145985,
89
+ "grad_norm": 1.4030137062072754,
90
+ "learning_rate": 2.131782945736434e-06,
91
+ "logits/chosen": 5.3550801277160645,
92
+ "logits/rejected": 5.375768661499023,
93
+ "logps/chosen": -370.96429443359375,
94
+ "logps/rejected": -402.4786071777344,
95
+ "loss": 0.6882913112640381,
96
+ "rewards/accuracies": 0.5,
97
+ "rewards/chosen": 0.01622028276324272,
98
+ "rewards/margins": 0.010086631402373314,
99
+ "rewards/rejected": 0.006133650429546833,
100
+ "step": 12
101
+ },
102
+ {
103
+ "epoch": 0.01635036496350365,
104
+ "grad_norm": 1.1157702207565308,
105
+ "learning_rate": 2.5193798449612402e-06,
106
+ "logits/chosen": 5.515308380126953,
107
+ "logits/rejected": 5.561104774475098,
108
+ "logps/chosen": -336.7254333496094,
109
+ "logps/rejected": -357.52203369140625,
110
+ "loss": 0.6896716356277466,
111
+ "rewards/accuracies": 0.625,
112
+ "rewards/chosen": -0.017319394275546074,
113
+ "rewards/margins": 0.007328510750085115,
114
+ "rewards/rejected": -0.024647902697324753,
115
+ "step": 14
116
+ },
117
+ {
118
+ "epoch": 0.018686131386861315,
119
+ "grad_norm": 0.9470655918121338,
120
+ "learning_rate": 2.9069767441860468e-06,
121
+ "logits/chosen": 5.553088665008545,
122
+ "logits/rejected": 5.582851886749268,
123
+ "logps/chosen": -415.6842041015625,
124
+ "logps/rejected": -441.1054992675781,
125
+ "loss": 0.6904245018959045,
126
+ "rewards/accuracies": 0.5625,
127
+ "rewards/chosen": 0.03270244598388672,
128
+ "rewards/margins": 0.005826758686453104,
129
+ "rewards/rejected": 0.026875685900449753,
130
+ "step": 16
131
+ },
132
+ {
133
+ "epoch": 0.021021897810218976,
134
+ "grad_norm": 1.4397331476211548,
135
+ "learning_rate": 3.2945736434108533e-06,
136
+ "logits/chosen": 5.440742015838623,
137
+ "logits/rejected": 5.489529132843018,
138
+ "logps/chosen": -392.46221923828125,
139
+ "logps/rejected": -420.1712341308594,
140
+ "loss": 0.683630108833313,
141
+ "rewards/accuracies": 0.5625,
142
+ "rewards/chosen": 0.011020278558135033,
143
+ "rewards/margins": 0.01951923407614231,
144
+ "rewards/rejected": -0.008498954586684704,
145
+ "step": 18
146
+ },
147
+ {
148
+ "epoch": 0.02335766423357664,
149
+ "grad_norm": 1.5941083431243896,
150
+ "learning_rate": 3.6821705426356594e-06,
151
+ "logits/chosen": 5.318347930908203,
152
+ "logits/rejected": 5.397945404052734,
153
+ "logps/chosen": -345.2221374511719,
154
+ "logps/rejected": -365.9537048339844,
155
+ "loss": 0.6902388334274292,
156
+ "rewards/accuracies": 0.5625,
157
+ "rewards/chosen": 0.006536484230309725,
158
+ "rewards/margins": 0.006013393402099609,
159
+ "rewards/rejected": 0.0005230908282101154,
160
+ "step": 20
161
+ },
162
+ {
163
+ "epoch": 0.025693430656934305,
164
+ "grad_norm": 1.1363905668258667,
165
+ "learning_rate": 4.0697674418604655e-06,
166
+ "logits/chosen": 5.632981300354004,
167
+ "logits/rejected": 5.7265520095825195,
168
+ "logps/chosen": -347.9439697265625,
169
+ "logps/rejected": -370.65777587890625,
170
+ "loss": 0.691262423992157,
171
+ "rewards/accuracies": 0.5,
172
+ "rewards/chosen": 0.011908342130482197,
173
+ "rewards/margins": 0.004538153763860464,
174
+ "rewards/rejected": 0.007370188366621733,
175
+ "step": 22
176
+ },
177
+ {
178
+ "epoch": 0.02802919708029197,
179
+ "grad_norm": 1.0684627294540405,
180
+ "learning_rate": 4.457364341085272e-06,
181
+ "logits/chosen": 5.35699987411499,
182
+ "logits/rejected": 5.405580520629883,
183
+ "logps/chosen": -347.1539001464844,
184
+ "logps/rejected": -377.6044921875,
185
+ "loss": 0.6769475936889648,
186
+ "rewards/accuracies": 0.875,
187
+ "rewards/chosen": 0.01244144607335329,
188
+ "rewards/margins": 0.03289356082677841,
189
+ "rewards/rejected": -0.020452119410037994,
190
+ "step": 24
191
+ },
192
+ {
193
+ "epoch": 0.029197080291970802,
194
+ "eval_logits/chosen": 5.295141220092773,
195
+ "eval_logits/rejected": 5.345211029052734,
196
+ "eval_logps/chosen": -370.1607666015625,
197
+ "eval_logps/rejected": -395.7251892089844,
198
+ "eval_loss": 0.6836819648742676,
199
+ "eval_rewards/accuracies": 0.665354311466217,
200
+ "eval_rewards/chosen": 0.024636391550302505,
201
+ "eval_rewards/margins": 0.019555427134037018,
202
+ "eval_rewards/rejected": 0.005080964416265488,
203
+ "eval_runtime": 454.4375,
204
+ "eval_samples_per_second": 1.677,
205
+ "eval_steps_per_second": 1.677,
206
+ "step": 25
207
+ },
208
+ {
209
+ "epoch": 0.030364963503649634,
210
+ "grad_norm": 1.592353105545044,
211
+ "learning_rate": 4.844961240310078e-06,
212
+ "logits/chosen": 5.157042026519775,
213
+ "logits/rejected": 5.244912147521973,
214
+ "logps/chosen": -387.54876708984375,
215
+ "logps/rejected": -412.0630187988281,
216
+ "loss": 0.6849788427352905,
217
+ "rewards/accuracies": 0.625,
218
+ "rewards/chosen": 0.026385309174656868,
219
+ "rewards/margins": 0.016966437920928,
220
+ "rewards/rejected": 0.009418869391083717,
221
+ "step": 26
222
+ },
223
+ {
224
+ "epoch": 0.0327007299270073,
225
+ "grad_norm": 1.3181558847427368,
226
+ "learning_rate": 5.232558139534884e-06,
227
+ "logits/chosen": 5.545513153076172,
228
+ "logits/rejected": 5.54400110244751,
229
+ "logps/chosen": -360.41650390625,
230
+ "logps/rejected": -391.2162170410156,
231
+ "loss": 0.675189733505249,
232
+ "rewards/accuracies": 0.8125,
233
+ "rewards/chosen": 0.045946408063173294,
234
+ "rewards/margins": 0.03675585240125656,
235
+ "rewards/rejected": 0.009190557524561882,
236
+ "step": 28
237
+ },
238
+ {
239
+ "epoch": 0.035036496350364967,
240
+ "grad_norm": 1.443650722503662,
241
+ "learning_rate": 5.620155038759691e-06,
242
+ "logits/chosen": 5.136168003082275,
243
+ "logits/rejected": 5.239327907562256,
244
+ "logps/chosen": -378.6293640136719,
245
+ "logps/rejected": -405.3665466308594,
246
+ "loss": 0.6752142310142517,
247
+ "rewards/accuracies": 0.8125,
248
+ "rewards/chosen": 0.04194517061114311,
249
+ "rewards/margins": 0.03668833151459694,
250
+ "rewards/rejected": 0.005256845150142908,
251
+ "step": 30
252
+ },
253
+ {
254
+ "epoch": 0.03737226277372263,
255
+ "grad_norm": 1.379568338394165,
256
+ "learning_rate": 6.007751937984497e-06,
257
+ "logits/chosen": 5.411487579345703,
258
+ "logits/rejected": 5.427243232727051,
259
+ "logps/chosen": -358.5367736816406,
260
+ "logps/rejected": -382.4181213378906,
261
+ "loss": 0.6700581312179565,
262
+ "rewards/accuracies": 0.875,
263
+ "rewards/chosen": 0.06658173352479935,
264
+ "rewards/margins": 0.047193337231874466,
265
+ "rewards/rejected": 0.019388392567634583,
266
+ "step": 32
267
+ },
268
+ {
269
+ "epoch": 0.039708029197080295,
270
+ "grad_norm": 1.3260451555252075,
271
+ "learning_rate": 6.395348837209303e-06,
272
+ "logits/chosen": 5.207217216491699,
273
+ "logits/rejected": 5.254848480224609,
274
+ "logps/chosen": -326.9423828125,
275
+ "logps/rejected": -346.52081298828125,
276
+ "loss": 0.6610866785049438,
277
+ "rewards/accuracies": 0.9375,
278
+ "rewards/chosen": 0.07038869708776474,
279
+ "rewards/margins": 0.06587495654821396,
280
+ "rewards/rejected": 0.0045137410052120686,
281
+ "step": 34
282
+ },
283
+ {
284
+ "epoch": 0.04204379562043795,
285
+ "grad_norm": 1.5776340961456299,
286
+ "learning_rate": 6.782945736434108e-06,
287
+ "logits/chosen": 5.550538063049316,
288
+ "logits/rejected": 5.6374335289001465,
289
+ "logps/chosen": -359.9613952636719,
290
+ "logps/rejected": -384.31683349609375,
291
+ "loss": 0.6281551718711853,
292
+ "rewards/accuracies": 1.0,
293
+ "rewards/chosen": 0.11738375574350357,
294
+ "rewards/margins": 0.1363767683506012,
295
+ "rewards/rejected": -0.018992995843291283,
296
+ "step": 36
297
+ },
298
+ {
299
+ "epoch": 0.04437956204379562,
300
+ "grad_norm": 1.8589071035385132,
301
+ "learning_rate": 7.170542635658915e-06,
302
+ "logits/chosen": 5.39143180847168,
303
+ "logits/rejected": 5.412029266357422,
304
+ "logps/chosen": -325.8544616699219,
305
+ "logps/rejected": -351.9772644042969,
306
+ "loss": 0.6270830631256104,
307
+ "rewards/accuracies": 0.9375,
308
+ "rewards/chosen": 0.1617884635925293,
309
+ "rewards/margins": 0.1388537436723709,
310
+ "rewards/rejected": 0.022934721782803535,
311
+ "step": 38
312
+ },
313
+ {
314
+ "epoch": 0.04671532846715328,
315
+ "grad_norm": 1.3231571912765503,
316
+ "learning_rate": 7.558139534883721e-06,
317
+ "logits/chosen": 5.189720153808594,
318
+ "logits/rejected": 5.203127384185791,
319
+ "logps/chosen": -343.3839111328125,
320
+ "logps/rejected": -374.7848205566406,
321
+ "loss": 0.641180157661438,
322
+ "rewards/accuracies": 0.875,
323
+ "rewards/chosen": 0.15248623490333557,
324
+ "rewards/margins": 0.11158552765846252,
325
+ "rewards/rejected": 0.04090070724487305,
326
+ "step": 40
327
+ },
328
+ {
329
+ "epoch": 0.049051094890510946,
330
+ "grad_norm": 2.5331315994262695,
331
+ "learning_rate": 7.945736434108528e-06,
332
+ "logits/chosen": 5.420182228088379,
333
+ "logits/rejected": 5.45302677154541,
334
+ "logps/chosen": -341.813720703125,
335
+ "logps/rejected": -372.44952392578125,
336
+ "loss": 0.6093671321868896,
337
+ "rewards/accuracies": 0.9375,
338
+ "rewards/chosen": 0.2898235321044922,
339
+ "rewards/margins": 0.18158456683158875,
340
+ "rewards/rejected": 0.10823898762464523,
341
+ "step": 42
342
+ },
343
+ {
344
+ "epoch": 0.05138686131386861,
345
+ "grad_norm": 1.5247384309768677,
346
+ "learning_rate": 8.333333333333334e-06,
347
+ "logits/chosen": 5.383636951446533,
348
+ "logits/rejected": 5.397551536560059,
349
+ "logps/chosen": -354.49627685546875,
350
+ "logps/rejected": -376.88818359375,
351
+ "loss": 0.5815833210945129,
352
+ "rewards/accuracies": 0.8125,
353
+ "rewards/chosen": 0.32459571957588196,
354
+ "rewards/margins": 0.2510552406311035,
355
+ "rewards/rejected": 0.07354050129652023,
356
+ "step": 44
357
+ },
358
+ {
359
+ "epoch": 0.053722627737226275,
360
+ "grad_norm": 2.0814144611358643,
361
+ "learning_rate": 8.72093023255814e-06,
362
+ "logits/chosen": 5.269731044769287,
363
+ "logits/rejected": 5.287116050720215,
364
+ "logps/chosen": -331.1025390625,
365
+ "logps/rejected": -362.90118408203125,
366
+ "loss": 0.5269681215286255,
367
+ "rewards/accuracies": 0.9375,
368
+ "rewards/chosen": 0.6465227603912354,
369
+ "rewards/margins": 0.37582656741142273,
370
+ "rewards/rejected": 0.27069616317749023,
371
+ "step": 46
372
+ },
373
+ {
374
+ "epoch": 0.05605839416058394,
375
+ "grad_norm": 1.769063115119934,
376
+ "learning_rate": 9.108527131782946e-06,
377
+ "logits/chosen": 5.472540855407715,
378
+ "logits/rejected": 5.465417861938477,
379
+ "logps/chosen": -369.40283203125,
380
+ "logps/rejected": -400.18438720703125,
381
+ "loss": 0.5066201686859131,
382
+ "rewards/accuracies": 1.0,
383
+ "rewards/chosen": 0.6377636194229126,
384
+ "rewards/margins": 0.42650213837623596,
385
+ "rewards/rejected": 0.21126146614551544,
386
+ "step": 48
387
+ },
388
+ {
389
+ "epoch": 0.058394160583941604,
390
+ "grad_norm": 2.84169602394104,
391
+ "learning_rate": 9.496124031007753e-06,
392
+ "logits/chosen": 5.050387382507324,
393
+ "logits/rejected": 5.112288951873779,
394
+ "logps/chosen": -363.4556579589844,
395
+ "logps/rejected": -397.8169860839844,
396
+ "loss": 0.529259979724884,
397
+ "rewards/accuracies": 1.0,
398
+ "rewards/chosen": 0.7923164367675781,
399
+ "rewards/margins": 0.3787059783935547,
400
+ "rewards/rejected": 0.4136104881763458,
401
+ "step": 50
402
+ },
403
+ {
404
+ "epoch": 0.058394160583941604,
405
+ "eval_logits/chosen": 5.22359037399292,
406
+ "eval_logits/rejected": 5.286833763122559,
407
+ "eval_logps/chosen": -361.462890625,
408
+ "eval_logps/rejected": -392.5708312988281,
409
+ "eval_loss": 0.4610801041126251,
410
+ "eval_rewards/accuracies": 0.9619422554969788,
411
+ "eval_rewards/chosen": 0.8944254517555237,
412
+ "eval_rewards/margins": 0.5739086270332336,
413
+ "eval_rewards/rejected": 0.3205168545246124,
414
+ "eval_runtime": 454.5598,
415
+ "eval_samples_per_second": 1.676,
416
+ "eval_steps_per_second": 1.676,
417
+ "step": 50
418
+ },
419
+ {
420
+ "epoch": 0.06072992700729927,
421
+ "grad_norm": 1.6907895803451538,
422
+ "learning_rate": 9.883720930232558e-06,
423
+ "logits/chosen": 5.486469268798828,
424
+ "logits/rejected": 5.541717529296875,
425
+ "logps/chosen": -343.4534606933594,
426
+ "logps/rejected": -379.39508056640625,
427
+ "loss": 0.44602835178375244,
428
+ "rewards/accuracies": 0.9375,
429
+ "rewards/chosen": 0.9869746565818787,
430
+ "rewards/margins": 0.6056646108627319,
431
+ "rewards/rejected": 0.3813100755214691,
432
+ "step": 52
433
+ },
434
+ {
435
+ "epoch": 0.06306569343065693,
436
+ "grad_norm": 1.9458682537078857,
437
+ "learning_rate": 1.0271317829457365e-05,
438
+ "logits/chosen": 5.169528961181641,
439
+ "logits/rejected": 5.2688751220703125,
440
+ "logps/chosen": -379.5437316894531,
441
+ "logps/rejected": -401.5587463378906,
442
+ "loss": 0.43609702587127686,
443
+ "rewards/accuracies": 1.0,
444
+ "rewards/chosen": 0.7794930934906006,
445
+ "rewards/margins": 0.6265671253204346,
446
+ "rewards/rejected": 0.15292587876319885,
447
+ "step": 54
448
+ },
449
+ {
450
+ "epoch": 0.0654014598540146,
451
+ "grad_norm": 2.1266520023345947,
452
+ "learning_rate": 1.065891472868217e-05,
453
+ "logits/chosen": 5.097426414489746,
454
+ "logits/rejected": 5.15327262878418,
455
+ "logps/chosen": -378.0788269042969,
456
+ "logps/rejected": -413.27392578125,
457
+ "loss": 0.3928414583206177,
458
+ "rewards/accuracies": 0.9375,
459
+ "rewards/chosen": 1.274291753768921,
460
+ "rewards/margins": 0.7864217758178711,
461
+ "rewards/rejected": 0.4878700375556946,
462
+ "step": 56
463
+ },
464
+ {
465
+ "epoch": 0.06773722627737226,
466
+ "grad_norm": 1.5381489992141724,
467
+ "learning_rate": 1.1046511627906977e-05,
468
+ "logits/chosen": 5.138954162597656,
469
+ "logits/rejected": 5.20254373550415,
470
+ "logps/chosen": -372.93438720703125,
471
+ "logps/rejected": -401.8287658691406,
472
+ "loss": 0.35855019092559814,
473
+ "rewards/accuracies": 0.875,
474
+ "rewards/chosen": 1.2897911071777344,
475
+ "rewards/margins": 0.9354276061058044,
476
+ "rewards/rejected": 0.35436347126960754,
477
+ "step": 58
478
+ },
479
+ {
480
+ "epoch": 0.07007299270072993,
481
+ "grad_norm": 2.358330726623535,
482
+ "learning_rate": 1.1434108527131783e-05,
483
+ "logits/chosen": 5.071888446807861,
484
+ "logits/rejected": 5.187964916229248,
485
+ "logps/chosen": -360.984619140625,
486
+ "logps/rejected": -392.3192138671875,
487
+ "loss": 0.42801612615585327,
488
+ "rewards/accuracies": 0.875,
489
+ "rewards/chosen": 1.3823509216308594,
490
+ "rewards/margins": 0.729066014289856,
491
+ "rewards/rejected": 0.6532848477363586,
492
+ "step": 60
493
+ },
494
+ {
495
+ "epoch": 0.07240875912408759,
496
+ "grad_norm": 2.177586317062378,
497
+ "learning_rate": 1.182170542635659e-05,
498
+ "logits/chosen": 5.264093399047852,
499
+ "logits/rejected": 5.310842990875244,
500
+ "logps/chosen": -364.808349609375,
501
+ "logps/rejected": -401.0321044921875,
502
+ "loss": 0.31365492939949036,
503
+ "rewards/accuracies": 1.0,
504
+ "rewards/chosen": 1.6637591123580933,
505
+ "rewards/margins": 1.0887457132339478,
506
+ "rewards/rejected": 0.5750135183334351,
507
+ "step": 62
508
+ },
509
+ {
510
+ "epoch": 0.07474452554744526,
511
+ "grad_norm": 1.697789192199707,
512
+ "learning_rate": 1.2209302325581395e-05,
513
+ "logits/chosen": 5.191982269287109,
514
+ "logits/rejected": 5.261416912078857,
515
+ "logps/chosen": -359.8249816894531,
516
+ "logps/rejected": -397.2122497558594,
517
+ "loss": 0.3037749230861664,
518
+ "rewards/accuracies": 1.0,
519
+ "rewards/chosen": 1.6470392942428589,
520
+ "rewards/margins": 1.114844799041748,
521
+ "rewards/rejected": 0.5321945548057556,
522
+ "step": 64
523
+ },
524
+ {
525
+ "epoch": 0.07708029197080292,
526
+ "grad_norm": 1.3219914436340332,
527
+ "learning_rate": 1.2596899224806202e-05,
528
+ "logits/chosen": 5.293405532836914,
529
+ "logits/rejected": 5.3094048500061035,
530
+ "logps/chosen": -352.3752136230469,
531
+ "logps/rejected": -392.6779479980469,
532
+ "loss": 0.25026455521583557,
533
+ "rewards/accuracies": 1.0,
534
+ "rewards/chosen": 1.5671364068984985,
535
+ "rewards/margins": 1.4098074436187744,
536
+ "rewards/rejected": 0.15732917189598083,
537
+ "step": 66
538
+ },
539
+ {
540
+ "epoch": 0.07941605839416059,
541
+ "grad_norm": 1.8173967599868774,
542
+ "learning_rate": 1.2984496124031009e-05,
543
+ "logits/chosen": 5.025746822357178,
544
+ "logits/rejected": 5.114965438842773,
545
+ "logps/chosen": -319.99700927734375,
546
+ "logps/rejected": -364.115234375,
547
+ "loss": 0.3108353912830353,
548
+ "rewards/accuracies": 0.9375,
549
+ "rewards/chosen": 1.4788665771484375,
550
+ "rewards/margins": 1.2637410163879395,
551
+ "rewards/rejected": 0.2151254564523697,
552
+ "step": 68
553
+ },
554
+ {
555
+ "epoch": 0.08175182481751825,
556
+ "grad_norm": 1.0658400058746338,
557
+ "learning_rate": 1.3372093023255814e-05,
558
+ "logits/chosen": 4.945235729217529,
559
+ "logits/rejected": 4.959147930145264,
560
+ "logps/chosen": -383.84033203125,
561
+ "logps/rejected": -431.7752685546875,
562
+ "loss": 0.22991834580898285,
563
+ "rewards/accuracies": 1.0,
564
+ "rewards/chosen": 1.3950352668762207,
565
+ "rewards/margins": 1.4965243339538574,
566
+ "rewards/rejected": -0.1014888733625412,
567
+ "step": 70
568
+ },
569
+ {
570
+ "epoch": 0.0840875912408759,
571
+ "grad_norm": 1.0350896120071411,
572
+ "learning_rate": 1.375968992248062e-05,
573
+ "logits/chosen": 5.00426721572876,
574
+ "logits/rejected": 5.120238780975342,
575
+ "logps/chosen": -350.9471435546875,
576
+ "logps/rejected": -382.6837158203125,
577
+ "loss": 0.22603684663772583,
578
+ "rewards/accuracies": 1.0,
579
+ "rewards/chosen": 1.2978975772857666,
580
+ "rewards/margins": 1.644275426864624,
581
+ "rewards/rejected": -0.34637776017189026,
582
+ "step": 72
583
+ },
584
+ {
585
+ "epoch": 0.08642335766423358,
586
+ "grad_norm": 1.1595423221588135,
587
+ "learning_rate": 1.4147286821705426e-05,
588
+ "logits/chosen": 4.890130043029785,
589
+ "logits/rejected": 4.9504714012146,
590
+ "logps/chosen": -352.34967041015625,
591
+ "logps/rejected": -399.23028564453125,
592
+ "loss": 0.18921935558319092,
593
+ "rewards/accuracies": 1.0,
594
+ "rewards/chosen": 1.1984589099884033,
595
+ "rewards/margins": 1.7495291233062744,
596
+ "rewards/rejected": -0.5510700941085815,
597
+ "step": 74
598
+ },
599
+ {
600
+ "epoch": 0.08759124087591241,
601
+ "eval_logits/chosen": 4.930174827575684,
602
+ "eval_logits/rejected": 5.032296657562256,
603
+ "eval_logps/chosen": -359.19647216796875,
604
+ "eval_logps/rejected": -405.1120300292969,
605
+ "eval_loss": 0.16020436584949493,
606
+ "eval_rewards/accuracies": 0.9960629940032959,
607
+ "eval_rewards/chosen": 1.1210675239562988,
608
+ "eval_rewards/margins": 2.0546727180480957,
609
+ "eval_rewards/rejected": -0.9336051344871521,
610
+ "eval_runtime": 454.3435,
611
+ "eval_samples_per_second": 1.677,
612
+ "eval_steps_per_second": 1.677,
613
+ "step": 75
614
+ },
615
+ {
616
+ "epoch": 0.08875912408759123,
617
+ "grad_norm": 1.1433167457580566,
618
+ "learning_rate": 1.4534883720930233e-05,
619
+ "logits/chosen": 5.037275314331055,
620
+ "logits/rejected": 5.1315507888793945,
621
+ "logps/chosen": -313.110595703125,
622
+ "logps/rejected": -356.1000061035156,
623
+ "loss": 0.15998858213424683,
624
+ "rewards/accuracies": 1.0,
625
+ "rewards/chosen": 1.2128857374191284,
626
+ "rewards/margins": 2.0945115089416504,
627
+ "rewards/rejected": -0.8816256523132324,
628
+ "step": 76
629
+ },
630
+ {
631
+ "epoch": 0.0910948905109489,
632
+ "grad_norm": 0.9839214086532593,
633
+ "learning_rate": 1.4922480620155039e-05,
634
+ "logits/chosen": 4.817085266113281,
635
+ "logits/rejected": 4.874035835266113,
636
+ "logps/chosen": -366.2629089355469,
637
+ "logps/rejected": -405.7989196777344,
638
+ "loss": 0.1894684135913849,
639
+ "rewards/accuracies": 1.0,
640
+ "rewards/chosen": 1.0605502128601074,
641
+ "rewards/margins": 1.90762460231781,
642
+ "rewards/rejected": -0.8470743894577026,
643
+ "step": 78
644
+ },
645
+ {
646
+ "epoch": 0.09343065693430656,
647
+ "grad_norm": 0.9212782979011536,
648
+ "learning_rate": 1.5310077519379846e-05,
649
+ "logits/chosen": 5.046716690063477,
650
+ "logits/rejected": 5.157979965209961,
651
+ "logps/chosen": -348.0658264160156,
652
+ "logps/rejected": -395.23870849609375,
653
+ "loss": 0.15948188304901123,
654
+ "rewards/accuracies": 1.0,
655
+ "rewards/chosen": 0.676516056060791,
656
+ "rewards/margins": 2.167430877685547,
657
+ "rewards/rejected": -1.4909145832061768,
658
+ "step": 80
659
+ },
660
+ {
661
+ "epoch": 0.09576642335766423,
662
+ "grad_norm": 0.9820688366889954,
663
+ "learning_rate": 1.569767441860465e-05,
664
+ "logits/chosen": 4.690741539001465,
665
+ "logits/rejected": 4.771791458129883,
666
+ "logps/chosen": -378.8666076660156,
667
+ "logps/rejected": -436.9100036621094,
668
+ "loss": 0.12085139006376266,
669
+ "rewards/accuracies": 1.0,
670
+ "rewards/chosen": 0.8719685077667236,
671
+ "rewards/margins": 2.646538257598877,
672
+ "rewards/rejected": -1.7745698690414429,
673
+ "step": 82
674
+ },
675
+ {
676
+ "epoch": 0.09810218978102189,
677
+ "grad_norm": 0.66785728931427,
678
+ "learning_rate": 1.608527131782946e-05,
679
+ "logits/chosen": 4.880465984344482,
680
+ "logits/rejected": 4.961792945861816,
681
+ "logps/chosen": -346.51214599609375,
682
+ "logps/rejected": -400.1110534667969,
683
+ "loss": 0.08720710873603821,
684
+ "rewards/accuracies": 1.0,
685
+ "rewards/chosen": 1.1337480545043945,
686
+ "rewards/margins": 2.903944253921509,
687
+ "rewards/rejected": -1.7701961994171143,
688
+ "step": 84
689
+ },
690
+ {
691
+ "epoch": 0.10043795620437956,
692
+ "grad_norm": 0.5760660767555237,
693
+ "learning_rate": 1.647286821705426e-05,
694
+ "logits/chosen": 4.464397430419922,
695
+ "logits/rejected": 4.680055618286133,
696
+ "logps/chosen": -341.7489318847656,
697
+ "logps/rejected": -398.322021484375,
698
+ "loss": 0.07942983508110046,
699
+ "rewards/accuracies": 1.0,
700
+ "rewards/chosen": 1.2459325790405273,
701
+ "rewards/margins": 3.0152552127838135,
702
+ "rewards/rejected": -1.7693227529525757,
703
+ "step": 86
704
+ },
705
+ {
706
+ "epoch": 0.10277372262773722,
707
+ "grad_norm": 1.6020294427871704,
708
+ "learning_rate": 1.686046511627907e-05,
709
+ "logits/chosen": 4.563863277435303,
710
+ "logits/rejected": 4.680974960327148,
711
+ "logps/chosen": -344.9147644042969,
712
+ "logps/rejected": -395.4453125,
713
+ "loss": 0.1258174479007721,
714
+ "rewards/accuracies": 0.9375,
715
+ "rewards/chosen": 1.0706769227981567,
716
+ "rewards/margins": 3.118717670440674,
717
+ "rewards/rejected": -2.0480403900146484,
718
+ "step": 88
719
+ },
720
+ {
721
+ "epoch": 0.10510948905109489,
722
+ "grad_norm": 0.46413859724998474,
723
+ "learning_rate": 1.7248062015503875e-05,
724
+ "logits/chosen": 4.4989237785339355,
725
+ "logits/rejected": 4.673248291015625,
726
+ "logps/chosen": -326.9678649902344,
727
+ "logps/rejected": -388.4164123535156,
728
+ "loss": 0.06663060188293457,
729
+ "rewards/accuracies": 1.0,
730
+ "rewards/chosen": 1.4128761291503906,
731
+ "rewards/margins": 3.760685920715332,
732
+ "rewards/rejected": -2.3478102684020996,
733
+ "step": 90
734
+ },
735
+ {
736
+ "epoch": 0.10744525547445255,
737
+ "grad_norm": 0.6699568629264832,
738
+ "learning_rate": 1.7635658914728684e-05,
739
+ "logits/chosen": 4.7294535636901855,
740
+ "logits/rejected": 4.813880920410156,
741
+ "logps/chosen": -362.7267150878906,
742
+ "logps/rejected": -439.2985534667969,
743
+ "loss": 0.04481709748506546,
744
+ "rewards/accuracies": 1.0,
745
+ "rewards/chosen": 1.477597713470459,
746
+ "rewards/margins": 4.37883186340332,
747
+ "rewards/rejected": -2.9012341499328613,
748
+ "step": 92
749
+ },
750
+ {
751
+ "epoch": 0.10978102189781022,
752
+ "grad_norm": 0.4152977168560028,
753
+ "learning_rate": 1.802325581395349e-05,
754
+ "logits/chosen": 4.785149574279785,
755
+ "logits/rejected": 4.891542434692383,
756
+ "logps/chosen": -381.59246826171875,
757
+ "logps/rejected": -444.2817687988281,
758
+ "loss": 0.05632612109184265,
759
+ "rewards/accuracies": 1.0,
760
+ "rewards/chosen": 0.71366286277771,
761
+ "rewards/margins": 3.4584720134735107,
762
+ "rewards/rejected": -2.744809150695801,
763
+ "step": 94
764
+ },
765
+ {
766
+ "epoch": 0.11211678832116788,
767
+ "grad_norm": 0.3152717649936676,
768
+ "learning_rate": 1.8410852713178295e-05,
769
+ "logits/chosen": 4.603940486907959,
770
+ "logits/rejected": 4.804995536804199,
771
+ "logps/chosen": -356.7286376953125,
772
+ "logps/rejected": -414.69635009765625,
773
+ "loss": 0.040920041501522064,
774
+ "rewards/accuracies": 1.0,
775
+ "rewards/chosen": 1.7566397190093994,
776
+ "rewards/margins": 4.020595550537109,
777
+ "rewards/rejected": -2.263956069946289,
778
+ "step": 96
779
+ },
780
+ {
781
+ "epoch": 0.11445255474452555,
782
+ "grad_norm": 0.37698569893836975,
783
+ "learning_rate": 1.8798449612403103e-05,
784
+ "logits/chosen": 4.558542728424072,
785
+ "logits/rejected": 4.690641403198242,
786
+ "logps/chosen": -339.794189453125,
787
+ "logps/rejected": -413.8865966796875,
788
+ "loss": 0.025794224813580513,
789
+ "rewards/accuracies": 1.0,
790
+ "rewards/chosen": 1.3867536783218384,
791
+ "rewards/margins": 4.6542744636535645,
792
+ "rewards/rejected": -3.2675204277038574,
793
+ "step": 98
794
+ },
795
+ {
796
+ "epoch": 0.11678832116788321,
797
+ "grad_norm": 0.15023073554039001,
798
+ "learning_rate": 1.918604651162791e-05,
799
+ "logits/chosen": 4.387497425079346,
800
+ "logits/rejected": 4.494588375091553,
801
+ "logps/chosen": -346.2568054199219,
802
+ "logps/rejected": -418.9315185546875,
803
+ "loss": 0.015155203640460968,
804
+ "rewards/accuracies": 1.0,
805
+ "rewards/chosen": 1.7938623428344727,
806
+ "rewards/margins": 4.942529201507568,
807
+ "rewards/rejected": -3.1486666202545166,
808
+ "step": 100
809
+ },
810
+ {
811
+ "epoch": 0.11678832116788321,
812
+ "eval_logits/chosen": 4.285891056060791,
813
+ "eval_logits/rejected": 4.425926208496094,
814
+ "eval_logps/chosen": -353.15850830078125,
815
+ "eval_logps/rejected": -424.4124755859375,
816
+ "eval_loss": 0.04428481683135033,
817
+ "eval_rewards/accuracies": 0.9921259880065918,
818
+ "eval_rewards/chosen": 1.7248634099960327,
819
+ "eval_rewards/margins": 4.588510513305664,
820
+ "eval_rewards/rejected": -2.863647222518921,
821
+ "eval_runtime": 454.7251,
822
+ "eval_samples_per_second": 1.676,
823
+ "eval_steps_per_second": 1.676,
824
+ "step": 100
825
+ }
826
+ ],
827
+ "logging_steps": 2,
828
+ "max_steps": 2571,
829
+ "num_input_tokens_seen": 0,
830
+ "num_train_epochs": 3,
831
+ "save_steps": 100,
832
+ "stateful_callbacks": {
833
+ "EarlyStoppingCallback": {
834
+ "args": {
835
+ "early_stopping_patience": 5,
836
+ "early_stopping_threshold": 0.001
837
+ },
838
+ "attributes": {
839
+ "early_stopping_patience_counter": 0
840
+ }
841
+ },
842
+ "TrainerControl": {
843
+ "args": {
844
+ "should_epoch_stop": false,
845
+ "should_evaluate": false,
846
+ "should_log": false,
847
+ "should_save": true,
848
+ "should_training_stop": false
849
+ },
850
+ "attributes": {}
851
+ }
852
+ },
853
+ "total_flos": 0.0,
854
+ "train_batch_size": 1,
855
+ "trial_name": null,
856
+ "trial_params": null
857
+ }
dpo_qwen_14B/checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21875ef630d3e8f528dce67596a0d783fd5cf223e6e245a98026996d1f3d3ade
3
+ size 5752
dpo_qwen_14B/config_resolved.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run:
2
+ run_dir: ./runs/dpo_run_14b_v1
3
+ seed: 42
4
+ wandb:
5
+ enabled: true
6
+ project: dpo-training
7
+ entity: null
8
+ name: null
9
+ tags:
10
+ - dpo-lora
11
+ - preference-optimization
12
+ notes: null
13
+ model:
14
+ repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
15
+ revision: null
16
+ base_local_dir: base_model
17
+ trust_remote_code: true
18
+ tokenizer_use_fast: true
19
+ device_map: auto
20
+ torch_dtype: bfloat16
21
+ use_4bit: false
22
+ bnb_4bit_quant_type: nf4
23
+ bnb_4bit_use_double_quant: false
24
+ bnb_4bit_compute_dtype: bfloat16
25
+ attn_implementation: null
26
+ data:
27
+ train_jsonl: dpo_pairs_generated.jsonl
28
+ eval_jsonl: null
29
+ eval_split_ratio: 0.1
30
+ prompt_field: prompt
31
+ chosen_field: chosen
32
+ rejected_field: rejected
33
+ score_field: f1_score
34
+ format_type: chatml
35
+ system_prompt: "You are a Hyperswitch Rust code analyzer. Identify functions/structs\
36
+ \ that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain\
37
+ \ the data flow and why each component must change:\n- Flow: [Input \u2192 Processing\
38
+ \ \u2192 Output with arrows]\n- For each component: \"The [ComponentName] ([path])\
39
+ \ must [action] because [reason]\u2014without this, [consequence]\"\n- Explain\
40
+ \ coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\n\
41
+ add::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n\
42
+ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for\
43
+ \ nested items: `status::StructName::Type::Name`\n3. Always explain \"must change\
44
+ \ because\" and \"without this\"\n3. Types of components: function, struct, enum,\
45
+ \ impl, trait\n4. If there is extra information (e.g., enum variants), include\
46
+ \ that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n"
47
+ max_length: 2048
48
+ shuffle: true
49
+ num_proc: 4
50
+ peft:
51
+ enabled: true
52
+ r: 16
53
+ lora_alpha: 32
54
+ lora_dropout: 0.05
55
+ bias: none
56
+ target_modules: auto
57
+ dpo:
58
+ beta: 0.1
59
+ label_smoothing: 0.0
60
+ loss_type: sigmoid
61
+ use_reference_model: true
62
+ reference_free: false
63
+ train:
64
+ num_train_epochs: 3
65
+ per_device_train_batch_size: 1
66
+ per_device_eval_batch_size: 1
67
+ gradient_accumulation_steps: 8
68
+ learning_rate: 5e-5
69
+ weight_decay: 0.0
70
+ warmup_ratio: 0.1
71
+ lr_scheduler_type: cosine
72
+ optim: adamw_torch
73
+ max_grad_norm: 1.0
74
+ gradient_checkpointing: true
75
+ logging_steps: 2
76
+ save_strategy: steps
77
+ save_steps: 100
78
+ save_total_limit: 10
79
+ evaluation_strategy: steps
80
+ eval_steps: 25
81
+ load_best_model_at_end: true
82
+ early_stopping:
83
+ enabled: true
84
+ patience: 5
85
+ min_delta: 0.001
86
+ metric: eval_loss
87
+ mode: min
88
+ resume_from_checkpoint: auto
89
+ merge:
90
+ enabled: true
91
+ merged_dtype: float16
92
+ max_shard_size: 2GB
93
+ output_dir: ./merged_14b_dpo_lora
dpo_qwen_14B/logs/eval.jsonl ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {"ts": "2025-12-26T16:09:16", "event": "eval", "step": 25, "epoch": 0.029197080291970802, "eval_loss": 0.6836819648742676, "eval_runtime": 454.4375, "eval_samples_per_second": 1.677, "eval_steps_per_second": 1.677, "eval_rewards/chosen": 0.024636391550302505, "eval_rewards/rejected": 0.005080964416265488, "eval_rewards/accuracies": 0.665354311466217, "eval_rewards/margins": 0.019555427134037018, "eval_logps/chosen": -370.1607666015625, "eval_logps/rejected": -395.7251892089844, "eval_logits/chosen": 5.295141220092773, "eval_logits/rejected": 5.345211029052734}
2
+ {"ts": "2025-12-26T16:20:56", "event": "eval", "step": 50, "epoch": 0.058394160583941604, "eval_loss": 0.4610801041126251, "eval_runtime": 454.5598, "eval_samples_per_second": 1.676, "eval_steps_per_second": 1.676, "eval_rewards/chosen": 0.8944254517555237, "eval_rewards/rejected": 0.3205168545246124, "eval_rewards/accuracies": 0.9619422554969788, "eval_rewards/margins": 0.5739086270332336, "eval_logps/chosen": -361.462890625, "eval_logps/rejected": -392.5708312988281, "eval_logits/chosen": 5.22359037399292, "eval_logits/rejected": 5.286833763122559}
3
+ {"ts": "2025-12-26T16:32:39", "event": "eval", "step": 75, "epoch": 0.08759124087591241, "eval_loss": 0.16020436584949493, "eval_runtime": 454.3435, "eval_samples_per_second": 1.677, "eval_steps_per_second": 1.677, "eval_rewards/chosen": 1.1210675239562988, "eval_rewards/rejected": -0.9336051344871521, "eval_rewards/accuracies": 0.9960629940032959, "eval_rewards/margins": 2.0546727180480957, "eval_logps/chosen": -359.19647216796875, "eval_logps/rejected": -405.1120300292969, "eval_logits/chosen": 4.930174827575684, "eval_logits/rejected": 5.032296657562256}
4
+ {"ts": "2025-12-26T16:44:21", "event": "eval", "step": 100, "epoch": 0.11678832116788321, "eval_loss": 0.04428481683135033, "eval_runtime": 454.7251, "eval_samples_per_second": 1.676, "eval_steps_per_second": 1.676, "eval_rewards/chosen": 1.7248634099960327, "eval_rewards/rejected": -2.863647222518921, "eval_rewards/accuracies": 0.9921259880065918, "eval_rewards/margins": 4.588510513305664, "eval_logps/chosen": -353.15850830078125, "eval_logps/rejected": -424.4124755859375, "eval_logits/chosen": 4.285891056060791, "eval_logits/rejected": 4.425926208496094}
5
+ {"ts": "2025-12-26T16:56:05", "event": "eval", "step": 125, "epoch": 0.145985401459854, "eval_loss": 0.024107323959469795, "eval_runtime": 454.8045, "eval_samples_per_second": 1.675, "eval_steps_per_second": 1.675, "eval_rewards/chosen": 0.5319492816925049, "eval_rewards/rejected": -6.150709629058838, "eval_rewards/accuracies": 0.9934383034706116, "eval_rewards/margins": 6.682660102844238, "eval_logps/chosen": -365.087646484375, "eval_logps/rejected": -457.28314208984375, "eval_logits/chosen": 3.6694726943969727, "eval_logits/rejected": 3.8436598777770996}
dpo_qwen_14B/logs/train.jsonl ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"ts": "2025-12-26T15:24:36", "event": "train_log", "step": 2, "epoch": 0.0023357664233576644, "progress_pct": 0.08, "epoch_pct": 0.08, "eta": "07:30:29", "max_grad_norm": 1.0, "loss": 0.6931473016738892, "grad_norm": 1.2424817085266113, "learning_rate": 1.9379844961240311e-07, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -368.911865234375, "logps/rejected": -398.83880615234375, "logits/chosen": 5.179401397705078, "logits/rejected": 5.192930698394775}
2
+ {"ts": "2025-12-26T15:24:56", "event": "train_log", "step": 4, "epoch": 0.004671532846715329, "progress_pct": 0.16, "epoch_pct": 0.16, "eta": "07:14:49", "max_grad_norm": 1.0, "loss": 0.693317174911499, "grad_norm": 1.3884541988372803, "learning_rate": 5.813953488372093e-07, "rewards/chosen": 0.022540951147675514, "rewards/rejected": 0.022656824439764023, "rewards/accuracies": 0.5, "rewards/margins": -0.00011587224435061216, "logps/chosen": -338.257568359375, "logps/rejected": -366.88128662109375, "logits/chosen": 5.405174255371094, "logits/rejected": 5.456291675567627}
3
+ {"ts": "2025-12-26T15:57:54", "event": "train_log", "step": 2, "epoch": 0.0023357664233576644, "progress_pct": 0.08, "epoch_pct": 0.08, "eta": "07:30:57", "max_grad_norm": 1.0, "loss": 0.6931473016738892, "grad_norm": 1.242694616317749, "learning_rate": 1.9379844961240311e-07, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -368.911865234375, "logps/rejected": -398.83880615234375, "logits/chosen": 5.179401397705078, "logits/rejected": 5.192930698394775}
4
+ {"ts": "2025-12-26T15:58:14", "event": "train_log", "step": 4, "epoch": 0.004671532846715329, "progress_pct": 0.16, "epoch_pct": 0.16, "eta": "07:15:10", "max_grad_norm": 1.0, "loss": 0.6949559450149536, "grad_norm": 1.392399787902832, "learning_rate": 5.813953488372093e-07, "rewards/chosen": 0.004504585638642311, "rewards/rejected": 0.007727146148681641, "rewards/accuracies": 0.625, "rewards/margins": -0.003222561441361904, "logps/chosen": -338.43792724609375, "logps/rejected": -367.03057861328125, "logits/chosen": 5.403897762298584, "logits/rejected": 5.4565606117248535}
5
+ {"ts": "2025-12-26T15:58:34", "event": "train_log", "step": 6, "epoch": 0.0070072992700729924, "progress_pct": 0.23, "epoch_pct": 0.23, "eta": "07:13:22", "max_grad_norm": 1.0, "loss": 0.689236581325531, "grad_norm": 1.066603183746338, "learning_rate": 9.689922480620155e-07, "rewards/chosen": -0.0034066196531057358, "rewards/rejected": -0.01166229322552681, "rewards/accuracies": 0.5625, "rewards/margins": 0.008255671709775925, "logps/chosen": -362.3431701660156, "logps/rejected": -387.5829772949219, "logits/chosen": 5.291868209838867, "logits/rejected": 5.328356742858887}
6
+ {"ts": "2025-12-26T15:58:54", "event": "train_log", "step": 8, "epoch": 0.009343065693430658, "progress_pct": 0.31, "epoch_pct": 0.31, "eta": "07:08:27", "max_grad_norm": 1.0, "loss": 0.6943775415420532, "grad_norm": 1.0005714893341064, "learning_rate": 1.3565891472868218e-06, "rewards/chosen": 0.014657974243164062, "rewards/rejected": 0.015892982482910156, "rewards/accuracies": 0.375, "rewards/margins": -0.0012350091710686684, "logps/chosen": -379.9283447265625, "logps/rejected": -389.0852355957031, "logits/chosen": 5.323437690734863, "logits/rejected": 5.410858631134033}
7
+ {"ts": "2025-12-26T15:59:13", "event": "train_log", "step": 10, "epoch": 0.01167883211678832, "progress_pct": 0.39, "epoch_pct": 0.39, "eta": "07:07:21", "max_grad_norm": 1.0, "loss": 0.693260908126831, "grad_norm": 1.2461222410202026, "learning_rate": 1.744186046511628e-06, "rewards/chosen": -0.028497030958533287, "rewards/rejected": -0.028623390942811966, "rewards/accuracies": 0.625, "rewards/margins": 0.00012636138126254082, "logps/chosen": -363.2003479003906, "logps/rejected": -389.67376708984375, "logits/chosen": 5.435908317565918, "logits/rejected": 5.494542121887207}
8
+ {"ts": "2025-12-26T15:59:34", "event": "train_log", "step": 12, "epoch": 0.014014598540145985, "progress_pct": 0.47, "epoch_pct": 0.47, "eta": "07:08:38", "max_grad_norm": 1.0, "loss": 0.6882913112640381, "grad_norm": 1.4030137062072754, "learning_rate": 2.131782945736434e-06, "rewards/chosen": 0.01622028276324272, "rewards/rejected": 0.006133650429546833, "rewards/accuracies": 0.5, "rewards/margins": 0.010086631402373314, "logps/chosen": -370.96429443359375, "logps/rejected": -402.4786071777344, "logits/chosen": 5.3550801277160645, "logits/rejected": 5.375768661499023}
9
+ {"ts": "2025-12-26T15:59:52", "event": "train_log", "step": 14, "epoch": 0.01635036496350365, "progress_pct": 0.54, "epoch_pct": 0.55, "eta": "07:02:49", "max_grad_norm": 1.0, "loss": 0.6896716356277466, "grad_norm": 1.1157702207565308, "learning_rate": 2.5193798449612402e-06, "rewards/chosen": -0.017319394275546074, "rewards/rejected": -0.024647902697324753, "rewards/accuracies": 0.625, "rewards/margins": 0.007328510750085115, "logps/chosen": -336.7254333496094, "logps/rejected": -357.52203369140625, "logits/chosen": 5.515308380126953, "logits/rejected": 5.561104774475098}
10
+ {"ts": "2025-12-26T16:00:12", "event": "train_log", "step": 16, "epoch": 0.018686131386861315, "progress_pct": 0.62, "epoch_pct": 0.62, "eta": "07:03:25", "max_grad_norm": 1.0, "loss": 0.6904245018959045, "grad_norm": 0.9470655918121338, "learning_rate": 2.9069767441860468e-06, "rewards/chosen": 0.03270244598388672, "rewards/rejected": 0.026875685900449753, "rewards/accuracies": 0.5625, "rewards/margins": 0.005826758686453104, "logps/chosen": -415.6842041015625, "logps/rejected": -441.1054992675781, "logits/chosen": 5.553088665008545, "logits/rejected": 5.582851886749268}
11
+ {"ts": "2025-12-26T16:00:33", "event": "train_log", "step": 18, "epoch": 0.021021897810218976, "progress_pct": 0.7, "epoch_pct": 0.7, "eta": "07:04:54", "max_grad_norm": 1.0, "loss": 0.683630108833313, "grad_norm": 1.4397331476211548, "learning_rate": 3.2945736434108533e-06, "rewards/chosen": 0.011020278558135033, "rewards/rejected": -0.008498954586684704, "rewards/accuracies": 0.5625, "rewards/margins": 0.01951923407614231, "logps/chosen": -392.46221923828125, "logps/rejected": -420.1712341308594, "logits/chosen": 5.440742015838623, "logits/rejected": 5.489529132843018}
12
+ {"ts": "2025-12-26T16:00:52", "event": "train_log", "step": 20, "epoch": 0.02335766423357664, "progress_pct": 0.78, "epoch_pct": 0.78, "eta": "07:03:26", "max_grad_norm": 1.0, "loss": 0.6902388334274292, "grad_norm": 1.5941083431243896, "learning_rate": 3.6821705426356594e-06, "rewards/chosen": 0.006536484230309725, "rewards/rejected": 0.0005230908282101154, "rewards/accuracies": 0.5625, "rewards/margins": 0.006013393402099609, "logps/chosen": -345.2221374511719, "logps/rejected": -365.9537048339844, "logits/chosen": 5.318347930908203, "logits/rejected": 5.397945404052734}
13
+ {"ts": "2025-12-26T16:01:12", "event": "train_log", "step": 22, "epoch": 0.025693430656934305, "progress_pct": 0.86, "epoch_pct": 0.86, "eta": "07:03:12", "max_grad_norm": 1.0, "loss": 0.691262423992157, "grad_norm": 1.1363905668258667, "learning_rate": 4.0697674418604655e-06, "rewards/chosen": 0.011908342130482197, "rewards/rejected": 0.007370188366621733, "rewards/accuracies": 0.5, "rewards/margins": 0.004538153763860464, "logps/chosen": -347.9439697265625, "logps/rejected": -370.65777587890625, "logits/chosen": 5.632981300354004, "logits/rejected": 5.7265520095825195}
14
+ {"ts": "2025-12-26T16:01:32", "event": "train_log", "step": 24, "epoch": 0.02802919708029197, "progress_pct": 0.93, "epoch_pct": 0.93, "eta": "07:02:08", "max_grad_norm": 1.0, "loss": 0.6769475936889648, "grad_norm": 1.0684627294540405, "learning_rate": 4.457364341085272e-06, "rewards/chosen": 0.01244144607335329, "rewards/rejected": -0.020452119410037994, "rewards/accuracies": 0.875, "rewards/margins": 0.03289356082677841, "logps/chosen": -347.1539001464844, "logps/rejected": -377.6044921875, "logits/chosen": 5.35699987411499, "logits/rejected": 5.405580520629883}
15
+ {"ts": "2025-12-26T16:09:16", "event": "train_log", "step": 25, "epoch": 0.029197080291970802, "progress_pct": 0.97, "epoch_pct": 0.97, "eta": "19:53:32", "max_grad_norm": 1.0, "eval_loss": 0.6836819648742676, "eval_runtime": 454.4375, "eval_samples_per_second": 1.677, "eval_steps_per_second": 1.677, "eval_rewards/chosen": 0.024636391550302505, "eval_rewards/rejected": 0.005080964416265488, "eval_rewards/accuracies": 0.665354311466217, "eval_rewards/margins": 0.019555427134037018, "eval_logps/chosen": -370.1607666015625, "eval_logps/rejected": -395.7251892089844, "eval_logits/chosen": 5.295141220092773, "eval_logits/rejected": 5.345211029052734}
16
+ {"ts": "2025-12-26T16:09:27", "event": "train_log", "step": 26, "epoch": 0.030364963503649634, "progress_pct": 1.01, "epoch_pct": 1.01, "eta": "19:24:28", "max_grad_norm": 1.0, "loss": 0.6849788427352905, "grad_norm": 1.592353105545044, "learning_rate": 4.844961240310078e-06, "rewards/chosen": 0.026385309174656868, "rewards/rejected": 0.009418869391083717, "rewards/accuracies": 0.625, "rewards/margins": 0.016966437920928, "logps/chosen": -387.54876708984375, "logps/rejected": -412.0630187988281, "logits/chosen": 5.157042026519775, "logits/rejected": 5.244912147521973}
17
+ {"ts": "2025-12-26T16:09:47", "event": "train_log", "step": 28, "epoch": 0.0327007299270073, "progress_pct": 1.09, "epoch_pct": 1.09, "eta": "18:30:11", "max_grad_norm": 1.0, "loss": 0.675189733505249, "grad_norm": 1.3181558847427368, "learning_rate": 5.232558139534884e-06, "rewards/chosen": 0.045946408063173294, "rewards/rejected": 0.009190557524561882, "rewards/accuracies": 0.8125, "rewards/margins": 0.03675585240125656, "logps/chosen": -360.41650390625, "logps/rejected": -391.2162170410156, "logits/chosen": 5.545513153076172, "logits/rejected": 5.54400110244751}
18
+ {"ts": "2025-12-26T16:10:07", "event": "train_log", "step": 30, "epoch": 0.035036496350364967, "progress_pct": 1.17, "epoch_pct": 1.17, "eta": "17:43:26", "max_grad_norm": 1.0, "loss": 0.6752142310142517, "grad_norm": 1.443650722503662, "learning_rate": 5.620155038759691e-06, "rewards/chosen": 0.04194517061114311, "rewards/rejected": 0.005256845150142908, "rewards/accuracies": 0.8125, "rewards/margins": 0.03668833151459694, "logps/chosen": -378.6293640136719, "logps/rejected": -405.3665466308594, "logits/chosen": 5.136168003082275, "logits/rejected": 5.239327907562256}
19
+ {"ts": "2025-12-26T16:10:26", "event": "train_log", "step": 32, "epoch": 0.03737226277372263, "progress_pct": 1.24, "epoch_pct": 1.25, "eta": "17:01:56", "max_grad_norm": 1.0, "loss": 0.6700581312179565, "grad_norm": 1.379568338394165, "learning_rate": 6.007751937984497e-06, "rewards/chosen": 0.06658173352479935, "rewards/rejected": 0.019388392567634583, "rewards/accuracies": 0.875, "rewards/margins": 0.047193337231874466, "logps/chosen": -358.5367736816406, "logps/rejected": -382.4181213378906, "logits/chosen": 5.411487579345703, "logits/rejected": 5.427243232727051}
20
+ {"ts": "2025-12-26T16:10:44", "event": "train_log", "step": 34, "epoch": 0.039708029197080295, "progress_pct": 1.32, "epoch_pct": 1.32, "eta": "16:23:25", "max_grad_norm": 1.0, "loss": 0.6610866785049438, "grad_norm": 1.3260451555252075, "learning_rate": 6.395348837209303e-06, "rewards/chosen": 0.07038869708776474, "rewards/rejected": 0.0045137410052120686, "rewards/accuracies": 0.9375, "rewards/margins": 0.06587495654821396, "logps/chosen": -326.9423828125, "logps/rejected": -346.52081298828125, "logits/chosen": 5.207217216491699, "logits/rejected": 5.254848480224609}
21
+ {"ts": "2025-12-26T16:11:04", "event": "train_log", "step": 36, "epoch": 0.04204379562043795, "progress_pct": 1.4, "epoch_pct": 1.4, "eta": "15:51:44", "max_grad_norm": 1.0, "loss": 0.6281551718711853, "grad_norm": 1.5776340961456299, "learning_rate": 6.782945736434108e-06, "rewards/chosen": 0.11738375574350357, "rewards/rejected": -0.018992995843291283, "rewards/accuracies": 1.0, "rewards/margins": 0.1363767683506012, "logps/chosen": -359.9613952636719, "logps/rejected": -384.31683349609375, "logits/chosen": 5.550538063049316, "logits/rejected": 5.6374335289001465}
22
+ {"ts": "2025-12-26T16:11:23", "event": "train_log", "step": 38, "epoch": 0.04437956204379562, "progress_pct": 1.48, "epoch_pct": 1.48, "eta": "15:21:57", "max_grad_norm": 1.0, "loss": 0.6270830631256104, "grad_norm": 1.8589071035385132, "learning_rate": 7.170542635658915e-06, "rewards/chosen": 0.1617884635925293, "rewards/rejected": 0.022934721782803535, "rewards/accuracies": 0.9375, "rewards/margins": 0.1388537436723709, "logps/chosen": -325.8544616699219, "logps/rejected": -351.9772644042969, "logits/chosen": 5.39143180847168, "logits/rejected": 5.412029266357422}
23
+ {"ts": "2025-12-26T16:11:43", "event": "train_log", "step": 40, "epoch": 0.04671532846715328, "progress_pct": 1.56, "epoch_pct": 1.56, "eta": "14:56:25", "max_grad_norm": 1.0, "loss": 0.641180157661438, "grad_norm": 1.3231571912765503, "learning_rate": 7.558139534883721e-06, "rewards/chosen": 0.15248623490333557, "rewards/rejected": 0.04090070724487305, "rewards/accuracies": 0.875, "rewards/margins": 0.11158552765846252, "logps/chosen": -343.3839111328125, "logps/rejected": -374.7848205566406, "logits/chosen": 5.189720153808594, "logits/rejected": 5.203127384185791}
24
+ {"ts": "2025-12-26T16:12:02", "event": "train_log", "step": 42, "epoch": 0.049051094890510946, "progress_pct": 1.63, "epoch_pct": 1.64, "eta": "14:31:31", "max_grad_norm": 1.0, "loss": 0.6093671321868896, "grad_norm": 2.5331315994262695, "learning_rate": 7.945736434108528e-06, "rewards/chosen": 0.2898235321044922, "rewards/rejected": 0.10823898762464523, "rewards/accuracies": 0.9375, "rewards/margins": 0.18158456683158875, "logps/chosen": -341.813720703125, "logps/rejected": -372.44952392578125, "logits/chosen": 5.420182228088379, "logits/rejected": 5.45302677154541}
25
+ {"ts": "2025-12-26T16:12:20", "event": "train_log", "step": 44, "epoch": 0.05138686131386861, "progress_pct": 1.71, "epoch_pct": 1.71, "eta": "14:08:47", "max_grad_norm": 1.0, "loss": 0.5815833210945129, "grad_norm": 1.5247384309768677, "learning_rate": 8.333333333333334e-06, "rewards/chosen": 0.32459571957588196, "rewards/rejected": 0.07354050129652023, "rewards/accuracies": 0.8125, "rewards/margins": 0.2510552406311035, "logps/chosen": -354.49627685546875, "logps/rejected": -376.88818359375, "logits/chosen": 5.383636951446533, "logits/rejected": 5.397551536560059}
26
+ {"ts": "2025-12-26T16:12:40", "event": "train_log", "step": 46, "epoch": 0.053722627737226275, "progress_pct": 1.79, "epoch_pct": 1.79, "eta": "13:49:08", "max_grad_norm": 1.0, "loss": 0.5269681215286255, "grad_norm": 2.0814144611358643, "learning_rate": 8.72093023255814e-06, "rewards/chosen": 0.6465227603912354, "rewards/rejected": 0.27069616317749023, "rewards/accuracies": 0.9375, "rewards/margins": 0.37582656741142273, "logps/chosen": -331.1025390625, "logps/rejected": -362.90118408203125, "logits/chosen": 5.269731044769287, "logits/rejected": 5.287116050720215}
27
+ {"ts": "2025-12-26T16:12:59", "event": "train_log", "step": 48, "epoch": 0.05605839416058394, "progress_pct": 1.87, "epoch_pct": 1.87, "eta": "13:31:19", "max_grad_norm": 1.0, "loss": 0.5066201686859131, "grad_norm": 1.769063115119934, "learning_rate": 9.108527131782946e-06, "rewards/chosen": 0.6377636194229126, "rewards/rejected": 0.21126146614551544, "rewards/accuracies": 1.0, "rewards/margins": 0.42650213837623596, "logps/chosen": -369.40283203125, "logps/rejected": -400.18438720703125, "logits/chosen": 5.472540855407715, "logits/rejected": 5.465417861938477}
28
+ {"ts": "2025-12-26T16:13:21", "event": "train_log", "step": 50, "epoch": 0.058394160583941604, "progress_pct": 1.94, "epoch_pct": 1.95, "eta": "13:16:33", "max_grad_norm": 1.0, "loss": 0.529259979724884, "grad_norm": 2.84169602394104, "learning_rate": 9.496124031007753e-06, "rewards/chosen": 0.7923164367675781, "rewards/rejected": 0.4136104881763458, "rewards/accuracies": 1.0, "rewards/margins": 0.3787059783935547, "logps/chosen": -363.4556579589844, "logps/rejected": -397.8169860839844, "logits/chosen": 5.050387382507324, "logits/rejected": 5.112288951873779}
29
+ {"ts": "2025-12-26T16:20:56", "event": "train_log", "step": 50, "epoch": 0.058394160583941604, "progress_pct": 1.94, "epoch_pct": 1.95, "eta": "19:38:32", "max_grad_norm": 1.0, "eval_loss": 0.4610801041126251, "eval_runtime": 454.5598, "eval_samples_per_second": 1.676, "eval_steps_per_second": 1.676, "eval_rewards/chosen": 0.8944254517555237, "eval_rewards/rejected": 0.3205168545246124, "eval_rewards/accuracies": 0.9619422554969788, "eval_rewards/margins": 0.5739086270332336, "eval_logps/chosen": -361.462890625, "eval_logps/rejected": -392.5708312988281, "eval_logits/chosen": 5.22359037399292, "eval_logits/rejected": 5.286833763122559}
30
+ {"ts": "2025-12-26T16:21:15", "event": "train_log", "step": 52, "epoch": 0.06072992700729927, "progress_pct": 2.02, "epoch_pct": 2.02, "eta": "19:08:07", "max_grad_norm": 1.0, "loss": 0.44602835178375244, "grad_norm": 1.6907895803451538, "learning_rate": 9.883720930232558e-06, "rewards/chosen": 0.9869746565818787, "rewards/rejected": 0.3813100755214691, "rewards/accuracies": 0.9375, "rewards/margins": 0.6056646108627319, "logps/chosen": -343.4534606933594, "logps/rejected": -379.39508056640625, "logits/chosen": 5.486469268798828, "logits/rejected": 5.541717529296875}
31
+ {"ts": "2025-12-26T16:21:36", "event": "train_log", "step": 54, "epoch": 0.06306569343065693, "progress_pct": 2.1, "epoch_pct": 2.1, "eta": "18:41:01", "max_grad_norm": 1.0, "loss": 0.43609702587127686, "grad_norm": 1.9458682537078857, "learning_rate": 1.0271317829457365e-05, "rewards/chosen": 0.7794930934906006, "rewards/rejected": 0.15292587876319885, "rewards/accuracies": 1.0, "rewards/margins": 0.6265671253204346, "logps/chosen": -379.5437316894531, "logps/rejected": -401.5587463378906, "logits/chosen": 5.169528961181641, "logits/rejected": 5.2688751220703125}
32
+ {"ts": "2025-12-26T16:21:57", "event": "train_log", "step": 56, "epoch": 0.0654014598540146, "progress_pct": 2.18, "epoch_pct": 2.18, "eta": "18:15:44", "max_grad_norm": 1.0, "loss": 0.3928414583206177, "grad_norm": 2.1266520023345947, "learning_rate": 1.065891472868217e-05, "rewards/chosen": 1.274291753768921, "rewards/rejected": 0.4878700375556946, "rewards/accuracies": 0.9375, "rewards/margins": 0.7864217758178711, "logps/chosen": -378.0788269042969, "logps/rejected": -413.27392578125, "logits/chosen": 5.097426414489746, "logits/rejected": 5.15327262878418}
33
+ {"ts": "2025-12-26T16:22:18", "event": "train_log", "step": 58, "epoch": 0.06773722627737226, "progress_pct": 2.26, "epoch_pct": 2.26, "eta": "17:52:03", "max_grad_norm": 1.0, "loss": 0.35855019092559814, "grad_norm": 1.5381489992141724, "learning_rate": 1.1046511627906977e-05, "rewards/chosen": 1.2897911071777344, "rewards/rejected": 0.35436347126960754, "rewards/accuracies": 0.875, "rewards/margins": 0.9354276061058044, "logps/chosen": -372.93438720703125, "logps/rejected": -401.8287658691406, "logits/chosen": 5.138954162597656, "logits/rejected": 5.20254373550415}
34
+ {"ts": "2025-12-26T16:22:39", "event": "train_log", "step": 60, "epoch": 0.07007299270072993, "progress_pct": 2.33, "epoch_pct": 2.34, "eta": "17:29:56", "max_grad_norm": 1.0, "loss": 0.42801612615585327, "grad_norm": 2.358330726623535, "learning_rate": 1.1434108527131783e-05, "rewards/chosen": 1.3823509216308594, "rewards/rejected": 0.6532848477363586, "rewards/accuracies": 0.875, "rewards/margins": 0.729066014289856, "logps/chosen": -360.984619140625, "logps/rejected": -392.3192138671875, "logits/chosen": 5.071888446807861, "logits/rejected": 5.187964916229248}
35
+ {"ts": "2025-12-26T16:22:58", "event": "train_log", "step": 62, "epoch": 0.07240875912408759, "progress_pct": 2.41, "epoch_pct": 2.41, "eta": "17:08:40", "max_grad_norm": 1.0, "loss": 0.31365492939949036, "grad_norm": 2.177586317062378, "learning_rate": 1.182170542635659e-05, "rewards/chosen": 1.6637591123580933, "rewards/rejected": 0.5750135183334351, "rewards/accuracies": 1.0, "rewards/margins": 1.0887457132339478, "logps/chosen": -364.808349609375, "logps/rejected": -401.0321044921875, "logits/chosen": 5.264093399047852, "logits/rejected": 5.310842990875244}
36
+ {"ts": "2025-12-26T16:23:19", "event": "train_log", "step": 64, "epoch": 0.07474452554744526, "progress_pct": 2.49, "epoch_pct": 2.49, "eta": "16:48:50", "max_grad_norm": 1.0, "loss": 0.3037749230861664, "grad_norm": 1.697789192199707, "learning_rate": 1.2209302325581395e-05, "rewards/chosen": 1.6470392942428589, "rewards/rejected": 0.5321945548057556, "rewards/accuracies": 1.0, "rewards/margins": 1.114844799041748, "logps/chosen": -359.8249816894531, "logps/rejected": -397.2122497558594, "logits/chosen": 5.191982269287109, "logits/rejected": 5.261416912078857}
37
+ {"ts": "2025-12-26T16:23:38", "event": "train_log", "step": 66, "epoch": 0.07708029197080292, "progress_pct": 2.57, "epoch_pct": 2.57, "eta": "16:29:40", "max_grad_norm": 1.0, "loss": 0.25026455521583557, "grad_norm": 1.3219914436340332, "learning_rate": 1.2596899224806202e-05, "rewards/chosen": 1.5671364068984985, "rewards/rejected": 0.15732917189598083, "rewards/accuracies": 1.0, "rewards/margins": 1.4098074436187744, "logps/chosen": -352.3752136230469, "logps/rejected": -392.6779479980469, "logits/chosen": 5.293405532836914, "logits/rejected": 5.3094048500061035}
38
+ {"ts": "2025-12-26T16:23:57", "event": "train_log", "step": 68, "epoch": 0.07941605839416059, "progress_pct": 2.64, "epoch_pct": 2.65, "eta": "16:11:16", "max_grad_norm": 1.0, "loss": 0.3108353912830353, "grad_norm": 1.8173967599868774, "learning_rate": 1.2984496124031009e-05, "rewards/chosen": 1.4788665771484375, "rewards/rejected": 0.2151254564523697, "rewards/accuracies": 0.9375, "rewards/margins": 1.2637410163879395, "logps/chosen": -319.99700927734375, "logps/rejected": -364.115234375, "logits/chosen": 5.025746822357178, "logits/rejected": 5.114965438842773}
39
+ {"ts": "2025-12-26T16:24:16", "event": "train_log", "step": 70, "epoch": 0.08175182481751825, "progress_pct": 2.72, "epoch_pct": 2.73, "eta": "15:54:17", "max_grad_norm": 1.0, "loss": 0.22991834580898285, "grad_norm": 1.0658400058746338, "learning_rate": 1.3372093023255814e-05, "rewards/chosen": 1.3950352668762207, "rewards/rejected": -0.1014888733625412, "rewards/accuracies": 1.0, "rewards/margins": 1.4965243339538574, "logps/chosen": -383.84033203125, "logps/rejected": -431.7752685546875, "logits/chosen": 4.945235729217529, "logits/rejected": 4.959147930145264}
40
+ {"ts": "2025-12-26T16:24:35", "event": "train_log", "step": 72, "epoch": 0.0840875912408759, "progress_pct": 2.8, "epoch_pct": 2.8, "eta": "15:38:16", "max_grad_norm": 1.0, "loss": 0.22603684663772583, "grad_norm": 1.0350896120071411, "learning_rate": 1.375968992248062e-05, "rewards/chosen": 1.2978975772857666, "rewards/rejected": -0.34637776017189026, "rewards/accuracies": 1.0, "rewards/margins": 1.644275426864624, "logps/chosen": -350.9471435546875, "logps/rejected": -382.6837158203125, "logits/chosen": 5.00426721572876, "logits/rejected": 5.120238780975342}
41
+ {"ts": "2025-12-26T16:24:56", "event": "train_log", "step": 74, "epoch": 0.08642335766423358, "progress_pct": 2.88, "epoch_pct": 2.88, "eta": "15:23:40", "max_grad_norm": 1.0, "loss": 0.18921935558319092, "grad_norm": 1.1595423221588135, "learning_rate": 1.4147286821705426e-05, "rewards/chosen": 1.1984589099884033, "rewards/rejected": -0.5510700941085815, "rewards/accuracies": 1.0, "rewards/margins": 1.7495291233062744, "logps/chosen": -352.34967041015625, "logps/rejected": -399.23028564453125, "logits/chosen": 4.890130043029785, "logits/rejected": 4.9504714012146}
42
+ {"ts": "2025-12-26T16:32:39", "event": "train_log", "step": 75, "epoch": 0.08759124087591241, "progress_pct": 2.92, "epoch_pct": 2.92, "eta": "19:27:59", "max_grad_norm": 1.0, "eval_loss": 0.16020436584949493, "eval_runtime": 454.3435, "eval_samples_per_second": 1.677, "eval_steps_per_second": 1.677, "eval_rewards/chosen": 1.1210675239562988, "eval_rewards/rejected": -0.9336051344871521, "eval_rewards/accuracies": 0.9960629940032959, "eval_rewards/margins": 2.0546727180480957, "eval_logps/chosen": -359.19647216796875, "eval_logps/rejected": -405.1120300292969, "eval_logits/chosen": 4.930174827575684, "eval_logits/rejected": 5.032296657562256}
43
+ {"ts": "2025-12-26T16:32:49", "event": "train_log", "step": 76, "epoch": 0.08875912408759123, "progress_pct": 2.96, "epoch_pct": 2.96, "eta": "19:17:37", "max_grad_norm": 1.0, "loss": 0.15998858213424683, "grad_norm": 1.1433167457580566, "learning_rate": 1.4534883720930233e-05, "rewards/chosen": 1.2128857374191284, "rewards/rejected": -0.8816256523132324, "rewards/accuracies": 1.0, "rewards/margins": 2.0945115089416504, "logps/chosen": -313.110595703125, "logps/rejected": -356.1000061035156, "logits/chosen": 5.037275314331055, "logits/rejected": 5.1315507888793945}
44
+ {"ts": "2025-12-26T16:33:09", "event": "train_log", "step": 78, "epoch": 0.0910948905109489, "progress_pct": 3.03, "epoch_pct": 3.04, "eta": "18:57:48", "max_grad_norm": 1.0, "loss": 0.1894684135913849, "grad_norm": 0.9839214086532593, "learning_rate": 1.4922480620155039e-05, "rewards/chosen": 1.0605502128601074, "rewards/rejected": -0.8470743894577026, "rewards/accuracies": 1.0, "rewards/margins": 1.90762460231781, "logps/chosen": -366.2629089355469, "logps/rejected": -405.7989196777344, "logits/chosen": 4.817085266113281, "logits/rejected": 4.874035835266113}
45
+ {"ts": "2025-12-26T16:33:29", "event": "train_log", "step": 80, "epoch": 0.09343065693430656, "progress_pct": 3.11, "epoch_pct": 3.11, "eta": "18:38:40", "max_grad_norm": 1.0, "loss": 0.15948188304901123, "grad_norm": 0.9212782979011536, "learning_rate": 1.5310077519379846e-05, "rewards/chosen": 0.676516056060791, "rewards/rejected": -1.4909145832061768, "rewards/accuracies": 1.0, "rewards/margins": 2.167430877685547, "logps/chosen": -348.0658264160156, "logps/rejected": -395.23870849609375, "logits/chosen": 5.046716690063477, "logits/rejected": 5.157979965209961}
46
+ {"ts": "2025-12-26T16:33:49", "event": "train_log", "step": 82, "epoch": 0.09576642335766423, "progress_pct": 3.19, "epoch_pct": 3.19, "eta": "18:20:47", "max_grad_norm": 1.0, "loss": 0.12085139006376266, "grad_norm": 0.9820688366889954, "learning_rate": 1.569767441860465e-05, "rewards/chosen": 0.8719685077667236, "rewards/rejected": -1.7745698690414429, "rewards/accuracies": 1.0, "rewards/margins": 2.646538257598877, "logps/chosen": -378.8666076660156, "logps/rejected": -436.9100036621094, "logits/chosen": 4.690741539001465, "logits/rejected": 4.771791458129883}
47
+ {"ts": "2025-12-26T16:34:09", "event": "train_log", "step": 84, "epoch": 0.09810218978102189, "progress_pct": 3.27, "epoch_pct": 3.27, "eta": "18:03:33", "max_grad_norm": 1.0, "loss": 0.08720710873603821, "grad_norm": 0.66785728931427, "learning_rate": 1.608527131782946e-05, "rewards/chosen": 1.1337480545043945, "rewards/rejected": -1.7701961994171143, "rewards/accuracies": 1.0, "rewards/margins": 2.903944253921509, "logps/chosen": -346.51214599609375, "logps/rejected": -400.1110534667969, "logits/chosen": 4.880465984344482, "logits/rejected": 4.961792945861816}
48
+ {"ts": "2025-12-26T16:34:28", "event": "train_log", "step": 86, "epoch": 0.10043795620437956, "progress_pct": 3.35, "epoch_pct": 3.35, "eta": "17:46:40", "max_grad_norm": 1.0, "loss": 0.07942983508110046, "grad_norm": 0.5760660767555237, "learning_rate": 1.647286821705426e-05, "rewards/chosen": 1.2459325790405273, "rewards/rejected": -1.7693227529525757, "rewards/accuracies": 1.0, "rewards/margins": 3.0152552127838135, "logps/chosen": -341.7489318847656, "logps/rejected": -398.322021484375, "logits/chosen": 4.464397430419922, "logits/rejected": 4.680055618286133}
49
+ {"ts": "2025-12-26T16:34:48", "event": "train_log", "step": 88, "epoch": 0.10277372262773722, "progress_pct": 3.42, "epoch_pct": 3.43, "eta": "17:30:52", "max_grad_norm": 1.0, "loss": 0.1258174479007721, "grad_norm": 1.6020294427871704, "learning_rate": 1.686046511627907e-05, "rewards/chosen": 1.0706769227981567, "rewards/rejected": -2.0480403900146484, "rewards/accuracies": 0.9375, "rewards/margins": 3.118717670440674, "logps/chosen": -344.9147644042969, "logps/rejected": -395.4453125, "logits/chosen": 4.563863277435303, "logits/rejected": 4.680974960327148}
50
+ {"ts": "2025-12-26T16:35:06", "event": "train_log", "step": 90, "epoch": 0.10510948905109489, "progress_pct": 3.5, "epoch_pct": 3.5, "eta": "17:14:58", "max_grad_norm": 1.0, "loss": 0.06663060188293457, "grad_norm": 0.46413859724998474, "learning_rate": 1.7248062015503875e-05, "rewards/chosen": 1.4128761291503906, "rewards/rejected": -2.3478102684020996, "rewards/accuracies": 1.0, "rewards/margins": 3.760685920715332, "logps/chosen": -326.9678649902344, "logps/rejected": -388.4164123535156, "logits/chosen": 4.4989237785339355, "logits/rejected": 4.673248291015625}
51
+ {"ts": "2025-12-26T16:35:26", "event": "train_log", "step": 92, "epoch": 0.10744525547445255, "progress_pct": 3.58, "epoch_pct": 3.58, "eta": "17:00:34", "max_grad_norm": 1.0, "loss": 0.04481709748506546, "grad_norm": 0.6699568629264832, "learning_rate": 1.7635658914728684e-05, "rewards/chosen": 1.477597713470459, "rewards/rejected": -2.9012341499328613, "rewards/accuracies": 1.0, "rewards/margins": 4.37883186340332, "logps/chosen": -362.7267150878906, "logps/rejected": -439.2985534667969, "logits/chosen": 4.7294535636901855, "logits/rejected": 4.813880920410156}
52
+ {"ts": "2025-12-26T16:35:47", "event": "train_log", "step": 94, "epoch": 0.10978102189781022, "progress_pct": 3.66, "epoch_pct": 3.66, "eta": "16:47:17", "max_grad_norm": 1.0, "loss": 0.05632612109184265, "grad_norm": 0.4152977168560028, "learning_rate": 1.802325581395349e-05, "rewards/chosen": 0.71366286277771, "rewards/rejected": -2.744809150695801, "rewards/accuracies": 1.0, "rewards/margins": 3.4584720134735107, "logps/chosen": -381.59246826171875, "logps/rejected": -444.2817687988281, "logits/chosen": 4.785149574279785, "logits/rejected": 4.891542434692383}
53
+ {"ts": "2025-12-26T16:36:07", "event": "train_log", "step": 96, "epoch": 0.11211678832116788, "progress_pct": 3.73, "epoch_pct": 3.74, "eta": "16:34:09", "max_grad_norm": 1.0, "loss": 0.040920041501522064, "grad_norm": 0.3152717649936676, "learning_rate": 1.8410852713178295e-05, "rewards/chosen": 1.7566397190093994, "rewards/rejected": -2.263956069946289, "rewards/accuracies": 1.0, "rewards/margins": 4.020595550537109, "logps/chosen": -356.7286376953125, "logps/rejected": -414.69635009765625, "logits/chosen": 4.603940486907959, "logits/rejected": 4.804995536804199}
54
+ {"ts": "2025-12-26T16:36:26", "event": "train_log", "step": 98, "epoch": 0.11445255474452555, "progress_pct": 3.81, "epoch_pct": 3.82, "eta": "16:21:07", "max_grad_norm": 1.0, "loss": 0.025794224813580513, "grad_norm": 0.37698569893836975, "learning_rate": 1.8798449612403103e-05, "rewards/chosen": 1.3867536783218384, "rewards/rejected": -3.2675204277038574, "rewards/accuracies": 1.0, "rewards/margins": 4.6542744636535645, "logps/chosen": -339.794189453125, "logps/rejected": -413.8865966796875, "logits/chosen": 4.558542728424072, "logits/rejected": 4.690641403198242}
55
+ {"ts": "2025-12-26T16:36:46", "event": "train_log", "step": 100, "epoch": 0.11678832116788321, "progress_pct": 3.89, "epoch_pct": 3.89, "eta": "16:09:03", "max_grad_norm": 1.0, "loss": 0.015155203640460968, "grad_norm": 0.15023073554039001, "learning_rate": 1.918604651162791e-05, "rewards/chosen": 1.7938623428344727, "rewards/rejected": -3.1486666202545166, "rewards/accuracies": 1.0, "rewards/margins": 4.942529201507568, "logps/chosen": -346.2568054199219, "logps/rejected": -418.9315185546875, "logits/chosen": 4.387497425079346, "logits/rejected": 4.494588375091553}
56
+ {"ts": "2025-12-26T16:44:21", "event": "train_log", "step": 100, "epoch": 0.11678832116788321, "progress_pct": 3.89, "epoch_pct": 3.89, "eta": "19:16:19", "max_grad_norm": 1.0, "eval_loss": 0.04428481683135033, "eval_runtime": 454.7251, "eval_samples_per_second": 1.676, "eval_steps_per_second": 1.676, "eval_rewards/chosen": 1.7248634099960327, "eval_rewards/rejected": -2.863647222518921, "eval_rewards/accuracies": 0.9921259880065918, "eval_rewards/margins": 4.588510513305664, "eval_logps/chosen": -353.15850830078125, "eval_logps/rejected": -424.4124755859375, "eval_logits/chosen": 4.285891056060791, "eval_logits/rejected": 4.425926208496094}
57
+ {"ts": "2025-12-26T16:44:42", "event": "train_log", "step": 102, "epoch": 0.11912408759124088, "progress_pct": 3.97, "epoch_pct": 3.97, "eta": "19:01:01", "max_grad_norm": 1.0, "loss": 0.01589718647301197, "grad_norm": 0.21237261593341827, "learning_rate": 1.9573643410852714e-05, "rewards/chosen": 1.7697646617889404, "rewards/rejected": -3.025937557220459, "rewards/accuracies": 1.0, "rewards/margins": 4.79570198059082, "logps/chosen": -305.01165771484375, "logps/rejected": -384.8538818359375, "logits/chosen": 4.197369575500488, "logits/rejected": 4.352917671203613}
58
+ {"ts": "2025-12-26T16:45:01", "event": "train_log", "step": 104, "epoch": 0.12145985401459854, "progress_pct": 4.05, "epoch_pct": 4.05, "eta": "18:45:50", "max_grad_norm": 1.0, "loss": 0.038177840411663055, "grad_norm": 1.1960583925247192, "learning_rate": 1.996124031007752e-05, "rewards/chosen": 1.556309461593628, "rewards/rejected": -3.2670814990997314, "rewards/accuracies": 1.0, "rewards/margins": 4.823390960693359, "logps/chosen": -341.10675048828125, "logps/rejected": -417.59613037109375, "logits/chosen": 4.184627056121826, "logits/rejected": 4.280352592468262}
59
+ {"ts": "2025-12-26T16:45:21", "event": "train_log", "step": 106, "epoch": 0.12379562043795621, "progress_pct": 4.12, "epoch_pct": 4.13, "eta": "18:31:17", "max_grad_norm": 1.0, "loss": 0.056792374700307846, "grad_norm": 1.3021241426467896, "learning_rate": 2.0348837209302328e-05, "rewards/chosen": 1.6538318395614624, "rewards/rejected": -3.0760293006896973, "rewards/accuracies": 1.0, "rewards/margins": 4.729861736297607, "logps/chosen": -358.1336669921875, "logps/rejected": -426.8945617675781, "logits/chosen": 4.32430362701416, "logits/rejected": 4.451810359954834}
60
+ {"ts": "2025-12-26T16:45:41", "event": "train_log", "step": 108, "epoch": 0.12613138686131387, "progress_pct": 4.2, "epoch_pct": 4.2, "eta": "18:17:24", "max_grad_norm": 1.0, "loss": 0.07614695280790329, "grad_norm": 0.3007296025753021, "learning_rate": 2.0736434108527133e-05, "rewards/chosen": 1.4121378660202026, "rewards/rejected": -3.331850051879883, "rewards/accuracies": 0.9375, "rewards/margins": 4.743987560272217, "logps/chosen": -364.4995422363281, "logps/rejected": -434.4844055175781, "logits/chosen": 4.4918341636657715, "logits/rejected": 4.6333909034729}
61
+ {"ts": "2025-12-26T16:45:59", "event": "train_log", "step": 110, "epoch": 0.12846715328467154, "progress_pct": 4.28, "epoch_pct": 4.28, "eta": "18:03:33", "max_grad_norm": 1.0, "loss": 0.014600476250052452, "grad_norm": 0.42474085092544556, "learning_rate": 2.1124031007751938e-05, "rewards/chosen": 1.958223819732666, "rewards/rejected": -4.051264762878418, "rewards/accuracies": 1.0, "rewards/margins": 6.009488582611084, "logps/chosen": -306.4935607910156, "logps/rejected": -392.5444030761719, "logits/chosen": 3.857876777648926, "logits/rejected": 3.9678285121917725}
62
+ {"ts": "2025-12-26T16:46:19", "event": "train_log", "step": 112, "epoch": 0.1308029197080292, "progress_pct": 4.36, "epoch_pct": 4.36, "eta": "17:50:24", "max_grad_norm": 1.0, "loss": 0.010151136666536331, "grad_norm": 0.14177864789962769, "learning_rate": 2.1511627906976744e-05, "rewards/chosen": 2.196099281311035, "rewards/rejected": -3.75758695602417, "rewards/accuracies": 1.0, "rewards/margins": 5.953686237335205, "logps/chosen": -339.5606689453125, "logps/rejected": -425.51361083984375, "logits/chosen": 4.254065036773682, "logits/rejected": 4.352800369262695}
63
+ {"ts": "2025-12-26T16:46:40", "event": "train_log", "step": 114, "epoch": 0.13313868613138685, "progress_pct": 4.43, "epoch_pct": 4.44, "eta": "17:38:30", "max_grad_norm": 1.0, "loss": 0.011391772888600826, "grad_norm": 0.29438889026641846, "learning_rate": 2.1899224806201552e-05, "rewards/chosen": 1.9949897527694702, "rewards/rejected": -3.3917016983032227, "rewards/accuracies": 1.0, "rewards/margins": 5.386691093444824, "logps/chosen": -349.3886413574219, "logps/rejected": -431.79925537109375, "logits/chosen": 3.7171452045440674, "logits/rejected": 3.9224042892456055}
64
+ {"ts": "2025-12-26T16:47:00", "event": "train_log", "step": 116, "epoch": 0.13547445255474452, "progress_pct": 4.51, "epoch_pct": 4.52, "eta": "17:26:32", "max_grad_norm": 1.0, "loss": 0.024509863927960396, "grad_norm": 0.9541389346122742, "learning_rate": 2.2286821705426357e-05, "rewards/chosen": 1.8549680709838867, "rewards/rejected": -3.4747393131256104, "rewards/accuracies": 1.0, "rewards/margins": 5.329707622528076, "logps/chosen": -343.19482421875, "logps/rejected": -423.23565673828125, "logits/chosen": 3.5138039588928223, "logits/rejected": 3.7400965690612793}
65
+ {"ts": "2025-12-26T16:47:21", "event": "train_log", "step": 118, "epoch": 0.1378102189781022, "progress_pct": 4.59, "epoch_pct": 4.59, "eta": "17:15:03", "max_grad_norm": 1.0, "loss": 0.007583940401673317, "grad_norm": 0.45693957805633545, "learning_rate": 2.2674418604651163e-05, "rewards/chosen": 2.130192518234253, "rewards/rejected": -4.364559650421143, "rewards/accuracies": 1.0, "rewards/margins": 6.494752407073975, "logps/chosen": -382.1067810058594, "logps/rejected": -480.72265625, "logits/chosen": 3.9002795219421387, "logits/rejected": 3.9630849361419678}
66
+ {"ts": "2025-12-26T16:47:41", "event": "train_log", "step": 120, "epoch": 0.14014598540145987, "progress_pct": 4.67, "epoch_pct": 4.67, "eta": "17:03:44", "max_grad_norm": 1.0, "loss": 0.007748167496174574, "grad_norm": 0.20826944708824158, "learning_rate": 2.3062015503875968e-05, "rewards/chosen": 1.398924469947815, "rewards/rejected": -4.580015182495117, "rewards/accuracies": 1.0, "rewards/margins": 5.978940010070801, "logps/chosen": -355.2779541015625, "logps/rejected": -436.54022216796875, "logits/chosen": 3.7722253799438477, "logits/rejected": 3.939023494720459}
67
+ {"ts": "2025-12-26T16:48:01", "event": "train_log", "step": 122, "epoch": 0.1424817518248175, "progress_pct": 4.75, "epoch_pct": 4.75, "eta": "16:52:48", "max_grad_norm": 1.0, "loss": 0.014359460212290287, "grad_norm": 0.21926206350326538, "learning_rate": 2.3449612403100777e-05, "rewards/chosen": 1.1770455837249756, "rewards/rejected": -5.205078125, "rewards/accuracies": 1.0, "rewards/margins": 6.382123947143555, "logps/chosen": -327.1947326660156, "logps/rejected": -414.7738037109375, "logits/chosen": 3.656745672225952, "logits/rejected": 3.875434160232544}
68
+ {"ts": "2025-12-26T16:48:21", "event": "train_log", "step": 124, "epoch": 0.14481751824817518, "progress_pct": 4.82, "epoch_pct": 4.83, "eta": "16:42:21", "max_grad_norm": 1.0, "loss": 0.007621760480105877, "grad_norm": 0.03550998866558075, "learning_rate": 2.3837209302325582e-05, "rewards/chosen": 0.7802913188934326, "rewards/rejected": -6.6151628494262695, "rewards/accuracies": 1.0, "rewards/margins": 7.395453453063965, "logps/chosen": -369.8974304199219, "logps/rejected": -473.97283935546875, "logits/chosen": 3.659773826599121, "logits/rejected": 3.725044012069702}
69
+ {"ts": "2025-12-26T16:56:05", "event": "train_log", "step": 125, "epoch": 0.145985401459854, "progress_pct": 4.86, "epoch_pct": 4.87, "eta": "19:05:16", "max_grad_norm": 1.0, "eval_loss": 0.024107323959469795, "eval_runtime": 454.8045, "eval_samples_per_second": 1.675, "eval_steps_per_second": 1.675, "eval_rewards/chosen": 0.5319492816925049, "eval_rewards/rejected": -6.150709629058838, "eval_rewards/accuracies": 0.9934383034706116, "eval_rewards/margins": 6.682660102844238, "eval_logps/chosen": -365.087646484375, "eval_logps/rejected": -457.28314208984375, "eval_logits/chosen": 3.6694726943969727, "eval_logits/rejected": 3.8436598777770996}
70
+ {"ts": "2025-12-26T16:56:15", "event": "train_log", "step": 126, "epoch": 0.14715328467153285, "progress_pct": 4.9, "epoch_pct": 4.91, "eta": "18:59:00", "max_grad_norm": 1.0, "loss": 0.005531508009880781, "grad_norm": 0.21691419184207916, "learning_rate": 2.4224806201550387e-05, "rewards/chosen": 0.9075853824615479, "rewards/rejected": -7.027284622192383, "rewards/accuracies": 1.0, "rewards/margins": 7.934869289398193, "logps/chosen": -345.18023681640625, "logps/rejected": -454.6177978515625, "logits/chosen": 3.777791738510132, "logits/rejected": 3.7573630809783936}
71
+ {"ts": "2025-12-26T16:56:36", "event": "train_log", "step": 128, "epoch": 0.14948905109489052, "progress_pct": 4.98, "epoch_pct": 4.98, "eta": "18:46:50", "max_grad_norm": 1.0, "loss": 0.0008547124452888966, "grad_norm": 0.0514506921172142, "learning_rate": 2.4612403100775196e-05, "rewards/chosen": 1.0864180326461792, "rewards/rejected": -6.75621223449707, "rewards/accuracies": 1.0, "rewards/margins": 7.842630863189697, "logps/chosen": -376.30023193359375, "logps/rejected": -486.30615234375, "logits/chosen": 3.6862380504608154, "logits/rejected": 3.77681827545166}
72
+ {"ts": "2025-12-26T16:56:54", "event": "train_log", "step": 130, "epoch": 0.15182481751824817, "progress_pct": 5.06, "epoch_pct": 5.06, "eta": "18:34:28", "max_grad_norm": 1.0, "loss": 0.019211476668715477, "grad_norm": 1.0013993978500366, "learning_rate": 2.5e-05, "rewards/chosen": 0.7987843751907349, "rewards/rejected": -6.31362247467041, "rewards/accuracies": 1.0, "rewards/margins": 7.1124067306518555, "logps/chosen": -330.2728271484375, "logps/rejected": -420.1920166015625, "logits/chosen": 3.8558738231658936, "logits/rejected": 4.067385673522949}
73
+ {"ts": "2025-12-26T16:57:14", "event": "train_log", "step": 132, "epoch": 0.15416058394160584, "progress_pct": 5.13, "epoch_pct": 5.14, "eta": "18:22:39", "max_grad_norm": 1.0, "loss": 0.005051509942859411, "grad_norm": 0.2909312844276428, "learning_rate": 2.5387596899224806e-05, "rewards/chosen": 0.7269927859306335, "rewards/rejected": -6.733253479003906, "rewards/accuracies": 1.0, "rewards/margins": 7.4602460861206055, "logps/chosen": -346.632568359375, "logps/rejected": -448.4364318847656, "logits/chosen": 3.8564915657043457, "logits/rejected": 4.106588363647461}
74
+ {"ts": "2025-12-26T16:57:34", "event": "train_log", "step": 134, "epoch": 0.1564963503649635, "progress_pct": 5.21, "epoch_pct": 5.22, "eta": "18:11:24", "max_grad_norm": 1.0, "loss": 0.029044320806860924, "grad_norm": 0.10341063886880875, "learning_rate": 2.5775193798449615e-05, "rewards/chosen": -0.15517807006835938, "rewards/rejected": -7.108559608459473, "rewards/accuracies": 1.0, "rewards/margins": 6.953381538391113, "logps/chosen": -398.5205078125, "logps/rejected": -493.3382568359375, "logits/chosen": 3.6923415660858154, "logits/rejected": 3.8758797645568848}
75
+ {"ts": "2025-12-26T16:57:55", "event": "train_log", "step": 136, "epoch": 0.15883211678832118, "progress_pct": 5.29, "epoch_pct": 5.29, "eta": "18:00:50", "max_grad_norm": 1.0, "loss": 0.008300668559968472, "grad_norm": 0.40827327966690063, "learning_rate": 2.616279069767442e-05, "rewards/chosen": 0.3356212377548218, "rewards/rejected": -6.36344051361084, "rewards/accuracies": 1.0, "rewards/margins": 6.699062347412109, "logps/chosen": -420.5874328613281, "logps/rejected": -511.71661376953125, "logits/chosen": 3.701347827911377, "logits/rejected": 3.8854856491088867}
76
+ {"ts": "2025-12-26T16:58:15", "event": "train_log", "step": 138, "epoch": 0.16116788321167883, "progress_pct": 5.37, "epoch_pct": 5.37, "eta": "17:50:12", "max_grad_norm": 1.0, "loss": 0.010793081484735012, "grad_norm": 0.17690710723400116, "learning_rate": 2.655038759689923e-05, "rewards/chosen": 1.5296471118927002, "rewards/rejected": -5.799461364746094, "rewards/accuracies": 1.0, "rewards/margins": 7.329109191894531, "logps/chosen": -361.59417724609375, "logps/rejected": -455.0230407714844, "logits/chosen": 3.476500988006592, "logits/rejected": 3.6296494007110596}
77
+ {"ts": "2025-12-26T16:58:35", "event": "train_log", "step": 140, "epoch": 0.1635036496350365, "progress_pct": 5.45, "epoch_pct": 5.45, "eta": "17:39:44", "max_grad_norm": 1.0, "loss": 0.012777667492628098, "grad_norm": 0.15591435134410858, "learning_rate": 2.693798449612403e-05, "rewards/chosen": 1.2819935083389282, "rewards/rejected": -6.812654972076416, "rewards/accuracies": 1.0, "rewards/margins": 8.094648361206055, "logps/chosen": -379.35333251953125, "logps/rejected": -490.0003356933594, "logits/chosen": 3.440129518508911, "logits/rejected": 3.5673890113830566}
78
+ {"ts": "2025-12-26T16:58:56", "event": "train_log", "step": 142, "epoch": 0.16583941605839417, "progress_pct": 5.52, "epoch_pct": 5.53, "eta": "17:29:50", "max_grad_norm": 1.0, "loss": 0.007118214387446642, "grad_norm": 0.820688009262085, "learning_rate": 2.7325581395348836e-05, "rewards/chosen": 1.4685018062591553, "rewards/rejected": -6.558856964111328, "rewards/accuracies": 1.0, "rewards/margins": 8.027359008789062, "logps/chosen": -402.8253479003906, "logps/rejected": -506.32000732421875, "logits/chosen": 3.23529052734375, "logits/rejected": 3.393266201019287}
dpo_qwen_14B/wandb/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:56:50.297401502Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-26T15:56:50.452320078Z","level":"INFO","msg":"stream: created new stream","id":"wbzoafvt"}
3
+ {"time":"2025-12-26T15:56:50.452494836Z","level":"INFO","msg":"handler: started","stream_id":"wbzoafvt"}
4
+ {"time":"2025-12-26T15:56:50.452572405Z","level":"INFO","msg":"stream: started","id":"wbzoafvt"}
5
+ {"time":"2025-12-26T15:56:50.452599156Z","level":"INFO","msg":"writer: started","stream_id":"wbzoafvt"}
6
+ {"time":"2025-12-26T15:56:50.452607804Z","level":"INFO","msg":"sender: started","stream_id":"wbzoafvt"}
7
+ {"time":"2025-12-26T16:59:00.070531235Z","level":"INFO","msg":"stream: closing","id":"wbzoafvt"}
8
+ {"time":"2025-12-26T16:59:00.346670237Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-12-26T16:59:00.473496131Z","level":"INFO","msg":"handler: closed","stream_id":"wbzoafvt"}
10
+ {"time":"2025-12-26T16:59:00.473589831Z","level":"INFO","msg":"sender: closed","stream_id":"wbzoafvt"}
11
+ {"time":"2025-12-26T16:59:00.473602236Z","level":"INFO","msg":"stream: closed","id":"wbzoafvt"}
dpo_qwen_14B/wandb/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Configure stats pid to 148906
3
+ 2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO-14b/wandb/settings
5
+ 2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_14b_v1/wandb/run-20251226_155650-wbzoafvt/logs/debug.log
7
+ 2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_14b_v1/wandb/run-20251226_155650-wbzoafvt/logs/debug-internal.log
8
+ 2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'model': {'repo_id': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_14b_v1', '_wandb': {}}
11
+ 2025-12-26 15:56:50,017 INFO MainThread:148906 [wandb_init.py:init():889] starting backend
12
+ 2025-12-26 15:56:50,290 INFO MainThread:148906 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-26 15:56:50,295 INFO MainThread:148906 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-26 15:56:50,297 INFO MainThread:148906 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-26 15:56:50,297 INFO MainThread:148906 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-26 15:56:50,648 INFO MainThread:148906 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-26 15:56:50,757 INFO MainThread:148906 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-26 15:56:50,757 INFO MainThread:148906 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-26 15:56:50,757 INFO MainThread:148906 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-26 15:56:50,757 INFO MainThread:148906 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-26 15:56:50,762 INFO MainThread:148906 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2025-12-26 15:57:33,783 INFO MainThread:148906 [wandb_run.py:_config_callback():1396] config_cb None None {'peft_config': {'default': {'task_type': 'CAUSAL_LM', 'peft_type': 'LORA', 'auto_mapping': None, 'peft_version': '0.18.0', 'base_model_name_or_path': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'revision': None, 'inference_mode': False, 'r': 16, 'target_modules': ['k_proj', 'o_proj', 'v_proj', 'q_proj'], 'exclude_modules': None, 'lora_alpha': 32, 'lora_dropout': 0.05, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'trainable_token_indices': None, 'loftq_config': {}, 'eva_config': None, 'corda_config': None, 'use_dora': False, 'alora_invocation_tokens': None, 'use_qalora': False, 'qalora_group_size': 16, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}, 'lora_bias': False, 'target_parameters': None, 'arrow_config': None, 'ensure_weight_tying': False}}, 'vocab_size': 152064, 'max_position_embeddings': 32768, 'hidden_size': 5120, 'intermediate_size': 13824, 'num_hidden_layers': 48, 'num_attention_heads': 40, 'use_sliding_window': False, 'sliding_window': None, 'max_window_layers': 48, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-06, 'use_cache': False, 'attention_dropout': 0.0, 'layer_types': ['full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention'], 'rope_parameters': {'rope_theta': 1000000.0, 'rope_type': 'default'}, 'return_dict': True, 'output_hidden_states': False, 'dtype': 'bfloat16', 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'architectures': ['Qwen2ForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'task_specific_params': None, 'problem_type': None, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 151643, 'eos_token_id': 151643, 'sep_token_id': None, 'decoder_start_token_id': None, '_name_or_path': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'transformers_version': '5.0.0.dev0', 'model_type': 'qwen2', 'output_attentions': False, 'output_dir': 'runs/dpo_run_14b_v1', 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': None, 'warmup_ratio': 0.1, 'warmup_steps': 0.1, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': None, 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 2, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'enable_jit_checkpoint': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'use_cpu': False, 'seed': 42, 'data_seed': None, 'bf16': True, 'fp16': False, 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': -1, 'ddp_backend': None, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 25, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'parallelism_config': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'project': 'huggingface', 'trackio_space_id': 'trackio', 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'auto_find_batch_size': False, 'full_determinism': False, 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True, 'model_init_kwargs': None, 'ref_model_init_kwargs': None, 'model_adapter_name': None, 'ref_adapter_name': None, 'force_use_ref_model': False, 'disable_dropout': True, 'use_logits_to_keep': False, 'dataset_num_proc': None, 'pad_token': '<PAD_TOKEN>', 'label_pad_token_id': -100, 'max_prompt_length': 1024, 'max_completion_length': None, 'max_length': 2048, 'truncation_mode': 'keep_end', 'padding_free': False, 'precompute_ref_log_probs': False, 'precompute_ref_batch_size': None, 'tools': None, 'loss_type': 'sigmoid', 'use_liger_loss': None, 'base_model_attribute_name': 'model', 'beta': 0.1, 'f_divergence_type': 'reverse_kl', 'f_alpha_divergence_coef': 1.0, 'reference_free': False, 'label_smoothing': 0.0, 'use_weighting': False, 'rpo_alpha': None, 'ld_alpha': None, 'discopop_tau': 0.05, 'loss_weights': None, 'sync_ref_model': False, 'ref_model_mixup_alpha': 0.6, 'ref_model_sync_steps': 512, 'generate_during_eval': False}
23
+ 2025-12-26 15:57:33,791 INFO MainThread:148906 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 14795199488 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7da0304cf970>>
24
+ 2025-12-26 15:57:33,792 INFO MainThread:148906 [wandb_run.py:_config_callback():1396] config_cb model/num_parameters 14795199488 None
25
+ 2025-12-26 16:59:00,070 INFO wandb-AsyncioManager-main:148906 [service_client.py:_forward_responses():80] Reached EOF.
26
+ 2025-12-26 16:59:00,070 INFO wandb-AsyncioManager-main:148906 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/config.yaml ADDED
@@ -0,0 +1,661 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _name_or_path:
2
+ value: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
3
+ _wandb:
4
+ value:
5
+ cli_version: 0.23.1
6
+ e:
7
+ ce8b9zq5sbh73okdbbvozze07ayjamtf:
8
+ args:
9
+ - --config
10
+ - config_dpo.yaml
11
+ codePath: run_dpo.py
12
+ codePathLocal: run_dpo.py
13
+ cpu_count: 12
14
+ cpu_count_logical: 24
15
+ cudaVersion: "13.0"
16
+ disk:
17
+ /:
18
+ total: "791251738624"
19
+ used: "314755911680"
20
+ email: shaiksirajuddin9949@gmail.com
21
+ executable: /workspace/llm_finetuning_env/bin/python
22
+ gpu: NVIDIA A100-SXM4-80GB
23
+ gpu_count: 2
24
+ gpu_nvidia:
25
+ - architecture: Ampere
26
+ cudaCores: 6912
27
+ memoryTotal: "85899345920"
28
+ name: NVIDIA A100-SXM4-80GB
29
+ uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
30
+ - architecture: Ampere
31
+ cudaCores: 6912
32
+ memoryTotal: "85899345920"
33
+ name: NVIDIA A100-SXM4-80GB
34
+ uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
35
+ host: a100-2gpu-shell-session-757d587799-mfdvv
36
+ memory:
37
+ total: "359047892992"
38
+ os: Linux-6.12.46+-x86_64-with-glibc2.35
39
+ program: /workspace/trainer-kit/DPO-14b/run_dpo.py
40
+ python: CPython 3.10.12
41
+ root: runs/dpo_run_14b_v1
42
+ startedAt: "2025-12-26T15:23:32.328004Z"
43
+ writerId: ce8b9zq5sbh73okdbbvozze07ayjamtf
44
+ m:
45
+ - "1": train/global_step
46
+ "6":
47
+ - 3
48
+ "7": []
49
+ - "2": '*'
50
+ "5": 1
51
+ "6":
52
+ - 1
53
+ "7": []
54
+ python_version: 3.10.12
55
+ t:
56
+ "1":
57
+ - 1
58
+ - 11
59
+ - 41
60
+ - 49
61
+ - 51
62
+ - 71
63
+ - 84
64
+ - 98
65
+ "2":
66
+ - 1
67
+ - 11
68
+ - 41
69
+ - 49
70
+ - 51
71
+ - 71
72
+ - 84
73
+ - 98
74
+ "3":
75
+ - 7
76
+ - 15
77
+ - 16
78
+ - 19
79
+ - 66
80
+ "4": 3.10.12
81
+ "5": 0.23.1
82
+ "6": 5.0.0.dev0
83
+ "9":
84
+ "1": transformers_trainer
85
+ "12": 0.23.1
86
+ "13": linux-x86_64
87
+ accelerator_config:
88
+ value:
89
+ dispatch_batches: null
90
+ even_batches: true
91
+ gradient_accumulation_kwargs: null
92
+ non_blocking: false
93
+ split_batches: false
94
+ use_seedable_sampler: true
95
+ adam_beta1:
96
+ value: 0.9
97
+ adam_beta2:
98
+ value: 0.999
99
+ adam_epsilon:
100
+ value: 1e-08
101
+ add_cross_attention:
102
+ value: false
103
+ architectures:
104
+ value:
105
+ - Qwen2ForCausalLM
106
+ attention_dropout:
107
+ value: 0
108
+ auto_find_batch_size:
109
+ value: false
110
+ average_tokens_across_devices:
111
+ value: true
112
+ base_model_attribute_name:
113
+ value: model
114
+ batch_eval_metrics:
115
+ value: false
116
+ beta:
117
+ value: 0.1
118
+ bf16:
119
+ value: true
120
+ bf16_full_eval:
121
+ value: false
122
+ bos_token_id:
123
+ value: null
124
+ chunk_size_feed_forward:
125
+ value: 0
126
+ cross_attention_hidden_size:
127
+ value: null
128
+ data:
129
+ value:
130
+ chosen_field: chosen
131
+ eval_jsonl: null
132
+ eval_split_ratio: 0.1
133
+ format_type: chatml
134
+ max_length: 2048
135
+ num_proc: 4
136
+ prompt_field: prompt
137
+ rejected_field: rejected
138
+ score_field: f1_score
139
+ shuffle: true
140
+ system_prompt: |
141
+ You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
142
+
143
+ ## Output Format
144
+
145
+ ##OUTPUT
146
+ Explain the data flow and why each component must change:
147
+ - Flow: [Input → Processing → Output with arrows]
148
+ - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
149
+ - Explain coupling between components
150
+
151
+ ##SELECT
152
+ modify::crates/path/to/file.rs::impl::ComponentName
153
+ add::crates/another/file.rs::function::AnotherComponent
154
+ <EOS>
155
+
156
+ ## Rules
157
+
158
+ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
159
+ 2. Use `::` for nested items: `status::StructName::Type::Name`
160
+ 3. Always explain "must change because" and "without this"
161
+ 3. Types of components: function, struct, enum, impl, trait
162
+ 4. If there is extra information (e.g., enum variants), include that too.
163
+ 5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
164
+ train_jsonl: dpo_pairs_generated.jsonl
165
+ data_seed:
166
+ value: null
167
+ dataloader_drop_last:
168
+ value: false
169
+ dataloader_num_workers:
170
+ value: 0
171
+ dataloader_persistent_workers:
172
+ value: false
173
+ dataloader_pin_memory:
174
+ value: true
175
+ dataloader_prefetch_factor:
176
+ value: null
177
+ dataset_num_proc:
178
+ value: null
179
+ ddp_backend:
180
+ value: null
181
+ ddp_broadcast_buffers:
182
+ value: null
183
+ ddp_bucket_cap_mb:
184
+ value: null
185
+ ddp_find_unused_parameters:
186
+ value: null
187
+ ddp_timeout:
188
+ value: 1800
189
+ debug:
190
+ value: []
191
+ decoder_start_token_id:
192
+ value: null
193
+ deepspeed:
194
+ value: null
195
+ disable_dropout:
196
+ value: true
197
+ disable_tqdm:
198
+ value: false
199
+ discopop_tau:
200
+ value: 0.05
201
+ do_eval:
202
+ value: true
203
+ do_predict:
204
+ value: false
205
+ do_train:
206
+ value: false
207
+ dpo:
208
+ value:
209
+ beta: 0.1
210
+ label_smoothing: 0
211
+ loss_type: sigmoid
212
+ reference_free: false
213
+ use_reference_model: true
214
+ dtype:
215
+ value: bfloat16
216
+ enable_jit_checkpoint:
217
+ value: false
218
+ eos_token_id:
219
+ value: 151643
220
+ eval_accumulation_steps:
221
+ value: null
222
+ eval_delay:
223
+ value: 0
224
+ eval_do_concat_batches:
225
+ value: true
226
+ eval_on_start:
227
+ value: false
228
+ eval_steps:
229
+ value: 25
230
+ eval_strategy:
231
+ value: steps
232
+ eval_use_gather_object:
233
+ value: false
234
+ f_alpha_divergence_coef:
235
+ value: 1
236
+ f_divergence_type:
237
+ value: reverse_kl
238
+ finetuning_task:
239
+ value: null
240
+ force_use_ref_model:
241
+ value: false
242
+ fp16:
243
+ value: false
244
+ fp16_full_eval:
245
+ value: false
246
+ fsdp:
247
+ value: []
248
+ fsdp_config:
249
+ value:
250
+ min_num_params: 0
251
+ xla: false
252
+ xla_fsdp_grad_ckpt: false
253
+ xla_fsdp_v2: false
254
+ full_determinism:
255
+ value: false
256
+ generate_during_eval:
257
+ value: false
258
+ gradient_accumulation_steps:
259
+ value: 8
260
+ gradient_checkpointing:
261
+ value: true
262
+ gradient_checkpointing_kwargs:
263
+ value: null
264
+ greater_is_better:
265
+ value: false
266
+ group_by_length:
267
+ value: false
268
+ hidden_act:
269
+ value: silu
270
+ hidden_size:
271
+ value: 5120
272
+ hub_always_push:
273
+ value: false
274
+ hub_model_id:
275
+ value: null
276
+ hub_private_repo:
277
+ value: null
278
+ hub_revision:
279
+ value: null
280
+ hub_strategy:
281
+ value: every_save
282
+ hub_token:
283
+ value: <HUB_TOKEN>
284
+ id2label:
285
+ value:
286
+ "0": LABEL_0
287
+ "1": LABEL_1
288
+ ignore_data_skip:
289
+ value: false
290
+ include_for_metrics:
291
+ value: []
292
+ include_num_input_tokens_seen:
293
+ value: "no"
294
+ initializer_range:
295
+ value: 0.02
296
+ intermediate_size:
297
+ value: 13824
298
+ is_decoder:
299
+ value: false
300
+ is_encoder_decoder:
301
+ value: false
302
+ label_names:
303
+ value: null
304
+ label_pad_token_id:
305
+ value: -100
306
+ label_smoothing:
307
+ value: 0
308
+ label_smoothing_factor:
309
+ value: 0
310
+ label2id:
311
+ value:
312
+ LABEL_0: 0
313
+ LABEL_1: 1
314
+ layer_types:
315
+ value:
316
+ - full_attention
317
+ - full_attention
318
+ - full_attention
319
+ - full_attention
320
+ - full_attention
321
+ - full_attention
322
+ - full_attention
323
+ - full_attention
324
+ - full_attention
325
+ - full_attention
326
+ - full_attention
327
+ - full_attention
328
+ - full_attention
329
+ - full_attention
330
+ - full_attention
331
+ - full_attention
332
+ - full_attention
333
+ - full_attention
334
+ - full_attention
335
+ - full_attention
336
+ - full_attention
337
+ - full_attention
338
+ - full_attention
339
+ - full_attention
340
+ - full_attention
341
+ - full_attention
342
+ - full_attention
343
+ - full_attention
344
+ - full_attention
345
+ - full_attention
346
+ - full_attention
347
+ - full_attention
348
+ - full_attention
349
+ - full_attention
350
+ - full_attention
351
+ - full_attention
352
+ - full_attention
353
+ - full_attention
354
+ - full_attention
355
+ - full_attention
356
+ - full_attention
357
+ - full_attention
358
+ - full_attention
359
+ - full_attention
360
+ - full_attention
361
+ - full_attention
362
+ - full_attention
363
+ - full_attention
364
+ ld_alpha:
365
+ value: null
366
+ learning_rate:
367
+ value: 5e-05
368
+ length_column_name:
369
+ value: length
370
+ liger_kernel_config:
371
+ value: null
372
+ load_best_model_at_end:
373
+ value: true
374
+ local_rank:
375
+ value: -1
376
+ log_level:
377
+ value: passive
378
+ log_level_replica:
379
+ value: warning
380
+ log_on_each_node:
381
+ value: true
382
+ logging_dir:
383
+ value: null
384
+ logging_first_step:
385
+ value: false
386
+ logging_nan_inf_filter:
387
+ value: true
388
+ logging_steps:
389
+ value: 2
390
+ logging_strategy:
391
+ value: steps
392
+ loss_type:
393
+ value: sigmoid
394
+ loss_weights:
395
+ value: null
396
+ lr_scheduler_kwargs:
397
+ value: null
398
+ lr_scheduler_type:
399
+ value: cosine
400
+ max_completion_length:
401
+ value: null
402
+ max_grad_norm:
403
+ value: 1
404
+ max_length:
405
+ value: 2048
406
+ max_position_embeddings:
407
+ value: 32768
408
+ max_prompt_length:
409
+ value: 1024
410
+ max_steps:
411
+ value: -1
412
+ max_window_layers:
413
+ value: 48
414
+ metric_for_best_model:
415
+ value: eval_loss
416
+ model:
417
+ value:
418
+ attn_implementation: null
419
+ base_local_dir: base_model
420
+ bnb_4bit_compute_dtype: bfloat16
421
+ bnb_4bit_quant_type: nf4
422
+ bnb_4bit_use_double_quant: false
423
+ device_map: auto
424
+ repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
425
+ revision: null
426
+ tokenizer_use_fast: true
427
+ torch_dtype: bfloat16
428
+ trust_remote_code: true
429
+ use_4bit: false
430
+ model/num_parameters:
431
+ value: 14795199488
432
+ model_adapter_name:
433
+ value: null
434
+ model_init_kwargs:
435
+ value: null
436
+ model_type:
437
+ value: qwen2
438
+ neftune_noise_alpha:
439
+ value: null
440
+ num_attention_heads:
441
+ value: 40
442
+ num_hidden_layers:
443
+ value: 48
444
+ num_key_value_heads:
445
+ value: 8
446
+ num_train_epochs:
447
+ value: 3
448
+ optim:
449
+ value: adamw_torch
450
+ optim_args:
451
+ value: null
452
+ optim_target_modules:
453
+ value: null
454
+ output_attentions:
455
+ value: false
456
+ output_dir:
457
+ value: runs/dpo_run_14b_v1
458
+ output_hidden_states:
459
+ value: false
460
+ pad_token:
461
+ value: <PAD_TOKEN>
462
+ pad_token_id:
463
+ value: 151643
464
+ padding_free:
465
+ value: false
466
+ parallelism_config:
467
+ value: null
468
+ peft:
469
+ value:
470
+ bias: none
471
+ enabled: true
472
+ lora_alpha: 32
473
+ lora_dropout: 0.05
474
+ r: 16
475
+ target_modules: auto
476
+ peft_config:
477
+ value:
478
+ default:
479
+ alora_invocation_tokens: null
480
+ arrow_config: null
481
+ auto_mapping: null
482
+ base_model_name_or_path: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
483
+ bias: none
484
+ corda_config: null
485
+ ensure_weight_tying: false
486
+ eva_config: null
487
+ exclude_modules: null
488
+ fan_in_fan_out: false
489
+ inference_mode: false
490
+ init_lora_weights: true
491
+ layer_replication: null
492
+ layers_pattern: null
493
+ layers_to_transform: null
494
+ lora_alpha: 32
495
+ lora_bias: false
496
+ lora_dropout: 0.05
497
+ megatron_config: null
498
+ megatron_core: megatron.core
499
+ modules_to_save: null
500
+ peft_type: LORA
501
+ peft_version: 0.18.0
502
+ qalora_group_size: 16
503
+ r: 16
504
+ revision: null
505
+ runtime_config:
506
+ ephemeral_gpu_offload: false
507
+ target_modules:
508
+ - v_proj
509
+ - k_proj
510
+ - o_proj
511
+ - q_proj
512
+ target_parameters: null
513
+ task_type: CAUSAL_LM
514
+ trainable_token_indices: null
515
+ use_dora: false
516
+ use_qalora: false
517
+ use_rslora: false
518
+ per_device_eval_batch_size:
519
+ value: 1
520
+ per_device_train_batch_size:
521
+ value: 1
522
+ precompute_ref_batch_size:
523
+ value: null
524
+ precompute_ref_log_probs:
525
+ value: false
526
+ prediction_loss_only:
527
+ value: false
528
+ prefix:
529
+ value: null
530
+ problem_type:
531
+ value: null
532
+ project:
533
+ value: huggingface
534
+ push_to_hub:
535
+ value: false
536
+ ref_adapter_name:
537
+ value: null
538
+ ref_model_init_kwargs:
539
+ value: null
540
+ ref_model_mixup_alpha:
541
+ value: 0.6
542
+ ref_model_sync_steps:
543
+ value: 512
544
+ reference_free:
545
+ value: false
546
+ remove_unused_columns:
547
+ value: false
548
+ report_to:
549
+ value:
550
+ - wandb
551
+ restore_callback_states_from_checkpoint:
552
+ value: false
553
+ resume_from_checkpoint:
554
+ value: null
555
+ return_dict:
556
+ value: true
557
+ rms_norm_eps:
558
+ value: 1e-06
559
+ rope_parameters:
560
+ value:
561
+ rope_theta: 1e+06
562
+ rope_type: default
563
+ rpo_alpha:
564
+ value: null
565
+ run_dir:
566
+ value: runs/dpo_run_14b_v1
567
+ run_name:
568
+ value: null
569
+ save_on_each_node:
570
+ value: false
571
+ save_only_model:
572
+ value: false
573
+ save_steps:
574
+ value: 100
575
+ save_strategy:
576
+ value: steps
577
+ save_total_limit:
578
+ value: 10
579
+ seed:
580
+ value: 42
581
+ sep_token_id:
582
+ value: null
583
+ skip_memory_metrics:
584
+ value: true
585
+ sliding_window:
586
+ value: null
587
+ sync_ref_model:
588
+ value: false
589
+ task_specific_params:
590
+ value: null
591
+ tf32:
592
+ value: null
593
+ tie_word_embeddings:
594
+ value: false
595
+ tokenizer_class:
596
+ value: null
597
+ tools:
598
+ value: null
599
+ torch_compile:
600
+ value: false
601
+ torch_compile_backend:
602
+ value: null
603
+ torch_compile_mode:
604
+ value: null
605
+ torch_empty_cache_steps:
606
+ value: null
607
+ trackio_space_id:
608
+ value: trackio
609
+ train:
610
+ value:
611
+ early_stopping:
612
+ enabled: true
613
+ metric: eval_loss
614
+ min_delta: 0.001
615
+ mode: min
616
+ patience: 5
617
+ eval_steps: 25
618
+ evaluation_strategy: steps
619
+ gradient_accumulation_steps: 8
620
+ gradient_checkpointing: true
621
+ learning_rate: "5e-5"
622
+ load_best_model_at_end: true
623
+ logging_steps: 2
624
+ lr_scheduler_type: cosine
625
+ max_grad_norm: 1
626
+ num_train_epochs: 3
627
+ optim: adamw_torch
628
+ per_device_eval_batch_size: 1
629
+ per_device_train_batch_size: 1
630
+ resume_from_checkpoint: auto
631
+ save_steps: 100
632
+ save_strategy: steps
633
+ save_total_limit: 10
634
+ warmup_ratio: 0.1
635
+ weight_decay: 0
636
+ transformers_version:
637
+ value: 5.0.0.dev0
638
+ truncation_mode:
639
+ value: keep_end
640
+ use_cache:
641
+ value: false
642
+ use_cpu:
643
+ value: false
644
+ use_liger_kernel:
645
+ value: false
646
+ use_liger_loss:
647
+ value: null
648
+ use_logits_to_keep:
649
+ value: false
650
+ use_sliding_window:
651
+ value: false
652
+ use_weighting:
653
+ value: false
654
+ vocab_size:
655
+ value: 152064
656
+ warmup_ratio:
657
+ value: 0.1
658
+ warmup_steps:
659
+ value: 0.1
660
+ weight_decay:
661
+ value: 0
dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/output.log ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Wandb initialized: project='dpo-training', name='auto-generated'
2
+ `torch_dtype` is deprecated! Use `dtype` instead!
3
+ Loading weights: 100%|█████████████████████████████████| 579/579 [00:09<00:00, 61.71it/s, Materializing param=model.norm.weight]
4
+ Loading reference model (frozen copy)...
5
+ Loading weights: 100%|█████████████████████████████████| 579/579 [00:09<00:00, 61.41it/s, Materializing param=model.norm.weight]
6
+ Reference model loaded and frozen
7
+ 2025-12-26 15:24:00,888 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
8
+ 2025-12-26 15:24:00,903 - INFO - Formatting train DPO data...
9
+ 2025-12-26 15:24:03,288 - INFO - Train dataset after filtering: 6850 examples
10
+ 2025-12-26 15:24:03,289 - INFO - train dataset validation passed: 6850 examples
11
+ 2025-12-26 15:24:03,289 - INFO - Formatting eval DPO data...
12
+ 2025-12-26 15:24:05,675 - INFO - Eval dataset after filtering: 762 examples
13
+ 2025-12-26 15:24:05,675 - INFO - eval dataset validation passed: 762 examples
14
+ warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
15
+ Early stopping enabled: patience=5, min_delta=0.001
16
+ 2025-12-26 15:24:05,710 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
17
+ warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
18
+ 2025-12-26 15:24:15,316 - INFO - Starting DPO training...
19
+ The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.
20
+ 0%|▏ | 5/2571 [00:51<7:16:02, 10.20s/it]Traceback (most recent call last):
21
+ {'loss': '0.6931', 'grad_norm': '1.242', 'learning_rate': '1.938e-07', 'rewards/chosen': '0', 'rewards/rejected': '0', 'rewards/accuracies': '0', 'rewards/margins': '0', 'logps/chosen': '-368.9', 'logps/rejected': '-398.8', 'logits/chosen': '5.179', 'logits/rejected': '5.193', 'epoch': '0.002336'}
22
+ {'loss': '0.6933', 'grad_norm': '1.388', 'learning_rate': '5.814e-07', 'rewards/chosen': '0.02254', 'rewards/rejected': '0.02266', 'rewards/accuracies': '0.5', 'rewards/margins': '-0.0001159', 'logps/chosen': '-338.3', 'logps/rejected': '-366.9', 'logits/chosen': '5.405', 'logits/rejected': '5.456', 'epoch': '0.004672'}
23
+ File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 953, in <module>
24
+ main()
25
+ File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 928, in main
26
+ trainer.train(resume_from_checkpoint=resume_from)
27
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2168, in train
28
+ return inner_training_loop(
29
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2535, in _inner_training_loop
30
+ tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
31
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 3807, in training_step
32
+ loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
33
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1810, in compute_loss
34
+ loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
35
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1733, in get_batch_loss_metrics
36
+ ref_chosen_logps, ref_rejected_logps = self.compute_ref_log_probs(batch)
37
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 926, in compute_ref_log_probs
38
+ ref_model_output = self.concatenated_forward(self.ref_model, batch, is_ref_model=True)
39
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1600, in concatenated_forward
40
+ outputs = model(input_ids, **model_kwargs)
41
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
42
+ return self._call_impl(*args, **kwargs)
43
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
44
+ return forward_call(*args, **kwargs)
45
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 819, in forward
46
+ return model_forward(*args, **kwargs)
47
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 807, in __call__
48
+ return convert_to_fp32(self.model_forward(*args, **kwargs))
49
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast
50
+ return func(*args, **kwargs)
51
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/peft_model.py", line 1923, in forward
52
+ return self.base_model(
53
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
54
+ return self._call_impl(*args, **kwargs)
55
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
56
+ return forward_call(*args, **kwargs)
57
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 308, in forward
58
+ return self.model.forward(*args, **kwargs)
59
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
60
+ output = module._old_forward(*args, **kwargs)
61
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 810, in wrapper
62
+ output = func(self, *args, **kwargs)
63
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 477, in forward
64
+ outputs: BaseModelOutputWithPast = self.model(
65
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
66
+ return self._call_impl(*args, **kwargs)
67
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
68
+ return forward_call(*args, **kwargs)
69
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 965, in wrapper
70
+ outputs = func(self, *args, **kwargs)
71
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 412, in forward
72
+ hidden_states = decoder_layer(
73
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/modeling_layers.py", line 94, in __call__
74
+ return super().__call__(*args, **kwargs)
75
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
76
+ return self._call_impl(*args, **kwargs)
77
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
78
+ return forward_call(*args, **kwargs)
79
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 918, in wrapped_forward
80
+ output = orig_forward(*args, **kwargs)
81
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
82
+ output = module._old_forward(*args, **kwargs)
83
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 300, in forward
84
+ hidden_states, _ = self.self_attn(
85
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
86
+ return self._call_impl(*args, **kwargs)
87
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
88
+ return forward_call(*args, **kwargs)
89
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
90
+ output = module._old_forward(*args, **kwargs)
91
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 222, in forward
92
+ value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
93
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
94
+ return self._call_impl(*args, **kwargs)
95
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
96
+ return forward_call(*args, **kwargs)
97
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/lora/layer.py", line 807, in forward
98
+ result = result + lora_B(lora_A(dropout(x))) * scaling
99
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
100
+ return self._call_impl(*args, **kwargs)
101
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
102
+ return forward_call(*args, **kwargs)
103
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 125, in forward
104
+ return F.linear(input, self.weight, self.bias)
105
+ KeyboardInterrupt
106
+ Traceback (most recent call last):
107
+ File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 953, in <module>
108
+ main()
109
+ File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 928, in main
110
+ trainer.train(resume_from_checkpoint=resume_from)
111
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2168, in train
112
+ return inner_training_loop(
113
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2535, in _inner_training_loop
114
+ tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
115
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 3807, in training_step
116
+ loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
117
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1810, in compute_loss
118
+ loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
119
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1733, in get_batch_loss_metrics
120
+ ref_chosen_logps, ref_rejected_logps = self.compute_ref_log_probs(batch)
121
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 926, in compute_ref_log_probs
122
+ ref_model_output = self.concatenated_forward(self.ref_model, batch, is_ref_model=True)
123
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1600, in concatenated_forward
124
+ outputs = model(input_ids, **model_kwargs)
125
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
126
+ return self._call_impl(*args, **kwargs)
127
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
128
+ return forward_call(*args, **kwargs)
129
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 819, in forward
130
+ return model_forward(*args, **kwargs)
131
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 807, in __call__
132
+ return convert_to_fp32(self.model_forward(*args, **kwargs))
133
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast
134
+ return func(*args, **kwargs)
135
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/peft_model.py", line 1923, in forward
136
+ return self.base_model(
137
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
138
+ return self._call_impl(*args, **kwargs)
139
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
140
+ return forward_call(*args, **kwargs)
141
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 308, in forward
142
+ return self.model.forward(*args, **kwargs)
143
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
144
+ output = module._old_forward(*args, **kwargs)
145
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 810, in wrapper
146
+ output = func(self, *args, **kwargs)
147
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 477, in forward
148
+ outputs: BaseModelOutputWithPast = self.model(
149
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
150
+ return self._call_impl(*args, **kwargs)
151
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
152
+ return forward_call(*args, **kwargs)
153
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 965, in wrapper
154
+ outputs = func(self, *args, **kwargs)
155
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 412, in forward
156
+ hidden_states = decoder_layer(
157
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/modeling_layers.py", line 94, in __call__
158
+ return super().__call__(*args, **kwargs)
159
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
160
+ return self._call_impl(*args, **kwargs)
161
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
162
+ return forward_call(*args, **kwargs)
163
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 918, in wrapped_forward
164
+ output = orig_forward(*args, **kwargs)
165
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
166
+ output = module._old_forward(*args, **kwargs)
167
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 300, in forward
168
+ hidden_states, _ = self.self_attn(
169
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
170
+ return self._call_impl(*args, **kwargs)
171
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
172
+ return forward_call(*args, **kwargs)
173
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
174
+ output = module._old_forward(*args, **kwargs)
175
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 222, in forward
176
+ value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
177
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
178
+ return self._call_impl(*args, **kwargs)
179
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
180
+ return forward_call(*args, **kwargs)
181
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/lora/layer.py", line 807, in forward
182
+ result = result + lora_B(lora_A(dropout(x))) * scaling
183
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
184
+ return self._call_impl(*args, **kwargs)
185
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
186
+ return forward_call(*args, **kwargs)
187
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 125, in forward
188
+ return F.linear(input, self.weight, self.bias)
189
+ KeyboardInterrupt
dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/requirements.txt ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exceptiongroup==1.3.1
2
+ wheel==0.45.1
3
+ python-dateutil==2.9.0.post0
4
+ nvidia-ml-py==13.580.82
5
+ huggingface_hub==1.2.3
6
+ idna==3.11
7
+ click==8.3.1
8
+ numpy==2.2.6
9
+ httpx==0.28.1
10
+ tokenizers==0.22.1
11
+ sympy==1.13.1
12
+ yarl==1.22.0
13
+ async-timeout==5.0.1
14
+ datasets==4.4.2
15
+ platformdirs==4.5.1
16
+ nvidia-cuda-cupti-cu12==12.1.105
17
+ nvidia-nvtx-cu12==12.1.105
18
+ smmap==5.0.2
19
+ accelerate==1.12.0
20
+ requests==2.32.5
21
+ aiohttp==3.13.2
22
+ bitsandbytes==0.49.0
23
+ nvidia-cublas-cu12==12.1.3.1
24
+ mpmath==1.3.0
25
+ torchaudio==2.5.1+cu121
26
+ nvidia-cuda-runtime-cu12==12.1.105
27
+ typing-inspection==0.4.2
28
+ GitPython==3.1.45
29
+ xxhash==3.6.0
30
+ nvidia-cusolver-cu12==11.4.5.107
31
+ pydantic_core==2.41.5
32
+ six==1.17.0
33
+ torchvision==0.20.1+cu121
34
+ typing_extensions==4.15.0
35
+ triton==3.1.0
36
+ charset-normalizer==3.4.4
37
+ nvitop==1.6.1
38
+ wandb==0.23.1
39
+ regex==2025.11.3
40
+ pip==25.3
41
+ nvidia-cusparse-cu12==12.1.0.106
42
+ pytz==2025.2
43
+ Jinja2==3.1.6
44
+ psutil==7.2.0
45
+ pillow==12.0.0
46
+ packaging==25.0
47
+ safetensors==0.7.0
48
+ sentry-sdk==2.48.0
49
+ gitdb==4.0.12
50
+ httpcore==1.0.9
51
+ setuptools==80.9.0
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ anyio==4.12.0
54
+ transformers==5.0.0.dev0
55
+ pydantic==2.12.5
56
+ fsspec==2025.10.0
57
+ filelock==3.20.0
58
+ PyYAML==6.0.3
59
+ hf-xet==1.2.0
60
+ nvidia-cudnn-cu12==9.1.0.70
61
+ tqdm==4.67.1
62
+ MarkupSafe==2.1.5
63
+ attrs==25.4.0
64
+ nvidia-cuda-nvrtc-cu12==12.1.105
65
+ peft==0.18.0
66
+ aiohappyeyeballs==2.6.1
67
+ networkx==3.4.2
68
+ nvidia-nvjitlink-cu12==12.9.86
69
+ certifi==2025.11.12
70
+ pyarrow==22.0.0
71
+ dill==0.4.0
72
+ protobuf==6.33.2
73
+ aiosignal==1.4.0
74
+ frozenlist==1.8.0
75
+ urllib3==2.6.2
76
+ propcache==0.4.1
77
+ tzdata==2025.3
78
+ pandas==2.3.3
79
+ annotated-types==0.7.0
80
+ shellingham==1.5.4
81
+ nvidia-nccl-cu12==2.21.5
82
+ multidict==6.7.0
83
+ nvidia-curand-cu12==10.3.2.106
84
+ trl==0.26.2
85
+ torch==2.5.1+cu121
86
+ h11==0.16.0
87
+ multiprocess==0.70.18
88
+ typer-slim==0.21.0
89
+ wheel==0.45.1
90
+ tomli==2.0.1
91
+ autocommand==2.2.2
92
+ jaraco.context==5.3.0
93
+ zipp==3.19.2
94
+ packaging==24.2
95
+ inflect==7.3.1
96
+ typing_extensions==4.12.2
97
+ platformdirs==4.2.2
98
+ jaraco.functools==4.0.1
99
+ jaraco.collections==5.1.0
100
+ jaraco.text==3.12.1
101
+ backports.tarfile==1.2.0
102
+ more-itertools==10.3.0
103
+ importlib_metadata==8.0.0
104
+ typeguard==4.3.0
dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.12.46+-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.12",
4
+ "startedAt": "2025-12-26T15:23:32.328004Z",
5
+ "args": [
6
+ "--config",
7
+ "config_dpo.yaml"
8
+ ],
9
+ "program": "/workspace/trainer-kit/DPO-14b/run_dpo.py",
10
+ "codePath": "run_dpo.py",
11
+ "codePathLocal": "run_dpo.py",
12
+ "email": "shaiksirajuddin9949@gmail.com",
13
+ "root": "runs/dpo_run_14b_v1",
14
+ "host": "a100-2gpu-shell-session-757d587799-mfdvv",
15
+ "executable": "/workspace/llm_finetuning_env/bin/python",
16
+ "cpu_count": 12,
17
+ "cpu_count_logical": 24,
18
+ "gpu": "NVIDIA A100-SXM4-80GB",
19
+ "gpu_count": 2,
20
+ "disk": {
21
+ "/": {
22
+ "total": "791251738624",
23
+ "used": "314755911680"
24
+ }
25
+ },
26
+ "memory": {
27
+ "total": "359047892992"
28
+ },
29
+ "gpu_nvidia": [
30
+ {
31
+ "name": "NVIDIA A100-SXM4-80GB",
32
+ "memoryTotal": "85899345920",
33
+ "cudaCores": 6912,
34
+ "architecture": "Ampere",
35
+ "uuid": "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
36
+ },
37
+ {
38
+ "name": "NVIDIA A100-SXM4-80GB",
39
+ "memoryTotal": "85899345920",
40
+ "cudaCores": 6912,
41
+ "architecture": "Ampere",
42
+ "uuid": "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
43
+ }
44
+ ],
45
+ "cudaVersion": "13.0",
46
+ "writerId": "ce8b9zq5sbh73okdbbvozze07ayjamtf"
47
+ }
dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_step":1,"train/logps/rejected":-366.88128662109375,"train/grad_norm":1.3884541988372803,"train/logps/chosen":-338.257568359375,"train/rewards/margins":-0.00011587224435061216,"train/logits/chosen":5.405174255371094,"_wandb":{"runtime":101},"train/loss":0.693317174911499,"train/global_step":4,"train/epoch":0.004671532846715329,"_timestamp":1.7667626963258417e+09,"train/rewards/chosen":0.022540951147675514,"train/logits/rejected":5.456291675567627,"train/rewards/accuracies":0.5,"train/rewards/rejected":0.022656824439764023,"_runtime":101,"train/learning_rate":5.813953488372093e-07}
dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:23:32.418743785Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpiwm5qcwf/port-134621.txt","pid":134621,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-12-26T15:23:32.419487782Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":134621}
3
+ {"time":"2025-12-26T15:23:32.419441897Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-134621-134691-43401370/socket","Net":"unix"}}
4
+ {"time":"2025-12-26T15:23:32.60107271Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-12-26T15:23:32.607567183Z","level":"INFO","msg":"handleInformInit: received","streamId":"r9hfat2g","id":"1(@)"}
6
+ {"time":"2025-12-26T15:23:32.769941198Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"r9hfat2g","id":"1(@)"}
7
+ {"time":"2025-12-26T15:25:14.279920394Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-12-26T15:25:14.279987785Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2025-12-26T15:25:14.280023071Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-12-26T15:25:14.280085895Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-12-26T15:25:14.280137634Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-134621-134691-43401370/socket","Net":"unix"}}
12
+ {"time":"2025-12-26T15:25:14.643871761Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-12-26T15:25:14.643905607Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-12-26T15:25:14.643922133Z","level":"INFO","msg":"server is closed"}
dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:23:32.607728655Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-26T15:23:32.769717362Z","level":"INFO","msg":"stream: created new stream","id":"r9hfat2g"}
3
+ {"time":"2025-12-26T15:23:32.769819803Z","level":"INFO","msg":"handler: started","stream_id":"r9hfat2g"}
4
+ {"time":"2025-12-26T15:23:32.76993207Z","level":"INFO","msg":"stream: started","id":"r9hfat2g"}
5
+ {"time":"2025-12-26T15:23:32.769980394Z","level":"INFO","msg":"sender: started","stream_id":"r9hfat2g"}
6
+ {"time":"2025-12-26T15:23:32.769979838Z","level":"INFO","msg":"writer: started","stream_id":"r9hfat2g"}
7
+ {"time":"2025-12-26T15:25:14.280016864Z","level":"INFO","msg":"stream: closing","id":"r9hfat2g"}
8
+ {"time":"2025-12-26T15:25:14.470499024Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-12-26T15:25:14.642982392Z","level":"INFO","msg":"handler: closed","stream_id":"r9hfat2g"}
10
+ {"time":"2025-12-26T15:25:14.643087783Z","level":"INFO","msg":"sender: closed","stream_id":"r9hfat2g"}
11
+ {"time":"2025-12-26T15:25:14.643101377Z","level":"INFO","msg":"stream: closed","id":"r9hfat2g"}
dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-26 15:23:32,329 INFO MainThread:134621 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-26 15:23:32,329 INFO MainThread:134621 [wandb_setup.py:_flush():80] Configure stats pid to 134621
3
+ 2025-12-26 15:23:32,329 INFO MainThread:134621 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-26 15:23:32,329 INFO MainThread:134621 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO-14b/wandb/settings
5
+ 2025-12-26 15:23:32,329 INFO MainThread:134621 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-26 15:23:32,329 INFO MainThread:134621 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_14b_v1/wandb/run-20251226_152332-r9hfat2g/logs/debug.log
7
+ 2025-12-26 15:23:32,330 INFO MainThread:134621 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_14b_v1/wandb/run-20251226_152332-r9hfat2g/logs/debug-internal.log
8
+ 2025-12-26 15:23:32,330 INFO MainThread:134621 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-26 15:23:32,330 INFO MainThread:134621 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'model': {'repo_id': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_14b_v1', '_wandb': {}}
11
+ 2025-12-26 15:23:32,330 INFO MainThread:134621 [wandb_init.py:init():889] starting backend
12
+ 2025-12-26 15:23:32,601 INFO MainThread:134621 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-26 15:23:32,605 INFO MainThread:134621 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-26 15:23:32,607 INFO MainThread:134621 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-26 15:23:32,608 INFO MainThread:134621 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-26 15:23:32,915 INFO MainThread:134621 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-26 15:23:33,025 INFO MainThread:134621 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-26 15:23:33,025 INFO MainThread:134621 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-26 15:23:33,025 INFO MainThread:134621 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-26 15:23:33,025 INFO MainThread:134621 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-26 15:23:33,031 INFO MainThread:134621 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2025-12-26 15:24:15,664 INFO MainThread:134621 [wandb_run.py:_config_callback():1396] config_cb None None {'peft_config': {'default': {'task_type': 'CAUSAL_LM', 'peft_type': 'LORA', 'auto_mapping': None, 'peft_version': '0.18.0', 'base_model_name_or_path': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'revision': None, 'inference_mode': False, 'r': 16, 'target_modules': ['v_proj', 'k_proj', 'o_proj', 'q_proj'], 'exclude_modules': None, 'lora_alpha': 32, 'lora_dropout': 0.05, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'trainable_token_indices': None, 'loftq_config': {}, 'eva_config': None, 'corda_config': None, 'use_dora': False, 'alora_invocation_tokens': None, 'use_qalora': False, 'qalora_group_size': 16, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}, 'lora_bias': False, 'target_parameters': None, 'arrow_config': None, 'ensure_weight_tying': False}}, 'vocab_size': 152064, 'max_position_embeddings': 32768, 'hidden_size': 5120, 'intermediate_size': 13824, 'num_hidden_layers': 48, 'num_attention_heads': 40, 'use_sliding_window': False, 'sliding_window': None, 'max_window_layers': 48, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-06, 'use_cache': False, 'attention_dropout': 0.0, 'layer_types': ['full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention', 'full_attention'], 'rope_parameters': {'rope_theta': 1000000.0, 'rope_type': 'default'}, 'return_dict': True, 'output_hidden_states': False, 'dtype': 'bfloat16', 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'architectures': ['Qwen2ForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'task_specific_params': None, 'problem_type': None, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 151643, 'eos_token_id': 151643, 'sep_token_id': None, 'decoder_start_token_id': None, '_name_or_path': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'transformers_version': '5.0.0.dev0', 'model_type': 'qwen2', 'output_attentions': False, 'output_dir': 'runs/dpo_run_14b_v1', 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': None, 'warmup_ratio': 0.1, 'warmup_steps': 0.1, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': None, 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 2, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'enable_jit_checkpoint': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'use_cpu': False, 'seed': 42, 'data_seed': None, 'bf16': True, 'fp16': False, 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': -1, 'ddp_backend': None, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 25, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'parallelism_config': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'project': 'huggingface', 'trackio_space_id': 'trackio', 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'auto_find_batch_size': False, 'full_determinism': False, 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True, 'model_init_kwargs': None, 'ref_model_init_kwargs': None, 'model_adapter_name': None, 'ref_adapter_name': None, 'force_use_ref_model': False, 'disable_dropout': True, 'use_logits_to_keep': False, 'dataset_num_proc': None, 'pad_token': '<PAD_TOKEN>', 'label_pad_token_id': -100, 'max_prompt_length': 1024, 'max_completion_length': None, 'max_length': 2048, 'truncation_mode': 'keep_end', 'padding_free': False, 'precompute_ref_log_probs': False, 'precompute_ref_batch_size': None, 'tools': None, 'loss_type': 'sigmoid', 'use_liger_loss': None, 'base_model_attribute_name': 'model', 'beta': 0.1, 'f_divergence_type': 'reverse_kl', 'f_alpha_divergence_coef': 1.0, 'reference_free': False, 'label_smoothing': 0.0, 'use_weighting': False, 'rpo_alpha': None, 'ld_alpha': None, 'discopop_tau': 0.05, 'loss_weights': None, 'sync_ref_model': False, 'ref_model_mixup_alpha': 0.6, 'ref_model_sync_steps': 512, 'generate_during_eval': False}
23
+ 2025-12-26 15:24:15,672 INFO MainThread:134621 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 14795199488 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x79bf403bb7c0>>
24
+ 2025-12-26 15:24:15,672 INFO MainThread:134621 [wandb_run.py:_config_callback():1396] config_cb model/num_parameters 14795199488 None
25
+ 2025-12-26 15:25:14,280 INFO wandb-AsyncioManager-main:134621 [service_client.py:_forward_responses():80] Reached EOF.
26
+ 2025-12-26 15:25:14,280 INFO wandb-AsyncioManager-main:134621 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
dpo_qwen_14B/wandb/run-20251226_152332-r9hfat2g/run-r9hfat2g.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43e630f821a728a70660f513c24097fbebe6281e9ed349c81fbbf5c9ee24270a
3
+ size 515777
dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/config.yaml ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.23.1
4
+ e:
5
+ 62bhwklrbfchpar5wzdaud7re7jdowat:
6
+ args:
7
+ - --config
8
+ - config_dpo.yaml
9
+ codePath: run_dpo.py
10
+ codePathLocal: run_dpo.py
11
+ cpu_count: 12
12
+ cpu_count_logical: 24
13
+ cudaVersion: "13.0"
14
+ disk:
15
+ /:
16
+ total: "791251738624"
17
+ used: "316563935232"
18
+ email: shaiksirajuddin9949@gmail.com
19
+ executable: /workspace/llm_finetuning_env/bin/python
20
+ gpu: NVIDIA A100-SXM4-80GB
21
+ gpu_count: 2
22
+ gpu_nvidia:
23
+ - architecture: Ampere
24
+ cudaCores: 6912
25
+ memoryTotal: "85899345920"
26
+ name: NVIDIA A100-SXM4-80GB
27
+ uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
28
+ - architecture: Ampere
29
+ cudaCores: 6912
30
+ memoryTotal: "85899345920"
31
+ name: NVIDIA A100-SXM4-80GB
32
+ uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
33
+ host: a100-2gpu-shell-session-757d587799-mfdvv
34
+ memory:
35
+ total: "359047892992"
36
+ os: Linux-6.12.46+-x86_64-with-glibc2.35
37
+ program: /workspace/trainer-kit/DPO-14b/run_dpo.py
38
+ python: CPython 3.10.12
39
+ root: runs/dpo_run_14b_v1
40
+ startedAt: "2025-12-26T15:29:36.793485Z"
41
+ writerId: 62bhwklrbfchpar5wzdaud7re7jdowat
42
+ m: []
43
+ python_version: 3.10.12
44
+ t:
45
+ "1":
46
+ - 1
47
+ - 11
48
+ - 41
49
+ - 49
50
+ - 51
51
+ - 71
52
+ - 84
53
+ - 98
54
+ "2":
55
+ - 1
56
+ - 11
57
+ - 41
58
+ - 49
59
+ - 51
60
+ - 71
61
+ - 84
62
+ - 98
63
+ "3":
64
+ - 15
65
+ - 16
66
+ "4": 3.10.12
67
+ "5": 0.23.1
68
+ "6": 5.0.0.dev0
69
+ "12": 0.23.1
70
+ "13": linux-x86_64
71
+ data:
72
+ value:
73
+ chosen_field: chosen
74
+ eval_jsonl: null
75
+ eval_split_ratio: 0.1
76
+ format_type: chatml
77
+ max_length: 2048
78
+ num_proc: 4
79
+ prompt_field: prompt
80
+ rejected_field: rejected
81
+ score_field: f1_score
82
+ shuffle: true
83
+ system_prompt: |
84
+ You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
85
+
86
+ ## Output Format
87
+
88
+ ##OUTPUT
89
+ Explain the data flow and why each component must change:
90
+ - Flow: [Input → Processing → Output with arrows]
91
+ - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
92
+ - Explain coupling between components
93
+
94
+ ##SELECT
95
+ modify::crates/path/to/file.rs::impl::ComponentName
96
+ add::crates/another/file.rs::function::AnotherComponent
97
+ <EOS>
98
+
99
+ ## Rules
100
+
101
+ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
102
+ 2. Use `::` for nested items: `status::StructName::Type::Name`
103
+ 3. Always explain "must change because" and "without this"
104
+ 3. Types of components: function, struct, enum, impl, trait
105
+ 4. If there is extra information (e.g., enum variants), include that too.
106
+ 5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
107
+ train_jsonl: dpo_pairs_generated.jsonl
108
+ dpo:
109
+ value:
110
+ beta: 0.1
111
+ label_smoothing: 0
112
+ loss_type: sigmoid
113
+ reference_free: false
114
+ use_reference_model: true
115
+ model:
116
+ value:
117
+ attn_implementation: null
118
+ base_local_dir: base_model
119
+ bnb_4bit_compute_dtype: bfloat16
120
+ bnb_4bit_quant_type: nf4
121
+ bnb_4bit_use_double_quant: false
122
+ device_map: auto
123
+ repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
124
+ revision: null
125
+ tokenizer_use_fast: true
126
+ torch_dtype: bfloat16
127
+ trust_remote_code: true
128
+ use_4bit: false
129
+ peft:
130
+ value:
131
+ bias: none
132
+ enabled: true
133
+ lora_alpha: 32
134
+ lora_dropout: 0.05
135
+ r: 16
136
+ target_modules: auto
137
+ run_dir:
138
+ value: runs/dpo_run_14b_v1
139
+ train:
140
+ value:
141
+ early_stopping:
142
+ enabled: true
143
+ metric: eval_loss
144
+ min_delta: 0.001
145
+ mode: min
146
+ patience: 5
147
+ eval_steps: 25
148
+ evaluation_strategy: steps
149
+ gradient_accumulation_steps: 8
150
+ gradient_checkpointing: true
151
+ learning_rate: "5e-5"
152
+ load_best_model_at_end: true
153
+ logging_steps: 2
154
+ lr_scheduler_type: cosine
155
+ max_grad_norm: 1
156
+ num_train_epochs: 3
157
+ optim: adamw_torch
158
+ per_device_eval_batch_size: 1
159
+ per_device_train_batch_size: 1
160
+ resume_from_checkpoint: auto
161
+ save_steps: 100
162
+ save_strategy: steps
163
+ save_total_limit: 10
164
+ warmup_ratio: 0.1
165
+ weight_decay: 0
dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/output.log ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Wandb initialized: project='dpo-training', name='auto-generated'
2
+ `torch_dtype` is deprecated! Use `dtype` instead!
3
+ Loading weights: 100%|█████████| 579/579 [00:09<00:00, 61.06it/s, Materializing param=model.norm.weight]
4
+ Loading reference model (frozen copy)...
5
+ Loading weights: 100%|█████████| 579/579 [00:09<00:00, 62.49it/s, Materializing param=model.norm.weight]
6
+ Reference model loaded and frozen
7
+ 2025-12-26 15:30:05,632 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
8
+ 2025-12-26 15:30:05,647 - INFO - Formatting train DPO data...
9
+ 2025-12-26 15:30:07,996 - INFO - Train dataset after filtering: 6850 examples
10
+ 2025-12-26 15:30:07,997 - INFO - train dataset validation passed: 6850 examples
11
+ 2025-12-26 15:30:07,997 - INFO - Formatting eval DPO data...
12
+ 2025-12-26 15:30:10,371 - INFO - Eval dataset after filtering: 762 examples
13
+ 2025-12-26 15:30:10,372 - INFO - eval dataset validation passed: 762 examples
14
+ warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
15
+ Early stopping enabled: patience=5, min_delta=0.001
16
+ 2025-12-26 15:30:10,408 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
17
+ warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
18
+ Parameter 'fn_kwargs'={'tokenizer': Qwen2Tokenizer(name_or_path='../../Models/Qwen2.5-Coder-14B-CPT-SFT', vocab_size=151643, model_max_length=32768, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|endoftext|>', 'pad_token': '<|endoftext|>'}, added_tokens_decoder={
19
+ 151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
20
+ 151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
21
+ 151645: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
22
+ 151646: AddedToken("<|object_ref_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
23
+ 151647: AddedToken("<|object_ref_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
24
+ 151648: AddedToken("<|box_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
25
+ 151649: AddedToken("<|box_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
26
+ 151650: AddedToken("<|quad_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
27
+ 151651: AddedToken("<|quad_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
28
+ 151652: AddedToken("<|vision_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
29
+ 151653: AddedToken("<|vision_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
30
+ 151654: AddedToken("<|vision_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
31
+ 151655: AddedToken("<|image_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32
+ 151656: AddedToken("<|video_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
33
+ 151657: AddedToken("<tool_call>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
34
+ 151658: AddedToken("</tool_call>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
35
+ 151659: AddedToken("<|fim_prefix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
36
+ 151660: AddedToken("<|fim_middle|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
37
+ 151661: AddedToken("<|fim_suffix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
38
+ 151662: AddedToken("<|fim_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
39
+ 151663: AddedToken("<|repo_name|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
40
+ 151664: AddedToken("<|file_sep|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
41
+ }
42
+ ), 'tools': None} of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only shown once. Subsequent hashing failures won't be shown.
43
+ 2025-12-26 15:30:15,283 - WARNING - Parameter 'fn_kwargs'={'tokenizer': Qwen2Tokenizer(name_or_path='../../Models/Qwen2.5-Coder-14B-CPT-SFT', vocab_size=151643, model_max_length=32768, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|endoftext|>', 'pad_token': '<|endoftext|>'}, added_tokens_decoder={
44
+ 151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
45
+ 151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
46
+ 151645: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
47
+ 151646: AddedToken("<|object_ref_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
48
+ 151647: AddedToken("<|object_ref_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
49
+ 151648: AddedToken("<|box_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
50
+ 151649: AddedToken("<|box_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
51
+ 151650: AddedToken("<|quad_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
52
+ 151651: AddedToken("<|quad_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
53
+ 151652: AddedToken("<|vision_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
54
+ 151653: AddedToken("<|vision_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
55
+ 151654: AddedToken("<|vision_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
56
+ 151655: AddedToken("<|image_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
57
+ 151656: AddedToken("<|video_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
58
+ 151657: AddedToken("<tool_call>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
59
+ 151658: AddedToken("</tool_call>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
60
+ 151659: AddedToken("<|fim_prefix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
61
+ 151660: AddedToken("<|fim_middle|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
62
+ 151661: AddedToken("<|fim_suffix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
63
+ 151662: AddedToken("<|fim_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
64
+ 151663: AddedToken("<|repo_name|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
65
+ 151664: AddedToken("<|file_sep|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
66
+ }
67
+ ), 'tools': None} of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only shown once. Subsequent hashing failures won't be shown.
68
+ Applying chat template to eval dataset: 100%|████████████████| 762/762 [00:00<00:00, 8054.02 examples/s]
69
+ Tokenizing eval dataset: 47%|███████████████▏ | 361/762 [00:01<00:01, 236.68 examples/s]
70
+ Traceback (most recent call last):
71
+ File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 953, in <module>
72
+ main()
73
+ File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 909, in main
74
+ trainer = DPOTrainer(
75
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 488, in __init__
76
+ eval_dataset = self._prepare_dataset(eval_dataset, processing_class, args, "eval")
77
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
78
+ dataset = dataset.map(
79
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
80
+ out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
81
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
82
+ for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
83
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
84
+ for i, example in iter_outputs(shard_iterable):
85
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
86
+ yield i, apply_function(example, i, offset=offset)
87
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
88
+ processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
89
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 714, in tokenize_row
90
+ rejected_input_ids = tokenizer(features["rejected"], add_special_tokens=False)["input_ids"]
91
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 2567, in __call__
92
+ encodings = self._encode_plus(
93
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_tokenizers.py", line 833, in _encode_plus
94
+ encodings = self._tokenizer.encode_batch(
95
+ KeyboardInterrupt
96
+ Traceback (most recent call last):
97
+ File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 953, in <module>
98
+ main()
99
+ File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 909, in main
100
+ trainer = DPOTrainer(
101
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 488, in __init__
102
+ eval_dataset = self._prepare_dataset(eval_dataset, processing_class, args, "eval")
103
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 654, in _prepare_dataset
104
+ dataset = dataset.map(
105
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 562, in wrapper
106
+ out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
107
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3341, in map
108
+ for rank, done, content in Dataset._map_single(**unprocessed_kwargs):
109
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3673, in _map_single
110
+ for i, example in iter_outputs(shard_iterable):
111
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3647, in iter_outputs
112
+ yield i, apply_function(example, i, offset=offset)
113
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3570, in apply_function
114
+ processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
115
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 714, in tokenize_row
116
+ rejected_input_ids = tokenizer(features["rejected"], add_special_tokens=False)["input_ids"]
117
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 2567, in __call__
118
+ encodings = self._encode_plus(
119
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/tokenization_utils_tokenizers.py", line 833, in _encode_plus
120
+ encodings = self._tokenizer.encode_batch(
121
+ KeyboardInterrupt
dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/requirements.txt ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exceptiongroup==1.3.1
2
+ wheel==0.45.1
3
+ python-dateutil==2.9.0.post0
4
+ nvidia-ml-py==13.580.82
5
+ huggingface_hub==1.2.3
6
+ idna==3.11
7
+ click==8.3.1
8
+ numpy==2.2.6
9
+ httpx==0.28.1
10
+ tokenizers==0.22.1
11
+ sympy==1.13.1
12
+ yarl==1.22.0
13
+ async-timeout==5.0.1
14
+ datasets==4.4.2
15
+ platformdirs==4.5.1
16
+ nvidia-cuda-cupti-cu12==12.1.105
17
+ nvidia-nvtx-cu12==12.1.105
18
+ smmap==5.0.2
19
+ accelerate==1.12.0
20
+ requests==2.32.5
21
+ aiohttp==3.13.2
22
+ bitsandbytes==0.49.0
23
+ nvidia-cublas-cu12==12.1.3.1
24
+ mpmath==1.3.0
25
+ torchaudio==2.5.1+cu121
26
+ nvidia-cuda-runtime-cu12==12.1.105
27
+ typing-inspection==0.4.2
28
+ GitPython==3.1.45
29
+ xxhash==3.6.0
30
+ nvidia-cusolver-cu12==11.4.5.107
31
+ pydantic_core==2.41.5
32
+ six==1.17.0
33
+ torchvision==0.20.1+cu121
34
+ typing_extensions==4.15.0
35
+ triton==3.1.0
36
+ charset-normalizer==3.4.4
37
+ nvitop==1.6.1
38
+ wandb==0.23.1
39
+ regex==2025.11.3
40
+ pip==25.3
41
+ nvidia-cusparse-cu12==12.1.0.106
42
+ pytz==2025.2
43
+ Jinja2==3.1.6
44
+ psutil==7.2.0
45
+ pillow==12.0.0
46
+ packaging==25.0
47
+ safetensors==0.7.0
48
+ sentry-sdk==2.48.0
49
+ gitdb==4.0.12
50
+ httpcore==1.0.9
51
+ setuptools==80.9.0
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ anyio==4.12.0
54
+ transformers==5.0.0.dev0
55
+ pydantic==2.12.5
56
+ fsspec==2025.10.0
57
+ filelock==3.20.0
58
+ PyYAML==6.0.3
59
+ hf-xet==1.2.0
60
+ nvidia-cudnn-cu12==9.1.0.70
61
+ tqdm==4.67.1
62
+ MarkupSafe==2.1.5
63
+ attrs==25.4.0
64
+ nvidia-cuda-nvrtc-cu12==12.1.105
65
+ peft==0.18.0
66
+ aiohappyeyeballs==2.6.1
67
+ networkx==3.4.2
68
+ nvidia-nvjitlink-cu12==12.9.86
69
+ certifi==2025.11.12
70
+ pyarrow==22.0.0
71
+ dill==0.4.0
72
+ protobuf==6.33.2
73
+ aiosignal==1.4.0
74
+ frozenlist==1.8.0
75
+ urllib3==2.6.2
76
+ propcache==0.4.1
77
+ tzdata==2025.3
78
+ pandas==2.3.3
79
+ annotated-types==0.7.0
80
+ shellingham==1.5.4
81
+ nvidia-nccl-cu12==2.21.5
82
+ multidict==6.7.0
83
+ nvidia-curand-cu12==10.3.2.106
84
+ trl==0.26.2
85
+ torch==2.5.1+cu121
86
+ h11==0.16.0
87
+ multiprocess==0.70.18
88
+ typer-slim==0.21.0
89
+ wheel==0.45.1
90
+ tomli==2.0.1
91
+ autocommand==2.2.2
92
+ jaraco.context==5.3.0
93
+ zipp==3.19.2
94
+ packaging==24.2
95
+ inflect==7.3.1
96
+ typing_extensions==4.12.2
97
+ platformdirs==4.2.2
98
+ jaraco.functools==4.0.1
99
+ jaraco.collections==5.1.0
100
+ jaraco.text==3.12.1
101
+ backports.tarfile==1.2.0
102
+ more-itertools==10.3.0
103
+ importlib_metadata==8.0.0
104
+ typeguard==4.3.0
dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/wandb-metadata.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.12.46+-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.12",
4
+ "startedAt": "2025-12-26T15:29:36.793485Z",
5
+ "args": [
6
+ "--config",
7
+ "config_dpo.yaml"
8
+ ],
9
+ "program": "/workspace/trainer-kit/DPO-14b/run_dpo.py",
10
+ "codePath": "run_dpo.py",
11
+ "codePathLocal": "run_dpo.py",
12
+ "email": "shaiksirajuddin9949@gmail.com",
13
+ "root": "runs/dpo_run_14b_v1",
14
+ "host": "a100-2gpu-shell-session-757d587799-mfdvv",
15
+ "executable": "/workspace/llm_finetuning_env/bin/python",
16
+ "cpu_count": 12,
17
+ "cpu_count_logical": 24,
18
+ "gpu": "NVIDIA A100-SXM4-80GB",
19
+ "gpu_count": 2,
20
+ "disk": {
21
+ "/": {
22
+ "total": "791251738624",
23
+ "used": "316563935232"
24
+ }
25
+ },
26
+ "memory": {
27
+ "total": "359047892992"
28
+ },
29
+ "gpu_nvidia": [
30
+ {
31
+ "name": "NVIDIA A100-SXM4-80GB",
32
+ "memoryTotal": "85899345920",
33
+ "cudaCores": 6912,
34
+ "architecture": "Ampere",
35
+ "uuid": "GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba"
36
+ },
37
+ {
38
+ "name": "NVIDIA A100-SXM4-80GB",
39
+ "memoryTotal": "85899345920",
40
+ "cudaCores": 6912,
41
+ "architecture": "Ampere",
42
+ "uuid": "GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40"
43
+ }
44
+ ],
45
+ "cudaVersion": "13.0",
46
+ "writerId": "62bhwklrbfchpar5wzdaud7re7jdowat"
47
+ }
dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":41},"_runtime":41}
dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:29:36.871855887Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp2764fn9e/port-137205.txt","pid":137205,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-12-26T15:29:36.872449374Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":137205}
3
+ {"time":"2025-12-26T15:29:36.872451526Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-137205-137352-1482738377/socket","Net":"unix"}}
4
+ {"time":"2025-12-26T15:29:37.058666689Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-12-26T15:29:37.064819569Z","level":"INFO","msg":"handleInformInit: received","streamId":"r1nptay8","id":"1(@)"}
6
+ {"time":"2025-12-26T15:29:37.216524061Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"r1nptay8","id":"1(@)"}
7
+ {"time":"2025-12-26T15:30:19.248432516Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-12-26T15:30:19.248506742Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2025-12-26T15:30:19.24857928Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
10
+ {"time":"2025-12-26T15:30:19.248524342Z","level":"INFO","msg":"server is shutting down"}
11
+ {"time":"2025-12-26T15:30:19.248647813Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-137205-137352-1482738377/socket","Net":"unix"}}
12
+ {"time":"2025-12-26T15:30:19.549751743Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-12-26T15:30:19.549788501Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-12-26T15:30:19.549806198Z","level":"INFO","msg":"server is closed"}
dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-26T15:29:37.064937062Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-26T15:29:37.216325813Z","level":"INFO","msg":"stream: created new stream","id":"r1nptay8"}
3
+ {"time":"2025-12-26T15:29:37.216413019Z","level":"INFO","msg":"handler: started","stream_id":"r1nptay8"}
4
+ {"time":"2025-12-26T15:29:37.216515668Z","level":"INFO","msg":"stream: started","id":"r1nptay8"}
5
+ {"time":"2025-12-26T15:29:37.216542759Z","level":"INFO","msg":"writer: started","stream_id":"r1nptay8"}
6
+ {"time":"2025-12-26T15:29:37.216565747Z","level":"INFO","msg":"sender: started","stream_id":"r1nptay8"}
7
+ {"time":"2025-12-26T15:30:19.248508176Z","level":"INFO","msg":"stream: closing","id":"r1nptay8"}
8
+ {"time":"2025-12-26T15:30:19.441030263Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-12-26T15:30:19.548847282Z","level":"INFO","msg":"handler: closed","stream_id":"r1nptay8"}
10
+ {"time":"2025-12-26T15:30:19.548944003Z","level":"INFO","msg":"sender: closed","stream_id":"r1nptay8"}
11
+ {"time":"2025-12-26T15:30:19.54895272Z","level":"INFO","msg":"stream: closed","id":"r1nptay8"}
dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_setup.py:_flush():80] Configure stats pid to 137205
3
+ 2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_setup.py:_flush():80] Loading settings from /workspace/trainer-kit/DPO-14b/wandb/settings
5
+ 2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_init.py:setup_run_log_directory():714] Logging user logs to runs/dpo_run_14b_v1/wandb/run-20251226_152936-r1nptay8/logs/debug.log
7
+ 2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to runs/dpo_run_14b_v1/wandb/run-20251226_152936-r1nptay8/logs/debug-internal.log
8
+ 2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'model': {'repo_id': '../../Models/Qwen2.5-Coder-14B-CPT-SFT', 'revision': None, 'base_local_dir': 'base_model', 'trust_remote_code': True, 'tokenizer_use_fast': True, 'device_map': 'auto', 'torch_dtype': 'bfloat16', 'use_4bit': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'bfloat16', 'attn_implementation': None}, 'data': {'train_jsonl': 'dpo_pairs_generated.jsonl', 'eval_jsonl': None, 'eval_split_ratio': 0.1, 'prompt_field': 'prompt', 'chosen_field': 'chosen', 'rejected_field': 'rejected', 'score_field': 'f1_score', 'format_type': 'chatml', 'system_prompt': 'You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain the data flow and why each component must change:\n- Flow: [Input → Processing → Output with arrows]\n- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"\n- Explain coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\nadd::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for nested items: `status::StructName::Type::Name`\n3. Always explain "must change because" and "without this"\n3. Types of components: function, struct, enum, impl, trait\n4. If there is extra information (e.g., enum variants), include that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n', 'max_length': 2048, 'shuffle': True, 'num_proc': 4}, 'peft': {'enabled': True, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'bias': 'none', 'target_modules': 'auto'}, 'dpo': {'beta': 0.1, 'label_smoothing': 0.0, 'loss_type': 'sigmoid', 'use_reference_model': True, 'reference_free': False}, 'train': {'num_train_epochs': 3, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'gradient_accumulation_steps': 8, 'learning_rate': '5e-5', 'weight_decay': 0.0, 'warmup_ratio': 0.1, 'lr_scheduler_type': 'cosine', 'optim': 'adamw_torch', 'max_grad_norm': 1.0, 'gradient_checkpointing': True, 'logging_steps': 2, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 10, 'evaluation_strategy': 'steps', 'eval_steps': 25, 'load_best_model_at_end': True, 'early_stopping': {'enabled': True, 'patience': 5, 'min_delta': 0.001, 'metric': 'eval_loss', 'mode': 'min'}, 'resume_from_checkpoint': 'auto'}, 'run_dir': 'runs/dpo_run_14b_v1', '_wandb': {}}
11
+ 2025-12-26 15:29:36,795 INFO MainThread:137205 [wandb_init.py:init():889] starting backend
12
+ 2025-12-26 15:29:37,058 INFO MainThread:137205 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-26 15:29:37,063 INFO MainThread:137205 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-26 15:29:37,065 INFO MainThread:137205 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-26 15:29:37,065 INFO MainThread:137205 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-26 15:29:37,469 INFO MainThread:137205 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-26 15:29:37,577 INFO MainThread:137205 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-26 15:29:37,578 INFO MainThread:137205 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-26 15:29:37,578 INFO MainThread:137205 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-26 15:29:37,578 INFO MainThread:137205 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-26 15:29:37,582 INFO MainThread:137205 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2025-12-26 15:30:19,248 INFO wandb-AsyncioManager-main:137205 [service_client.py:_forward_responses():80] Reached EOF.
23
+ 2025-12-26 15:30:19,248 INFO wandb-AsyncioManager-main:137205 [mailbox.py:close():137] Closing mailbox, abandoning 1 handles.
dpo_qwen_14B/wandb/run-20251226_152936-r1nptay8/run-r1nptay8.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbc498b55a73f9c8a0d524a9f073f87ac74b98d8031b36455fd731caf2cff78f
3
+ size 403205
dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/config.yaml ADDED
@@ -0,0 +1,661 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _name_or_path:
2
+ value: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
3
+ _wandb:
4
+ value:
5
+ cli_version: 0.23.1
6
+ e:
7
+ afn1h9dtq29ul6sseazq0ojw1mqcn19i:
8
+ args:
9
+ - --config
10
+ - config_dpo.yaml
11
+ codePath: run_dpo.py
12
+ codePathLocal: run_dpo.py
13
+ cpu_count: 12
14
+ cpu_count_logical: 24
15
+ cudaVersion: "13.0"
16
+ disk:
17
+ /:
18
+ total: "791251738624"
19
+ used: "323290275840"
20
+ email: shaiksirajuddin9949@gmail.com
21
+ executable: /workspace/llm_finetuning_env/bin/python
22
+ gpu: NVIDIA A100-SXM4-80GB
23
+ gpu_count: 2
24
+ gpu_nvidia:
25
+ - architecture: Ampere
26
+ cudaCores: 6912
27
+ memoryTotal: "85899345920"
28
+ name: NVIDIA A100-SXM4-80GB
29
+ uuid: GPU-989794b0-ec3b-13bf-db9f-3fbe341497ba
30
+ - architecture: Ampere
31
+ cudaCores: 6912
32
+ memoryTotal: "85899345920"
33
+ name: NVIDIA A100-SXM4-80GB
34
+ uuid: GPU-3790aa64-60ef-9eac-b0b1-b278ee8c0d40
35
+ host: a100-2gpu-shell-session-757d587799-mfdvv
36
+ memory:
37
+ total: "359047892992"
38
+ os: Linux-6.12.46+-x86_64-with-glibc2.35
39
+ program: /workspace/trainer-kit/DPO-14b/run_dpo.py
40
+ python: CPython 3.10.12
41
+ root: runs/dpo_run_14b_v1
42
+ startedAt: "2025-12-26T15:56:50.015524Z"
43
+ writerId: afn1h9dtq29ul6sseazq0ojw1mqcn19i
44
+ m:
45
+ - "1": train/global_step
46
+ "6":
47
+ - 3
48
+ "7": []
49
+ - "2": '*'
50
+ "5": 1
51
+ "6":
52
+ - 1
53
+ "7": []
54
+ python_version: 3.10.12
55
+ t:
56
+ "1":
57
+ - 1
58
+ - 11
59
+ - 41
60
+ - 49
61
+ - 51
62
+ - 71
63
+ - 84
64
+ - 98
65
+ "2":
66
+ - 1
67
+ - 11
68
+ - 41
69
+ - 49
70
+ - 51
71
+ - 71
72
+ - 84
73
+ - 98
74
+ "3":
75
+ - 7
76
+ - 15
77
+ - 16
78
+ - 19
79
+ - 66
80
+ "4": 3.10.12
81
+ "5": 0.23.1
82
+ "6": 5.0.0.dev0
83
+ "9":
84
+ "1": transformers_trainer
85
+ "12": 0.23.1
86
+ "13": linux-x86_64
87
+ accelerator_config:
88
+ value:
89
+ dispatch_batches: null
90
+ even_batches: true
91
+ gradient_accumulation_kwargs: null
92
+ non_blocking: false
93
+ split_batches: false
94
+ use_seedable_sampler: true
95
+ adam_beta1:
96
+ value: 0.9
97
+ adam_beta2:
98
+ value: 0.999
99
+ adam_epsilon:
100
+ value: 1e-08
101
+ add_cross_attention:
102
+ value: false
103
+ architectures:
104
+ value:
105
+ - Qwen2ForCausalLM
106
+ attention_dropout:
107
+ value: 0
108
+ auto_find_batch_size:
109
+ value: false
110
+ average_tokens_across_devices:
111
+ value: true
112
+ base_model_attribute_name:
113
+ value: model
114
+ batch_eval_metrics:
115
+ value: false
116
+ beta:
117
+ value: 0.1
118
+ bf16:
119
+ value: true
120
+ bf16_full_eval:
121
+ value: false
122
+ bos_token_id:
123
+ value: null
124
+ chunk_size_feed_forward:
125
+ value: 0
126
+ cross_attention_hidden_size:
127
+ value: null
128
+ data:
129
+ value:
130
+ chosen_field: chosen
131
+ eval_jsonl: null
132
+ eval_split_ratio: 0.1
133
+ format_type: chatml
134
+ max_length: 2048
135
+ num_proc: 4
136
+ prompt_field: prompt
137
+ rejected_field: rejected
138
+ score_field: f1_score
139
+ shuffle: true
140
+ system_prompt: |
141
+ You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.
142
+
143
+ ## Output Format
144
+
145
+ ##OUTPUT
146
+ Explain the data flow and why each component must change:
147
+ - Flow: [Input → Processing → Output with arrows]
148
+ - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
149
+ - Explain coupling between components
150
+
151
+ ##SELECT
152
+ modify::crates/path/to/file.rs::impl::ComponentName
153
+ add::crates/another/file.rs::function::AnotherComponent
154
+ <EOS>
155
+
156
+ ## Rules
157
+
158
+ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
159
+ 2. Use `::` for nested items: `status::StructName::Type::Name`
160
+ 3. Always explain "must change because" and "without this"
161
+ 3. Types of components: function, struct, enum, impl, trait
162
+ 4. If there is extra information (e.g., enum variants), include that too.
163
+ 5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>
164
+ train_jsonl: dpo_pairs_generated.jsonl
165
+ data_seed:
166
+ value: null
167
+ dataloader_drop_last:
168
+ value: false
169
+ dataloader_num_workers:
170
+ value: 0
171
+ dataloader_persistent_workers:
172
+ value: false
173
+ dataloader_pin_memory:
174
+ value: true
175
+ dataloader_prefetch_factor:
176
+ value: null
177
+ dataset_num_proc:
178
+ value: null
179
+ ddp_backend:
180
+ value: null
181
+ ddp_broadcast_buffers:
182
+ value: null
183
+ ddp_bucket_cap_mb:
184
+ value: null
185
+ ddp_find_unused_parameters:
186
+ value: null
187
+ ddp_timeout:
188
+ value: 1800
189
+ debug:
190
+ value: []
191
+ decoder_start_token_id:
192
+ value: null
193
+ deepspeed:
194
+ value: null
195
+ disable_dropout:
196
+ value: true
197
+ disable_tqdm:
198
+ value: false
199
+ discopop_tau:
200
+ value: 0.05
201
+ do_eval:
202
+ value: true
203
+ do_predict:
204
+ value: false
205
+ do_train:
206
+ value: false
207
+ dpo:
208
+ value:
209
+ beta: 0.1
210
+ label_smoothing: 0
211
+ loss_type: sigmoid
212
+ reference_free: false
213
+ use_reference_model: true
214
+ dtype:
215
+ value: bfloat16
216
+ enable_jit_checkpoint:
217
+ value: false
218
+ eos_token_id:
219
+ value: 151643
220
+ eval_accumulation_steps:
221
+ value: null
222
+ eval_delay:
223
+ value: 0
224
+ eval_do_concat_batches:
225
+ value: true
226
+ eval_on_start:
227
+ value: false
228
+ eval_steps:
229
+ value: 25
230
+ eval_strategy:
231
+ value: steps
232
+ eval_use_gather_object:
233
+ value: false
234
+ f_alpha_divergence_coef:
235
+ value: 1
236
+ f_divergence_type:
237
+ value: reverse_kl
238
+ finetuning_task:
239
+ value: null
240
+ force_use_ref_model:
241
+ value: false
242
+ fp16:
243
+ value: false
244
+ fp16_full_eval:
245
+ value: false
246
+ fsdp:
247
+ value: []
248
+ fsdp_config:
249
+ value:
250
+ min_num_params: 0
251
+ xla: false
252
+ xla_fsdp_grad_ckpt: false
253
+ xla_fsdp_v2: false
254
+ full_determinism:
255
+ value: false
256
+ generate_during_eval:
257
+ value: false
258
+ gradient_accumulation_steps:
259
+ value: 8
260
+ gradient_checkpointing:
261
+ value: true
262
+ gradient_checkpointing_kwargs:
263
+ value: null
264
+ greater_is_better:
265
+ value: false
266
+ group_by_length:
267
+ value: false
268
+ hidden_act:
269
+ value: silu
270
+ hidden_size:
271
+ value: 5120
272
+ hub_always_push:
273
+ value: false
274
+ hub_model_id:
275
+ value: null
276
+ hub_private_repo:
277
+ value: null
278
+ hub_revision:
279
+ value: null
280
+ hub_strategy:
281
+ value: every_save
282
+ hub_token:
283
+ value: <HUB_TOKEN>
284
+ id2label:
285
+ value:
286
+ "0": LABEL_0
287
+ "1": LABEL_1
288
+ ignore_data_skip:
289
+ value: false
290
+ include_for_metrics:
291
+ value: []
292
+ include_num_input_tokens_seen:
293
+ value: "no"
294
+ initializer_range:
295
+ value: 0.02
296
+ intermediate_size:
297
+ value: 13824
298
+ is_decoder:
299
+ value: false
300
+ is_encoder_decoder:
301
+ value: false
302
+ label_names:
303
+ value: null
304
+ label_pad_token_id:
305
+ value: -100
306
+ label_smoothing:
307
+ value: 0
308
+ label_smoothing_factor:
309
+ value: 0
310
+ label2id:
311
+ value:
312
+ LABEL_0: 0
313
+ LABEL_1: 1
314
+ layer_types:
315
+ value:
316
+ - full_attention
317
+ - full_attention
318
+ - full_attention
319
+ - full_attention
320
+ - full_attention
321
+ - full_attention
322
+ - full_attention
323
+ - full_attention
324
+ - full_attention
325
+ - full_attention
326
+ - full_attention
327
+ - full_attention
328
+ - full_attention
329
+ - full_attention
330
+ - full_attention
331
+ - full_attention
332
+ - full_attention
333
+ - full_attention
334
+ - full_attention
335
+ - full_attention
336
+ - full_attention
337
+ - full_attention
338
+ - full_attention
339
+ - full_attention
340
+ - full_attention
341
+ - full_attention
342
+ - full_attention
343
+ - full_attention
344
+ - full_attention
345
+ - full_attention
346
+ - full_attention
347
+ - full_attention
348
+ - full_attention
349
+ - full_attention
350
+ - full_attention
351
+ - full_attention
352
+ - full_attention
353
+ - full_attention
354
+ - full_attention
355
+ - full_attention
356
+ - full_attention
357
+ - full_attention
358
+ - full_attention
359
+ - full_attention
360
+ - full_attention
361
+ - full_attention
362
+ - full_attention
363
+ - full_attention
364
+ ld_alpha:
365
+ value: null
366
+ learning_rate:
367
+ value: 5e-05
368
+ length_column_name:
369
+ value: length
370
+ liger_kernel_config:
371
+ value: null
372
+ load_best_model_at_end:
373
+ value: true
374
+ local_rank:
375
+ value: -1
376
+ log_level:
377
+ value: passive
378
+ log_level_replica:
379
+ value: warning
380
+ log_on_each_node:
381
+ value: true
382
+ logging_dir:
383
+ value: null
384
+ logging_first_step:
385
+ value: false
386
+ logging_nan_inf_filter:
387
+ value: true
388
+ logging_steps:
389
+ value: 2
390
+ logging_strategy:
391
+ value: steps
392
+ loss_type:
393
+ value: sigmoid
394
+ loss_weights:
395
+ value: null
396
+ lr_scheduler_kwargs:
397
+ value: null
398
+ lr_scheduler_type:
399
+ value: cosine
400
+ max_completion_length:
401
+ value: null
402
+ max_grad_norm:
403
+ value: 1
404
+ max_length:
405
+ value: 2048
406
+ max_position_embeddings:
407
+ value: 32768
408
+ max_prompt_length:
409
+ value: 1024
410
+ max_steps:
411
+ value: -1
412
+ max_window_layers:
413
+ value: 48
414
+ metric_for_best_model:
415
+ value: eval_loss
416
+ model:
417
+ value:
418
+ attn_implementation: null
419
+ base_local_dir: base_model
420
+ bnb_4bit_compute_dtype: bfloat16
421
+ bnb_4bit_quant_type: nf4
422
+ bnb_4bit_use_double_quant: false
423
+ device_map: auto
424
+ repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
425
+ revision: null
426
+ tokenizer_use_fast: true
427
+ torch_dtype: bfloat16
428
+ trust_remote_code: true
429
+ use_4bit: false
430
+ model/num_parameters:
431
+ value: 14795199488
432
+ model_adapter_name:
433
+ value: null
434
+ model_init_kwargs:
435
+ value: null
436
+ model_type:
437
+ value: qwen2
438
+ neftune_noise_alpha:
439
+ value: null
440
+ num_attention_heads:
441
+ value: 40
442
+ num_hidden_layers:
443
+ value: 48
444
+ num_key_value_heads:
445
+ value: 8
446
+ num_train_epochs:
447
+ value: 3
448
+ optim:
449
+ value: adamw_torch
450
+ optim_args:
451
+ value: null
452
+ optim_target_modules:
453
+ value: null
454
+ output_attentions:
455
+ value: false
456
+ output_dir:
457
+ value: runs/dpo_run_14b_v1
458
+ output_hidden_states:
459
+ value: false
460
+ pad_token:
461
+ value: <PAD_TOKEN>
462
+ pad_token_id:
463
+ value: 151643
464
+ padding_free:
465
+ value: false
466
+ parallelism_config:
467
+ value: null
468
+ peft:
469
+ value:
470
+ bias: none
471
+ enabled: true
472
+ lora_alpha: 32
473
+ lora_dropout: 0.05
474
+ r: 16
475
+ target_modules: auto
476
+ peft_config:
477
+ value:
478
+ default:
479
+ alora_invocation_tokens: null
480
+ arrow_config: null
481
+ auto_mapping: null
482
+ base_model_name_or_path: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
483
+ bias: none
484
+ corda_config: null
485
+ ensure_weight_tying: false
486
+ eva_config: null
487
+ exclude_modules: null
488
+ fan_in_fan_out: false
489
+ inference_mode: false
490
+ init_lora_weights: true
491
+ layer_replication: null
492
+ layers_pattern: null
493
+ layers_to_transform: null
494
+ lora_alpha: 32
495
+ lora_bias: false
496
+ lora_dropout: 0.05
497
+ megatron_config: null
498
+ megatron_core: megatron.core
499
+ modules_to_save: null
500
+ peft_type: LORA
501
+ peft_version: 0.18.0
502
+ qalora_group_size: 16
503
+ r: 16
504
+ revision: null
505
+ runtime_config:
506
+ ephemeral_gpu_offload: false
507
+ target_modules:
508
+ - k_proj
509
+ - o_proj
510
+ - v_proj
511
+ - q_proj
512
+ target_parameters: null
513
+ task_type: CAUSAL_LM
514
+ trainable_token_indices: null
515
+ use_dora: false
516
+ use_qalora: false
517
+ use_rslora: false
518
+ per_device_eval_batch_size:
519
+ value: 1
520
+ per_device_train_batch_size:
521
+ value: 1
522
+ precompute_ref_batch_size:
523
+ value: null
524
+ precompute_ref_log_probs:
525
+ value: false
526
+ prediction_loss_only:
527
+ value: false
528
+ prefix:
529
+ value: null
530
+ problem_type:
531
+ value: null
532
+ project:
533
+ value: huggingface
534
+ push_to_hub:
535
+ value: false
536
+ ref_adapter_name:
537
+ value: null
538
+ ref_model_init_kwargs:
539
+ value: null
540
+ ref_model_mixup_alpha:
541
+ value: 0.6
542
+ ref_model_sync_steps:
543
+ value: 512
544
+ reference_free:
545
+ value: false
546
+ remove_unused_columns:
547
+ value: false
548
+ report_to:
549
+ value:
550
+ - wandb
551
+ restore_callback_states_from_checkpoint:
552
+ value: false
553
+ resume_from_checkpoint:
554
+ value: null
555
+ return_dict:
556
+ value: true
557
+ rms_norm_eps:
558
+ value: 1e-06
559
+ rope_parameters:
560
+ value:
561
+ rope_theta: 1e+06
562
+ rope_type: default
563
+ rpo_alpha:
564
+ value: null
565
+ run_dir:
566
+ value: runs/dpo_run_14b_v1
567
+ run_name:
568
+ value: null
569
+ save_on_each_node:
570
+ value: false
571
+ save_only_model:
572
+ value: false
573
+ save_steps:
574
+ value: 100
575
+ save_strategy:
576
+ value: steps
577
+ save_total_limit:
578
+ value: 10
579
+ seed:
580
+ value: 42
581
+ sep_token_id:
582
+ value: null
583
+ skip_memory_metrics:
584
+ value: true
585
+ sliding_window:
586
+ value: null
587
+ sync_ref_model:
588
+ value: false
589
+ task_specific_params:
590
+ value: null
591
+ tf32:
592
+ value: null
593
+ tie_word_embeddings:
594
+ value: false
595
+ tokenizer_class:
596
+ value: null
597
+ tools:
598
+ value: null
599
+ torch_compile:
600
+ value: false
601
+ torch_compile_backend:
602
+ value: null
603
+ torch_compile_mode:
604
+ value: null
605
+ torch_empty_cache_steps:
606
+ value: null
607
+ trackio_space_id:
608
+ value: trackio
609
+ train:
610
+ value:
611
+ early_stopping:
612
+ enabled: true
613
+ metric: eval_loss
614
+ min_delta: 0.001
615
+ mode: min
616
+ patience: 5
617
+ eval_steps: 25
618
+ evaluation_strategy: steps
619
+ gradient_accumulation_steps: 8
620
+ gradient_checkpointing: true
621
+ learning_rate: "5e-5"
622
+ load_best_model_at_end: true
623
+ logging_steps: 2
624
+ lr_scheduler_type: cosine
625
+ max_grad_norm: 1
626
+ num_train_epochs: 3
627
+ optim: adamw_torch
628
+ per_device_eval_batch_size: 1
629
+ per_device_train_batch_size: 1
630
+ resume_from_checkpoint: auto
631
+ save_steps: 100
632
+ save_strategy: steps
633
+ save_total_limit: 10
634
+ warmup_ratio: 0.1
635
+ weight_decay: 0
636
+ transformers_version:
637
+ value: 5.0.0.dev0
638
+ truncation_mode:
639
+ value: keep_end
640
+ use_cache:
641
+ value: false
642
+ use_cpu:
643
+ value: false
644
+ use_liger_kernel:
645
+ value: false
646
+ use_liger_loss:
647
+ value: null
648
+ use_logits_to_keep:
649
+ value: false
650
+ use_sliding_window:
651
+ value: false
652
+ use_weighting:
653
+ value: false
654
+ vocab_size:
655
+ value: 152064
656
+ warmup_ratio:
657
+ value: 0.1
658
+ warmup_steps:
659
+ value: 0.1
660
+ weight_decay:
661
+ value: 0
dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/output.log ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Wandb initialized: project='dpo-training', name='auto-generated'
2
+ `torch_dtype` is deprecated! Use `dtype` instead!
3
+ Loading weights: 100%|█████████| 579/579 [00:09<00:00, 60.71it/s, Materializing param=model.norm.weight]
4
+ Loading reference model (frozen copy)...
5
+ Loading weights: 100%|█████████| 579/579 [00:09<00:00, 60.20it/s, Materializing param=model.norm.weight]
6
+ Reference model loaded and frozen
7
+ 2025-12-26 15:57:19,133 - INFO - HTTP Request: HEAD https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets/json/json.py "HTTP/1.1 200 OK"
8
+ 2025-12-26 15:57:19,148 - INFO - Formatting train DPO data...
9
+ 2025-12-26 15:57:21,512 - INFO - Train dataset after filtering: 6850 examples
10
+ 2025-12-26 15:57:21,513 - INFO - train dataset validation passed: 6850 examples
11
+ 2025-12-26 15:57:21,513 - INFO - Formatting eval DPO data...
12
+ 2025-12-26 15:57:23,870 - INFO - Eval dataset after filtering: 762 examples
13
+ 2025-12-26 15:57:23,871 - INFO - eval dataset validation passed: 762 examples
14
+ warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
15
+ Early stopping enabled: patience=5, min_delta=0.001
16
+ 2025-12-26 15:57:23,907 - INFO - DPO Training with beta=0.1, loss_type=sigmoid
17
+ warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.
18
+ 2025-12-26 15:57:33,435 - INFO - Starting DPO training...
19
+ The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.
20
+
21
+ {'loss': '0.6931', 'grad_norm': '1.243', 'learning_rate': '1.938e-07', 'rewards/chosen': '0', 'rewards/rejected': '0', 'rewards/accuracies': '0', 'rewards/margins': '0', 'logps/chosen': '-368.9', 'logps/rejected': '-398.8', 'logits/chosen': '5.179', 'logits/rejected': '5.193', 'epoch': '0.002336'}
22
+ {'loss': '0.695', 'grad_norm': '1.392', 'learning_rate': '5.814e-07', 'rewards/chosen': '0.004505', 'rewards/rejected': '0.007727', 'rewards/accuracies': '0.625', 'rewards/margins': '-0.003223', 'logps/chosen': '-338.4', 'logps/rejected': '-367', 'logits/chosen': '5.404', 'logits/rejected': '5.457', 'epoch': '0.004672'}
23
+ {'loss': '0.6892', 'grad_norm': '1.067', 'learning_rate': '9.69e-07', 'rewards/chosen': '-0.003407', 'rewards/rejected': '-0.01166', 'rewards/accuracies': '0.5625', 'rewards/margins': '0.008256', 'logps/chosen': '-362.3', 'logps/rejected': '-387.6', 'logits/chosen': '5.292', 'logits/rejected': '5.328', 'epoch': '0.007007'}
24
+ {'loss': '0.6944', 'grad_norm': '1.001', 'learning_rate': '1.357e-06', 'rewards/chosen': '0.01466', 'rewards/rejected': '0.01589', 'rewards/accuracies': '0.375', 'rewards/margins': '-0.001235', 'logps/chosen': '-379.9', 'logps/rejected': '-389.1', 'logits/chosen': '5.323', 'logits/rejected': '5.411', 'epoch': '0.009343'}
25
+ {'loss': '0.6933', 'grad_norm': '1.246', 'learning_rate': '1.744e-06', 'rewards/chosen': '-0.0285', 'rewards/rejected': '-0.02862', 'rewards/accuracies': '0.625', 'rewards/margins': '0.0001264', 'logps/chosen': '-363.2', 'logps/rejected': '-389.7', 'logits/chosen': '5.436', 'logits/rejected': '5.495', 'epoch': '0.01168'}
26
+ {'loss': '0.6883', 'grad_norm': '1.403', 'learning_rate': '2.132e-06', 'rewards/chosen': '0.01622', 'rewards/rejected': '0.006134', 'rewards/accuracies': '0.5', 'rewards/margins': '0.01009', 'logps/chosen': '-371', 'logps/rejected': '-402.5', 'logits/chosen': '5.355', 'logits/rejected': '5.376', 'epoch': '0.01401'}
27
+ {'loss': '0.6897', 'grad_norm': '1.116', 'learning_rate': '2.519e-06', 'rewards/chosen': '-0.01732', 'rewards/rejected': '-0.02465', 'rewards/accuracies': '0.625', 'rewards/margins': '0.007329', 'logps/chosen': '-336.7', 'logps/rejected': '-357.5', 'logits/chosen': '5.515', 'logits/rejected': '5.561', 'epoch': '0.01635'}
28
+ {'loss': '0.6904', 'grad_norm': '0.9471', 'learning_rate': '2.907e-06', 'rewards/chosen': '0.0327', 'rewards/rejected': '0.02688', 'rewards/accuracies': '0.5625', 'rewards/margins': '0.005827', 'logps/chosen': '-415.7', 'logps/rejected': '-441.1', 'logits/chosen': '5.553', 'logits/rejected': '5.583', 'epoch': '0.01869'}
29
+ {'loss': '0.6836', 'grad_norm': '1.44', 'learning_rate': '3.295e-06', 'rewards/chosen': '0.01102', 'rewards/rejected': '-0.008499', 'rewards/accuracies': '0.5625', 'rewards/margins': '0.01952', 'logps/chosen': '-392.5', 'logps/rejected': '-420.2', 'logits/chosen': '5.441', 'logits/rejected': '5.49', 'epoch': '0.02102'}
30
+ {'loss': '0.6902', 'grad_norm': '1.594', 'learning_rate': '3.682e-06', 'rewards/chosen': '0.006536', 'rewards/rejected': '0.0005231', 'rewards/accuracies': '0.5625', 'rewards/margins': '0.006013', 'logps/chosen': '-345.2', 'logps/rejected': '-366', 'logits/chosen': '5.318', 'logits/rejected': '5.398', 'epoch': '0.02336'}
31
+ {'loss': '0.6913', 'grad_norm': '1.136', 'learning_rate': '4.07e-06', 'rewards/chosen': '0.01191', 'rewards/rejected': '0.00737', 'rewards/accuracies': '0.5', 'rewards/margins': '0.004538', 'logps/chosen': '-347.9', 'logps/rejected': '-370.7', 'logits/chosen': '5.633', 'logits/rejected': '5.727', 'epoch': '0.02569'}
32
+ {'loss': '0.6769', 'grad_norm': '1.068', 'learning_rate': '4.457e-06', 'rewards/chosen': '0.01244', 'rewards/rejected': '-0.02045', 'rewards/accuracies': '0.875', 'rewards/margins': '0.03289', 'logps/chosen': '-347.2', 'logps/rejected': '-377.6', 'logits/chosen': '5.357', 'logits/rejected': '5.406', 'epoch': '0.02803'}
33
+
34
+ {'eval_loss': '0.6837', 'eval_runtime': '454.4', 'eval_samples_per_second': '1.677', 'eval_steps_per_second': '1.677', 'eval_rewards/chosen': '0.02464', 'eval_rewards/rejected': '0.005081', 'eval_rewards/accuracies': '0.6654', 'eval_rewards/margins': '0.01956', 'eval_logps/chosen': '-370.2', 'eval_logps/rejected': '-395.7', 'eval_logits/chosen': '5.295', 'eval_logits/rejected': '5.345', 'epoch': '0.0292'}
35
+ {'loss': '0.685', 'grad_norm': '1.592', 'learning_rate': '4.845e-06', 'rewards/chosen': '0.02639', 'rewards/rejected': '0.009419', 'rewards/accuracies': '0.625', 'rewards/margins': '0.01697', 'logps/chosen': '-387.5', 'logps/rejected': '-412.1', 'logits/chosen': '5.157', 'logits/rejected': '5.245', 'epoch': '0.03036'}
36
+ {'loss': '0.6752', 'grad_norm': '1.318', 'learning_rate': '5.233e-06', 'rewards/chosen': '0.04595', 'rewards/rejected': '0.009191', 'rewards/accuracies': '0.8125', 'rewards/margins': '0.03676', 'logps/chosen': '-360.4', 'logps/rejected': '-391.2', 'logits/chosen': '5.546', 'logits/rejected': '5.544', 'epoch': '0.0327'}
37
+ {'loss': '0.6752', 'grad_norm': '1.444', 'learning_rate': '5.62e-06', 'rewards/chosen': '0.04195', 'rewards/rejected': '0.005257', 'rewards/accuracies': '0.8125', 'rewards/margins': '0.03669', 'logps/chosen': '-378.6', 'logps/rejected': '-405.4', 'logits/chosen': '5.136', 'logits/rejected': '5.239', 'epoch': '0.03504'}
38
+ {'loss': '0.6701', 'grad_norm': '1.38', 'learning_rate': '6.008e-06', 'rewards/chosen': '0.06658', 'rewards/rejected': '0.01939', 'rewards/accuracies': '0.875', 'rewards/margins': '0.04719', 'logps/chosen': '-358.5', 'logps/rejected': '-382.4', 'logits/chosen': '5.411', 'logits/rejected': '5.427', 'epoch': '0.03737'}
39
+ {'loss': '0.6611', 'grad_norm': '1.326', 'learning_rate': '6.395e-06', 'rewards/chosen': '0.07039', 'rewards/rejected': '0.004514', 'rewards/accuracies': '0.9375', 'rewards/margins': '0.06587', 'logps/chosen': '-326.9', 'logps/rejected': '-346.5', 'logits/chosen': '5.207', 'logits/rejected': '5.255', 'epoch': '0.03971'}
40
+ {'loss': '0.6282', 'grad_norm': '1.578', 'learning_rate': '6.783e-06', 'rewards/chosen': '0.1174', 'rewards/rejected': '-0.01899', 'rewards/accuracies': '1', 'rewards/margins': '0.1364', 'logps/chosen': '-360', 'logps/rejected': '-384.3', 'logits/chosen': '5.551', 'logits/rejected': '5.637', 'epoch': '0.04204'}
41
+ {'loss': '0.6271', 'grad_norm': '1.859', 'learning_rate': '7.171e-06', 'rewards/chosen': '0.1618', 'rewards/rejected': '0.02293', 'rewards/accuracies': '0.9375', 'rewards/margins': '0.1389', 'logps/chosen': '-325.9', 'logps/rejected': '-352', 'logits/chosen': '5.391', 'logits/rejected': '5.412', 'epoch': '0.04438'}
42
+ {'loss': '0.6412', 'grad_norm': '1.323', 'learning_rate': '7.558e-06', 'rewards/chosen': '0.1525', 'rewards/rejected': '0.0409', 'rewards/accuracies': '0.875', 'rewards/margins': '0.1116', 'logps/chosen': '-343.4', 'logps/rejected': '-374.8', 'logits/chosen': '5.19', 'logits/rejected': '5.203', 'epoch': '0.04672'}
43
+ {'loss': '0.6094', 'grad_norm': '2.533', 'learning_rate': '7.946e-06', 'rewards/chosen': '0.2898', 'rewards/rejected': '0.1082', 'rewards/accuracies': '0.9375', 'rewards/margins': '0.1816', 'logps/chosen': '-341.8', 'logps/rejected': '-372.4', 'logits/chosen': '5.42', 'logits/rejected': '5.453', 'epoch': '0.04905'}
44
+ {'loss': '0.5816', 'grad_norm': '1.525', 'learning_rate': '8.333e-06', 'rewards/chosen': '0.3246', 'rewards/rejected': '0.07354', 'rewards/accuracies': '0.8125', 'rewards/margins': '0.2511', 'logps/chosen': '-354.5', 'logps/rejected': '-376.9', 'logits/chosen': '5.384', 'logits/rejected': '5.398', 'epoch': '0.05139'}
45
+ {'loss': '0.527', 'grad_norm': '2.081', 'learning_rate': '8.721e-06', 'rewards/chosen': '0.6465', 'rewards/rejected': '0.2707', 'rewards/accuracies': '0.9375', 'rewards/margins': '0.3758', 'logps/chosen': '-331.1', 'logps/rejected': '-362.9', 'logits/chosen': '5.27', 'logits/rejected': '5.287', 'epoch': '0.05372'}
46
+ {'loss': '0.5066', 'grad_norm': '1.769', 'learning_rate': '9.109e-06', 'rewards/chosen': '0.6378', 'rewards/rejected': '0.2113', 'rewards/accuracies': '1', 'rewards/margins': '0.4265', 'logps/chosen': '-369.4', 'logps/rejected': '-400.2', 'logits/chosen': '5.473', 'logits/rejected': '5.465', 'epoch': '0.05606'}
47
+ {'loss': '0.5293', 'grad_norm': '2.842', 'learning_rate': '9.496e-06', 'rewards/chosen': '0.7923', 'rewards/rejected': '0.4136', 'rewards/accuracies': '1', 'rewards/margins': '0.3787', 'logps/chosen': '-363.5', 'logps/rejected': '-397.8', 'logits/chosen': '5.05', 'logits/rejected': '5.112', 'epoch': '0.05839'}
48
+ {'eval_loss': '0.4611', 'eval_runtime': '454.6', 'eval_samples_per_second': '1.676', 'eval_steps_per_second': '1.676', 'eval_rewards/chosen': '0.8944', 'eval_rewards/rejected': '0.3205', 'eval_rewards/accuracies': '0.9619', 'eval_rewards/margins': '0.5739', 'eval_logps/chosen': '-361.5', 'eval_logps/rejected': '-392.6', 'eval_logits/chosen': '5.224', 'eval_logits/rejected': '5.287', 'epoch': '0.05839'}
49
+ {'loss': '0.446', 'grad_norm': '1.691', 'learning_rate': '9.884e-06', 'rewards/chosen': '0.987', 'rewards/rejected': '0.3813', 'rewards/accuracies': '0.9375', 'rewards/margins': '0.6057', 'logps/chosen': '-343.5', 'logps/rejected': '-379.4', 'logits/chosen': '5.486', 'logits/rejected': '5.542', 'epoch': '0.06073'}
50
+ {'loss': '0.4361', 'grad_norm': '1.946', 'learning_rate': '1.027e-05', 'rewards/chosen': '0.7795', 'rewards/rejected': '0.1529', 'rewards/accuracies': '1', 'rewards/margins': '0.6266', 'logps/chosen': '-379.5', 'logps/rejected': '-401.6', 'logits/chosen': '5.17', 'logits/rejected': '5.269', 'epoch': '0.06307'}
51
+ {'loss': '0.3928', 'grad_norm': '2.127', 'learning_rate': '1.066e-05', 'rewards/chosen': '1.274', 'rewards/rejected': '0.4879', 'rewards/accuracies': '0.9375', 'rewards/margins': '0.7864', 'logps/chosen': '-378.1', 'logps/rejected': '-413.3', 'logits/chosen': '5.097', 'logits/rejected': '5.153', 'epoch': '0.0654'}
52
+ {'loss': '0.3586', 'grad_norm': '1.538', 'learning_rate': '1.105e-05', 'rewards/chosen': '1.29', 'rewards/rejected': '0.3544', 'rewards/accuracies': '0.875', 'rewards/margins': '0.9354', 'logps/chosen': '-372.9', 'logps/rejected': '-401.8', 'logits/chosen': '5.139', 'logits/rejected': '5.203', 'epoch': '0.06774'}
53
+ {'loss': '0.428', 'grad_norm': '2.358', 'learning_rate': '1.143e-05', 'rewards/chosen': '1.382', 'rewards/rejected': '0.6533', 'rewards/accuracies': '0.875', 'rewards/margins': '0.7291', 'logps/chosen': '-361', 'logps/rejected': '-392.3', 'logits/chosen': '5.072', 'logits/rejected': '5.188', 'epoch': '0.07007'}
54
+ {'loss': '0.3137', 'grad_norm': '2.178', 'learning_rate': '1.182e-05', 'rewards/chosen': '1.664', 'rewards/rejected': '0.575', 'rewards/accuracies': '1', 'rewards/margins': '1.089', 'logps/chosen': '-364.8', 'logps/rejected': '-401', 'logits/chosen': '5.264', 'logits/rejected': '5.311', 'epoch': '0.07241'}
55
+ {'loss': '0.3038', 'grad_norm': '1.698', 'learning_rate': '1.221e-05', 'rewards/chosen': '1.647', 'rewards/rejected': '0.5322', 'rewards/accuracies': '1', 'rewards/margins': '1.115', 'logps/chosen': '-359.8', 'logps/rejected': '-397.2', 'logits/chosen': '5.192', 'logits/rejected': '5.261', 'epoch': '0.07474'}
56
+ {'loss': '0.2503', 'grad_norm': '1.322', 'learning_rate': '1.26e-05', 'rewards/chosen': '1.567', 'rewards/rejected': '0.1573', 'rewards/accuracies': '1', 'rewards/margins': '1.41', 'logps/chosen': '-352.4', 'logps/rejected': '-392.7', 'logits/chosen': '5.293', 'logits/rejected': '5.309', 'epoch': '0.07708'}
57
+ {'loss': '0.3108', 'grad_norm': '1.817', 'learning_rate': '1.298e-05', 'rewards/chosen': '1.479', 'rewards/rejected': '0.2151', 'rewards/accuracies': '0.9375', 'rewards/margins': '1.264', 'logps/chosen': '-320', 'logps/rejected': '-364.1', 'logits/chosen': '5.026', 'logits/rejected': '5.115', 'epoch': '0.07942'}
58
+ {'loss': '0.2299', 'grad_norm': '1.066', 'learning_rate': '1.337e-05', 'rewards/chosen': '1.395', 'rewards/rejected': '-0.1015', 'rewards/accuracies': '1', 'rewards/margins': '1.497', 'logps/chosen': '-383.8', 'logps/rejected': '-431.8', 'logits/chosen': '4.945', 'logits/rejected': '4.959', 'epoch': '0.08175'}
59
+ {'loss': '0.226', 'grad_norm': '1.035', 'learning_rate': '1.376e-05', 'rewards/chosen': '1.298', 'rewards/rejected': '-0.3464', 'rewards/accuracies': '1', 'rewards/margins': '1.644', 'logps/chosen': '-350.9', 'logps/rejected': '-382.7', 'logits/chosen': '5.004', 'logits/rejected': '5.12', 'epoch': '0.08409'}
60
+ {'loss': '0.1892', 'grad_norm': '1.16', 'learning_rate': '1.415e-05', 'rewards/chosen': '1.198', 'rewards/rejected': '-0.5511', 'rewards/accuracies': '1', 'rewards/margins': '1.75', 'logps/chosen': '-352.3', 'logps/rejected': '-399.2', 'logits/chosen': '4.89', 'logits/rejected': '4.95', 'epoch': '0.08642'}
61
+ {'eval_loss': '0.1602', 'eval_runtime': '454.3', 'eval_samples_per_second': '1.677', 'eval_steps_per_second': '1.677', 'eval_rewards/chosen': '1.121', 'eval_rewards/rejected': '-0.9336', 'eval_rewards/accuracies': '0.9961', 'eval_rewards/margins': '2.055', 'eval_logps/chosen': '-359.2', 'eval_logps/rejected': '-405.1', 'eval_logits/chosen': '4.93', 'eval_logits/rejected': '5.032', 'epoch': '0.08759'}
62
+ {'loss': '0.16', 'grad_norm': '1.143', 'learning_rate': '1.453e-05', 'rewards/chosen': '1.213', 'rewards/rejected': '-0.8816', 'rewards/accuracies': '1', 'rewards/margins': '2.095', 'logps/chosen': '-313.1', 'logps/rejected': '-356.1', 'logits/chosen': '5.037', 'logits/rejected': '5.132', 'epoch': '0.08876'}
63
+ {'loss': '0.1895', 'grad_norm': '0.9839', 'learning_rate': '1.492e-05', 'rewards/chosen': '1.061', 'rewards/rejected': '-0.8471', 'rewards/accuracies': '1', 'rewards/margins': '1.908', 'logps/chosen': '-366.3', 'logps/rejected': '-405.8', 'logits/chosen': '4.817', 'logits/rejected': '4.874', 'epoch': '0.09109'}
64
+ {'loss': '0.1595', 'grad_norm': '0.9213', 'learning_rate': '1.531e-05', 'rewards/chosen': '0.6765', 'rewards/rejected': '-1.491', 'rewards/accuracies': '1', 'rewards/margins': '2.167', 'logps/chosen': '-348.1', 'logps/rejected': '-395.2', 'logits/chosen': '5.047', 'logits/rejected': '5.158', 'epoch': '0.09343'}
65
+ {'loss': '0.1209', 'grad_norm': '0.9821', 'learning_rate': '1.57e-05', 'rewards/chosen': '0.872', 'rewards/rejected': '-1.775', 'rewards/accuracies': '1', 'rewards/margins': '2.647', 'logps/chosen': '-378.9', 'logps/rejected': '-436.9', 'logits/chosen': '4.691', 'logits/rejected': '4.772', 'epoch': '0.09577'}
66
+ {'loss': '0.08721', 'grad_norm': '0.6679', 'learning_rate': '1.609e-05', 'rewards/chosen': '1.134', 'rewards/rejected': '-1.77', 'rewards/accuracies': '1', 'rewards/margins': '2.904', 'logps/chosen': '-346.5', 'logps/rejected': '-400.1', 'logits/chosen': '4.88', 'logits/rejected': '4.962', 'epoch': '0.0981'}
67
+ {'loss': '0.07943', 'grad_norm': '0.5761', 'learning_rate': '1.647e-05', 'rewards/chosen': '1.246', 'rewards/rejected': '-1.769', 'rewards/accuracies': '1', 'rewards/margins': '3.015', 'logps/chosen': '-341.7', 'logps/rejected': '-398.3', 'logits/chosen': '4.464', 'logits/rejected': '4.68', 'epoch': '0.1004'}
68
+ {'loss': '0.1258', 'grad_norm': '1.602', 'learning_rate': '1.686e-05', 'rewards/chosen': '1.071', 'rewards/rejected': '-2.048', 'rewards/accuracies': '0.9375', 'rewards/margins': '3.119', 'logps/chosen': '-344.9', 'logps/rejected': '-395.4', 'logits/chosen': '4.564', 'logits/rejected': '4.681', 'epoch': '0.1028'}
69
+ {'loss': '0.06663', 'grad_norm': '0.4641', 'learning_rate': '1.725e-05', 'rewards/chosen': '1.413', 'rewards/rejected': '-2.348', 'rewards/accuracies': '1', 'rewards/margins': '3.761', 'logps/chosen': '-327', 'logps/rejected': '-388.4', 'logits/chosen': '4.499', 'logits/rejected': '4.673', 'epoch': '0.1051'}
70
+ {'loss': '0.04482', 'grad_norm': '0.67', 'learning_rate': '1.764e-05', 'rewards/chosen': '1.478', 'rewards/rejected': '-2.901', 'rewards/accuracies': '1', 'rewards/margins': '4.379', 'logps/chosen': '-362.7', 'logps/rejected': '-439.3', 'logits/chosen': '4.729', 'logits/rejected': '4.814', 'epoch': '0.1074'}
71
+ {'loss': '0.05633', 'grad_norm': '0.4153', 'learning_rate': '1.802e-05', 'rewards/chosen': '0.7137', 'rewards/rejected': '-2.745', 'rewards/accuracies': '1', 'rewards/margins': '3.458', 'logps/chosen': '-381.6', 'logps/rejected': '-444.3', 'logits/chosen': '4.785', 'logits/rejected': '4.892', 'epoch': '0.1098'}
72
+ {'loss': '0.04092', 'grad_norm': '0.3153', 'learning_rate': '1.841e-05', 'rewards/chosen': '1.757', 'rewards/rejected': '-2.264', 'rewards/accuracies': '1', 'rewards/margins': '4.021', 'logps/chosen': '-356.7', 'logps/rejected': '-414.7', 'logits/chosen': '4.604', 'logits/rejected': '4.805', 'epoch': '0.1121'}
73
+ {'loss': '0.02579', 'grad_norm': '0.377', 'learning_rate': '1.88e-05', 'rewards/chosen': '1.387', 'rewards/rejected': '-3.268', 'rewards/accuracies': '1', 'rewards/margins': '4.654', 'logps/chosen': '-339.8', 'logps/rejected': '-413.9', 'logits/chosen': '4.559', 'logits/rejected': '4.691', 'epoch': '0.1145'}
74
+ {'loss': '0.01516', 'grad_norm': '0.1502', 'learning_rate': '1.919e-05', 'rewards/chosen': '1.794', 'rewards/rejected': '-3.149', 'rewards/accuracies': '1', 'rewards/margins': '4.943', 'logps/chosen': '-346.3', 'logps/rejected': '-418.9', 'logits/chosen': '4.387', 'logits/rejected': '4.495', 'epoch': '0.1168'}
75
+ {'eval_loss': '0.04428', 'eval_runtime': '454.7', 'eval_samples_per_second': '1.676', 'eval_steps_per_second': '1.676', 'eval_rewards/chosen': '1.725', 'eval_rewards/rejected': '-2.864', 'eval_rewards/accuracies': '0.9921', 'eval_rewards/margins': '4.589', 'eval_logps/chosen': '-353.2', 'eval_logps/rejected': '-424.4', 'eval_logits/chosen': '4.286', 'eval_logits/rejected': '4.426', 'epoch': '0.1168'}
76
+ {'loss': '0.0159', 'grad_norm': '0.2124', 'learning_rate': '1.957e-05', 'rewards/chosen': '1.77', 'rewards/rejected': '-3.026', 'rewards/accuracies': '1', 'rewards/margins': '4.796', 'logps/chosen': '-305', 'logps/rejected': '-384.9', 'logits/chosen': '4.197', 'logits/rejected': '4.353', 'epoch': '0.1191'}
77
+ {'loss': '0.03818', 'grad_norm': '1.196', 'learning_rate': '1.996e-05', 'rewards/chosen': '1.556', 'rewards/rejected': '-3.267', 'rewards/accuracies': '1', 'rewards/margins': '4.823', 'logps/chosen': '-341.1', 'logps/rejected': '-417.6', 'logits/chosen': '4.185', 'logits/rejected': '4.28', 'epoch': '0.1215'}
78
+ {'loss': '0.05679', 'grad_norm': '1.302', 'learning_rate': '2.035e-05', 'rewards/chosen': '1.654', 'rewards/rejected': '-3.076', 'rewards/accuracies': '1', 'rewards/margins': '4.73', 'logps/chosen': '-358.1', 'logps/rejected': '-426.9', 'logits/chosen': '4.324', 'logits/rejected': '4.452', 'epoch': '0.1238'}
79
+ {'loss': '0.07615', 'grad_norm': '0.3007', 'learning_rate': '2.074e-05', 'rewards/chosen': '1.412', 'rewards/rejected': '-3.332', 'rewards/accuracies': '0.9375', 'rewards/margins': '4.744', 'logps/chosen': '-364.5', 'logps/rejected': '-434.5', 'logits/chosen': '4.492', 'logits/rejected': '4.633', 'epoch': '0.1261'}
80
+ {'loss': '0.0146', 'grad_norm': '0.4247', 'learning_rate': '2.112e-05', 'rewards/chosen': '1.958', 'rewards/rejected': '-4.051', 'rewards/accuracies': '1', 'rewards/margins': '6.009', 'logps/chosen': '-306.5', 'logps/rejected': '-392.5', 'logits/chosen': '3.858', 'logits/rejected': '3.968', 'epoch': '0.1285'}
81
+ {'loss': '0.01015', 'grad_norm': '0.1418', 'learning_rate': '2.151e-05', 'rewards/chosen': '2.196', 'rewards/rejected': '-3.758', 'rewards/accuracies': '1', 'rewards/margins': '5.954', 'logps/chosen': '-339.6', 'logps/rejected': '-425.5', 'logits/chosen': '4.254', 'logits/rejected': '4.353', 'epoch': '0.1308'}
82
+ {'loss': '0.01139', 'grad_norm': '0.2944', 'learning_rate': '2.19e-05', 'rewards/chosen': '1.995', 'rewards/rejected': '-3.392', 'rewards/accuracies': '1', 'rewards/margins': '5.387', 'logps/chosen': '-349.4', 'logps/rejected': '-431.8', 'logits/chosen': '3.717', 'logits/rejected': '3.922', 'epoch': '0.1331'}
83
+ {'loss': '0.02451', 'grad_norm': '0.9541', 'learning_rate': '2.229e-05', 'rewards/chosen': '1.855', 'rewards/rejected': '-3.475', 'rewards/accuracies': '1', 'rewards/margins': '5.33', 'logps/chosen': '-343.2', 'logps/rejected': '-423.2', 'logits/chosen': '3.514', 'logits/rejected': '3.74', 'epoch': '0.1355'}
84
+ {'loss': '0.007584', 'grad_norm': '0.4569', 'learning_rate': '2.267e-05', 'rewards/chosen': '2.13', 'rewards/rejected': '-4.365', 'rewards/accuracies': '1', 'rewards/margins': '6.495', 'logps/chosen': '-382.1', 'logps/rejected': '-480.7', 'logits/chosen': '3.9', 'logits/rejected': '3.963', 'epoch': '0.1378'}
85
+ {'loss': '0.007748', 'grad_norm': '0.2083', 'learning_rate': '2.306e-05', 'rewards/chosen': '1.399', 'rewards/rejected': '-4.58', 'rewards/accuracies': '1', 'rewards/margins': '5.979', 'logps/chosen': '-355.3', 'logps/rejected': '-436.5', 'logits/chosen': '3.772', 'logits/rejected': '3.939', 'epoch': '0.1401'}
86
+ {'loss': '0.01436', 'grad_norm': '0.2193', 'learning_rate': '2.345e-05', 'rewards/chosen': '1.177', 'rewards/rejected': '-5.205', 'rewards/accuracies': '1', 'rewards/margins': '6.382', 'logps/chosen': '-327.2', 'logps/rejected': '-414.8', 'logits/chosen': '3.657', 'logits/rejected': '3.875', 'epoch': '0.1425'}
87
+ {'loss': '0.007622', 'grad_norm': '0.03551', 'learning_rate': '2.384e-05', 'rewards/chosen': '0.7803', 'rewards/rejected': '-6.615', 'rewards/accuracies': '1', 'rewards/margins': '7.395', 'logps/chosen': '-369.9', 'logps/rejected': '-474', 'logits/chosen': '3.66', 'logits/rejected': '3.725', 'epoch': '0.1448'}
88
+ {'eval_loss': '0.02411', 'eval_runtime': '454.8', 'eval_samples_per_second': '1.675', 'eval_steps_per_second': '1.675', 'eval_rewards/chosen': '0.5319', 'eval_rewards/rejected': '-6.151', 'eval_rewards/accuracies': '0.9934', 'eval_rewards/margins': '6.683', 'eval_logps/chosen': '-365.1', 'eval_logps/rejected': '-457.3', 'eval_logits/chosen': '3.669', 'eval_logits/rejected': '3.844', 'epoch': '0.146'}
89
+ {'loss': '0.005532', 'grad_norm': '0.2169', 'learning_rate': '2.422e-05', 'rewards/chosen': '0.9076', 'rewards/rejected': '-7.027', 'rewards/accuracies': '1', 'rewards/margins': '7.935', 'logps/chosen': '-345.2', 'logps/rejected': '-454.6', 'logits/chosen': '3.778', 'logits/rejected': '3.757', 'epoch': '0.1472'}
90
+ {'loss': '0.0008547', 'grad_norm': '0.05145', 'learning_rate': '2.461e-05', 'rewards/chosen': '1.086', 'rewards/rejected': '-6.756', 'rewards/accuracies': '1', 'rewards/margins': '7.843', 'logps/chosen': '-376.3', 'logps/rejected': '-486.3', 'logits/chosen': '3.686', 'logits/rejected': '3.777', 'epoch': '0.1495'}
91
+ {'loss': '0.01921', 'grad_norm': '1.001', 'learning_rate': '2.5e-05', 'rewards/chosen': '0.7988', 'rewards/rejected': '-6.314', 'rewards/accuracies': '1', 'rewards/margins': '7.112', 'logps/chosen': '-330.3', 'logps/rejected': '-420.2', 'logits/chosen': '3.856', 'logits/rejected': '4.067', 'epoch': '0.1518'}
92
+ {'loss': '0.005052', 'grad_norm': '0.2909', 'learning_rate': '2.539e-05', 'rewards/chosen': '0.727', 'rewards/rejected': '-6.733', 'rewards/accuracies': '1', 'rewards/margins': '7.46', 'logps/chosen': '-346.6', 'logps/rejected': '-448.4', 'logits/chosen': '3.856', 'logits/rejected': '4.107', 'epoch': '0.1542'}
93
+ {'loss': '0.02904', 'grad_norm': '0.1034', 'learning_rate': '2.578e-05', 'rewards/chosen': '-0.1552', 'rewards/rejected': '-7.109', 'rewards/accuracies': '1', 'rewards/margins': '6.953', 'logps/chosen': '-398.5', 'logps/rejected': '-493.3', 'logits/chosen': '3.692', 'logits/rejected': '3.876', 'epoch': '0.1565'}
94
+ {'loss': '0.008301', 'grad_norm': '0.4083', 'learning_rate': '2.616e-05', 'rewards/chosen': '0.3356', 'rewards/rejected': '-6.363', 'rewards/accuracies': '1', 'rewards/margins': '6.699', 'logps/chosen': '-420.6', 'logps/rejected': '-511.7', 'logits/chosen': '3.701', 'logits/rejected': '3.885', 'epoch': '0.1588'}
95
+ {'loss': '0.01079', 'grad_norm': '0.1769', 'learning_rate': '2.655e-05', 'rewards/chosen': '1.53', 'rewards/rejected': '-5.799', 'rewards/accuracies': '1', 'rewards/margins': '7.329', 'logps/chosen': '-361.6', 'logps/rejected': '-455', 'logits/chosen': '3.477', 'logits/rejected': '3.63', 'epoch': '0.1612'}
96
+ {'loss': '0.01278', 'grad_norm': '0.1559', 'learning_rate': '2.694e-05', 'rewards/chosen': '1.282', 'rewards/rejected': '-6.813', 'rewards/accuracies': '1', 'rewards/margins': '8.095', 'logps/chosen': '-379.4', 'logps/rejected': '-490', 'logits/chosen': '3.44', 'logits/rejected': '3.567', 'epoch': '0.1635'}
97
+ {'loss': '0.007118', 'grad_norm': '0.8207', 'learning_rate': '2.733e-05', 'rewards/chosen': '1.469', 'rewards/rejected': '-6.559', 'rewards/accuracies': '1', 'rewards/margins': '8.027', 'logps/chosen': '-402.8', 'logps/rejected': '-506.3', 'logits/chosen': '3.235', 'logits/rejected': '3.393', 'epoch': '0.1658'}
98
+ main()
99
+ File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 928, in main
100
+ trainer.train(resume_from_checkpoint=resume_from)
101
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2168, in train
102
+ return inner_training_loop(
103
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2535, in _inner_training_loop
104
+ tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
105
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 3807, in training_step
106
+ loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
107
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1810, in compute_loss
108
+ loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
109
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1726, in get_batch_loss_metrics
110
+ model_output = self.concatenated_forward(model, batch)
111
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1600, in concatenated_forward
112
+ outputs = model(input_ids, **model_kwargs)
113
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
114
+ return self._call_impl(*args, **kwargs)
115
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
116
+ return forward_call(*args, **kwargs)
117
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 819, in forward
118
+ return model_forward(*args, **kwargs)
119
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 807, in __call__
120
+ return convert_to_fp32(self.model_forward(*args, **kwargs))
121
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast
122
+ return func(*args, **kwargs)
123
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/peft_model.py", line 1923, in forward
124
+ return self.base_model(
125
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
126
+ return self._call_impl(*args, **kwargs)
127
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
128
+ return forward_call(*args, **kwargs)
129
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 308, in forward
130
+ return self.model.forward(*args, **kwargs)
131
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
132
+ output = module._old_forward(*args, **kwargs)
133
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 810, in wrapper
134
+ output = func(self, *args, **kwargs)
135
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 477, in forward
136
+ outputs: BaseModelOutputWithPast = self.model(
137
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
138
+ return self._call_impl(*args, **kwargs)
139
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
140
+ return forward_call(*args, **kwargs)
141
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 965, in wrapper
142
+ outputs = func(self, *args, **kwargs)
143
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 412, in forward
144
+ hidden_states = decoder_layer(
145
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/modeling_layers.py", line 93, in __call__
146
+ return self._gradient_checkpointing_func(partial(super().__call__, **kwargs), *args)
147
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/_compile.py", line 32, in inner
148
+ return disable_fn(*args, **kwargs)
149
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 632, in _fn
150
+ return fn(*args, **kwargs)
151
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 489, in checkpoint
152
+ return CheckpointFunction.apply(function, preserve, *args)
153
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/autograd/function.py", line 575, in apply
154
+ return super().apply(*args, **kwargs) # type: ignore[misc]
155
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 264, in forward
156
+ outputs = run_function(*args)
157
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
158
+ return self._call_impl(*args, **kwargs)
159
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
160
+ return forward_call(*args, **kwargs)
161
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 918, in wrapped_forward
162
+ output = orig_forward(*args, **kwargs)
163
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
164
+ output = module._old_forward(*args, **kwargs)
165
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 300, in forward
166
+ hidden_states, _ = self.self_attn(
167
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
168
+ return self._call_impl(*args, **kwargs)
169
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
170
+ return forward_call(*args, **kwargs)
171
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
172
+ output = module._old_forward(*args, **kwargs)
173
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 220, in forward
174
+ query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2)
175
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
176
+ return self._call_impl(*args, **kwargs)
177
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
178
+ return forward_call(*args, **kwargs)
179
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/lora/layer.py", line 793, in forward
180
+ result = self.base_layer(x, *args, **kwargs)
181
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
182
+ return self._call_impl(*args, **kwargs)
183
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
184
+ return forward_call(*args, **kwargs)
185
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
186
+ output = module._old_forward(*args, **kwargs)
187
+ KeyboardInterrupt
188
+ Traceback (most recent call last):
189
+ File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 953, in <module>
190
+ main()
191
+ File "/workspace/trainer-kit/DPO-14b/run_dpo.py", line 928, in main
192
+ trainer.train(resume_from_checkpoint=resume_from)
193
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2168, in train
194
+ return inner_training_loop(
195
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 2535, in _inner_training_loop
196
+ tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
197
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/trainer.py", line 3807, in training_step
198
+ loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
199
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1810, in compute_loss
200
+ loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
201
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1726, in get_batch_loss_metrics
202
+ model_output = self.concatenated_forward(model, batch)
203
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py", line 1600, in concatenated_forward
204
+ outputs = model(input_ids, **model_kwargs)
205
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
206
+ return self._call_impl(*args, **kwargs)
207
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
208
+ return forward_call(*args, **kwargs)
209
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 819, in forward
210
+ return model_forward(*args, **kwargs)
211
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/utils/operations.py", line 807, in __call__
212
+ return convert_to_fp32(self.model_forward(*args, **kwargs))
213
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast
214
+ return func(*args, **kwargs)
215
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/peft_model.py", line 1923, in forward
216
+ return self.base_model(
217
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
218
+ return self._call_impl(*args, **kwargs)
219
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
220
+ return forward_call(*args, **kwargs)
221
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/tuners_utils.py", line 308, in forward
222
+ return self.model.forward(*args, **kwargs)
223
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
224
+ output = module._old_forward(*args, **kwargs)
225
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 810, in wrapper
226
+ output = func(self, *args, **kwargs)
227
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 477, in forward
228
+ outputs: BaseModelOutputWithPast = self.model(
229
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
230
+ return self._call_impl(*args, **kwargs)
231
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
232
+ return forward_call(*args, **kwargs)
233
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 965, in wrapper
234
+ outputs = func(self, *args, **kwargs)
235
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 412, in forward
236
+ hidden_states = decoder_layer(
237
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/modeling_layers.py", line 93, in __call__
238
+ return self._gradient_checkpointing_func(partial(super().__call__, **kwargs), *args)
239
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/_compile.py", line 32, in inner
240
+ return disable_fn(*args, **kwargs)
241
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 632, in _fn
242
+ return fn(*args, **kwargs)
243
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 489, in checkpoint
244
+ return CheckpointFunction.apply(function, preserve, *args)
245
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/autograd/function.py", line 575, in apply
246
+ return super().apply(*args, **kwargs) # type: ignore[misc]
247
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 264, in forward
248
+ outputs = run_function(*args)
249
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
250
+ return self._call_impl(*args, **kwargs)
251
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
252
+ return forward_call(*args, **kwargs)
253
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/utils/generic.py", line 918, in wrapped_forward
254
+ output = orig_forward(*args, **kwargs)
255
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
256
+ output = module._old_forward(*args, **kwargs)
257
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 300, in forward
258
+ hidden_states, _ = self.self_attn(
259
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
260
+ return self._call_impl(*args, **kwargs)
261
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
262
+ return forward_call(*args, **kwargs)
263
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
264
+ output = module._old_forward(*args, **kwargs)
265
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 220, in forward
266
+ query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2)
267
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
268
+ return self._call_impl(*args, **kwargs)
269
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
270
+ return forward_call(*args, **kwargs)
271
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/peft/tuners/lora/layer.py", line 793, in forward
272
+ result = self.base_layer(x, *args, **kwargs)
273
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
274
+ return self._call_impl(*args, **kwargs)
275
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
276
+ return forward_call(*args, **kwargs)
277
+ File "/workspace/llm_finetuning_env/lib/python3.10/site-packages/accelerate/hooks.py", line 175, in new_forward
278
+ output = module._old_forward(*args, **kwargs)
279
+ KeyboardInterrupt
dpo_qwen_14B/wandb/run-20251226_155650-wbzoafvt/files/requirements.txt ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ exceptiongroup==1.3.1
2
+ wheel==0.45.1
3
+ python-dateutil==2.9.0.post0
4
+ nvidia-ml-py==13.580.82
5
+ huggingface_hub==1.2.3
6
+ idna==3.11
7
+ click==8.3.1
8
+ numpy==2.2.6
9
+ httpx==0.28.1
10
+ tokenizers==0.22.1
11
+ sympy==1.13.1
12
+ yarl==1.22.0
13
+ async-timeout==5.0.1
14
+ datasets==4.4.2
15
+ platformdirs==4.5.1
16
+ nvidia-cuda-cupti-cu12==12.1.105
17
+ nvidia-nvtx-cu12==12.1.105
18
+ smmap==5.0.2
19
+ accelerate==1.12.0
20
+ requests==2.32.5
21
+ aiohttp==3.13.2
22
+ bitsandbytes==0.49.0
23
+ nvidia-cublas-cu12==12.1.3.1
24
+ mpmath==1.3.0
25
+ torchaudio==2.5.1+cu121
26
+ nvidia-cuda-runtime-cu12==12.1.105
27
+ typing-inspection==0.4.2
28
+ GitPython==3.1.45
29
+ xxhash==3.6.0
30
+ nvidia-cusolver-cu12==11.4.5.107
31
+ pydantic_core==2.41.5
32
+ six==1.17.0
33
+ torchvision==0.20.1+cu121
34
+ typing_extensions==4.15.0
35
+ triton==3.1.0
36
+ charset-normalizer==3.4.4
37
+ nvitop==1.6.1
38
+ wandb==0.23.1
39
+ regex==2025.11.3
40
+ pip==25.3
41
+ nvidia-cusparse-cu12==12.1.0.106
42
+ pytz==2025.2
43
+ Jinja2==3.1.6
44
+ psutil==7.2.0
45
+ pillow==12.0.0
46
+ packaging==25.0
47
+ safetensors==0.7.0
48
+ sentry-sdk==2.48.0
49
+ gitdb==4.0.12
50
+ httpcore==1.0.9
51
+ setuptools==80.9.0
52
+ nvidia-cufft-cu12==11.0.2.54
53
+ anyio==4.12.0
54
+ transformers==5.0.0.dev0
55
+ pydantic==2.12.5
56
+ fsspec==2025.10.0
57
+ filelock==3.20.0
58
+ PyYAML==6.0.3
59
+ hf-xet==1.2.0
60
+ nvidia-cudnn-cu12==9.1.0.70
61
+ tqdm==4.67.1
62
+ MarkupSafe==2.1.5
63
+ attrs==25.4.0
64
+ nvidia-cuda-nvrtc-cu12==12.1.105
65
+ peft==0.18.0
66
+ aiohappyeyeballs==2.6.1
67
+ networkx==3.4.2
68
+ nvidia-nvjitlink-cu12==12.9.86
69
+ certifi==2025.11.12
70
+ pyarrow==22.0.0
71
+ dill==0.4.0
72
+ protobuf==6.33.2
73
+ aiosignal==1.4.0
74
+ frozenlist==1.8.0
75
+ urllib3==2.6.2
76
+ propcache==0.4.1
77
+ tzdata==2025.3
78
+ pandas==2.3.3
79
+ annotated-types==0.7.0
80
+ shellingham==1.5.4
81
+ nvidia-nccl-cu12==2.21.5
82
+ multidict==6.7.0
83
+ nvidia-curand-cu12==10.3.2.106
84
+ trl==0.26.2
85
+ torch==2.5.1+cu121
86
+ h11==0.16.0
87
+ multiprocess==0.70.18
88
+ typer-slim==0.21.0
89
+ wheel==0.45.1
90
+ tomli==2.0.1
91
+ autocommand==2.2.2
92
+ jaraco.context==5.3.0
93
+ zipp==3.19.2
94
+ packaging==24.2
95
+ inflect==7.3.1
96
+ typing_extensions==4.12.2
97
+ platformdirs==4.2.2
98
+ jaraco.functools==4.0.1
99
+ jaraco.collections==5.1.0
100
+ jaraco.text==3.12.1
101
+ backports.tarfile==1.2.0
102
+ more-itertools==10.3.0
103
+ importlib_metadata==8.0.0
104
+ typeguard==4.3.0