sameepv21 commited on
Commit
e6cfebd
·
verified ·
1 Parent(s): f227a77

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.5.0
adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "DAMO-NLP-SG/VideoLLaMA3-7B",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 256,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 128,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "gate_proj",
18
+ "out_proj",
19
+ "down_proj",
20
+ "q_proj",
21
+ "fc2",
22
+ "v_proj",
23
+ "o_proj",
24
+ "up_proj",
25
+ "k_proj",
26
+ "fc1"
27
+ ],
28
+ "task_type": "CAUSAL_LM"
29
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1be9e7b0fba83d5e831d60538adce9138fc2aa347d6ff2ca540ae992a8f10f61
3
+ size 785303482
config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "X": [
3
+ "Image",
4
+ "Video"
5
+ ],
6
+ "_attn_implementation_autoset": true,
7
+ "_name_or_path": "DAMO-NLP-SG/VideoLLaMA3-7B",
8
+ "architectures": [
9
+ "Videollama3Qwen2ForCausalLM"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "auto_map": {
13
+ "AutoConfig": "DAMO-NLP-SG/VideoLLaMA3-7B--configuration_videollama3.Videollama3Qwen2Config",
14
+ "AutoModelForCausalLM": "DAMO-NLP-SG/VideoLLaMA3-7B--modeling_videollama3.Videollama3Qwen2ForCausalLM"
15
+ },
16
+ "bos_token_id": 151643,
17
+ "eos_token_id": 151645,
18
+ "freeze_mm_mlp_adapter": true,
19
+ "hidden_act": "silu",
20
+ "hidden_size": 3584,
21
+ "image_aspect_ratio": "pad",
22
+ "image_grid_pinpoints": null,
23
+ "image_token_index": 151665,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 18944,
26
+ "max_position_embeddings": 32768,
27
+ "max_window_layers": 28,
28
+ "mm_projector_type": "mlp2x_gelu",
29
+ "mm_use_x_patch_token": false,
30
+ "mm_use_x_start_end": false,
31
+ "model_type": "videollama3_qwen2",
32
+ "num_attention_heads": 28,
33
+ "num_hidden_layers": 28,
34
+ "num_key_value_heads": 4,
35
+ "rms_norm_eps": 1e-06,
36
+ "rope_scaling": null,
37
+ "rope_theta": 1000000.0,
38
+ "sliding_window": null,
39
+ "tie_word_embeddings": false,
40
+ "torch_dtype": "bfloat16",
41
+ "transformers_version": "4.46.3",
42
+ "tune_mm_mlp_adapter": false,
43
+ "use_cache": true,
44
+ "use_sliding_window": false,
45
+ "use_token_compression": false,
46
+ "vision_encoder": null,
47
+ "vision_encoder_config": {
48
+ "hidden_size": 1152,
49
+ "intermediate_size": 4304,
50
+ "model_type": "videollama3_vision_encoder",
51
+ "num_attention_heads": 16,
52
+ "num_hidden_layers": 27,
53
+ "patch_size": 14
54
+ },
55
+ "vocab_size": 152064
56
+ }
non_lora_trainables.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b
3
+ size 912
trainer_state.json ADDED
@@ -0,0 +1,830 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9962453066332917,
5
+ "eval_steps": 500,
6
+ "global_step": 1197,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06,
13
+ "learning_rate": 1.0416666666666667e-07,
14
+ "logps/chosen": -71.84510803222656,
15
+ "logps/rejected": -68.73027038574219,
16
+ "loss": 0.7215,
17
+ "losses/dpo": 0.7109084129333496,
18
+ "losses/sft": 1.546567678451538,
19
+ "losses/total": 0.7109084129333496,
20
+ "ref_logps/chosen": -71.75371551513672,
21
+ "ref_logps/rejected": -68.71001434326172,
22
+ "rewards/accuracies": 0.5049999952316284,
23
+ "rewards/chosen": -0.009138082154095173,
24
+ "rewards/margins": -0.0071117933839559555,
25
+ "rewards/rejected": -0.0020262906327843666,
26
+ "step": 25
27
+ },
28
+ {
29
+ "epoch": 0.13,
30
+ "learning_rate": 2.0833333333333333e-07,
31
+ "logps/chosen": -69.88240814208984,
32
+ "logps/rejected": -69.54386901855469,
33
+ "loss": 0.7126,
34
+ "losses/dpo": 0.6903221607208252,
35
+ "losses/sft": 1.5194830894470215,
36
+ "losses/total": 0.6903221607208252,
37
+ "ref_logps/chosen": -69.94444274902344,
38
+ "ref_logps/rejected": -69.52371978759766,
39
+ "rewards/accuracies": 0.4970000088214874,
40
+ "rewards/chosen": 0.006204119883477688,
41
+ "rewards/margins": 0.008218951523303986,
42
+ "rewards/rejected": -0.002014830242842436,
43
+ "step": 50
44
+ },
45
+ {
46
+ "epoch": 0.19,
47
+ "learning_rate": 3.1249999999999997e-07,
48
+ "logps/chosen": -73.9260482788086,
49
+ "logps/rejected": -71.71464538574219,
50
+ "loss": 0.7231,
51
+ "losses/dpo": 0.7194635272026062,
52
+ "losses/sft": 1.5661953687667847,
53
+ "losses/total": 0.7194635272026062,
54
+ "ref_logps/chosen": -73.91621398925781,
55
+ "ref_logps/rejected": -71.82748413085938,
56
+ "rewards/accuracies": 0.5029999613761902,
57
+ "rewards/chosen": -0.0009833112126216292,
58
+ "rewards/margins": -0.01226747315376997,
59
+ "rewards/rejected": 0.011284159496426582,
60
+ "step": 75
61
+ },
62
+ {
63
+ "epoch": 0.25,
64
+ "learning_rate": 4.1666666666666667e-07,
65
+ "logps/chosen": -68.9831314086914,
66
+ "logps/rejected": -66.86358642578125,
67
+ "loss": 0.7192,
68
+ "losses/dpo": 0.7422583103179932,
69
+ "losses/sft": 1.5391790866851807,
70
+ "losses/total": 0.7422583103179932,
71
+ "ref_logps/chosen": -68.984130859375,
72
+ "ref_logps/rejected": -66.90238189697266,
73
+ "rewards/accuracies": 0.4930000305175781,
74
+ "rewards/chosen": 0.00010019920591730624,
75
+ "rewards/margins": -0.00377923552878201,
76
+ "rewards/rejected": 0.003879436058923602,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 0.31,
81
+ "learning_rate": 4.976787372330547e-07,
82
+ "logps/chosen": -73.55087280273438,
83
+ "logps/rejected": -71.61868286132812,
84
+ "loss": 0.7077,
85
+ "losses/dpo": 0.7084662914276123,
86
+ "losses/sft": 1.5277128219604492,
87
+ "losses/total": 0.7084662914276123,
88
+ "ref_logps/chosen": -73.66172790527344,
89
+ "ref_logps/rejected": -71.5505599975586,
90
+ "rewards/accuracies": 0.5219999551773071,
91
+ "rewards/chosen": 0.011085684411227703,
92
+ "rewards/margins": 0.017898183315992355,
93
+ "rewards/rejected": -0.00681249750778079,
94
+ "step": 125
95
+ },
96
+ {
97
+ "epoch": 0.38,
98
+ "learning_rate": 4.860724233983287e-07,
99
+ "logps/chosen": -69.85209655761719,
100
+ "logps/rejected": -69.75456237792969,
101
+ "loss": 0.7016,
102
+ "losses/dpo": 0.6934231519699097,
103
+ "losses/sft": 1.5373976230621338,
104
+ "losses/total": 0.6934231519699097,
105
+ "ref_logps/chosen": -70.19540405273438,
106
+ "ref_logps/rejected": -69.83726501464844,
107
+ "rewards/accuracies": 0.5325000286102295,
108
+ "rewards/chosen": 0.0343310683965683,
109
+ "rewards/margins": 0.026060676202178,
110
+ "rewards/rejected": 0.008270387537777424,
111
+ "step": 150
112
+ },
113
+ {
114
+ "epoch": 0.44,
115
+ "learning_rate": 4.7446610956360255e-07,
116
+ "logps/chosen": -69.9237289428711,
117
+ "logps/rejected": -69.56204223632812,
118
+ "loss": 0.6826,
119
+ "losses/dpo": 0.6780227422714233,
120
+ "losses/sft": 1.5951652526855469,
121
+ "losses/total": 0.6780227422714233,
122
+ "ref_logps/chosen": -70.6123046875,
123
+ "ref_logps/rejected": -69.56880187988281,
124
+ "rewards/accuracies": 0.5539999604225159,
125
+ "rewards/chosen": 0.06885794550180435,
126
+ "rewards/margins": 0.0681825652718544,
127
+ "rewards/rejected": 0.0006753735360689461,
128
+ "step": 175
129
+ },
130
+ {
131
+ "epoch": 0.5,
132
+ "learning_rate": 4.628597957288765e-07,
133
+ "logps/chosen": -71.02560424804688,
134
+ "logps/rejected": -69.64324951171875,
135
+ "loss": 0.679,
136
+ "losses/dpo": 0.6822719573974609,
137
+ "losses/sft": 1.5291378498077393,
138
+ "losses/total": 0.6822719573974609,
139
+ "ref_logps/chosen": -71.80342102050781,
140
+ "ref_logps/rejected": -69.58236694335938,
141
+ "rewards/accuracies": 0.5559999942779541,
142
+ "rewards/chosen": 0.07778114080429077,
143
+ "rewards/margins": 0.08386911451816559,
144
+ "rewards/rejected": -0.006087968125939369,
145
+ "step": 200
146
+ },
147
+ {
148
+ "epoch": 0.56,
149
+ "learning_rate": 4.512534818941504e-07,
150
+ "logps/chosen": -67.88444519042969,
151
+ "logps/rejected": -68.10497283935547,
152
+ "loss": 0.679,
153
+ "losses/dpo": 0.6802477240562439,
154
+ "losses/sft": 1.5126712322235107,
155
+ "losses/total": 0.6802477240562439,
156
+ "ref_logps/chosen": -68.86332702636719,
157
+ "ref_logps/rejected": -68.26943969726562,
158
+ "rewards/accuracies": 0.5540000200271606,
159
+ "rewards/chosen": 0.0978882685303688,
160
+ "rewards/margins": 0.08144120872020721,
161
+ "rewards/rejected": 0.01644706539809704,
162
+ "step": 225
163
+ },
164
+ {
165
+ "epoch": 0.63,
166
+ "learning_rate": 4.3964716805942433e-07,
167
+ "logps/chosen": -69.61144256591797,
168
+ "logps/rejected": -71.70446014404297,
169
+ "loss": 0.661,
170
+ "losses/dpo": 0.6608595848083496,
171
+ "losses/sft": 1.5144795179367065,
172
+ "losses/total": 0.6608595848083496,
173
+ "ref_logps/chosen": -70.85598754882812,
174
+ "ref_logps/rejected": -71.67627716064453,
175
+ "rewards/accuracies": 0.5940000414848328,
176
+ "rewards/chosen": 0.12445437163114548,
177
+ "rewards/margins": 0.12727266550064087,
178
+ "rewards/rejected": -0.0028182892128825188,
179
+ "step": 250
180
+ },
181
+ {
182
+ "epoch": 0.69,
183
+ "learning_rate": 4.280408542246982e-07,
184
+ "logps/chosen": -69.20745849609375,
185
+ "logps/rejected": -68.46233367919922,
186
+ "loss": 0.6577,
187
+ "losses/dpo": 0.669312059879303,
188
+ "losses/sft": 1.5866602659225464,
189
+ "losses/total": 0.669312059879303,
190
+ "ref_logps/chosen": -70.55343627929688,
191
+ "ref_logps/rejected": -68.40556335449219,
192
+ "rewards/accuracies": 0.5860000252723694,
193
+ "rewards/chosen": 0.13459768891334534,
194
+ "rewards/margins": 0.14027482271194458,
195
+ "rewards/rejected": -0.005677163600921631,
196
+ "step": 275
197
+ },
198
+ {
199
+ "epoch": 0.75,
200
+ "learning_rate": 4.164345403899721e-07,
201
+ "logps/chosen": -70.11485290527344,
202
+ "logps/rejected": -68.45763397216797,
203
+ "loss": 0.649,
204
+ "losses/dpo": 0.6586881875991821,
205
+ "losses/sft": 1.5220152139663696,
206
+ "losses/total": 0.6586881875991821,
207
+ "ref_logps/chosen": -71.6851806640625,
208
+ "ref_logps/rejected": -68.39569091796875,
209
+ "rewards/accuracies": 0.5975000262260437,
210
+ "rewards/chosen": 0.15703237056732178,
211
+ "rewards/margins": 0.16322720050811768,
212
+ "rewards/rejected": -0.006194834131747484,
213
+ "step": 300
214
+ },
215
+ {
216
+ "epoch": 0.81,
217
+ "learning_rate": 4.04828226555246e-07,
218
+ "logps/chosen": -69.53617095947266,
219
+ "logps/rejected": -68.70401763916016,
220
+ "loss": 0.6401,
221
+ "losses/dpo": 0.6386440396308899,
222
+ "losses/sft": 1.4834158420562744,
223
+ "losses/total": 0.6386440396308899,
224
+ "ref_logps/chosen": -71.28308868408203,
225
+ "ref_logps/rejected": -68.5333480834961,
226
+ "rewards/accuracies": 0.6014999151229858,
227
+ "rewards/chosen": 0.17469124495983124,
228
+ "rewards/margins": 0.19175761938095093,
229
+ "rewards/rejected": -0.017066391184926033,
230
+ "step": 325
231
+ },
232
+ {
233
+ "epoch": 0.88,
234
+ "learning_rate": 3.9322191272051997e-07,
235
+ "logps/chosen": -69.60104370117188,
236
+ "logps/rejected": -70.08628845214844,
237
+ "loss": 0.6349,
238
+ "losses/dpo": 0.649332582950592,
239
+ "losses/sft": 1.4883217811584473,
240
+ "losses/total": 0.649332582950592,
241
+ "ref_logps/chosen": -71.48506164550781,
242
+ "ref_logps/rejected": -69.83267211914062,
243
+ "rewards/accuracies": 0.6045000553131104,
244
+ "rewards/chosen": 0.18840213119983673,
245
+ "rewards/margins": 0.2137639820575714,
246
+ "rewards/rejected": -0.025361843407154083,
247
+ "step": 350
248
+ },
249
+ {
250
+ "epoch": 0.94,
251
+ "learning_rate": 3.816155988857939e-07,
252
+ "logps/chosen": -68.25212097167969,
253
+ "logps/rejected": -69.1668930053711,
254
+ "loss": 0.6273,
255
+ "losses/dpo": 0.6162423491477966,
256
+ "losses/sft": 1.5002387762069702,
257
+ "losses/total": 0.6162423491477966,
258
+ "ref_logps/chosen": -70.23272705078125,
259
+ "ref_logps/rejected": -68.74607849121094,
260
+ "rewards/accuracies": 0.6295000314712524,
261
+ "rewards/chosen": 0.19805949926376343,
262
+ "rewards/margins": 0.24014097452163696,
263
+ "rewards/rejected": -0.04208146035671234,
264
+ "step": 375
265
+ },
266
+ {
267
+ "epoch": 1.0,
268
+ "learning_rate": 3.700092850510678e-07,
269
+ "logps/chosen": -70.95040893554688,
270
+ "logps/rejected": -72.96726989746094,
271
+ "loss": 0.6195,
272
+ "losses/dpo": 0.6564822196960449,
273
+ "losses/sft": 1.560630440711975,
274
+ "losses/total": 0.6564822196960449,
275
+ "ref_logps/chosen": -72.91325378417969,
276
+ "ref_logps/rejected": -72.14061737060547,
277
+ "rewards/accuracies": 0.6350000500679016,
278
+ "rewards/chosen": 0.19628457725048065,
279
+ "rewards/margins": 0.2789497375488281,
280
+ "rewards/rejected": -0.08266513794660568,
281
+ "step": 400
282
+ },
283
+ {
284
+ "epoch": 1.06,
285
+ "learning_rate": 3.5840297121634165e-07,
286
+ "logps/chosen": -67.00164794921875,
287
+ "logps/rejected": -69.44139099121094,
288
+ "loss": 0.6167,
289
+ "losses/dpo": 0.6204876899719238,
290
+ "losses/sft": 1.4992446899414062,
291
+ "losses/total": 0.6204876899719238,
292
+ "ref_logps/chosen": -68.97762298583984,
293
+ "ref_logps/rejected": -68.63683319091797,
294
+ "rewards/accuracies": 0.6190000772476196,
295
+ "rewards/chosen": 0.19759786128997803,
296
+ "rewards/margins": 0.27805399894714355,
297
+ "rewards/rejected": -0.08045615255832672,
298
+ "step": 425
299
+ },
300
+ {
301
+ "epoch": 1.13,
302
+ "learning_rate": 3.4679665738161556e-07,
303
+ "logps/chosen": -69.90442657470703,
304
+ "logps/rejected": -72.69268035888672,
305
+ "loss": 0.6054,
306
+ "losses/dpo": 0.598928689956665,
307
+ "losses/sft": 1.5615432262420654,
308
+ "losses/total": 0.598928689956665,
309
+ "ref_logps/chosen": -71.99002838134766,
310
+ "ref_logps/rejected": -71.58721923828125,
311
+ "rewards/accuracies": 0.6365000009536743,
312
+ "rewards/chosen": 0.20855939388275146,
313
+ "rewards/margins": 0.3191070556640625,
314
+ "rewards/rejected": -0.11054765433073044,
315
+ "step": 450
316
+ },
317
+ {
318
+ "epoch": 1.19,
319
+ "learning_rate": 3.351903435468895e-07,
320
+ "logps/chosen": -68.88660430908203,
321
+ "logps/rejected": -70.41629791259766,
322
+ "loss": 0.5879,
323
+ "losses/dpo": 0.5859370231628418,
324
+ "losses/sft": 1.5487860441207886,
325
+ "losses/total": 0.5859370231628418,
326
+ "ref_logps/chosen": -71.17853546142578,
327
+ "ref_logps/rejected": -69.02958679199219,
328
+ "rewards/accuracies": 0.6640000343322754,
329
+ "rewards/chosen": 0.22919251024723053,
330
+ "rewards/margins": 0.36786285042762756,
331
+ "rewards/rejected": -0.13867038488388062,
332
+ "step": 475
333
+ },
334
+ {
335
+ "epoch": 1.25,
336
+ "learning_rate": 3.2358402971216343e-07,
337
+ "logps/chosen": -69.55220031738281,
338
+ "logps/rejected": -70.18158721923828,
339
+ "loss": 0.6038,
340
+ "losses/dpo": 0.6201021075248718,
341
+ "losses/sft": 1.5225414037704468,
342
+ "losses/total": 0.6201021075248718,
343
+ "ref_logps/chosen": -71.74359130859375,
344
+ "ref_logps/rejected": -68.91885375976562,
345
+ "rewards/accuracies": 0.6455000042915344,
346
+ "rewards/chosen": 0.2191377878189087,
347
+ "rewards/margins": 0.34541237354278564,
348
+ "rewards/rejected": -0.12627457082271576,
349
+ "step": 500
350
+ },
351
+ {
352
+ "epoch": 1.31,
353
+ "learning_rate": 3.1197771587743734e-07,
354
+ "logps/chosen": -69.61231231689453,
355
+ "logps/rejected": -72.09597778320312,
356
+ "loss": 0.5827,
357
+ "losses/dpo": 0.5971881747245789,
358
+ "losses/sft": 1.4868888854980469,
359
+ "losses/total": 0.5971881747245789,
360
+ "ref_logps/chosen": -71.90864562988281,
361
+ "ref_logps/rejected": -70.28910827636719,
362
+ "rewards/accuracies": 0.6584999561309814,
363
+ "rewards/chosen": 0.22963352501392365,
364
+ "rewards/margins": 0.41032031178474426,
365
+ "rewards/rejected": -0.18068677186965942,
366
+ "step": 525
367
+ },
368
+ {
369
+ "epoch": 1.38,
370
+ "learning_rate": 3.003714020427112e-07,
371
+ "logps/chosen": -69.58737182617188,
372
+ "logps/rejected": -70.84317016601562,
373
+ "loss": 0.5868,
374
+ "losses/dpo": 0.5683429837226868,
375
+ "losses/sft": 1.5186117887496948,
376
+ "losses/total": 0.5683429837226868,
377
+ "ref_logps/chosen": -71.8995590209961,
378
+ "ref_logps/rejected": -68.87027740478516,
379
+ "rewards/accuracies": 0.6439999938011169,
380
+ "rewards/chosen": 0.23121845722198486,
381
+ "rewards/margins": 0.42850860953330994,
382
+ "rewards/rejected": -0.19729015231132507,
383
+ "step": 550
384
+ },
385
+ {
386
+ "epoch": 1.44,
387
+ "learning_rate": 2.887650882079851e-07,
388
+ "logps/chosen": -69.75614166259766,
389
+ "logps/rejected": -71.54226684570312,
390
+ "loss": 0.5853,
391
+ "losses/dpo": 0.573384165763855,
392
+ "losses/sft": 1.506340742111206,
393
+ "losses/total": 0.573384165763855,
394
+ "ref_logps/chosen": -71.97299194335938,
395
+ "ref_logps/rejected": -69.5394287109375,
396
+ "rewards/accuracies": 0.6439999938011169,
397
+ "rewards/chosen": 0.2216847836971283,
398
+ "rewards/margins": 0.4219689667224884,
399
+ "rewards/rejected": -0.2002841979265213,
400
+ "step": 575
401
+ },
402
+ {
403
+ "epoch": 1.5,
404
+ "learning_rate": 2.77158774373259e-07,
405
+ "logps/chosen": -67.17892456054688,
406
+ "logps/rejected": -72.1897201538086,
407
+ "loss": 0.5811,
408
+ "losses/dpo": 0.5949756503105164,
409
+ "losses/sft": 1.5656284093856812,
410
+ "losses/total": 0.5949756503105164,
411
+ "ref_logps/chosen": -69.0622329711914,
412
+ "ref_logps/rejected": -69.59004974365234,
413
+ "rewards/accuracies": 0.658500075340271,
414
+ "rewards/chosen": 0.18833142518997192,
415
+ "rewards/margins": 0.4482985734939575,
416
+ "rewards/rejected": -0.2599670886993408,
417
+ "step": 600
418
+ },
419
+ {
420
+ "epoch": 1.56,
421
+ "learning_rate": 2.65552460538533e-07,
422
+ "logps/chosen": -67.06172180175781,
423
+ "logps/rejected": -71.94342041015625,
424
+ "loss": 0.5723,
425
+ "losses/dpo": 0.55665123462677,
426
+ "losses/sft": 1.5321460962295532,
427
+ "losses/total": 0.55665123462677,
428
+ "ref_logps/chosen": -68.80352783203125,
429
+ "ref_logps/rejected": -69.0389175415039,
430
+ "rewards/accuracies": 0.6610000133514404,
431
+ "rewards/chosen": 0.17417989671230316,
432
+ "rewards/margins": 0.46463003754615784,
433
+ "rewards/rejected": -0.29045018553733826,
434
+ "step": 625
435
+ },
436
+ {
437
+ "epoch": 1.63,
438
+ "learning_rate": 2.539461467038069e-07,
439
+ "logps/chosen": -71.48672485351562,
440
+ "logps/rejected": -74.49577331542969,
441
+ "loss": 0.5551,
442
+ "losses/dpo": 0.5403231382369995,
443
+ "losses/sft": 1.5089354515075684,
444
+ "losses/total": 0.5403231382369995,
445
+ "ref_logps/chosen": -73.56059265136719,
446
+ "ref_logps/rejected": -71.07339477539062,
447
+ "rewards/accuracies": 0.6800000071525574,
448
+ "rewards/chosen": 0.20738649368286133,
449
+ "rewards/margins": 0.5496238470077515,
450
+ "rewards/rejected": -0.34223735332489014,
451
+ "step": 650
452
+ },
453
+ {
454
+ "epoch": 1.69,
455
+ "learning_rate": 2.4233983286908075e-07,
456
+ "logps/chosen": -70.11666107177734,
457
+ "logps/rejected": -72.92801666259766,
458
+ "loss": 0.5697,
459
+ "losses/dpo": 0.5605096817016602,
460
+ "losses/sft": 1.5102070569992065,
461
+ "losses/total": 0.5605096817016602,
462
+ "ref_logps/chosen": -71.93968200683594,
463
+ "ref_logps/rejected": -69.70313262939453,
464
+ "rewards/accuracies": 0.6594999432563782,
465
+ "rewards/chosen": 0.18230296671390533,
466
+ "rewards/margins": 0.5047909021377563,
467
+ "rewards/rejected": -0.32248786091804504,
468
+ "step": 675
469
+ },
470
+ {
471
+ "epoch": 1.75,
472
+ "learning_rate": 2.3073351903435469e-07,
473
+ "logps/chosen": -70.7259750366211,
474
+ "logps/rejected": -74.96146392822266,
475
+ "loss": 0.5596,
476
+ "losses/dpo": 0.5713181495666504,
477
+ "losses/sft": 1.501438021659851,
478
+ "losses/total": 0.5713181495666504,
479
+ "ref_logps/chosen": -72.50656127929688,
480
+ "ref_logps/rejected": -71.40023803710938,
481
+ "rewards/accuracies": 0.6769999861717224,
482
+ "rewards/chosen": 0.1780581921339035,
483
+ "rewards/margins": 0.5341811180114746,
484
+ "rewards/rejected": -0.3561229705810547,
485
+ "step": 700
486
+ },
487
+ {
488
+ "epoch": 1.81,
489
+ "learning_rate": 2.191272051996286e-07,
490
+ "logps/chosen": -71.7147445678711,
491
+ "logps/rejected": -74.73912048339844,
492
+ "loss": 0.5536,
493
+ "losses/dpo": 0.5598438382148743,
494
+ "losses/sft": 1.6115312576293945,
495
+ "losses/total": 0.5598438382148743,
496
+ "ref_logps/chosen": -73.46680450439453,
497
+ "ref_logps/rejected": -70.61848449707031,
498
+ "rewards/accuracies": 0.6815000772476196,
499
+ "rewards/chosen": 0.17520827054977417,
500
+ "rewards/margins": 0.5872728228569031,
501
+ "rewards/rejected": -0.4120645225048065,
502
+ "step": 725
503
+ },
504
+ {
505
+ "epoch": 1.88,
506
+ "learning_rate": 2.075208913649025e-07,
507
+ "logps/chosen": -70.7854232788086,
508
+ "logps/rejected": -74.75676727294922,
509
+ "loss": 0.55,
510
+ "losses/dpo": 0.5355216860771179,
511
+ "losses/sft": 1.5508781671524048,
512
+ "losses/total": 0.5355216860771179,
513
+ "ref_logps/chosen": -72.55353546142578,
514
+ "ref_logps/rejected": -70.52770233154297,
515
+ "rewards/accuracies": 0.6890000104904175,
516
+ "rewards/chosen": 0.1768111288547516,
517
+ "rewards/margins": 0.5997176170349121,
518
+ "rewards/rejected": -0.4229064881801605,
519
+ "step": 750
520
+ },
521
+ {
522
+ "epoch": 1.94,
523
+ "learning_rate": 1.959145775301764e-07,
524
+ "logps/chosen": -69.92965698242188,
525
+ "logps/rejected": -75.43944549560547,
526
+ "loss": 0.5567,
527
+ "losses/dpo": 0.5669773817062378,
528
+ "losses/sft": 1.5475414991378784,
529
+ "losses/total": 0.5669773817062378,
530
+ "ref_logps/chosen": -71.25016021728516,
531
+ "ref_logps/rejected": -70.77532958984375,
532
+ "rewards/accuracies": 0.6794999837875366,
533
+ "rewards/chosen": 0.13204967975616455,
534
+ "rewards/margins": 0.5984623432159424,
535
+ "rewards/rejected": -0.4664126932621002,
536
+ "step": 775
537
+ },
538
+ {
539
+ "epoch": 2.0,
540
+ "learning_rate": 1.8430826369545033e-07,
541
+ "logps/chosen": -70.1933364868164,
542
+ "logps/rejected": -74.6698226928711,
543
+ "loss": 0.5486,
544
+ "losses/dpo": 0.543175458908081,
545
+ "losses/sft": 1.5320526361465454,
546
+ "losses/total": 0.543175458908081,
547
+ "ref_logps/chosen": -71.89373016357422,
548
+ "ref_logps/rejected": -70.11508178710938,
549
+ "rewards/accuracies": 0.6875,
550
+ "rewards/chosen": 0.17003829777240753,
551
+ "rewards/margins": 0.6255122423171997,
552
+ "rewards/rejected": -0.455473929643631,
553
+ "step": 800
554
+ },
555
+ {
556
+ "epoch": 2.07,
557
+ "learning_rate": 1.7270194986072424e-07,
558
+ "logps/chosen": -68.841064453125,
559
+ "logps/rejected": -75.60282897949219,
560
+ "loss": 0.5496,
561
+ "losses/dpo": 0.5592978596687317,
562
+ "losses/sft": 1.591374158859253,
563
+ "losses/total": 0.5592978596687317,
564
+ "ref_logps/chosen": -70.10298156738281,
565
+ "ref_logps/rejected": -70.73596954345703,
566
+ "rewards/accuracies": 0.684499979019165,
567
+ "rewards/chosen": 0.12619122862815857,
568
+ "rewards/margins": 0.6128779053688049,
569
+ "rewards/rejected": -0.48668670654296875,
570
+ "step": 825
571
+ },
572
+ {
573
+ "epoch": 2.13,
574
+ "learning_rate": 1.6109563602599812e-07,
575
+ "logps/chosen": -70.97602081298828,
576
+ "logps/rejected": -75.59082794189453,
577
+ "loss": 0.5226,
578
+ "losses/dpo": 0.5137518048286438,
579
+ "losses/sft": 1.4946039915084839,
580
+ "losses/total": 0.5137518048286438,
581
+ "ref_logps/chosen": -72.67564392089844,
582
+ "ref_logps/rejected": -70.24249267578125,
583
+ "rewards/accuracies": 0.7165000438690186,
584
+ "rewards/chosen": 0.169962078332901,
585
+ "rewards/margins": 0.704794704914093,
586
+ "rewards/rejected": -0.5348325967788696,
587
+ "step": 850
588
+ },
589
+ {
590
+ "epoch": 2.19,
591
+ "learning_rate": 1.4948932219127206e-07,
592
+ "logps/chosen": -69.44739532470703,
593
+ "logps/rejected": -74.73712158203125,
594
+ "loss": 0.5483,
595
+ "losses/dpo": 0.554201602935791,
596
+ "losses/sft": 1.5423518419265747,
597
+ "losses/total": 0.554201602935791,
598
+ "ref_logps/chosen": -70.63737487792969,
599
+ "ref_logps/rejected": -69.3394546508789,
600
+ "rewards/accuracies": 0.6875,
601
+ "rewards/chosen": 0.11899794638156891,
602
+ "rewards/margins": 0.6587647795677185,
603
+ "rewards/rejected": -0.5397669076919556,
604
+ "step": 875
605
+ },
606
+ {
607
+ "epoch": 2.25,
608
+ "learning_rate": 1.3788300835654597e-07,
609
+ "logps/chosen": -67.69676971435547,
610
+ "logps/rejected": -72.41621398925781,
611
+ "loss": 0.554,
612
+ "losses/dpo": 0.536446750164032,
613
+ "losses/sft": 1.52887761592865,
614
+ "losses/total": 0.536446750164032,
615
+ "ref_logps/chosen": -68.82073974609375,
616
+ "ref_logps/rejected": -67.15235137939453,
617
+ "rewards/accuracies": 0.6720000505447388,
618
+ "rewards/chosen": 0.11239679157733917,
619
+ "rewards/margins": 0.6387830972671509,
620
+ "rewards/rejected": -0.5263863205909729,
621
+ "step": 900
622
+ },
623
+ {
624
+ "epoch": 2.32,
625
+ "learning_rate": 1.2627669452181985e-07,
626
+ "logps/chosen": -68.61820983886719,
627
+ "logps/rejected": -75.03334045410156,
628
+ "loss": 0.547,
629
+ "losses/dpo": 0.5469151139259338,
630
+ "losses/sft": 1.524001121520996,
631
+ "losses/total": 0.5469151139259338,
632
+ "ref_logps/chosen": -69.68536376953125,
633
+ "ref_logps/rejected": -69.33779907226562,
634
+ "rewards/accuracies": 0.6725000143051147,
635
+ "rewards/chosen": 0.10671478509902954,
636
+ "rewards/margins": 0.6762691736221313,
637
+ "rewards/rejected": -0.5695543885231018,
638
+ "step": 925
639
+ },
640
+ {
641
+ "epoch": 2.38,
642
+ "learning_rate": 1.1467038068709377e-07,
643
+ "logps/chosen": -71.48025512695312,
644
+ "logps/rejected": -76.9962387084961,
645
+ "loss": 0.5328,
646
+ "losses/dpo": 0.5382718443870544,
647
+ "losses/sft": 1.5356690883636475,
648
+ "losses/total": 0.5382718443870544,
649
+ "ref_logps/chosen": -72.66265869140625,
650
+ "ref_logps/rejected": -70.87562561035156,
651
+ "rewards/accuracies": 0.7055000066757202,
652
+ "rewards/chosen": 0.11823976784944534,
653
+ "rewards/margins": 0.7303012013435364,
654
+ "rewards/rejected": -0.6120614409446716,
655
+ "step": 950
656
+ },
657
+ {
658
+ "epoch": 2.44,
659
+ "learning_rate": 1.0306406685236768e-07,
660
+ "logps/chosen": -68.71895599365234,
661
+ "logps/rejected": -74.29911804199219,
662
+ "loss": 0.5524,
663
+ "losses/dpo": 0.5623547434806824,
664
+ "losses/sft": 1.6026860475540161,
665
+ "losses/total": 0.5623547434806824,
666
+ "ref_logps/chosen": -69.32998657226562,
667
+ "ref_logps/rejected": -68.43716430664062,
668
+ "rewards/accuracies": 0.6655000448226929,
669
+ "rewards/chosen": 0.06110435351729393,
670
+ "rewards/margins": 0.6473007202148438,
671
+ "rewards/rejected": -0.5861963629722595,
672
+ "step": 975
673
+ },
674
+ {
675
+ "epoch": 2.5,
676
+ "learning_rate": 9.14577530176416e-08,
677
+ "logps/chosen": -69.40322875976562,
678
+ "logps/rejected": -73.88810729980469,
679
+ "loss": 0.5513,
680
+ "losses/dpo": 0.5662988424301147,
681
+ "losses/sft": 1.6082065105438232,
682
+ "losses/total": 0.5662988424301147,
683
+ "ref_logps/chosen": -70.1698226928711,
684
+ "ref_logps/rejected": -68.06956481933594,
685
+ "rewards/accuracies": 0.6759999990463257,
686
+ "rewards/chosen": 0.07665982842445374,
687
+ "rewards/margins": 0.6585137248039246,
688
+ "rewards/rejected": -0.5818539261817932,
689
+ "step": 1000
690
+ },
691
+ {
692
+ "epoch": 2.57,
693
+ "learning_rate": 7.98514391829155e-08,
694
+ "logps/chosen": -68.32543182373047,
695
+ "logps/rejected": -74.76167297363281,
696
+ "loss": 0.5433,
697
+ "losses/dpo": 0.5388572216033936,
698
+ "losses/sft": 1.5300703048706055,
699
+ "losses/total": 0.5388572216033936,
700
+ "ref_logps/chosen": -69.11152648925781,
701
+ "ref_logps/rejected": -68.69097900390625,
702
+ "rewards/accuracies": 0.6825000047683716,
703
+ "rewards/chosen": 0.07860930263996124,
704
+ "rewards/margins": 0.685679018497467,
705
+ "rewards/rejected": -0.6070696115493774,
706
+ "step": 1025
707
+ },
708
+ {
709
+ "epoch": 2.63,
710
+ "learning_rate": 6.824512534818941e-08,
711
+ "logps/chosen": -70.73451232910156,
712
+ "logps/rejected": -77.00275421142578,
713
+ "loss": 0.5239,
714
+ "losses/dpo": 0.5082178115844727,
715
+ "losses/sft": 1.4840093851089478,
716
+ "losses/total": 0.5082178115844727,
717
+ "ref_logps/chosen": -72.08134460449219,
718
+ "ref_logps/rejected": -70.49996948242188,
719
+ "rewards/accuracies": 0.7019999623298645,
720
+ "rewards/chosen": 0.13468389213085175,
721
+ "rewards/margins": 0.7849621772766113,
722
+ "rewards/rejected": -0.6502782702445984,
723
+ "step": 1050
724
+ },
725
+ {
726
+ "epoch": 2.69,
727
+ "learning_rate": 5.6638811513463324e-08,
728
+ "logps/chosen": -69.95764923095703,
729
+ "logps/rejected": -77.90116882324219,
730
+ "loss": 0.5327,
731
+ "losses/dpo": 0.5328630805015564,
732
+ "losses/sft": 1.6418886184692383,
733
+ "losses/total": 0.5328630805015564,
734
+ "ref_logps/chosen": -70.50801086425781,
735
+ "ref_logps/rejected": -71.11058807373047,
736
+ "rewards/accuracies": 0.6959999799728394,
737
+ "rewards/chosen": 0.05503645911812782,
738
+ "rewards/margins": 0.7340949773788452,
739
+ "rewards/rejected": -0.6790586113929749,
740
+ "step": 1075
741
+ },
742
+ {
743
+ "epoch": 2.75,
744
+ "learning_rate": 4.503249767873723e-08,
745
+ "logps/chosen": -69.83995819091797,
746
+ "logps/rejected": -75.7170639038086,
747
+ "loss": 0.5415,
748
+ "losses/dpo": 0.5642114281654358,
749
+ "losses/sft": 1.5595824718475342,
750
+ "losses/total": 0.5642114281654358,
751
+ "ref_logps/chosen": -70.96809387207031,
752
+ "ref_logps/rejected": -69.68138885498047,
753
+ "rewards/accuracies": 0.6990000009536743,
754
+ "rewards/chosen": 0.11281368136405945,
755
+ "rewards/margins": 0.7163800001144409,
756
+ "rewards/rejected": -0.6035662889480591,
757
+ "step": 1100
758
+ },
759
+ {
760
+ "epoch": 2.82,
761
+ "learning_rate": 3.3426183844011144e-08,
762
+ "logps/chosen": -72.0064697265625,
763
+ "logps/rejected": -75.64459228515625,
764
+ "loss": 0.553,
765
+ "losses/dpo": 0.6136656403541565,
766
+ "losses/sft": 1.6066731214523315,
767
+ "losses/total": 0.6136656403541565,
768
+ "ref_logps/chosen": -72.82428741455078,
769
+ "ref_logps/rejected": -69.51007843017578,
770
+ "rewards/accuracies": 0.6880000233650208,
771
+ "rewards/chosen": 0.08178197592496872,
772
+ "rewards/margins": 0.6952335834503174,
773
+ "rewards/rejected": -0.6134517192840576,
774
+ "step": 1125
775
+ },
776
+ {
777
+ "epoch": 2.88,
778
+ "learning_rate": 2.181987000928505e-08,
779
+ "logps/chosen": -71.19115447998047,
780
+ "logps/rejected": -74.97571563720703,
781
+ "loss": 0.5495,
782
+ "losses/dpo": 0.5555659532546997,
783
+ "losses/sft": 1.547566533088684,
784
+ "losses/total": 0.5555659532546997,
785
+ "ref_logps/chosen": -72.04399108886719,
786
+ "ref_logps/rejected": -68.92461395263672,
787
+ "rewards/accuracies": 0.6770000457763672,
788
+ "rewards/chosen": 0.08528263866901398,
789
+ "rewards/margins": 0.6903927326202393,
790
+ "rewards/rejected": -0.6051101088523865,
791
+ "step": 1150
792
+ },
793
+ {
794
+ "epoch": 2.94,
795
+ "learning_rate": 1.0213556174558959e-08,
796
+ "logps/chosen": -68.98538970947266,
797
+ "logps/rejected": -74.58392333984375,
798
+ "loss": 0.5254,
799
+ "losses/dpo": 0.49954432249069214,
800
+ "losses/sft": 1.4814612865447998,
801
+ "losses/total": 0.49954432249069214,
802
+ "ref_logps/chosen": -70.0972671508789,
803
+ "ref_logps/rejected": -68.21308898925781,
804
+ "rewards/accuracies": 0.6914999485015869,
805
+ "rewards/chosen": 0.11118759214878082,
806
+ "rewards/margins": 0.7482713460922241,
807
+ "rewards/rejected": -0.6370838284492493,
808
+ "step": 1175
809
+ },
810
+ {
811
+ "epoch": 3.0,
812
+ "step": 1197,
813
+ "total_flos": 0.0,
814
+ "train_loss": 0.5977537606095112,
815
+ "train_runtime": 15731.8751,
816
+ "train_samples_per_second": 6.094,
817
+ "train_steps_per_second": 0.076
818
+ }
819
+ ],
820
+ "logging_steps": 25,
821
+ "max_steps": 1197,
822
+ "num_input_tokens_seen": 0,
823
+ "num_train_epochs": 3,
824
+ "save_steps": 500,
825
+ "stateful_callbacks": {},
826
+ "total_flos": 0.0,
827
+ "train_batch_size": 4,
828
+ "trial_name": null,
829
+ "trial_params": null
830
+ }