Upload folder using huggingface_hub

#1
by sameepv21 - opened
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.5.0
adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "ShareGPTVideo/LLaVA-Hound-SFT",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 256,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 128,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "gate_proj",
18
+ "v_proj",
19
+ "o_proj",
20
+ "down_proj",
21
+ "q_proj",
22
+ "k_proj",
23
+ "up_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:527ebd40353e5f546408521755915d45727b23637d47f06d86659b982dd8c52e
3
+ size 639787082
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "X": [
3
+ "Image",
4
+ "Video"
5
+ ],
6
+ "_name_or_path": "ShareGPTVideo/LLaVA-Hound-SFT",
7
+ "architectures": [
8
+ "LlavaLlamaForCausalLM"
9
+ ],
10
+ "attention_bias": false,
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "eos_token_id": 2,
14
+ "freeze_mm_mlp_adapter": true,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 4096,
17
+ "image_aspect_ratio": "pad",
18
+ "image_grid_pinpoints": null,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 11008,
21
+ "max_position_embeddings": 4096,
22
+ "mm_hidden_size": 1024,
23
+ "mm_image_tower": "LanguageBind/LanguageBind_Image",
24
+ "mm_projector_type": "mlp2x_gelu",
25
+ "mm_use_x_patch_token": false,
26
+ "mm_use_x_start_end": false,
27
+ "mm_video_tower": "LanguageBind/LanguageBind_Video_merge",
28
+ "mm_vision_select_feature": "patch",
29
+ "mm_vision_select_layer": -2,
30
+ "model_type": "llava_llama",
31
+ "num_attention_heads": 32,
32
+ "num_hidden_layers": 32,
33
+ "num_key_value_heads": 32,
34
+ "pad_token_id": 0,
35
+ "pretraining_tp": 1,
36
+ "rms_norm_eps": 1e-05,
37
+ "rope_scaling": null,
38
+ "rope_theta": 10000.0,
39
+ "tie_word_embeddings": false,
40
+ "torch_dtype": "bfloat16",
41
+ "transformers_version": "4.37.0",
42
+ "tune_mm_mlp_adapter": false,
43
+ "use_cache": true,
44
+ "use_mm_proj": true,
45
+ "vocab_size": 32000
46
+ }
non_lora_trainables.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b
3
+ size 912
trainer_state.json ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 636,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.12,
13
+ "learning_rate": 7.8125e-06,
14
+ "logps/chosen": -122.16297149658203,
15
+ "logps/rejected": -71.43323516845703,
16
+ "loss": 0.4952,
17
+ "losses/dpo": 0.4956728219985962,
18
+ "losses/sft": 0.7316558957099915,
19
+ "losses/total": 0.4956728219985962,
20
+ "ref_logps/chosen": -127.74378204345703,
21
+ "ref_logps/rejected": -70.59587860107422,
22
+ "rewards/accuracies": 0.8070000410079956,
23
+ "rewards/chosen": 0.5580801367759705,
24
+ "rewards/margins": 0.6418154239654541,
25
+ "rewards/rejected": -0.08373536914587021,
26
+ "step": 25
27
+ },
28
+ {
29
+ "epoch": 0.24,
30
+ "learning_rate": 1.5625e-05,
31
+ "logps/chosen": -94.26205444335938,
32
+ "logps/rejected": -85.60575866699219,
33
+ "loss": 0.0691,
34
+ "losses/dpo": 0.07388682663440704,
35
+ "losses/sft": 0.5650071501731873,
36
+ "losses/total": 0.07388682663440704,
37
+ "ref_logps/chosen": -128.54661560058594,
38
+ "ref_logps/rejected": -72.49893951416016,
39
+ "rewards/accuracies": 0.9929999709129333,
40
+ "rewards/chosen": 3.428455114364624,
41
+ "rewards/margins": 4.739137649536133,
42
+ "rewards/rejected": -1.310682773590088,
43
+ "step": 50
44
+ },
45
+ {
46
+ "epoch": 0.35,
47
+ "learning_rate": 1.9615384615384617e-05,
48
+ "logps/chosen": -85.07345581054688,
49
+ "logps/rejected": -101.97691345214844,
50
+ "loss": 0.0179,
51
+ "losses/dpo": 0.014726839028298855,
52
+ "losses/sft": 0.5030468106269836,
53
+ "losses/total": 0.014726839028298855,
54
+ "ref_logps/chosen": -129.9876708984375,
55
+ "ref_logps/rejected": -72.3249282836914,
56
+ "rewards/accuracies": 0.9989999532699585,
57
+ "rewards/chosen": 4.491419792175293,
58
+ "rewards/margins": 7.45661735534668,
59
+ "rewards/rejected": -2.9651970863342285,
60
+ "step": 75
61
+ },
62
+ {
63
+ "epoch": 0.47,
64
+ "learning_rate": 1.8741258741258744e-05,
65
+ "logps/chosen": -85.64691162109375,
66
+ "logps/rejected": -110.90087890625,
67
+ "loss": 0.0096,
68
+ "losses/dpo": 0.012412017211318016,
69
+ "losses/sft": 0.5199429988861084,
70
+ "losses/total": 0.012412017211318016,
71
+ "ref_logps/chosen": -130.2884979248047,
72
+ "ref_logps/rejected": -71.44290924072266,
73
+ "rewards/accuracies": 0.9984999299049377,
74
+ "rewards/chosen": 4.464157581329346,
75
+ "rewards/margins": 8.409955024719238,
76
+ "rewards/rejected": -3.9457967281341553,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 0.59,
81
+ "learning_rate": 1.7867132867132868e-05,
82
+ "logps/chosen": -82.34768676757812,
83
+ "logps/rejected": -116.94005584716797,
84
+ "loss": 0.0061,
85
+ "losses/dpo": 0.008614934980869293,
86
+ "losses/sft": 0.49562689661979675,
87
+ "losses/total": 0.008614934980869293,
88
+ "ref_logps/chosen": -128.71200561523438,
89
+ "ref_logps/rejected": -71.86701202392578,
90
+ "rewards/accuracies": 0.9994999766349792,
91
+ "rewards/chosen": 4.636431694030762,
92
+ "rewards/margins": 9.143735885620117,
93
+ "rewards/rejected": -4.507305145263672,
94
+ "step": 125
95
+ },
96
+ {
97
+ "epoch": 0.71,
98
+ "learning_rate": 1.6993006993006995e-05,
99
+ "logps/chosen": -85.28910064697266,
100
+ "logps/rejected": -123.17980194091797,
101
+ "loss": 0.0053,
102
+ "losses/dpo": 0.004700234159827232,
103
+ "losses/sft": 0.5220319032669067,
104
+ "losses/total": 0.004700234159827232,
105
+ "ref_logps/chosen": -129.39625549316406,
106
+ "ref_logps/rejected": -70.16360473632812,
107
+ "rewards/accuracies": 0.9994999766349792,
108
+ "rewards/chosen": 4.410714149475098,
109
+ "rewards/margins": 9.712334632873535,
110
+ "rewards/rejected": -5.301620006561279,
111
+ "step": 150
112
+ },
113
+ {
114
+ "epoch": 0.83,
115
+ "learning_rate": 1.611888111888112e-05,
116
+ "logps/chosen": -84.59432983398438,
117
+ "logps/rejected": -131.1455535888672,
118
+ "loss": 0.0051,
119
+ "losses/dpo": 0.003301014890894294,
120
+ "losses/sft": 0.5115602016448975,
121
+ "losses/total": 0.003301014890894294,
122
+ "ref_logps/chosen": -127.61747741699219,
123
+ "ref_logps/rejected": -71.97355651855469,
124
+ "rewards/accuracies": 0.9994999766349792,
125
+ "rewards/chosen": 4.302317142486572,
126
+ "rewards/margins": 10.219517707824707,
127
+ "rewards/rejected": -5.917200088500977,
128
+ "step": 175
129
+ },
130
+ {
131
+ "epoch": 0.94,
132
+ "learning_rate": 1.5244755244755244e-05,
133
+ "logps/chosen": -88.2319564819336,
134
+ "logps/rejected": -141.83016967773438,
135
+ "loss": 0.0028,
136
+ "losses/dpo": 0.002293643541634083,
137
+ "losses/sft": 0.5383260846138,
138
+ "losses/total": 0.002293643541634083,
139
+ "ref_logps/chosen": -129.1661376953125,
140
+ "ref_logps/rejected": -71.8288803100586,
141
+ "rewards/accuracies": 1.0,
142
+ "rewards/chosen": 4.093417167663574,
143
+ "rewards/margins": 11.093545913696289,
144
+ "rewards/rejected": -7.000128746032715,
145
+ "step": 200
146
+ },
147
+ {
148
+ "epoch": 1.06,
149
+ "learning_rate": 1.4370629370629371e-05,
150
+ "logps/chosen": -89.62419891357422,
151
+ "logps/rejected": -153.9451904296875,
152
+ "loss": 0.0033,
153
+ "losses/dpo": 0.0030320805963128805,
154
+ "losses/sft": 0.532666027545929,
155
+ "losses/total": 0.0030320805963128805,
156
+ "ref_logps/chosen": -128.41148376464844,
157
+ "ref_logps/rejected": -71.97950744628906,
158
+ "rewards/accuracies": 0.9989999532699585,
159
+ "rewards/chosen": 3.878729820251465,
160
+ "rewards/margins": 12.075300216674805,
161
+ "rewards/rejected": -8.196569442749023,
162
+ "step": 225
163
+ },
164
+ {
165
+ "epoch": 1.18,
166
+ "learning_rate": 1.3496503496503497e-05,
167
+ "logps/chosen": -86.68380737304688,
168
+ "logps/rejected": -156.0954132080078,
169
+ "loss": 0.0013,
170
+ "losses/dpo": 0.0008313562138937414,
171
+ "losses/sft": 0.518293559551239,
172
+ "losses/total": 0.0008313562138937414,
173
+ "ref_logps/chosen": -128.29469299316406,
174
+ "ref_logps/rejected": -71.77529907226562,
175
+ "rewards/accuracies": 1.0,
176
+ "rewards/chosen": 4.161087989807129,
177
+ "rewards/margins": 12.593099594116211,
178
+ "rewards/rejected": -8.432010650634766,
179
+ "step": 250
180
+ },
181
+ {
182
+ "epoch": 1.3,
183
+ "learning_rate": 1.2622377622377624e-05,
184
+ "logps/chosen": -88.83470916748047,
185
+ "logps/rejected": -160.05836486816406,
186
+ "loss": 0.0015,
187
+ "losses/dpo": 0.0008700879407115281,
188
+ "losses/sft": 0.5323516726493835,
189
+ "losses/total": 0.0008700879407115281,
190
+ "ref_logps/chosen": -128.95960998535156,
191
+ "ref_logps/rejected": -70.340576171875,
192
+ "rewards/accuracies": 0.9994999766349792,
193
+ "rewards/chosen": 4.012491703033447,
194
+ "rewards/margins": 12.984270095825195,
195
+ "rewards/rejected": -8.971778869628906,
196
+ "step": 275
197
+ },
198
+ {
199
+ "epoch": 1.42,
200
+ "learning_rate": 1.1748251748251748e-05,
201
+ "logps/chosen": -90.86172485351562,
202
+ "logps/rejected": -162.376953125,
203
+ "loss": 0.0011,
204
+ "losses/dpo": 0.00106943363789469,
205
+ "losses/sft": 0.5586134195327759,
206
+ "losses/total": 0.00106943363789469,
207
+ "ref_logps/chosen": -129.39662170410156,
208
+ "ref_logps/rejected": -71.82042694091797,
209
+ "rewards/accuracies": 1.0,
210
+ "rewards/chosen": 3.853489398956299,
211
+ "rewards/margins": 12.909143447875977,
212
+ "rewards/rejected": -9.055652618408203,
213
+ "step": 300
214
+ },
215
+ {
216
+ "epoch": 1.53,
217
+ "learning_rate": 1.0874125874125875e-05,
218
+ "logps/chosen": -93.8604507446289,
219
+ "logps/rejected": -171.50653076171875,
220
+ "loss": 0.0013,
221
+ "losses/dpo": 0.0009554739226587117,
222
+ "losses/sft": 0.5667473077774048,
223
+ "losses/total": 0.0009554739226587117,
224
+ "ref_logps/chosen": -128.62173461914062,
225
+ "ref_logps/rejected": -72.07390594482422,
226
+ "rewards/accuracies": 1.0,
227
+ "rewards/chosen": 3.476128339767456,
228
+ "rewards/margins": 13.419390678405762,
229
+ "rewards/rejected": -9.943263053894043,
230
+ "step": 325
231
+ },
232
+ {
233
+ "epoch": 1.65,
234
+ "learning_rate": 1e-05,
235
+ "logps/chosen": -92.96914672851562,
236
+ "logps/rejected": -173.7914276123047,
237
+ "loss": 0.0009,
238
+ "losses/dpo": 0.00044292627717368305,
239
+ "losses/sft": 0.561470627784729,
240
+ "losses/total": 0.00044292627717368305,
241
+ "ref_logps/chosen": -128.7430877685547,
242
+ "ref_logps/rejected": -72.57361602783203,
243
+ "rewards/accuracies": 0.9994999766349792,
244
+ "rewards/chosen": 3.577392578125,
245
+ "rewards/margins": 13.699174880981445,
246
+ "rewards/rejected": -10.121781349182129,
247
+ "step": 350
248
+ },
249
+ {
250
+ "epoch": 1.77,
251
+ "learning_rate": 9.125874125874126e-06,
252
+ "logps/chosen": -94.47010040283203,
253
+ "logps/rejected": -175.3832550048828,
254
+ "loss": 0.0006,
255
+ "losses/dpo": 0.00038047495763748884,
256
+ "losses/sft": 0.567641019821167,
257
+ "losses/total": 0.00038047495763748884,
258
+ "ref_logps/chosen": -128.09613037109375,
259
+ "ref_logps/rejected": -71.36651611328125,
260
+ "rewards/accuracies": 1.0,
261
+ "rewards/chosen": 3.3626015186309814,
262
+ "rewards/margins": 13.764276504516602,
263
+ "rewards/rejected": -10.401673316955566,
264
+ "step": 375
265
+ },
266
+ {
267
+ "epoch": 1.89,
268
+ "learning_rate": 8.251748251748254e-06,
269
+ "logps/chosen": -96.90827941894531,
270
+ "logps/rejected": -179.17970275878906,
271
+ "loss": 0.0004,
272
+ "losses/dpo": 0.0003745325666386634,
273
+ "losses/sft": 0.5794407725334167,
274
+ "losses/total": 0.0003745325666386634,
275
+ "ref_logps/chosen": -129.79989624023438,
276
+ "ref_logps/rejected": -71.4466323852539,
277
+ "rewards/accuracies": 1.0,
278
+ "rewards/chosen": 3.2891619205474854,
279
+ "rewards/margins": 14.062468528747559,
280
+ "rewards/rejected": -10.773306846618652,
281
+ "step": 400
282
+ },
283
+ {
284
+ "epoch": 2.0,
285
+ "learning_rate": 7.377622377622379e-06,
286
+ "logps/chosen": -96.15299987792969,
287
+ "logps/rejected": -181.71612548828125,
288
+ "loss": 0.001,
289
+ "losses/dpo": 0.0020445636473596096,
290
+ "losses/sft": 0.5634098052978516,
291
+ "losses/total": 0.0020445636473596096,
292
+ "ref_logps/chosen": -130.4124298095703,
293
+ "ref_logps/rejected": -71.5147933959961,
294
+ "rewards/accuracies": 1.0,
295
+ "rewards/chosen": 3.4259424209594727,
296
+ "rewards/margins": 14.446078300476074,
297
+ "rewards/rejected": -11.020133972167969,
298
+ "step": 425
299
+ },
300
+ {
301
+ "epoch": 2.12,
302
+ "learning_rate": 6.503496503496504e-06,
303
+ "logps/chosen": -93.45982360839844,
304
+ "logps/rejected": -177.76284790039062,
305
+ "loss": 0.0003,
306
+ "losses/dpo": 0.0001847467792686075,
307
+ "losses/sft": 0.5676508545875549,
308
+ "losses/total": 0.0001847467792686075,
309
+ "ref_logps/chosen": -128.48521423339844,
310
+ "ref_logps/rejected": -71.86593627929688,
311
+ "rewards/accuracies": 1.0,
312
+ "rewards/chosen": 3.5025393962860107,
313
+ "rewards/margins": 14.092233657836914,
314
+ "rewards/rejected": -10.589694023132324,
315
+ "step": 450
316
+ },
317
+ {
318
+ "epoch": 2.24,
319
+ "learning_rate": 5.629370629370629e-06,
320
+ "logps/chosen": -95.26840209960938,
321
+ "logps/rejected": -181.77322387695312,
322
+ "loss": 0.0003,
323
+ "losses/dpo": 0.00029167634784244,
324
+ "losses/sft": 0.5723408460617065,
325
+ "losses/total": 0.00029167634784244,
326
+ "ref_logps/chosen": -129.8194580078125,
327
+ "ref_logps/rejected": -71.22503662109375,
328
+ "rewards/accuracies": 1.0,
329
+ "rewards/chosen": 3.455106496810913,
330
+ "rewards/margins": 14.509923934936523,
331
+ "rewards/rejected": -11.054819107055664,
332
+ "step": 475
333
+ },
334
+ {
335
+ "epoch": 2.36,
336
+ "learning_rate": 4.755244755244756e-06,
337
+ "logps/chosen": -96.92023468017578,
338
+ "logps/rejected": -186.9994354248047,
339
+ "loss": 0.0002,
340
+ "losses/dpo": 0.00031228098669089377,
341
+ "losses/sft": 0.5785849690437317,
342
+ "losses/total": 0.00031228098669089377,
343
+ "ref_logps/chosen": -129.39373779296875,
344
+ "ref_logps/rejected": -72.0064468383789,
345
+ "rewards/accuracies": 1.0,
346
+ "rewards/chosen": 3.2473514080047607,
347
+ "rewards/margins": 14.746650695800781,
348
+ "rewards/rejected": -11.499299049377441,
349
+ "step": 500
350
+ },
351
+ {
352
+ "epoch": 2.48,
353
+ "learning_rate": 3.881118881118881e-06,
354
+ "logps/chosen": -96.56753540039062,
355
+ "logps/rejected": -188.7633819580078,
356
+ "loss": 0.0003,
357
+ "losses/dpo": 0.00019269342010375112,
358
+ "losses/sft": 0.5762569904327393,
359
+ "losses/total": 0.00019269342010375112,
360
+ "ref_logps/chosen": -128.0542449951172,
361
+ "ref_logps/rejected": -71.33090209960938,
362
+ "rewards/accuracies": 1.0,
363
+ "rewards/chosen": 3.148669481277466,
364
+ "rewards/margins": 14.891918182373047,
365
+ "rewards/rejected": -11.743247985839844,
366
+ "step": 525
367
+ },
368
+ {
369
+ "epoch": 2.59,
370
+ "learning_rate": 3.006993006993007e-06,
371
+ "logps/chosen": -97.47007751464844,
372
+ "logps/rejected": -189.46685791015625,
373
+ "loss": 0.0002,
374
+ "losses/dpo": 0.00025137903867289424,
375
+ "losses/sft": 0.5845997333526611,
376
+ "losses/total": 0.00025137903867289424,
377
+ "ref_logps/chosen": -128.27481079101562,
378
+ "ref_logps/rejected": -71.0475082397461,
379
+ "rewards/accuracies": 1.0,
380
+ "rewards/chosen": 3.0804734230041504,
381
+ "rewards/margins": 14.922408103942871,
382
+ "rewards/rejected": -11.841936111450195,
383
+ "step": 550
384
+ },
385
+ {
386
+ "epoch": 2.71,
387
+ "learning_rate": 2.132867132867133e-06,
388
+ "logps/chosen": -97.53392791748047,
389
+ "logps/rejected": -190.3184356689453,
390
+ "loss": 0.0003,
391
+ "losses/dpo": 0.00022768642520532012,
392
+ "losses/sft": 0.5818451642990112,
393
+ "losses/total": 0.00022768642520532012,
394
+ "ref_logps/chosen": -127.79641723632812,
395
+ "ref_logps/rejected": -71.30667877197266,
396
+ "rewards/accuracies": 1.0,
397
+ "rewards/chosen": 3.0262484550476074,
398
+ "rewards/margins": 14.927424430847168,
399
+ "rewards/rejected": -11.901176452636719,
400
+ "step": 575
401
+ },
402
+ {
403
+ "epoch": 2.83,
404
+ "learning_rate": 1.258741258741259e-06,
405
+ "logps/chosen": -98.7781753540039,
406
+ "logps/rejected": -194.68101501464844,
407
+ "loss": 0.0002,
408
+ "losses/dpo": 0.00027412467170506716,
409
+ "losses/sft": 0.5883935689926147,
410
+ "losses/total": 0.00027412467170506716,
411
+ "ref_logps/chosen": -129.11810302734375,
412
+ "ref_logps/rejected": -72.69854736328125,
413
+ "rewards/accuracies": 1.0,
414
+ "rewards/chosen": 3.0339913368225098,
415
+ "rewards/margins": 15.232237815856934,
416
+ "rewards/rejected": -12.198246955871582,
417
+ "step": 600
418
+ },
419
+ {
420
+ "epoch": 2.95,
421
+ "learning_rate": 3.846153846153847e-07,
422
+ "logps/chosen": -99.09100341796875,
423
+ "logps/rejected": -193.7891387939453,
424
+ "loss": 0.0002,
425
+ "losses/dpo": 0.0002839878143277019,
426
+ "losses/sft": 0.5941969752311707,
427
+ "losses/total": 0.0002839878143277019,
428
+ "ref_logps/chosen": -129.47601318359375,
429
+ "ref_logps/rejected": -71.78772735595703,
430
+ "rewards/accuracies": 1.0,
431
+ "rewards/chosen": 3.0384998321533203,
432
+ "rewards/margins": 15.238642692565918,
433
+ "rewards/rejected": -12.200141906738281,
434
+ "step": 625
435
+ },
436
+ {
437
+ "epoch": 3.0,
438
+ "step": 636,
439
+ "total_flos": 0.0,
440
+ "train_loss": 0.02455043116600528,
441
+ "train_runtime": 18942.3632,
442
+ "train_samples_per_second": 2.686,
443
+ "train_steps_per_second": 0.034
444
+ }
445
+ ],
446
+ "logging_steps": 25,
447
+ "max_steps": 636,
448
+ "num_input_tokens_seen": 0,
449
+ "num_train_epochs": 3,
450
+ "save_steps": 500,
451
+ "total_flos": 0.0,
452
+ "train_batch_size": 4,
453
+ "trial_name": null,
454
+ "trial_params": null
455
+ }