Upload folder using huggingface_hub

#1
by sameepv21 - opened
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.5.0
adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "DAMO-NLP-SG/VideoLLaMA3-7B",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 256,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 128,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "out_proj",
18
+ "up_proj",
19
+ "q_proj",
20
+ "down_proj",
21
+ "fc2",
22
+ "v_proj",
23
+ "fc1",
24
+ "gate_proj",
25
+ "o_proj",
26
+ "k_proj"
27
+ ],
28
+ "task_type": "CAUSAL_LM"
29
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e982e2b4e9e8c73e9e973b3285df8bd845569bc86a8acc39791fe8c4d32d1c5e
3
+ size 785303482
config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "X": [
3
+ "Image",
4
+ "Video"
5
+ ],
6
+ "_attn_implementation_autoset": true,
7
+ "_name_or_path": "DAMO-NLP-SG/VideoLLaMA3-7B",
8
+ "architectures": [
9
+ "Videollama3Qwen2ForCausalLM"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "auto_map": {
13
+ "AutoConfig": "DAMO-NLP-SG/VideoLLaMA3-7B--configuration_videollama3.Videollama3Qwen2Config",
14
+ "AutoModelForCausalLM": "DAMO-NLP-SG/VideoLLaMA3-7B--modeling_videollama3.Videollama3Qwen2ForCausalLM"
15
+ },
16
+ "bos_token_id": 151643,
17
+ "eos_token_id": 151645,
18
+ "freeze_mm_mlp_adapter": true,
19
+ "hidden_act": "silu",
20
+ "hidden_size": 3584,
21
+ "image_aspect_ratio": "pad",
22
+ "image_grid_pinpoints": null,
23
+ "image_token_index": 151665,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 18944,
26
+ "max_position_embeddings": 32768,
27
+ "max_window_layers": 28,
28
+ "mm_projector_type": "mlp2x_gelu",
29
+ "mm_use_x_patch_token": false,
30
+ "mm_use_x_start_end": false,
31
+ "model_type": "videollama3_qwen2",
32
+ "num_attention_heads": 28,
33
+ "num_hidden_layers": 28,
34
+ "num_key_value_heads": 4,
35
+ "rms_norm_eps": 1e-06,
36
+ "rope_scaling": null,
37
+ "rope_theta": 1000000.0,
38
+ "sliding_window": null,
39
+ "tie_word_embeddings": false,
40
+ "torch_dtype": "bfloat16",
41
+ "transformers_version": "4.46.3",
42
+ "tune_mm_mlp_adapter": false,
43
+ "use_cache": true,
44
+ "use_sliding_window": false,
45
+ "use_token_compression": false,
46
+ "vision_encoder": null,
47
+ "vision_encoder_config": {
48
+ "hidden_size": 1152,
49
+ "intermediate_size": 4304,
50
+ "model_type": "videollama3_vision_encoder",
51
+ "num_attention_heads": 16,
52
+ "num_hidden_layers": 27,
53
+ "patch_size": 14
54
+ },
55
+ "vocab_size": 152064
56
+ }
non_lora_trainables.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b
3
+ size 912
trainer_state.json ADDED
@@ -0,0 +1,439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9927360774818403,
5
+ "eval_steps": 500,
6
+ "global_step": 618,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.12,
13
+ "learning_rate": 2.0161290322580642e-07,
14
+ "logps/chosen": -155.47128295898438,
15
+ "logps/rejected": -104.8045883178711,
16
+ "loss": 0.7536,
17
+ "losses/dpo": 0.7361882925033569,
18
+ "losses/sft": 1.3545048236846924,
19
+ "losses/total": 0.7361882925033569,
20
+ "ref_logps/chosen": -155.36932373046875,
21
+ "ref_logps/rejected": -104.93973541259766,
22
+ "rewards/accuracies": 0.48450005054473877,
23
+ "rewards/chosen": -0.010196244344115257,
24
+ "rewards/margins": -0.023710083216428757,
25
+ "rewards/rejected": 0.0135138388723135,
26
+ "step": 25
27
+ },
28
+ {
29
+ "epoch": 0.24,
30
+ "learning_rate": 4.0322580645161285e-07,
31
+ "logps/chosen": -153.7573699951172,
32
+ "logps/rejected": -107.0270004272461,
33
+ "loss": 0.7401,
34
+ "losses/dpo": 0.7450882792472839,
35
+ "losses/sft": 1.3266704082489014,
36
+ "losses/total": 0.7450882792472839,
37
+ "ref_logps/chosen": -153.8381805419922,
38
+ "ref_logps/rejected": -107.06134033203125,
39
+ "rewards/accuracies": 0.502500057220459,
40
+ "rewards/chosen": 0.00808356050401926,
41
+ "rewards/margins": 0.004649868700653315,
42
+ "rewards/rejected": 0.003433691570535302,
43
+ "step": 50
44
+ },
45
+ {
46
+ "epoch": 0.36,
47
+ "learning_rate": 4.883093525179856e-07,
48
+ "logps/chosen": -151.94009399414062,
49
+ "logps/rejected": -105.40670013427734,
50
+ "loss": 0.7213,
51
+ "losses/dpo": 0.7041549682617188,
52
+ "losses/sft": 1.3607875108718872,
53
+ "losses/total": 0.7041549682617188,
54
+ "ref_logps/chosen": -152.60272216796875,
55
+ "ref_logps/rejected": -105.59368896484375,
56
+ "rewards/accuracies": 0.5264999866485596,
57
+ "rewards/chosen": 0.06626255810260773,
58
+ "rewards/margins": 0.04756266996264458,
59
+ "rewards/rejected": 0.01869989186525345,
60
+ "step": 75
61
+ },
62
+ {
63
+ "epoch": 0.48,
64
+ "learning_rate": 4.6582733812949637e-07,
65
+ "logps/chosen": -149.70230102539062,
66
+ "logps/rejected": -106.14666748046875,
67
+ "loss": 0.6625,
68
+ "losses/dpo": 0.6611780524253845,
69
+ "losses/sft": 1.341475009918213,
70
+ "losses/total": 0.6611780524253845,
71
+ "ref_logps/chosen": -152.02659606933594,
72
+ "ref_logps/rejected": -106.69593811035156,
73
+ "rewards/accuracies": 0.621999979019165,
74
+ "rewards/chosen": 0.2324293702840805,
75
+ "rewards/margins": 0.17750366032123566,
76
+ "rewards/rejected": 0.05492572858929634,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 0.61,
81
+ "learning_rate": 4.4334532374100717e-07,
82
+ "logps/chosen": -148.40090942382812,
83
+ "logps/rejected": -104.86791229248047,
84
+ "loss": 0.597,
85
+ "losses/dpo": 0.5830589532852173,
86
+ "losses/sft": 1.2935805320739746,
87
+ "losses/total": 0.5830589532852173,
88
+ "ref_logps/chosen": -152.3332977294922,
89
+ "ref_logps/rejected": -105.52149200439453,
90
+ "rewards/accuracies": 0.7135000824928284,
91
+ "rewards/chosen": 0.39323729276657104,
92
+ "rewards/margins": 0.32787904143333435,
93
+ "rewards/rejected": 0.0653582364320755,
94
+ "step": 125
95
+ },
96
+ {
97
+ "epoch": 0.73,
98
+ "learning_rate": 4.2086330935251797e-07,
99
+ "logps/chosen": -147.95791625976562,
100
+ "logps/rejected": -106.17552947998047,
101
+ "loss": 0.5515,
102
+ "losses/dpo": 0.5297109484672546,
103
+ "losses/sft": 1.289250135421753,
104
+ "losses/total": 0.5297109484672546,
105
+ "ref_logps/chosen": -153.35595703125,
106
+ "ref_logps/rejected": -106.98403930664062,
107
+ "rewards/accuracies": 0.7540000677108765,
108
+ "rewards/chosen": 0.5398061871528625,
109
+ "rewards/margins": 0.45895519852638245,
110
+ "rewards/rejected": 0.0808510109782219,
111
+ "step": 150
112
+ },
113
+ {
114
+ "epoch": 0.85,
115
+ "learning_rate": 3.9838129496402877e-07,
116
+ "logps/chosen": -147.28538513183594,
117
+ "logps/rejected": -106.73645782470703,
118
+ "loss": 0.5171,
119
+ "losses/dpo": 0.526803731918335,
120
+ "losses/sft": 1.2763028144836426,
121
+ "losses/total": 0.526803731918335,
122
+ "ref_logps/chosen": -153.7596893310547,
123
+ "ref_logps/rejected": -107.54974365234375,
124
+ "rewards/accuracies": 0.7815000414848328,
125
+ "rewards/chosen": 0.6474303603172302,
126
+ "rewards/margins": 0.5661011934280396,
127
+ "rewards/rejected": 0.0813291072845459,
128
+ "step": 175
129
+ },
130
+ {
131
+ "epoch": 0.97,
132
+ "learning_rate": 3.7589928057553957e-07,
133
+ "logps/chosen": -146.67481994628906,
134
+ "logps/rejected": -104.6893539428711,
135
+ "loss": 0.4544,
136
+ "losses/dpo": 0.4492318332195282,
137
+ "losses/sft": 1.284212350845337,
138
+ "losses/total": 0.4492318332195282,
139
+ "ref_logps/chosen": -154.76519775390625,
140
+ "ref_logps/rejected": -105.09065246582031,
141
+ "rewards/accuracies": 0.825499951839447,
142
+ "rewards/chosen": 0.809037446975708,
143
+ "rewards/margins": 0.7689078450202942,
144
+ "rewards/rejected": 0.040129706263542175,
145
+ "step": 200
146
+ },
147
+ {
148
+ "epoch": 1.09,
149
+ "learning_rate": 3.5341726618705037e-07,
150
+ "logps/chosen": -144.06991577148438,
151
+ "logps/rejected": -105.81298828125,
152
+ "loss": 0.4189,
153
+ "losses/dpo": 0.42898857593536377,
154
+ "losses/sft": 1.2516932487487793,
155
+ "losses/total": 0.42898857593536377,
156
+ "ref_logps/chosen": -153.25570678710938,
157
+ "ref_logps/rejected": -106.06501007080078,
158
+ "rewards/accuracies": 0.8469999432563782,
159
+ "rewards/chosen": 0.9185801148414612,
160
+ "rewards/margins": 0.8933786749839783,
161
+ "rewards/rejected": 0.02520136535167694,
162
+ "step": 225
163
+ },
164
+ {
165
+ "epoch": 1.21,
166
+ "learning_rate": 3.309352517985611e-07,
167
+ "logps/chosen": -144.94163513183594,
168
+ "logps/rejected": -106.42683410644531,
169
+ "loss": 0.3982,
170
+ "losses/dpo": 0.41044384241104126,
171
+ "losses/sft": 1.2750244140625,
172
+ "losses/total": 0.41044384241104126,
173
+ "ref_logps/chosen": -154.86021423339844,
174
+ "ref_logps/rejected": -106.34297180175781,
175
+ "rewards/accuracies": 0.8530000448226929,
176
+ "rewards/chosen": 0.9918593764305115,
177
+ "rewards/margins": 1.0002468824386597,
178
+ "rewards/rejected": -0.008387637324631214,
179
+ "step": 250
180
+ },
181
+ {
182
+ "epoch": 1.33,
183
+ "learning_rate": 3.0845323741007197e-07,
184
+ "logps/chosen": -144.10977172851562,
185
+ "logps/rejected": -105.52392578125,
186
+ "loss": 0.3652,
187
+ "losses/dpo": 0.3518593907356262,
188
+ "losses/sft": 1.2418614625930786,
189
+ "losses/total": 0.3518593907356262,
190
+ "ref_logps/chosen": -154.4581298828125,
191
+ "ref_logps/rejected": -104.62450408935547,
192
+ "rewards/accuracies": 0.8764999508857727,
193
+ "rewards/chosen": 1.0348376035690308,
194
+ "rewards/margins": 1.1247813701629639,
195
+ "rewards/rejected": -0.08994373679161072,
196
+ "step": 275
197
+ },
198
+ {
199
+ "epoch": 1.45,
200
+ "learning_rate": 2.859712230215827e-07,
201
+ "logps/chosen": -142.1906280517578,
202
+ "logps/rejected": -108.99441528320312,
203
+ "loss": 0.3624,
204
+ "losses/dpo": 0.35955867171287537,
205
+ "losses/sft": 1.2526122331619263,
206
+ "losses/total": 0.35955867171287537,
207
+ "ref_logps/chosen": -152.7955322265625,
208
+ "ref_logps/rejected": -107.89449310302734,
209
+ "rewards/accuracies": 0.8740000128746033,
210
+ "rewards/chosen": 1.0604920387268066,
211
+ "rewards/margins": 1.170485496520996,
212
+ "rewards/rejected": -0.10999350249767303,
213
+ "step": 300
214
+ },
215
+ {
216
+ "epoch": 1.57,
217
+ "learning_rate": 2.634892086330935e-07,
218
+ "logps/chosen": -141.9879913330078,
219
+ "logps/rejected": -107.29058837890625,
220
+ "loss": 0.349,
221
+ "losses/dpo": 0.3294835686683655,
222
+ "losses/sft": 1.2578469514846802,
223
+ "losses/total": 0.3294835686683655,
224
+ "ref_logps/chosen": -152.93646240234375,
225
+ "ref_logps/rejected": -105.73262786865234,
226
+ "rewards/accuracies": 0.8819999694824219,
227
+ "rewards/chosen": 1.0948452949523926,
228
+ "rewards/margins": 1.2506405115127563,
229
+ "rewards/rejected": -0.15579518675804138,
230
+ "step": 325
231
+ },
232
+ {
233
+ "epoch": 1.69,
234
+ "learning_rate": 2.410071942446043e-07,
235
+ "logps/chosen": -142.55641174316406,
236
+ "logps/rejected": -108.43463134765625,
237
+ "loss": 0.3091,
238
+ "losses/dpo": 0.3135191798210144,
239
+ "losses/sft": 1.2662179470062256,
240
+ "losses/total": 0.3135191798210144,
241
+ "ref_logps/chosen": -154.17462158203125,
242
+ "ref_logps/rejected": -105.654052734375,
243
+ "rewards/accuracies": 0.8995000720024109,
244
+ "rewards/chosen": 1.1618221998214722,
245
+ "rewards/margins": 1.4398791790008545,
246
+ "rewards/rejected": -0.2780568599700928,
247
+ "step": 350
248
+ },
249
+ {
250
+ "epoch": 1.82,
251
+ "learning_rate": 2.185251798561151e-07,
252
+ "logps/chosen": -141.23895263671875,
253
+ "logps/rejected": -108.13822937011719,
254
+ "loss": 0.3071,
255
+ "losses/dpo": 0.299526184797287,
256
+ "losses/sft": 1.2248637676239014,
257
+ "losses/total": 0.299526184797287,
258
+ "ref_logps/chosen": -153.57838439941406,
259
+ "ref_logps/rejected": -105.29540252685547,
260
+ "rewards/accuracies": 0.8889999389648438,
261
+ "rewards/chosen": 1.2339427471160889,
262
+ "rewards/margins": 1.5182260274887085,
263
+ "rewards/rejected": -0.28428351879119873,
264
+ "step": 375
265
+ },
266
+ {
267
+ "epoch": 1.94,
268
+ "learning_rate": 1.960431654676259e-07,
269
+ "logps/chosen": -143.84800720214844,
270
+ "logps/rejected": -109.23783111572266,
271
+ "loss": 0.3079,
272
+ "losses/dpo": 0.31919562816619873,
273
+ "losses/sft": 1.2433806657791138,
274
+ "losses/total": 0.31919562816619873,
275
+ "ref_logps/chosen": -155.57464599609375,
276
+ "ref_logps/rejected": -105.85549926757812,
277
+ "rewards/accuracies": 0.8924999237060547,
278
+ "rewards/chosen": 1.1726653575897217,
279
+ "rewards/margins": 1.510898470878601,
280
+ "rewards/rejected": -0.33823302388191223,
281
+ "step": 400
282
+ },
283
+ {
284
+ "epoch": 2.06,
285
+ "learning_rate": 1.735611510791367e-07,
286
+ "logps/chosen": -142.04263305664062,
287
+ "logps/rejected": -110.5551528930664,
288
+ "loss": 0.2964,
289
+ "losses/dpo": 0.29985514283180237,
290
+ "losses/sft": 1.256807565689087,
291
+ "losses/total": 0.29985514283180237,
292
+ "ref_logps/chosen": -153.96051025390625,
293
+ "ref_logps/rejected": -106.5252914428711,
294
+ "rewards/accuracies": 0.8864999413490295,
295
+ "rewards/chosen": 1.1917892694473267,
296
+ "rewards/margins": 1.5947766304016113,
297
+ "rewards/rejected": -0.4029873013496399,
298
+ "step": 425
299
+ },
300
+ {
301
+ "epoch": 2.18,
302
+ "learning_rate": 1.5107913669064747e-07,
303
+ "logps/chosen": -141.11489868164062,
304
+ "logps/rejected": -110.66223907470703,
305
+ "loss": 0.2921,
306
+ "losses/dpo": 0.29900702834129333,
307
+ "losses/sft": 1.223684549331665,
308
+ "losses/total": 0.29900702834129333,
309
+ "ref_logps/chosen": -153.32489013671875,
310
+ "ref_logps/rejected": -106.36825561523438,
311
+ "rewards/accuracies": 0.9029999375343323,
312
+ "rewards/chosen": 1.2209986448287964,
313
+ "rewards/margins": 1.6503956317901611,
314
+ "rewards/rejected": -0.42939692735671997,
315
+ "step": 450
316
+ },
317
+ {
318
+ "epoch": 2.3,
319
+ "learning_rate": 1.2859712230215827e-07,
320
+ "logps/chosen": -140.06715393066406,
321
+ "logps/rejected": -111.3785629272461,
322
+ "loss": 0.2771,
323
+ "losses/dpo": 0.2982315719127655,
324
+ "losses/sft": 1.2262225151062012,
325
+ "losses/total": 0.2982315719127655,
326
+ "ref_logps/chosen": -152.20010375976562,
327
+ "ref_logps/rejected": -106.2849349975586,
328
+ "rewards/accuracies": 0.8995000720024109,
329
+ "rewards/chosen": 1.2132951021194458,
330
+ "rewards/margins": 1.722659707069397,
331
+ "rewards/rejected": -0.509364664554596,
332
+ "step": 475
333
+ },
334
+ {
335
+ "epoch": 2.42,
336
+ "learning_rate": 1.0611510791366907e-07,
337
+ "logps/chosen": -142.41409301757812,
338
+ "logps/rejected": -112.02964782714844,
339
+ "loss": 0.2692,
340
+ "losses/dpo": 0.26506972312927246,
341
+ "losses/sft": 1.2326300144195557,
342
+ "losses/total": 0.26506972312927246,
343
+ "ref_logps/chosen": -154.93585205078125,
344
+ "ref_logps/rejected": -106.74388885498047,
345
+ "rewards/accuracies": 0.9050000905990601,
346
+ "rewards/chosen": 1.252176284790039,
347
+ "rewards/margins": 1.7807520627975464,
348
+ "rewards/rejected": -0.5285759568214417,
349
+ "step": 500
350
+ },
351
+ {
352
+ "epoch": 2.54,
353
+ "learning_rate": 8.363309352517984e-08,
354
+ "logps/chosen": -141.35763549804688,
355
+ "logps/rejected": -111.21255493164062,
356
+ "loss": 0.2639,
357
+ "losses/dpo": 0.2448505461215973,
358
+ "losses/sft": 1.2256782054901123,
359
+ "losses/total": 0.2448505461215973,
360
+ "ref_logps/chosen": -153.86410522460938,
361
+ "ref_logps/rejected": -105.44302368164062,
362
+ "rewards/accuracies": 0.9070001244544983,
363
+ "rewards/chosen": 1.250647783279419,
364
+ "rewards/margins": 1.8276008367538452,
365
+ "rewards/rejected": -0.576953113079071,
366
+ "step": 525
367
+ },
368
+ {
369
+ "epoch": 2.66,
370
+ "learning_rate": 6.115107913669064e-08,
371
+ "logps/chosen": -140.52743530273438,
372
+ "logps/rejected": -111.19217681884766,
373
+ "loss": 0.253,
374
+ "losses/dpo": 0.2362019419670105,
375
+ "losses/sft": 1.219558835029602,
376
+ "losses/total": 0.2362019419670105,
377
+ "ref_logps/chosen": -153.1510009765625,
378
+ "ref_logps/rejected": -104.75523376464844,
379
+ "rewards/accuracies": 0.925000011920929,
380
+ "rewards/chosen": 1.2623597383499146,
381
+ "rewards/margins": 1.9060544967651367,
382
+ "rewards/rejected": -0.6436949372291565,
383
+ "step": 550
384
+ },
385
+ {
386
+ "epoch": 2.78,
387
+ "learning_rate": 3.866906474820144e-08,
388
+ "logps/chosen": -142.2875213623047,
389
+ "logps/rejected": -113.81513977050781,
390
+ "loss": 0.2529,
391
+ "losses/dpo": 0.24827653169631958,
392
+ "losses/sft": 1.2383555173873901,
393
+ "losses/total": 0.24827653169631958,
394
+ "ref_logps/chosen": -154.96365356445312,
395
+ "ref_logps/rejected": -107.25000762939453,
396
+ "rewards/accuracies": 0.9150000214576721,
397
+ "rewards/chosen": 1.2676140069961548,
398
+ "rewards/margins": 1.9241262674331665,
399
+ "rewards/rejected": -0.656512439250946,
400
+ "step": 575
401
+ },
402
+ {
403
+ "epoch": 2.91,
404
+ "learning_rate": 1.618705035971223e-08,
405
+ "logps/chosen": -140.9930877685547,
406
+ "logps/rejected": -113.36444091796875,
407
+ "loss": 0.2651,
408
+ "losses/dpo": 0.27665671706199646,
409
+ "losses/sft": 1.2188746929168701,
410
+ "losses/total": 0.27665671706199646,
411
+ "ref_logps/chosen": -153.5058135986328,
412
+ "ref_logps/rejected": -107.16682434082031,
413
+ "rewards/accuracies": 0.9030001163482666,
414
+ "rewards/chosen": 1.251274585723877,
415
+ "rewards/margins": 1.871036410331726,
416
+ "rewards/rejected": -0.6197616457939148,
417
+ "step": 600
418
+ },
419
+ {
420
+ "epoch": 2.99,
421
+ "step": 618,
422
+ "total_flos": 0.0,
423
+ "train_loss": 0.41181609237078326,
424
+ "train_runtime": 7630.1245,
425
+ "train_samples_per_second": 6.494,
426
+ "train_steps_per_second": 0.081
427
+ }
428
+ ],
429
+ "logging_steps": 25,
430
+ "max_steps": 618,
431
+ "num_input_tokens_seen": 0,
432
+ "num_train_epochs": 3,
433
+ "save_steps": 500,
434
+ "stateful_callbacks": {},
435
+ "total_flos": 0.0,
436
+ "train_batch_size": 4,
437
+ "trial_name": null,
438
+ "trial_params": null
439
+ }