Incomple commited on
Commit
33e5deb
·
verified ·
1 Parent(s): c3d942b

End of training

Browse files
README.md CHANGED
@@ -3,9 +3,10 @@ library_name: peft
3
  license: llama3.1
4
  base_model: meta-llama/Llama-3.1-8B-Instruct
5
  tags:
 
 
6
  - trl
7
  - dpo
8
- - llama-factory
9
  - generated_from_trainer
10
  model-index:
11
  - name: Llama-3.1-8B-Instruct_holistic_30
@@ -17,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  # Llama-3.1-8B-Instruct_holistic_30
19
 
20
- This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on an unknown dataset.
21
 
22
  ## Model description
23
 
 
3
  license: llama3.1
4
  base_model: meta-llama/Llama-3.1-8B-Instruct
5
  tags:
6
+ - llama-factory
7
+ - lora
8
  - trl
9
  - dpo
 
10
  - generated_from_trainer
11
  model-index:
12
  - name: Llama-3.1-8B-Instruct_holistic_30
 
18
 
19
  # Llama-3.1-8B-Instruct_holistic_30
20
 
21
+ This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the holistic_30 dataset.
22
 
23
  ## Model description
24
 
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9999344906649198,
3
+ "total_flos": 2.3140851990119055e+18,
4
+ "train_loss": 0.09998038786012421,
5
+ "train_runtime": 57165.5253,
6
+ "train_samples_per_second": 0.534,
7
+ "train_steps_per_second": 0.067
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9999344906649198,
3
+ "total_flos": 2.3140851990119055e+18,
4
+ "train_loss": 0.09998038786012421,
5
+ "train_runtime": 57165.5253,
6
+ "train_samples_per_second": 0.534,
7
+ "train_steps_per_second": 0.067
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9999344906649198,
5
+ "eval_steps": 500,
6
+ "global_step": 3816,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.050049132001310184,
13
+ "grad_norm": 4.843938827514648,
14
+ "learning_rate": 5e-07,
15
+ "logits/chosen": -0.5328307747840881,
16
+ "logits/rejected": -0.46889373660087585,
17
+ "logps/chosen": -74.61598205566406,
18
+ "logps/rejected": -13.190138816833496,
19
+ "loss": 0.6891,
20
+ "rewards/accuracies": 0.5595549941062927,
21
+ "rewards/chosen": 0.007412092760205269,
22
+ "rewards/margins": 0.008624909445643425,
23
+ "rewards/rejected": -0.001212816801853478,
24
+ "step": 191
25
+ },
26
+ {
27
+ "epoch": 0.10009826400262037,
28
+ "grad_norm": 3.657888174057007,
29
+ "learning_rate": 1e-06,
30
+ "logits/chosen": -0.5486236810684204,
31
+ "logits/rejected": -0.48813965916633606,
32
+ "logps/chosen": -70.09341430664062,
33
+ "logps/rejected": -13.666769981384277,
34
+ "loss": 0.5809,
35
+ "rewards/accuracies": 0.8239529132843018,
36
+ "rewards/chosen": 0.24100865423679352,
37
+ "rewards/margins": 0.2950609624385834,
38
+ "rewards/rejected": -0.054052311927080154,
39
+ "step": 382
40
+ },
41
+ {
42
+ "epoch": 0.15014739600393057,
43
+ "grad_norm": 1.726294755935669,
44
+ "learning_rate": 9.443797320908561e-07,
45
+ "logits/chosen": -0.5495401620864868,
46
+ "logits/rejected": -0.4997619390487671,
47
+ "logps/chosen": -61.40409469604492,
48
+ "logps/rejected": -19.322750091552734,
49
+ "loss": 0.2712,
50
+ "rewards/accuracies": 0.9463350772857666,
51
+ "rewards/chosen": 1.1437506675720215,
52
+ "rewards/margins": 1.791763424873352,
53
+ "rewards/rejected": -0.648012638092041,
54
+ "step": 573
55
+ },
56
+ {
57
+ "epoch": 0.20019652800524074,
58
+ "grad_norm": 9.303996086120605,
59
+ "learning_rate": 8.887594641817122e-07,
60
+ "logits/chosen": -0.5349320769309998,
61
+ "logits/rejected": -0.4746370315551758,
62
+ "logps/chosen": -61.47517776489258,
63
+ "logps/rejected": -35.966888427734375,
64
+ "loss": 0.0598,
65
+ "rewards/accuracies": 0.9869109988212585,
66
+ "rewards/chosen": 1.5467382669448853,
67
+ "rewards/margins": 3.8473398685455322,
68
+ "rewards/rejected": -2.3006019592285156,
69
+ "step": 764
70
+ },
71
+ {
72
+ "epoch": 0.2502456600065509,
73
+ "grad_norm": 0.23748359084129333,
74
+ "learning_rate": 8.331391962725683e-07,
75
+ "logits/chosen": -0.4757286310195923,
76
+ "logits/rejected": -0.3977571725845337,
77
+ "logps/chosen": -57.084896087646484,
78
+ "logps/rejected": -45.92123794555664,
79
+ "loss": 0.0267,
80
+ "rewards/accuracies": 0.9901832342147827,
81
+ "rewards/chosen": 1.6708416938781738,
82
+ "rewards/margins": 5.011633396148682,
83
+ "rewards/rejected": -3.340791702270508,
84
+ "step": 955
85
+ },
86
+ {
87
+ "epoch": 0.30029479200786113,
88
+ "grad_norm": 0.43927711248397827,
89
+ "learning_rate": 7.775189283634246e-07,
90
+ "logits/chosen": -0.4342685043811798,
91
+ "logits/rejected": -0.33603060245513916,
92
+ "logps/chosen": -57.238773345947266,
93
+ "logps/rejected": -50.37546920776367,
94
+ "loss": 0.0331,
95
+ "rewards/accuracies": 0.9869109988212585,
96
+ "rewards/chosen": 1.7030671834945679,
97
+ "rewards/margins": 5.485808372497559,
98
+ "rewards/rejected": -3.7827417850494385,
99
+ "step": 1146
100
+ },
101
+ {
102
+ "epoch": 0.3503439240091713,
103
+ "grad_norm": 0.08208785951137543,
104
+ "learning_rate": 7.218986604542807e-07,
105
+ "logits/chosen": -0.4299260973930359,
106
+ "logits/rejected": -0.32499420642852783,
107
+ "logps/chosen": -56.95598220825195,
108
+ "logps/rejected": -55.50334548950195,
109
+ "loss": 0.0327,
110
+ "rewards/accuracies": 0.985602080821991,
111
+ "rewards/chosen": 1.6657171249389648,
112
+ "rewards/margins": 5.907327651977539,
113
+ "rewards/rejected": -4.241610527038574,
114
+ "step": 1337
115
+ },
116
+ {
117
+ "epoch": 0.4003930560104815,
118
+ "grad_norm": 0.18074464797973633,
119
+ "learning_rate": 6.662783925451368e-07,
120
+ "logits/chosen": -0.389539510011673,
121
+ "logits/rejected": -0.26959964632987976,
122
+ "logps/chosen": -56.53901672363281,
123
+ "logps/rejected": -59.943992614746094,
124
+ "loss": 0.0247,
125
+ "rewards/accuracies": 0.9914921522140503,
126
+ "rewards/chosen": 1.684263825416565,
127
+ "rewards/margins": 6.446012496948242,
128
+ "rewards/rejected": -4.761748790740967,
129
+ "step": 1528
130
+ },
131
+ {
132
+ "epoch": 0.4504421880117917,
133
+ "grad_norm": 0.07140897214412689,
134
+ "learning_rate": 6.10658124635993e-07,
135
+ "logits/chosen": -0.38458481431007385,
136
+ "logits/rejected": -0.25353050231933594,
137
+ "logps/chosen": -56.987735748291016,
138
+ "logps/rejected": -65.21515655517578,
139
+ "loss": 0.0252,
140
+ "rewards/accuracies": 0.9882199168205261,
141
+ "rewards/chosen": 1.7450363636016846,
142
+ "rewards/margins": 6.95145845413208,
143
+ "rewards/rejected": -5.206421375274658,
144
+ "step": 1719
145
+ },
146
+ {
147
+ "epoch": 0.5004913200131018,
148
+ "grad_norm": 0.13229215145111084,
149
+ "learning_rate": 5.550378567268491e-07,
150
+ "logits/chosen": -0.3671688139438629,
151
+ "logits/rejected": -0.23125909268856049,
152
+ "logps/chosen": -55.54978942871094,
153
+ "logps/rejected": -69.9174575805664,
154
+ "loss": 0.0311,
155
+ "rewards/accuracies": 0.9908376932144165,
156
+ "rewards/chosen": 1.6499972343444824,
157
+ "rewards/margins": 7.394839286804199,
158
+ "rewards/rejected": -5.744842529296875,
159
+ "step": 1910
160
+ },
161
+ {
162
+ "epoch": 0.550540452014412,
163
+ "grad_norm": 0.03863115608692169,
164
+ "learning_rate": 4.994175888177053e-07,
165
+ "logits/chosen": -0.34299236536026,
166
+ "logits/rejected": -0.21112458407878876,
167
+ "logps/chosen": -56.28550338745117,
168
+ "logps/rejected": -74.3562240600586,
169
+ "loss": 0.0203,
170
+ "rewards/accuracies": 0.9908376932144165,
171
+ "rewards/chosen": 1.6116416454315186,
172
+ "rewards/margins": 7.8128886222839355,
173
+ "rewards/rejected": -6.201247215270996,
174
+ "step": 2101
175
+ },
176
+ {
177
+ "epoch": 0.6005895840157223,
178
+ "grad_norm": 0.16626648604869843,
179
+ "learning_rate": 4.4379732090856143e-07,
180
+ "logits/chosen": -0.317820280790329,
181
+ "logits/rejected": -0.16657008230686188,
182
+ "logps/chosen": -58.859397888183594,
183
+ "logps/rejected": -77.39883422851562,
184
+ "loss": 0.0317,
185
+ "rewards/accuracies": 0.9875654578208923,
186
+ "rewards/chosen": 1.6255024671554565,
187
+ "rewards/margins": 8.082979202270508,
188
+ "rewards/rejected": -6.4574761390686035,
189
+ "step": 2292
190
+ },
191
+ {
192
+ "epoch": 0.6506387160170324,
193
+ "grad_norm": 0.01698416657745838,
194
+ "learning_rate": 3.8817705299941754e-07,
195
+ "logits/chosen": -0.3418547213077545,
196
+ "logits/rejected": -0.19180215895175934,
197
+ "logps/chosen": -57.59861373901367,
198
+ "logps/rejected": -80.31951141357422,
199
+ "loss": 0.0122,
200
+ "rewards/accuracies": 0.9934555292129517,
201
+ "rewards/chosen": 1.641247034072876,
202
+ "rewards/margins": 8.408109664916992,
203
+ "rewards/rejected": -6.766862392425537,
204
+ "step": 2483
205
+ },
206
+ {
207
+ "epoch": 0.7006878480183426,
208
+ "grad_norm": 0.03158177435398102,
209
+ "learning_rate": 3.3255678509027375e-07,
210
+ "logits/chosen": -0.32275617122650146,
211
+ "logits/rejected": -0.18107445538043976,
212
+ "logps/chosen": -59.604644775390625,
213
+ "logps/rejected": -81.69689178466797,
214
+ "loss": 0.0214,
215
+ "rewards/accuracies": 0.9908376932144165,
216
+ "rewards/chosen": 1.6721757650375366,
217
+ "rewards/margins": 8.503122329711914,
218
+ "rewards/rejected": -6.83094596862793,
219
+ "step": 2674
220
+ },
221
+ {
222
+ "epoch": 0.7507369800196528,
223
+ "grad_norm": 0.08755598962306976,
224
+ "learning_rate": 2.7693651718112985e-07,
225
+ "logits/chosen": -0.32179561257362366,
226
+ "logits/rejected": -0.17599359154701233,
227
+ "logps/chosen": -57.8769416809082,
228
+ "logps/rejected": -83.73433685302734,
229
+ "loss": 0.0192,
230
+ "rewards/accuracies": 0.9908376932144165,
231
+ "rewards/chosen": 1.6639045476913452,
232
+ "rewards/margins": 8.702630043029785,
233
+ "rewards/rejected": -7.038724899291992,
234
+ "step": 2865
235
+ },
236
+ {
237
+ "epoch": 0.800786112020963,
238
+ "grad_norm": 0.015691732987761497,
239
+ "learning_rate": 2.21316249271986e-07,
240
+ "logits/chosen": -0.3434266149997711,
241
+ "logits/rejected": -0.19399824738502502,
242
+ "logps/chosen": -56.69347381591797,
243
+ "logps/rejected": -85.22378540039062,
244
+ "loss": 0.0277,
245
+ "rewards/accuracies": 0.9914921522140503,
246
+ "rewards/chosen": 1.6015362739562988,
247
+ "rewards/margins": 8.894182205200195,
248
+ "rewards/rejected": -7.2926459312438965,
249
+ "step": 3056
250
+ },
251
+ {
252
+ "epoch": 0.8508352440222732,
253
+ "grad_norm": 0.010581793263554573,
254
+ "learning_rate": 1.6569598136284217e-07,
255
+ "logits/chosen": -0.33340033888816833,
256
+ "logits/rejected": -0.18355616927146912,
257
+ "logps/chosen": -58.657325744628906,
258
+ "logps/rejected": -85.88097381591797,
259
+ "loss": 0.0251,
260
+ "rewards/accuracies": 0.9921466112136841,
261
+ "rewards/chosen": 1.615267276763916,
262
+ "rewards/margins": 8.870849609375,
263
+ "rewards/rejected": -7.255581855773926,
264
+ "step": 3247
265
+ },
266
+ {
267
+ "epoch": 0.9008843760235834,
268
+ "grad_norm": 0.02463706023991108,
269
+ "learning_rate": 1.100757134536983e-07,
270
+ "logits/chosen": -0.3216906785964966,
271
+ "logits/rejected": -0.17967411875724792,
272
+ "logps/chosen": -55.944915771484375,
273
+ "logps/rejected": -87.04055786132812,
274
+ "loss": 0.0236,
275
+ "rewards/accuracies": 0.9882199168205261,
276
+ "rewards/chosen": 1.5252350568771362,
277
+ "rewards/margins": 8.986153602600098,
278
+ "rewards/rejected": -7.460918426513672,
279
+ "step": 3438
280
+ },
281
+ {
282
+ "epoch": 0.9509335080248935,
283
+ "grad_norm": 0.1806812733411789,
284
+ "learning_rate": 5.445544554455445e-08,
285
+ "logits/chosen": -0.3310312032699585,
286
+ "logits/rejected": -0.17037709057331085,
287
+ "logps/chosen": -56.92354202270508,
288
+ "logps/rejected": -86.47986602783203,
289
+ "loss": 0.0231,
290
+ "rewards/accuracies": 0.9928010702133179,
291
+ "rewards/chosen": 1.5716968774795532,
292
+ "rewards/margins": 9.01201057434082,
293
+ "rewards/rejected": -7.440313339233398,
294
+ "step": 3629
295
+ },
296
+ {
297
+ "epoch": 0.9999344906649198,
298
+ "step": 3816,
299
+ "total_flos": 2.3140851990119055e+18,
300
+ "train_loss": 0.09998038786012421,
301
+ "train_runtime": 57165.5253,
302
+ "train_samples_per_second": 0.534,
303
+ "train_steps_per_second": 0.067
304
+ }
305
+ ],
306
+ "logging_steps": 191,
307
+ "max_steps": 3816,
308
+ "num_input_tokens_seen": 0,
309
+ "num_train_epochs": 1,
310
+ "save_steps": 500,
311
+ "stateful_callbacks": {
312
+ "TrainerControl": {
313
+ "args": {
314
+ "should_epoch_stop": false,
315
+ "should_evaluate": false,
316
+ "should_log": false,
317
+ "should_save": true,
318
+ "should_training_stop": true
319
+ },
320
+ "attributes": {}
321
+ }
322
+ },
323
+ "total_flos": 2.3140851990119055e+18,
324
+ "train_batch_size": 2,
325
+ "trial_name": null,
326
+ "trial_params": null
327
+ }
training_loss.png ADDED
training_rewards_accuracies.png ADDED