yoavf commited on
Commit
0a13e4f
·
verified ·
1 Parent(s): 4214c7d

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,6 +1,5 @@
1
  ---
2
- base_model:
3
- - Qwen/Qwen3-VL-8B-Instruct
4
  library_name: peft
5
  pipeline_tag: text-generation
6
  tags:
@@ -9,11 +8,6 @@ tags:
9
  - lora
10
  - transformers
11
  - trl
12
- license: mit
13
- datasets:
14
- - yoavf/svg-animal-illustrations
15
- language:
16
- - en
17
  ---
18
 
19
  # Model Card for Model ID
 
1
  ---
2
+ base_model: Qwen/Qwen3-VL-8B-Instruct
 
3
  library_name: peft
4
  pipeline_tag: text-generation
5
  tags:
 
8
  - lora
9
  - transformers
10
  - trl
 
 
 
 
 
11
  ---
12
 
13
  # Model Card for Model ID
adapter_config.json CHANGED
@@ -29,10 +29,10 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
- "v_proj",
33
  "k_proj",
34
  "o_proj",
35
- "q_proj"
36
  ],
37
  "target_parameters": null,
38
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "q_proj",
33
  "k_proj",
34
  "o_proj",
35
+ "v_proj"
36
  ],
37
  "target_parameters": null,
38
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82a51e7b37b05dd7ea42594518355673ae114bb50d3da20d0d09dc54fee7f4f8
3
  size 61384752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbb49f52f3e22aebc4e1600546d83656b67867231a07d467e75ece4c5bd4af1f
3
  size 61384752
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0af329302356ea56aed820fac9c299fb239a668e3d6ae5de9e640e6cbcf2d627
3
  size 122854795
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:479122d9a9939c852f3898332cee178a46634af8cbf583fca68c585417a010b4
3
  size 122854795
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ab9afdfe88ca369217f0f7bcc8f84eace053d867ea13532267eecb8300631f7
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb030b37dacda29104e066d95ec1903f37c25d3a91f465220ac18eede3745980
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f673db473586ce87d2bfba76c362e332a223d9de40cad5ced52c5bc0b26e7b2
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb54d230a0c2ef4f1dd1005e48f8bc418e0bd568b6e7ca0d90419ffd0cd17bf8
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.08,
6
  "eval_steps": 500,
7
- "global_step": 40,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -16,23 +16,23 @@
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
  "completions/clipped_ratio": 0.0,
19
- "completions/max_length": 1053.0,
20
- "completions/max_terminated_length": 1053.0,
21
- "completions/mean_length": 778.6000366210938,
22
- "completions/mean_terminated_length": 778.6000366210938,
23
- "completions/min_length": 620.0,
24
- "completions/min_terminated_length": 620.0,
25
- "entropy": 0.08571863919496536,
26
  "epoch": 0.002,
27
  "frac_reward_zero_std": 0.0,
28
- "grad_norm": 0.4034070670604706,
29
- "learning_rate": 1e-06,
30
- "loss": 0.0811,
31
- "num_tokens": 3988.0,
32
- "reward": 0.6599999666213989,
33
- "reward_std": 0.022360675036907196,
34
- "rewards/reward_func_with_saving/mean": 0.6599999666213989,
35
- "rewards/reward_func_with_saving/std": 0.02236068621277809,
36
  "step": 1
37
  },
38
  {
@@ -41,11 +41,24 @@
41
  "clip_ratio/low_mean": 0.0,
42
  "clip_ratio/low_min": 0.0,
43
  "clip_ratio/region_mean": 0.0,
44
- "entropy": 0.05181041359901428,
 
 
 
 
 
 
 
45
  "epoch": 0.004,
46
- "grad_norm": 0.9778931736946106,
47
- "learning_rate": 1e-06,
48
- "loss": -0.317,
 
 
 
 
 
 
49
  "step": 2
50
  },
51
  {
@@ -54,37 +67,76 @@
54
  "clip_ratio/low_mean": 0.0,
55
  "clip_ratio/low_min": 0.0,
56
  "clip_ratio/region_mean": 0.0,
57
- "entropy": 0.10429538041353226,
 
 
 
 
 
 
 
58
  "epoch": 0.006,
59
- "grad_norm": 0.31522250175476074,
60
- "learning_rate": 1e-06,
61
- "loss": 0.1203,
 
 
 
 
 
 
62
  "step": 3
63
  },
64
  {
65
  "clip_ratio/high_max": 0.0,
66
  "clip_ratio/high_mean": 0.0,
67
- "clip_ratio/low_mean": 0.001612903201021254,
68
- "clip_ratio/low_min": 0.001612903201021254,
69
- "clip_ratio/region_mean": 0.001612903201021254,
70
- "entropy": 0.06899195909500122,
 
 
 
 
 
 
 
71
  "epoch": 0.008,
72
- "grad_norm": 0.31318220496177673,
73
- "learning_rate": 1e-06,
74
- "loss": 0.0709,
 
 
 
 
 
 
75
  "step": 4
76
  },
77
  {
78
  "clip_ratio/high_max": 0.0,
79
  "clip_ratio/high_mean": 0.0,
80
- "clip_ratio/low_mean": 0.0012224939418956637,
81
- "clip_ratio/low_min": 0.0012224939418956637,
82
- "clip_ratio/region_mean": 0.0012224939418956637,
83
- "entropy": 0.072004035115242,
 
 
 
 
 
 
 
84
  "epoch": 0.01,
85
- "grad_norm": 0.36428698897361755,
86
- "learning_rate": 1e-06,
87
- "loss": 0.0935,
 
 
 
 
 
 
88
  "step": 5
89
  },
90
  {
@@ -94,23 +146,23 @@
94
  "clip_ratio/low_min": 0.0,
95
  "clip_ratio/region_mean": 0.0,
96
  "completions/clipped_ratio": 0.0,
97
- "completions/max_length": 697.0,
98
- "completions/max_terminated_length": 697.0,
99
- "completions/mean_length": 680.0,
100
- "completions/mean_terminated_length": 680.0,
101
- "completions/min_length": 662.0,
102
- "completions/min_terminated_length": 662.0,
103
- "entropy": 0.04808889329433441,
104
  "epoch": 0.012,
105
  "frac_reward_zero_std": 0.0,
106
- "grad_norm": 0.3064998388290405,
107
- "learning_rate": 1e-06,
108
- "loss": -0.0905,
109
- "num_tokens": 7488.0,
110
- "reward": 0.7333332896232605,
111
- "reward_std": 0.09316948801279068,
112
- "rewards/reward_func_with_saving/mean": 0.7333332896232605,
113
- "rewards/reward_func_with_saving/std": 0.09316948801279068,
114
  "step": 6
115
  },
116
  {
@@ -119,11 +171,24 @@
119
  "clip_ratio/low_mean": 0.0,
120
  "clip_ratio/low_min": 0.0,
121
  "clip_ratio/region_mean": 0.0,
122
- "entropy": 0.050035636872053146,
 
 
 
 
 
 
 
123
  "epoch": 0.014,
124
- "grad_norm": 1.2182977199554443,
125
- "learning_rate": 1e-06,
126
- "loss": 0.348,
 
 
 
 
 
 
127
  "step": 7
128
  },
129
  {
@@ -132,11 +197,24 @@
132
  "clip_ratio/low_mean": 0.0,
133
  "clip_ratio/low_min": 0.0,
134
  "clip_ratio/region_mean": 0.0,
135
- "entropy": 0.05900810286402702,
 
 
 
 
 
 
 
136
  "epoch": 0.016,
137
- "grad_norm": 0.30796700716018677,
138
- "learning_rate": 1e-06,
139
- "loss": -0.0899,
 
 
 
 
 
 
140
  "step": 8
141
  },
142
  {
@@ -145,11 +223,24 @@
145
  "clip_ratio/low_mean": 0.0,
146
  "clip_ratio/low_min": 0.0,
147
  "clip_ratio/region_mean": 0.0,
148
- "entropy": 0.06963960826396942,
 
 
 
 
 
 
 
149
  "epoch": 0.018,
150
- "grad_norm": 0.40918684005737305,
151
- "learning_rate": 1e-06,
152
- "loss": -0.0878,
 
 
 
 
 
 
153
  "step": 9
154
  },
155
  {
@@ -158,11 +249,24 @@
158
  "clip_ratio/low_mean": 0.0,
159
  "clip_ratio/low_min": 0.0,
160
  "clip_ratio/region_mean": 0.0,
161
- "entropy": 0.06808315217494965,
 
 
 
 
 
 
 
162
  "epoch": 0.02,
163
- "grad_norm": 0.26108482480049133,
164
- "learning_rate": 1e-06,
165
- "loss": -0.0915,
 
 
 
 
 
 
166
  "step": 10
167
  },
168
  {
@@ -172,23 +276,23 @@
172
  "clip_ratio/low_min": 0.0,
173
  "clip_ratio/region_mean": 0.0,
174
  "completions/clipped_ratio": 0.0,
175
- "completions/max_length": 796.0,
176
- "completions/max_terminated_length": 796.0,
177
- "completions/mean_length": 697.2000122070312,
178
- "completions/mean_terminated_length": 697.2000122070312,
179
- "completions/min_length": 610.0,
180
- "completions/min_terminated_length": 610.0,
181
- "entropy": 0.032603874802589417,
182
  "epoch": 0.022,
183
  "frac_reward_zero_std": 0.0,
184
- "grad_norm": 0.21574945747852325,
185
- "learning_rate": 1e-06,
186
- "loss": -0.1102,
187
- "num_tokens": 11084.0,
188
- "reward": 0.7483333945274353,
189
- "reward_std": 0.04224595054984093,
190
- "rewards/reward_func_with_saving/mean": 0.7483333349227905,
191
- "rewards/reward_func_with_saving/std": 0.04224596172571182,
192
  "step": 11
193
  },
194
  {
@@ -197,52 +301,26 @@
197
  "clip_ratio/low_mean": 0.0,
198
  "clip_ratio/low_min": 0.0,
199
  "clip_ratio/region_mean": 0.0,
200
- "entropy": 0.04580014944076538,
 
 
 
 
 
 
 
201
  "epoch": 0.024,
202
- "grad_norm": 0.6932300329208374,
203
- "learning_rate": 1e-06,
204
- "loss": 0.3115,
 
 
 
 
 
 
205
  "step": 12
206
  },
207
- {
208
- "clip_ratio/high_max": 0.0,
209
- "clip_ratio/high_mean": 0.0,
210
- "clip_ratio/low_mean": 0.0,
211
- "clip_ratio/low_min": 0.0,
212
- "clip_ratio/region_mean": 0.0,
213
- "entropy": 0.05027260258793831,
214
- "epoch": 0.026,
215
- "grad_norm": 0.2965257465839386,
216
- "learning_rate": 1e-06,
217
- "loss": -0.1438,
218
- "step": 13
219
- },
220
- {
221
- "clip_ratio/high_max": 0.0,
222
- "clip_ratio/high_mean": 0.0,
223
- "clip_ratio/low_mean": 0.0,
224
- "clip_ratio/low_min": 0.0,
225
- "clip_ratio/region_mean": 0.0,
226
- "entropy": 0.05315280705690384,
227
- "epoch": 0.028,
228
- "grad_norm": 0.01823524758219719,
229
- "learning_rate": 1e-06,
230
- "loss": -0.0088,
231
- "step": 14
232
- },
233
- {
234
- "clip_ratio/high_max": 0.0,
235
- "clip_ratio/high_mean": 0.0,
236
- "clip_ratio/low_mean": 0.0,
237
- "clip_ratio/low_min": 0.0,
238
- "clip_ratio/region_mean": 0.0,
239
- "entropy": 0.0618734173476696,
240
- "epoch": 0.03,
241
- "grad_norm": 0.4097316861152649,
242
- "learning_rate": 1e-06,
243
- "loss": -0.0837,
244
- "step": 15
245
- },
246
  {
247
  "clip_ratio/high_max": 0.0,
248
  "clip_ratio/high_mean": 0.0,
@@ -250,154 +328,24 @@
250
  "clip_ratio/low_min": 0.0,
251
  "clip_ratio/region_mean": 0.0,
252
  "completions/clipped_ratio": 0.0,
253
- "completions/max_length": 989.0,
254
- "completions/max_terminated_length": 989.0,
255
- "completions/mean_length": 889.4000244140625,
256
- "completions/mean_terminated_length": 889.4000244140625,
257
  "completions/min_length": 783.0,
258
  "completions/min_terminated_length": 783.0,
259
- "entropy": 0.07022179663181305,
260
- "epoch": 0.032,
261
- "frac_reward_zero_std": 0.0,
262
- "grad_norm": 0.8408637046813965,
263
- "learning_rate": 1e-06,
264
- "loss": 0.2405,
265
- "num_tokens": 15631.0,
266
- "reward": 0.6633333563804626,
267
- "reward_std": 0.07490735501050949,
268
- "rewards/reward_func_with_saving/mean": 0.6633332967758179,
269
- "rewards/reward_func_with_saving/std": 0.0749073475599289,
270
- "step": 16
271
- },
272
- {
273
- "clip_ratio/high_max": 0.0,
274
- "clip_ratio/high_mean": 0.0,
275
- "clip_ratio/low_mean": 0.0,
276
- "clip_ratio/low_min": 0.0,
277
- "clip_ratio/region_mean": 0.0,
278
- "entropy": 0.07662880420684814,
279
- "epoch": 0.034,
280
- "grad_norm": 0.18040509521961212,
281
- "learning_rate": 1e-06,
282
- "loss": 0.0395,
283
- "step": 17
284
- },
285
- {
286
- "clip_ratio/high_max": 0.0,
287
- "clip_ratio/high_mean": 0.0,
288
- "clip_ratio/low_mean": 0.0,
289
- "clip_ratio/low_min": 0.0,
290
- "clip_ratio/region_mean": 0.0,
291
- "entropy": 0.07824546098709106,
292
- "epoch": 0.036,
293
- "grad_norm": 0.9069496393203735,
294
- "learning_rate": 1e-06,
295
- "loss": -0.2769,
296
- "step": 18
297
- },
298
- {
299
- "clip_ratio/high_max": 0.0,
300
- "clip_ratio/high_mean": 0.0,
301
- "clip_ratio/low_mean": 0.0,
302
- "clip_ratio/low_min": 0.0,
303
- "clip_ratio/region_mean": 0.0,
304
- "entropy": 0.07629340887069702,
305
- "epoch": 0.038,
306
- "grad_norm": 0.20565274357795715,
307
- "learning_rate": 1e-06,
308
- "loss": 0.0705,
309
- "step": 19
310
- },
311
- {
312
- "clip_ratio/high_max": 0.0010515246540307999,
313
- "clip_ratio/high_mean": 0.0010515246540307999,
314
- "clip_ratio/low_mean": 0.0,
315
- "clip_ratio/low_min": 0.0,
316
- "clip_ratio/region_mean": 0.0010515246540307999,
317
- "entropy": 0.0615997388958931,
318
- "epoch": 0.04,
319
- "grad_norm": 0.3993583619594574,
320
- "learning_rate": 1e-06,
321
- "loss": -0.1046,
322
- "step": 20
323
- },
324
- {
325
- "clip_ratio/high_max": 0.0,
326
- "clip_ratio/high_mean": 0.0,
327
- "clip_ratio/low_mean": 0.0,
328
- "clip_ratio/low_min": 0.0,
329
- "clip_ratio/region_mean": 0.0,
330
- "completions/clipped_ratio": 0.0,
331
- "completions/max_length": 839.0,
332
- "completions/max_terminated_length": 839.0,
333
- "completions/mean_length": 745.6000366210938,
334
- "completions/mean_terminated_length": 745.6000366210938,
335
- "completions/min_length": 617.0,
336
- "completions/min_terminated_length": 617.0,
337
- "entropy": 0.07856940478086472,
338
- "epoch": 0.042,
339
  "frac_reward_zero_std": 0.0,
340
- "grad_norm": 1.6567957401275635,
341
- "learning_rate": 1e-06,
342
- "loss": 0.2708,
343
- "num_tokens": 19444.0,
344
- "reward": 0.6916667222976685,
345
- "reward_std": 0.06291528046131134,
346
- "rewards/reward_func_with_saving/mean": 0.6916666626930237,
347
- "rewards/reward_func_with_saving/std": 0.06291527301073074,
348
- "step": 21
349
- },
350
- {
351
- "clip_ratio/high_max": 0.0,
352
- "clip_ratio/high_mean": 0.0,
353
- "clip_ratio/low_mean": 0.0,
354
- "clip_ratio/low_min": 0.0,
355
- "clip_ratio/region_mean": 0.0,
356
- "entropy": 0.06388463824987411,
357
- "epoch": 0.044,
358
- "grad_norm": 0.2810056209564209,
359
- "learning_rate": 1e-06,
360
- "loss": 0.0893,
361
- "step": 22
362
- },
363
- {
364
- "clip_ratio/high_max": 0.0,
365
- "clip_ratio/high_mean": 0.0,
366
- "clip_ratio/low_mean": 0.0,
367
- "clip_ratio/low_min": 0.0,
368
- "clip_ratio/region_mean": 0.0,
369
- "entropy": 0.05800528824329376,
370
- "epoch": 0.046,
371
- "grad_norm": 0.08997764438390732,
372
- "learning_rate": 1e-06,
373
- "loss": -0.0287,
374
- "step": 23
375
- },
376
- {
377
- "clip_ratio/high_max": 0.0,
378
- "clip_ratio/high_mean": 0.0,
379
- "clip_ratio/low_mean": 0.0,
380
- "clip_ratio/low_min": 0.0,
381
- "clip_ratio/region_mean": 0.0,
382
- "entropy": 0.06156563758850098,
383
- "epoch": 0.048,
384
- "grad_norm": 0.41701430082321167,
385
- "learning_rate": 1e-06,
386
- "loss": -0.109,
387
- "step": 24
388
- },
389
- {
390
- "clip_ratio/high_max": 0.0016207455191761255,
391
- "clip_ratio/high_mean": 0.0016207455191761255,
392
- "clip_ratio/low_mean": 0.0,
393
- "clip_ratio/low_min": 0.0,
394
- "clip_ratio/region_mean": 0.0016207455191761255,
395
- "entropy": 0.054504893720149994,
396
- "epoch": 0.05,
397
- "grad_norm": 1.1660373210906982,
398
- "learning_rate": 1e-06,
399
- "loss": -0.197,
400
- "step": 25
401
  },
402
  {
403
  "clip_ratio/high_max": 0.0,
@@ -406,76 +354,24 @@
406
  "clip_ratio/low_min": 0.0,
407
  "clip_ratio/region_mean": 0.0,
408
  "completions/clipped_ratio": 0.0,
409
- "completions/max_length": 827.0,
410
- "completions/max_terminated_length": 827.0,
411
- "completions/mean_length": 715.2000122070312,
412
- "completions/mean_terminated_length": 715.2000122070312,
413
- "completions/min_length": 600.0,
414
- "completions/min_terminated_length": 600.0,
415
- "entropy": 0.03946581110358238,
416
- "epoch": 0.052,
417
  "frac_reward_zero_std": 0.0,
418
- "grad_norm": 0.4461834132671356,
419
- "learning_rate": 1e-06,
420
- "loss": 0.1631,
421
- "num_tokens": 23130.0,
422
- "reward": 0.675000011920929,
423
- "reward_std": 0.035355355590581894,
424
- "rewards/reward_func_with_saving/mean": 0.675000011920929,
425
- "rewards/reward_func_with_saving/std": 0.03535535931587219,
426
- "step": 26
427
- },
428
- {
429
- "clip_ratio/high_max": 0.0,
430
- "clip_ratio/high_mean": 0.0,
431
- "clip_ratio/low_mean": 0.0,
432
- "clip_ratio/low_min": 0.0,
433
- "clip_ratio/region_mean": 0.0,
434
- "entropy": 0.08581237494945526,
435
- "epoch": 0.054,
436
- "grad_norm": 0.5848076939582825,
437
- "learning_rate": 1e-06,
438
- "loss": 0.1479,
439
- "step": 27
440
- },
441
- {
442
- "clip_ratio/high_max": 0.0,
443
- "clip_ratio/high_mean": 0.0,
444
- "clip_ratio/low_mean": 0.0,
445
- "clip_ratio/low_min": 0.0,
446
- "clip_ratio/region_mean": 0.0,
447
- "entropy": 0.05218680202960968,
448
- "epoch": 0.056,
449
- "grad_norm": 0.5354483723640442,
450
- "learning_rate": 1e-06,
451
- "loss": 0.1183,
452
- "step": 28
453
- },
454
- {
455
- "clip_ratio/high_max": 0.0,
456
- "clip_ratio/high_mean": 0.0,
457
- "clip_ratio/low_mean": 0.0,
458
- "clip_ratio/low_min": 0.0,
459
- "clip_ratio/region_mean": 0.0,
460
- "entropy": 0.053608063608407974,
461
- "epoch": 0.058,
462
- "grad_norm": 1.3185772895812988,
463
- "learning_rate": 1e-06,
464
- "loss": -0.2595,
465
- "step": 29
466
- },
467
- {
468
- "clip_ratio/high_max": 0.001349527621641755,
469
- "clip_ratio/high_mean": 0.001349527621641755,
470
- "clip_ratio/low_mean": 0.0,
471
- "clip_ratio/low_min": 0.0,
472
- "clip_ratio/region_mean": 0.001349527621641755,
473
- "entropy": 0.075257308781147,
474
- "epoch": 0.06,
475
- "grad_norm": 0.486283540725708,
476
- "learning_rate": 1e-06,
477
- "loss": -0.1461,
478
- "step": 30
479
  },
480
  {
481
  "clip_ratio/high_max": 0.0,
@@ -484,76 +380,24 @@
484
  "clip_ratio/low_min": 0.0,
485
  "clip_ratio/region_mean": 0.0,
486
  "completions/clipped_ratio": 0.0,
487
- "completions/max_length": 901.0,
488
- "completions/max_terminated_length": 901.0,
489
- "completions/mean_length": 713.4000244140625,
490
- "completions/mean_terminated_length": 713.4000244140625,
491
- "completions/min_length": 608.0,
492
- "completions/min_terminated_length": 608.0,
493
- "entropy": 0.10886804014444351,
494
- "epoch": 0.062,
495
  "frac_reward_zero_std": 0.0,
496
- "grad_norm": 0.2377174347639084,
497
- "learning_rate": 1e-06,
498
- "loss": 0.0543,
499
- "num_tokens": 26782.0,
500
- "reward": 0.6883333325386047,
501
- "reward_std": 0.04951147362589836,
502
- "rewards/reward_func_with_saving/mean": 0.6883333325386047,
503
- "rewards/reward_func_with_saving/std": 0.049511492252349854,
504
- "step": 31
505
- },
506
- {
507
- "clip_ratio/high_max": 0.0,
508
- "clip_ratio/high_mean": 0.0,
509
- "clip_ratio/low_mean": 0.0,
510
- "clip_ratio/low_min": 0.0,
511
- "clip_ratio/region_mean": 0.0,
512
- "entropy": 0.0800568163394928,
513
- "epoch": 0.064,
514
- "grad_norm": 0.3408770263195038,
515
- "learning_rate": 1e-06,
516
- "loss": 0.0811,
517
- "step": 32
518
- },
519
- {
520
- "clip_ratio/high_max": 0.0,
521
- "clip_ratio/high_mean": 0.0,
522
- "clip_ratio/low_mean": 0.0,
523
- "clip_ratio/low_min": 0.0,
524
- "clip_ratio/region_mean": 0.0,
525
- "entropy": 0.11951474100351334,
526
- "epoch": 0.066,
527
- "grad_norm": 1.2026166915893555,
528
- "learning_rate": 1e-06,
529
- "loss": -0.2977,
530
- "step": 33
531
- },
532
- {
533
- "clip_ratio/high_max": 0.0,
534
- "clip_ratio/high_mean": 0.0,
535
- "clip_ratio/low_mean": 0.0,
536
- "clip_ratio/low_min": 0.0,
537
- "clip_ratio/region_mean": 0.0,
538
- "entropy": 0.09091290831565857,
539
- "epoch": 0.068,
540
- "grad_norm": 0.2414180338382721,
541
- "learning_rate": 1e-06,
542
- "loss": 0.0679,
543
- "step": 34
544
- },
545
- {
546
- "clip_ratio/high_max": 0.0,
547
- "clip_ratio/high_mean": 0.0,
548
- "clip_ratio/low_mean": 0.0,
549
- "clip_ratio/low_min": 0.0,
550
- "clip_ratio/region_mean": 0.0,
551
- "entropy": 0.0767383873462677,
552
- "epoch": 0.07,
553
- "grad_norm": 0.7556729912757874,
554
- "learning_rate": 1e-06,
555
- "loss": 0.1462,
556
- "step": 35
557
  },
558
  {
559
  "clip_ratio/high_max": 0.0,
@@ -562,83 +406,31 @@
562
  "clip_ratio/low_min": 0.0,
563
  "clip_ratio/region_mean": 0.0,
564
  "completions/clipped_ratio": 0.0,
565
- "completions/max_length": 726.0,
566
- "completions/max_terminated_length": 726.0,
567
- "completions/mean_length": 660.0,
568
- "completions/mean_terminated_length": 660.0,
569
- "completions/min_length": 582.0,
570
- "completions/min_terminated_length": 582.0,
571
- "entropy": 0.04803081601858139,
572
- "epoch": 0.072,
573
  "frac_reward_zero_std": 0.0,
574
- "grad_norm": 0.5553380846977234,
575
- "learning_rate": 1e-06,
576
- "loss": -0.1371,
577
- "num_tokens": 30162.0,
578
- "reward": 0.73499995470047,
579
- "reward_std": 0.057554323226213455,
580
- "rewards/reward_func_with_saving/mean": 0.73499995470047,
581
- "rewards/reward_func_with_saving/std": 0.057554323226213455,
582
- "step": 36
583
- },
584
- {
585
- "clip_ratio/high_max": 0.0,
586
- "clip_ratio/high_mean": 0.0,
587
- "clip_ratio/low_mean": 0.0,
588
- "clip_ratio/low_min": 0.0,
589
- "clip_ratio/region_mean": 0.0,
590
- "entropy": 0.0411250926554203,
591
- "epoch": 0.074,
592
- "grad_norm": 0.43590816855430603,
593
- "learning_rate": 1e-06,
594
- "loss": -0.1224,
595
- "step": 37
596
- },
597
- {
598
- "clip_ratio/high_max": 0.0,
599
- "clip_ratio/high_mean": 0.0,
600
- "clip_ratio/low_mean": 0.0,
601
- "clip_ratio/low_min": 0.0,
602
- "clip_ratio/region_mean": 0.0,
603
- "entropy": 0.0592646561563015,
604
- "epoch": 0.076,
605
- "grad_norm": 0.40773651003837585,
606
- "learning_rate": 1e-06,
607
- "loss": 0.1275,
608
- "step": 38
609
- },
610
- {
611
- "clip_ratio/high_max": 0.0,
612
- "clip_ratio/high_mean": 0.0,
613
- "clip_ratio/low_mean": 0.0,
614
- "clip_ratio/low_min": 0.0,
615
- "clip_ratio/region_mean": 0.0,
616
- "entropy": 0.049946531653404236,
617
- "epoch": 0.078,
618
- "grad_norm": 1.0851061344146729,
619
- "learning_rate": 1e-06,
620
- "loss": 0.3242,
621
- "step": 39
622
- },
623
- {
624
- "clip_ratio/high_max": 0.0,
625
- "clip_ratio/high_mean": 0.0,
626
- "clip_ratio/low_mean": 0.0,
627
- "clip_ratio/low_min": 0.0,
628
- "clip_ratio/region_mean": 0.0,
629
- "entropy": 0.04744778200984001,
630
- "epoch": 0.08,
631
- "grad_norm": 0.3738895654678345,
632
- "learning_rate": 1e-06,
633
- "loss": -0.136,
634
- "step": 40
635
  }
636
  ],
637
  "logging_steps": 1,
638
- "max_steps": 40,
639
- "num_input_tokens_seen": 30162,
640
  "num_train_epochs": 1,
641
- "save_steps": 5,
642
  "stateful_callbacks": {
643
  "TrainerControl": {
644
  "args": {
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.032,
6
  "eval_steps": 500,
7
+ "global_step": 16,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
  "completions/clipped_ratio": 0.0,
19
+ "completions/max_length": 1174.0,
20
+ "completions/max_terminated_length": 1174.0,
21
+ "completions/mean_length": 909.0,
22
+ "completions/mean_terminated_length": 909.0,
23
+ "completions/min_length": 783.0,
24
+ "completions/min_terminated_length": 783.0,
25
+ "entropy": 0.05627120193094015,
26
  "epoch": 0.002,
27
  "frac_reward_zero_std": 0.0,
28
+ "grad_norm": 0.5968819260597229,
29
+ "learning_rate": 5e-06,
30
+ "loss": -0.0469,
31
+ "num_tokens": 3712.0,
32
+ "reward": 0.8966667056083679,
33
+ "reward_std": 0.050852831453084946,
34
+ "rewards/reward_func_with_saving/mean": 0.8966667056083679,
35
+ "rewards/reward_func_with_saving/std": 0.05085281282663345,
36
  "step": 1
37
  },
38
  {
 
41
  "clip_ratio/low_mean": 0.0,
42
  "clip_ratio/low_min": 0.0,
43
  "clip_ratio/region_mean": 0.0,
44
+ "completions/clipped_ratio": 0.0,
45
+ "completions/max_length": 613.0,
46
+ "completions/max_terminated_length": 613.0,
47
+ "completions/mean_length": 611.5,
48
+ "completions/mean_terminated_length": 611.5,
49
+ "completions/min_length": 611.0,
50
+ "completions/min_terminated_length": 611.0,
51
+ "entropy": 0.02739148633554578,
52
  "epoch": 0.004,
53
+ "frac_reward_zero_std": 0.0,
54
+ "grad_norm": 0.4688481092453003,
55
+ "learning_rate": 5e-06,
56
+ "loss": -0.0004,
57
+ "num_tokens": 6254.0,
58
+ "reward": 0.7022222280502319,
59
+ "reward_std": 0.02222222089767456,
60
+ "rewards/reward_func_with_saving/mean": 0.7022222280502319,
61
+ "rewards/reward_func_with_saving/std": 0.02222222276031971,
62
  "step": 2
63
  },
64
  {
 
67
  "clip_ratio/low_mean": 0.0,
68
  "clip_ratio/low_min": 0.0,
69
  "clip_ratio/region_mean": 0.0,
70
+ "completions/clipped_ratio": 0.0,
71
+ "completions/max_length": 780.0,
72
+ "completions/max_terminated_length": 780.0,
73
+ "completions/mean_length": 703.75,
74
+ "completions/mean_terminated_length": 703.75,
75
+ "completions/min_length": 666.0,
76
+ "completions/min_terminated_length": 666.0,
77
+ "entropy": 0.10391132719814777,
78
  "epoch": 0.006,
79
+ "frac_reward_zero_std": 0.0,
80
+ "grad_norm": 1.0785013437271118,
81
+ "learning_rate": 5e-06,
82
+ "loss": -0.016,
83
+ "num_tokens": 9153.0,
84
+ "reward": 0.8922222256660461,
85
+ "reward_std": 0.011111120693385601,
86
+ "rewards/reward_func_with_saving/mean": 0.8922222256660461,
87
+ "rewards/reward_func_with_saving/std": 0.011111111380159855,
88
  "step": 3
89
  },
90
  {
91
  "clip_ratio/high_max": 0.0,
92
  "clip_ratio/high_mean": 0.0,
93
+ "clip_ratio/low_mean": 0.0,
94
+ "clip_ratio/low_min": 0.0,
95
+ "clip_ratio/region_mean": 0.0,
96
+ "completions/clipped_ratio": 0.0,
97
+ "completions/max_length": 798.0,
98
+ "completions/max_terminated_length": 798.0,
99
+ "completions/mean_length": 732.0,
100
+ "completions/mean_terminated_length": 732.0,
101
+ "completions/min_length": 639.0,
102
+ "completions/min_terminated_length": 639.0,
103
+ "entropy": 0.07874358911067247,
104
  "epoch": 0.008,
105
+ "frac_reward_zero_std": 0.0,
106
+ "grad_norm": 1.7055785655975342,
107
+ "learning_rate": 5e-06,
108
+ "loss": 0.0102,
109
+ "num_tokens": 12157.0,
110
+ "reward": 0.8300000429153442,
111
+ "reward_std": 0.08785511553287506,
112
+ "rewards/reward_func_with_saving/mean": 0.8300000429153442,
113
+ "rewards/reward_func_with_saving/std": 0.08785512298345566,
114
  "step": 4
115
  },
116
  {
117
  "clip_ratio/high_max": 0.0,
118
  "clip_ratio/high_mean": 0.0,
119
+ "clip_ratio/low_mean": 0.0,
120
+ "clip_ratio/low_min": 0.0,
121
+ "clip_ratio/region_mean": 0.0,
122
+ "completions/clipped_ratio": 0.0,
123
+ "completions/max_length": 731.0,
124
+ "completions/max_terminated_length": 731.0,
125
+ "completions/mean_length": 704.5,
126
+ "completions/mean_terminated_length": 704.5,
127
+ "completions/min_length": 654.0,
128
+ "completions/min_terminated_length": 654.0,
129
+ "entropy": 0.12625528872013092,
130
  "epoch": 0.01,
131
+ "frac_reward_zero_std": 1.0,
132
+ "grad_norm": 0.0,
133
+ "learning_rate": 5e-06,
134
+ "loss": 0.0,
135
+ "num_tokens": 15051.0,
136
+ "reward": 0.8311111330986023,
137
+ "reward_std": 0.0,
138
+ "rewards/reward_func_with_saving/mean": 0.8311111330986023,
139
+ "rewards/reward_func_with_saving/std": 0.0,
140
  "step": 5
141
  },
142
  {
 
146
  "clip_ratio/low_min": 0.0,
147
  "clip_ratio/region_mean": 0.0,
148
  "completions/clipped_ratio": 0.0,
149
+ "completions/max_length": 1007.0,
150
+ "completions/max_terminated_length": 1007.0,
151
+ "completions/mean_length": 816.0,
152
+ "completions/mean_terminated_length": 816.0,
153
+ "completions/min_length": 624.0,
154
+ "completions/min_terminated_length": 624.0,
155
+ "entropy": 0.1077885851264,
156
  "epoch": 0.012,
157
  "frac_reward_zero_std": 0.0,
158
+ "grad_norm": 1.0341051816940308,
159
+ "learning_rate": 5e-06,
160
+ "loss": -0.119,
161
+ "num_tokens": 18407.0,
162
+ "reward": 0.8322222232818604,
163
+ "reward_std": 0.06416287273168564,
164
+ "rewards/reward_func_with_saving/mean": 0.8322222232818604,
165
+ "rewards/reward_func_with_saving/std": 0.06416288018226624,
166
  "step": 6
167
  },
168
  {
 
171
  "clip_ratio/low_mean": 0.0,
172
  "clip_ratio/low_min": 0.0,
173
  "clip_ratio/region_mean": 0.0,
174
+ "completions/clipped_ratio": 0.0,
175
+ "completions/max_length": 783.0,
176
+ "completions/max_terminated_length": 783.0,
177
+ "completions/mean_length": 724.5,
178
+ "completions/mean_terminated_length": 724.5,
179
+ "completions/min_length": 690.0,
180
+ "completions/min_terminated_length": 690.0,
181
+ "entropy": 0.1391423474997282,
182
  "epoch": 0.014,
183
+ "frac_reward_zero_std": 0.0,
184
+ "grad_norm": 1.149609088897705,
185
+ "learning_rate": 5e-06,
186
+ "loss": -0.004,
187
+ "num_tokens": 21385.0,
188
+ "reward": 0.7538889050483704,
189
+ "reward_std": 0.1363290250301361,
190
+ "rewards/reward_func_with_saving/mean": 0.7538889050483704,
191
+ "rewards/reward_func_with_saving/std": 0.1363290250301361,
192
  "step": 7
193
  },
194
  {
 
197
  "clip_ratio/low_mean": 0.0,
198
  "clip_ratio/low_min": 0.0,
199
  "clip_ratio/region_mean": 0.0,
200
+ "completions/clipped_ratio": 0.0,
201
+ "completions/max_length": 611.0,
202
+ "completions/max_terminated_length": 611.0,
203
+ "completions/mean_length": 608.25,
204
+ "completions/mean_terminated_length": 608.25,
205
+ "completions/min_length": 601.0,
206
+ "completions/min_terminated_length": 601.0,
207
+ "entropy": 0.04191916948184371,
208
  "epoch": 0.016,
209
+ "frac_reward_zero_std": 0.0,
210
+ "grad_norm": 1.440533995628357,
211
+ "learning_rate": 5e-06,
212
+ "loss": 0.006,
213
+ "num_tokens": 23894.0,
214
+ "reward": 0.4983333349227905,
215
+ "reward_std": 0.09888887405395508,
216
+ "rewards/reward_func_with_saving/mean": 0.4983333349227905,
217
+ "rewards/reward_func_with_saving/std": 0.09888887405395508,
218
  "step": 8
219
  },
220
  {
 
223
  "clip_ratio/low_mean": 0.0,
224
  "clip_ratio/low_min": 0.0,
225
  "clip_ratio/region_mean": 0.0,
226
+ "completions/clipped_ratio": 0.0,
227
+ "completions/max_length": 643.0,
228
+ "completions/max_terminated_length": 643.0,
229
+ "completions/mean_length": 609.25,
230
+ "completions/mean_terminated_length": 609.25,
231
+ "completions/min_length": 576.0,
232
+ "completions/min_terminated_length": 576.0,
233
+ "entropy": 0.04268141835927963,
234
  "epoch": 0.018,
235
+ "frac_reward_zero_std": 0.0,
236
+ "grad_norm": 1.0735034942626953,
237
+ "learning_rate": 5e-06,
238
+ "loss": -0.0273,
239
+ "num_tokens": 26407.0,
240
+ "reward": 0.5416666865348816,
241
+ "reward_std": 0.15209239721298218,
242
+ "rewards/reward_func_with_saving/mean": 0.5416666865348816,
243
+ "rewards/reward_func_with_saving/std": 0.15209239721298218,
244
  "step": 9
245
  },
246
  {
 
249
  "clip_ratio/low_mean": 0.0,
250
  "clip_ratio/low_min": 0.0,
251
  "clip_ratio/region_mean": 0.0,
252
+ "completions/clipped_ratio": 0.0,
253
+ "completions/max_length": 750.0,
254
+ "completions/max_terminated_length": 750.0,
255
+ "completions/mean_length": 678.25,
256
+ "completions/mean_terminated_length": 678.25,
257
+ "completions/min_length": 612.0,
258
+ "completions/min_terminated_length": 612.0,
259
+ "entropy": 0.10207068175077438,
260
  "epoch": 0.02,
261
+ "frac_reward_zero_std": 0.0,
262
+ "grad_norm": 1.8149948120117188,
263
+ "learning_rate": 5e-06,
264
+ "loss": 0.0611,
265
+ "num_tokens": 29192.0,
266
+ "reward": 0.7227777242660522,
267
+ "reward_std": 0.12421109527349472,
268
+ "rewards/reward_func_with_saving/mean": 0.7227777242660522,
269
+ "rewards/reward_func_with_saving/std": 0.12421111017465591,
270
  "step": 10
271
  },
272
  {
 
276
  "clip_ratio/low_min": 0.0,
277
  "clip_ratio/region_mean": 0.0,
278
  "completions/clipped_ratio": 0.0,
279
+ "completions/max_length": 762.0,
280
+ "completions/max_terminated_length": 762.0,
281
+ "completions/mean_length": 659.75,
282
+ "completions/mean_terminated_length": 659.75,
283
+ "completions/min_length": 607.0,
284
+ "completions/min_terminated_length": 607.0,
285
+ "entropy": 0.11515359580516815,
286
  "epoch": 0.022,
287
  "frac_reward_zero_std": 0.0,
288
+ "grad_norm": 1.5091134309768677,
289
+ "learning_rate": 5e-06,
290
+ "loss": 0.0615,
291
+ "num_tokens": 31907.0,
292
+ "reward": 0.7888888716697693,
293
+ "reward_std": 0.10210946202278137,
294
+ "rewards/reward_func_with_saving/mean": 0.7888888716697693,
295
+ "rewards/reward_func_with_saving/std": 0.10210946202278137,
296
  "step": 11
297
  },
298
  {
 
301
  "clip_ratio/low_mean": 0.0,
302
  "clip_ratio/low_min": 0.0,
303
  "clip_ratio/region_mean": 0.0,
304
+ "completions/clipped_ratio": 0.0,
305
+ "completions/max_length": 856.0,
306
+ "completions/max_terminated_length": 856.0,
307
+ "completions/mean_length": 705.75,
308
+ "completions/mean_terminated_length": 705.75,
309
+ "completions/min_length": 530.0,
310
+ "completions/min_terminated_length": 530.0,
311
+ "entropy": 0.15026956051588058,
312
  "epoch": 0.024,
313
+ "frac_reward_zero_std": 0.0,
314
+ "grad_norm": 1.3372756242752075,
315
+ "learning_rate": 5e-06,
316
+ "loss": 0.1075,
317
+ "num_tokens": 34818.0,
318
+ "reward": 0.7594444751739502,
319
+ "reward_std": 0.16865848004817963,
320
+ "rewards/reward_func_with_saving/mean": 0.7594444751739502,
321
+ "rewards/reward_func_with_saving/std": 0.16865849494934082,
322
  "step": 12
323
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  {
325
  "clip_ratio/high_max": 0.0,
326
  "clip_ratio/high_mean": 0.0,
 
328
  "clip_ratio/low_min": 0.0,
329
  "clip_ratio/region_mean": 0.0,
330
  "completions/clipped_ratio": 0.0,
331
+ "completions/max_length": 919.0,
332
+ "completions/max_terminated_length": 919.0,
333
+ "completions/mean_length": 863.0,
334
+ "completions/mean_terminated_length": 863.0,
335
  "completions/min_length": 783.0,
336
  "completions/min_terminated_length": 783.0,
337
+ "entropy": 0.07314991764724255,
338
+ "epoch": 0.026,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  "frac_reward_zero_std": 0.0,
340
+ "grad_norm": 1.5220526456832886,
341
+ "learning_rate": 5e-06,
342
+ "loss": 0.0265,
343
+ "num_tokens": 38338.0,
344
+ "reward": 0.8427777886390686,
345
+ "reward_std": 0.021111130714416504,
346
+ "rewards/reward_func_with_saving/mean": 0.8427777886390686,
347
+ "rewards/reward_func_with_saving/std": 0.021111130714416504,
348
+ "step": 13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  },
350
  {
351
  "clip_ratio/high_max": 0.0,
 
354
  "clip_ratio/low_min": 0.0,
355
  "clip_ratio/region_mean": 0.0,
356
  "completions/clipped_ratio": 0.0,
357
+ "completions/max_length": 909.0,
358
+ "completions/max_terminated_length": 909.0,
359
+ "completions/mean_length": 733.5,
360
+ "completions/mean_terminated_length": 733.5,
361
+ "completions/min_length": 602.0,
362
+ "completions/min_terminated_length": 602.0,
363
+ "entropy": 0.11434740386903286,
364
+ "epoch": 0.028,
365
  "frac_reward_zero_std": 0.0,
366
+ "grad_norm": 1.422579050064087,
367
+ "learning_rate": 5e-06,
368
+ "loss": 0.0105,
369
+ "num_tokens": 41344.0,
370
+ "reward": 0.8372222185134888,
371
+ "reward_std": 0.03222225233912468,
372
+ "rewards/reward_func_with_saving/mean": 0.8372222185134888,
373
+ "rewards/reward_func_with_saving/std": 0.032222241163253784,
374
+ "step": 14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  },
376
  {
377
  "clip_ratio/high_max": 0.0,
 
380
  "clip_ratio/low_min": 0.0,
381
  "clip_ratio/region_mean": 0.0,
382
  "completions/clipped_ratio": 0.0,
383
+ "completions/max_length": 655.0,
384
+ "completions/max_terminated_length": 655.0,
385
+ "completions/mean_length": 630.0,
386
+ "completions/mean_terminated_length": 630.0,
387
+ "completions/min_length": 611.0,
388
+ "completions/min_terminated_length": 611.0,
389
+ "entropy": 0.048715847078710794,
390
+ "epoch": 0.03,
391
  "frac_reward_zero_std": 0.0,
392
+ "grad_norm": 0.6969804763793945,
393
+ "learning_rate": 5e-06,
394
+ "loss": 0.0151,
395
+ "num_tokens": 43936.0,
396
+ "reward": 0.528333306312561,
397
+ "reward_std": 0.15888887643814087,
398
+ "rewards/reward_func_with_saving/mean": 0.528333306312561,
399
+ "rewards/reward_func_with_saving/std": 0.15888887643814087,
400
+ "step": 15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  },
402
  {
403
  "clip_ratio/high_max": 0.0,
 
406
  "clip_ratio/low_min": 0.0,
407
  "clip_ratio/region_mean": 0.0,
408
  "completions/clipped_ratio": 0.0,
409
+ "completions/max_length": 879.0,
410
+ "completions/max_terminated_length": 879.0,
411
+ "completions/mean_length": 747.25,
412
+ "completions/mean_terminated_length": 747.25,
413
+ "completions/min_length": 643.0,
414
+ "completions/min_terminated_length": 643.0,
415
+ "entropy": 0.07380866352468729,
416
+ "epoch": 0.032,
417
  "frac_reward_zero_std": 0.0,
418
+ "grad_norm": 1.3967198133468628,
419
+ "learning_rate": 5e-06,
420
+ "loss": 0.0736,
421
+ "num_tokens": 46997.0,
422
+ "reward": 0.7199999690055847,
423
+ "reward_std": 0.14237260818481445,
424
+ "rewards/reward_func_with_saving/mean": 0.7199999690055847,
425
+ "rewards/reward_func_with_saving/std": 0.14237260818481445,
426
+ "step": 16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  }
428
  ],
429
  "logging_steps": 1,
430
+ "max_steps": 16,
431
+ "num_input_tokens_seen": 46997,
432
  "num_train_epochs": 1,
433
+ "save_steps": 4,
434
  "stateful_callbacks": {
435
  "TrainerControl": {
436
  "args": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4a06f1dbd5fe57850327672e334124bc880911e2fbf86ed0e967bbdbd99eded
3
  size 7313
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bb3a8fe277af9bdf1f31540071cd20f21c0470946d3c2d53dfae7edd515712d
3
  size 7313