Kapturz0ny commited on
Commit
55ec7e6
·
verified ·
1 Parent(s): 89f10af

Upload fine-tuned retriever model

Browse files
Files changed (9) hide show
  1. README.md +0 -0
  2. model.safetensors +1 -1
  3. optimizer.pt +3 -0
  4. rng_state.pth +3 -0
  5. scaler.pt +3 -0
  6. scheduler.pt +3 -0
  7. tokenizer.json +2 -14
  8. trainer_state.json +457 -0
  9. training_args.bin +3 -0
README.md CHANGED
The diff for this file is too large to render. See raw diff
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3c22492b93885a269f991e4912889fc64ca6c3858c1c70c987be738dcef7688
3
  size 437951328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d05117916dd2f05c48a94cb660140809176d7ec524a6553afa607f255126543
3
  size 437951328
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f92bed52aef294289761229cd2b9c10cdc364828591d9a6c390d132f98fe6efa
3
+ size 871298443
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8dad59ceff6a75c7525a825b923fab0998a8fbc895091d0047c16e887a7bf0a
3
+ size 14645
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:911449e67f3112a5e15b8316033a123bafd40147919c93233393f8ca44c64005
3
+ size 1383
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cffa310dc5601509fc9547a5f27d963431f42ade920d24c479ed135940d3be02
3
+ size 1465
tokenizer.json CHANGED
@@ -1,19 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": "BatchLongest",
11
- "direction": "Right",
12
- "pad_to_multiple_of": null,
13
- "pad_id": 0,
14
- "pad_type_id": 0,
15
- "pad_token": "[PAD]"
16
- },
17
  "added_tokens": [
18
  {
19
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
trainer_state.json ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 450,
3
+ "best_metric": 0.8513853904282116,
4
+ "best_model_checkpoint": "../models/retriever_trainer_bge_base/checkpoint-450",
5
+ "epoch": 1.9148936170212765,
6
+ "eval_steps": 50,
7
+ "global_step": 450,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0425531914893617,
14
+ "grad_norm": 0.8533166646957397,
15
+ "learning_rate": 1.5254237288135596e-06,
16
+ "loss": 0.2054,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.0851063829787234,
21
+ "grad_norm": 0.7839147448539734,
22
+ "learning_rate": 3.2203389830508473e-06,
23
+ "loss": 0.2017,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.1276595744680851,
28
+ "grad_norm": 0.9631144404411316,
29
+ "learning_rate": 4.915254237288136e-06,
30
+ "loss": 0.197,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.1702127659574468,
35
+ "grad_norm": 1.077710747718811,
36
+ "learning_rate": 6.610169491525424e-06,
37
+ "loss": 0.1913,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.2127659574468085,
42
+ "grad_norm": 0.8926519155502319,
43
+ "learning_rate": 8.305084745762712e-06,
44
+ "loss": 0.1827,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.2127659574468085,
49
+ "eval_accuracy_at_0.5_thresh": 0.7808510638297872,
50
+ "eval_f1_at_0.5_thresh": 0.7444168734491315,
51
+ "eval_loss": 0.18353447318077087,
52
+ "eval_precision_at_0.5_thresh": 0.7389162561576355,
53
+ "eval_recall_at_0.5_thresh": 0.75,
54
+ "eval_runtime": 1.2282,
55
+ "eval_samples_per_second": 382.661,
56
+ "eval_steps_per_second": 12.213,
57
+ "step": 50
58
+ },
59
+ {
60
+ "epoch": 0.2553191489361702,
61
+ "grad_norm": 1.0001214742660522,
62
+ "learning_rate": 1e-05,
63
+ "loss": 0.1897,
64
+ "step": 60
65
+ },
66
+ {
67
+ "epoch": 0.2978723404255319,
68
+ "grad_norm": 0.8536041378974915,
69
+ "learning_rate": 1.169491525423729e-05,
70
+ "loss": 0.1811,
71
+ "step": 70
72
+ },
73
+ {
74
+ "epoch": 0.3404255319148936,
75
+ "grad_norm": 0.9402114748954773,
76
+ "learning_rate": 1.3389830508474577e-05,
77
+ "loss": 0.1777,
78
+ "step": 80
79
+ },
80
+ {
81
+ "epoch": 0.3829787234042553,
82
+ "grad_norm": 0.9630193710327148,
83
+ "learning_rate": 1.5084745762711865e-05,
84
+ "loss": 0.1697,
85
+ "step": 90
86
+ },
87
+ {
88
+ "epoch": 0.425531914893617,
89
+ "grad_norm": 1.079608678817749,
90
+ "learning_rate": 1.6779661016949154e-05,
91
+ "loss": 0.166,
92
+ "step": 100
93
+ },
94
+ {
95
+ "epoch": 0.425531914893617,
96
+ "eval_accuracy_at_0.5_thresh": 0.8063829787234043,
97
+ "eval_f1_at_0.5_thresh": 0.723404255319149,
98
+ "eval_loss": 0.15719293057918549,
99
+ "eval_precision_at_0.5_thresh": 0.9224806201550387,
100
+ "eval_recall_at_0.5_thresh": 0.595,
101
+ "eval_runtime": 1.1886,
102
+ "eval_samples_per_second": 395.432,
103
+ "eval_steps_per_second": 12.62,
104
+ "step": 100
105
+ },
106
+ {
107
+ "epoch": 0.46808510638297873,
108
+ "grad_norm": 1.0034838914871216,
109
+ "learning_rate": 1.8474576271186443e-05,
110
+ "loss": 0.1685,
111
+ "step": 110
112
+ },
113
+ {
114
+ "epoch": 0.5106382978723404,
115
+ "grad_norm": 0.9138811230659485,
116
+ "learning_rate": 1.9981078524124883e-05,
117
+ "loss": 0.1708,
118
+ "step": 120
119
+ },
120
+ {
121
+ "epoch": 0.5531914893617021,
122
+ "grad_norm": 0.9457395076751709,
123
+ "learning_rate": 1.97918637653737e-05,
124
+ "loss": 0.1524,
125
+ "step": 130
126
+ },
127
+ {
128
+ "epoch": 0.5957446808510638,
129
+ "grad_norm": 1.1439883708953857,
130
+ "learning_rate": 1.960264900662252e-05,
131
+ "loss": 0.1642,
132
+ "step": 140
133
+ },
134
+ {
135
+ "epoch": 0.6382978723404256,
136
+ "grad_norm": 0.900199830532074,
137
+ "learning_rate": 1.9413434247871333e-05,
138
+ "loss": 0.1612,
139
+ "step": 150
140
+ },
141
+ {
142
+ "epoch": 0.6382978723404256,
143
+ "eval_accuracy_at_0.5_thresh": 0.8553191489361702,
144
+ "eval_f1_at_0.5_thresh": 0.8308457711442786,
145
+ "eval_loss": 0.14183281362056732,
146
+ "eval_precision_at_0.5_thresh": 0.8267326732673267,
147
+ "eval_recall_at_0.5_thresh": 0.835,
148
+ "eval_runtime": 1.1664,
149
+ "eval_samples_per_second": 402.933,
150
+ "eval_steps_per_second": 12.86,
151
+ "step": 150
152
+ },
153
+ {
154
+ "epoch": 0.6808510638297872,
155
+ "grad_norm": 1.2067737579345703,
156
+ "learning_rate": 1.922421948912015e-05,
157
+ "loss": 0.1553,
158
+ "step": 160
159
+ },
160
+ {
161
+ "epoch": 0.723404255319149,
162
+ "grad_norm": 1.0066120624542236,
163
+ "learning_rate": 1.9035004730368973e-05,
164
+ "loss": 0.1495,
165
+ "step": 170
166
+ },
167
+ {
168
+ "epoch": 0.7659574468085106,
169
+ "grad_norm": 1.0830472707748413,
170
+ "learning_rate": 1.8845789971617787e-05,
171
+ "loss": 0.1364,
172
+ "step": 180
173
+ },
174
+ {
175
+ "epoch": 0.8085106382978723,
176
+ "grad_norm": 0.9800769686698914,
177
+ "learning_rate": 1.8656575212866605e-05,
178
+ "loss": 0.1295,
179
+ "step": 190
180
+ },
181
+ {
182
+ "epoch": 0.851063829787234,
183
+ "grad_norm": 0.9936037659645081,
184
+ "learning_rate": 1.8467360454115423e-05,
185
+ "loss": 0.1545,
186
+ "step": 200
187
+ },
188
+ {
189
+ "epoch": 0.851063829787234,
190
+ "eval_accuracy_at_0.5_thresh": 0.8382978723404255,
191
+ "eval_f1_at_0.5_thresh": 0.8020833333333334,
192
+ "eval_loss": 0.13116958737373352,
193
+ "eval_precision_at_0.5_thresh": 0.8369565217391305,
194
+ "eval_recall_at_0.5_thresh": 0.77,
195
+ "eval_runtime": 1.1719,
196
+ "eval_samples_per_second": 401.047,
197
+ "eval_steps_per_second": 12.799,
198
+ "step": 200
199
+ },
200
+ {
201
+ "epoch": 0.8936170212765957,
202
+ "grad_norm": 0.9179720878601074,
203
+ "learning_rate": 1.827814569536424e-05,
204
+ "loss": 0.1328,
205
+ "step": 210
206
+ },
207
+ {
208
+ "epoch": 0.9361702127659575,
209
+ "grad_norm": 1.5345299243927002,
210
+ "learning_rate": 1.808893093661306e-05,
211
+ "loss": 0.1355,
212
+ "step": 220
213
+ },
214
+ {
215
+ "epoch": 0.9787234042553191,
216
+ "grad_norm": 1.037996530532837,
217
+ "learning_rate": 1.7899716177861873e-05,
218
+ "loss": 0.1289,
219
+ "step": 230
220
+ },
221
+ {
222
+ "epoch": 1.0212765957446808,
223
+ "grad_norm": 1.2900911569595337,
224
+ "learning_rate": 1.771050141911069e-05,
225
+ "loss": 0.1245,
226
+ "step": 240
227
+ },
228
+ {
229
+ "epoch": 1.0638297872340425,
230
+ "grad_norm": 0.7172983884811401,
231
+ "learning_rate": 1.752128666035951e-05,
232
+ "loss": 0.1067,
233
+ "step": 250
234
+ },
235
+ {
236
+ "epoch": 1.0638297872340425,
237
+ "eval_accuracy_at_0.5_thresh": 0.8425531914893617,
238
+ "eval_f1_at_0.5_thresh": 0.8131313131313131,
239
+ "eval_loss": 0.12659965455532074,
240
+ "eval_precision_at_0.5_thresh": 0.8214285714285714,
241
+ "eval_recall_at_0.5_thresh": 0.805,
242
+ "eval_runtime": 1.1658,
243
+ "eval_samples_per_second": 403.141,
244
+ "eval_steps_per_second": 12.866,
245
+ "step": 250
246
+ },
247
+ {
248
+ "epoch": 1.1063829787234043,
249
+ "grad_norm": 1.0125339031219482,
250
+ "learning_rate": 1.7332071901608327e-05,
251
+ "loss": 0.1177,
252
+ "step": 260
253
+ },
254
+ {
255
+ "epoch": 1.148936170212766,
256
+ "grad_norm": 1.0160564184188843,
257
+ "learning_rate": 1.7142857142857142e-05,
258
+ "loss": 0.1034,
259
+ "step": 270
260
+ },
261
+ {
262
+ "epoch": 1.1914893617021276,
263
+ "grad_norm": 1.3702187538146973,
264
+ "learning_rate": 1.6953642384105963e-05,
265
+ "loss": 0.1117,
266
+ "step": 280
267
+ },
268
+ {
269
+ "epoch": 1.2340425531914894,
270
+ "grad_norm": 1.317497730255127,
271
+ "learning_rate": 1.676442762535478e-05,
272
+ "loss": 0.1235,
273
+ "step": 290
274
+ },
275
+ {
276
+ "epoch": 1.2765957446808511,
277
+ "grad_norm": 0.8722613453865051,
278
+ "learning_rate": 1.6575212866603596e-05,
279
+ "loss": 0.0933,
280
+ "step": 300
281
+ },
282
+ {
283
+ "epoch": 1.2765957446808511,
284
+ "eval_accuracy_at_0.5_thresh": 0.8489361702127659,
285
+ "eval_f1_at_0.5_thresh": 0.8202531645569621,
286
+ "eval_loss": 0.1204555556178093,
287
+ "eval_precision_at_0.5_thresh": 0.8307692307692308,
288
+ "eval_recall_at_0.5_thresh": 0.81,
289
+ "eval_runtime": 1.1714,
290
+ "eval_samples_per_second": 401.236,
291
+ "eval_steps_per_second": 12.805,
292
+ "step": 300
293
+ },
294
+ {
295
+ "epoch": 1.3191489361702127,
296
+ "grad_norm": 1.0289169549942017,
297
+ "learning_rate": 1.6385998107852414e-05,
298
+ "loss": 0.1039,
299
+ "step": 310
300
+ },
301
+ {
302
+ "epoch": 1.3617021276595744,
303
+ "grad_norm": 0.9575484991073608,
304
+ "learning_rate": 1.619678334910123e-05,
305
+ "loss": 0.1161,
306
+ "step": 320
307
+ },
308
+ {
309
+ "epoch": 1.4042553191489362,
310
+ "grad_norm": 1.3465416431427002,
311
+ "learning_rate": 1.600756859035005e-05,
312
+ "loss": 0.0912,
313
+ "step": 330
314
+ },
315
+ {
316
+ "epoch": 1.4468085106382977,
317
+ "grad_norm": 0.9446049332618713,
318
+ "learning_rate": 1.5818353831598864e-05,
319
+ "loss": 0.0868,
320
+ "step": 340
321
+ },
322
+ {
323
+ "epoch": 1.4893617021276595,
324
+ "grad_norm": 1.3670283555984497,
325
+ "learning_rate": 1.5629139072847682e-05,
326
+ "loss": 0.0984,
327
+ "step": 350
328
+ },
329
+ {
330
+ "epoch": 1.4893617021276595,
331
+ "eval_accuracy_at_0.5_thresh": 0.8617021276595744,
332
+ "eval_f1_at_0.5_thresh": 0.8387096774193549,
333
+ "eval_loss": 0.12064016610383987,
334
+ "eval_precision_at_0.5_thresh": 0.8325123152709359,
335
+ "eval_recall_at_0.5_thresh": 0.845,
336
+ "eval_runtime": 1.1704,
337
+ "eval_samples_per_second": 401.579,
338
+ "eval_steps_per_second": 12.816,
339
+ "step": 350
340
+ },
341
+ {
342
+ "epoch": 1.5319148936170213,
343
+ "grad_norm": 1.2213770151138306,
344
+ "learning_rate": 1.54399243140965e-05,
345
+ "loss": 0.109,
346
+ "step": 360
347
+ },
348
+ {
349
+ "epoch": 1.574468085106383,
350
+ "grad_norm": 0.8301506638526917,
351
+ "learning_rate": 1.5250709555345318e-05,
352
+ "loss": 0.1014,
353
+ "step": 370
354
+ },
355
+ {
356
+ "epoch": 1.6170212765957448,
357
+ "grad_norm": 1.2703402042388916,
358
+ "learning_rate": 1.5061494796594136e-05,
359
+ "loss": 0.1054,
360
+ "step": 380
361
+ },
362
+ {
363
+ "epoch": 1.6595744680851063,
364
+ "grad_norm": 1.1871310472488403,
365
+ "learning_rate": 1.4872280037842952e-05,
366
+ "loss": 0.1043,
367
+ "step": 390
368
+ },
369
+ {
370
+ "epoch": 1.702127659574468,
371
+ "grad_norm": 0.7756622433662415,
372
+ "learning_rate": 1.468306527909177e-05,
373
+ "loss": 0.1052,
374
+ "step": 400
375
+ },
376
+ {
377
+ "epoch": 1.702127659574468,
378
+ "eval_accuracy_at_0.5_thresh": 0.8659574468085106,
379
+ "eval_f1_at_0.5_thresh": 0.8372093023255814,
380
+ "eval_loss": 0.1149349957704544,
381
+ "eval_precision_at_0.5_thresh": 0.8663101604278075,
382
+ "eval_recall_at_0.5_thresh": 0.81,
383
+ "eval_runtime": 1.1721,
384
+ "eval_samples_per_second": 400.999,
385
+ "eval_steps_per_second": 12.798,
386
+ "step": 400
387
+ },
388
+ {
389
+ "epoch": 1.7446808510638299,
390
+ "grad_norm": 1.2976441383361816,
391
+ "learning_rate": 1.4493850520340588e-05,
392
+ "loss": 0.1295,
393
+ "step": 410
394
+ },
395
+ {
396
+ "epoch": 1.7872340425531914,
397
+ "grad_norm": 1.139825463294983,
398
+ "learning_rate": 1.4304635761589404e-05,
399
+ "loss": 0.1032,
400
+ "step": 420
401
+ },
402
+ {
403
+ "epoch": 1.8297872340425532,
404
+ "grad_norm": 1.3335371017456055,
405
+ "learning_rate": 1.4115421002838224e-05,
406
+ "loss": 0.1001,
407
+ "step": 430
408
+ },
409
+ {
410
+ "epoch": 1.872340425531915,
411
+ "grad_norm": 1.33684241771698,
412
+ "learning_rate": 1.3926206244087039e-05,
413
+ "loss": 0.1052,
414
+ "step": 440
415
+ },
416
+ {
417
+ "epoch": 1.9148936170212765,
418
+ "grad_norm": 1.1832659244537354,
419
+ "learning_rate": 1.3736991485335858e-05,
420
+ "loss": 0.1064,
421
+ "step": 450
422
+ },
423
+ {
424
+ "epoch": 1.9148936170212765,
425
+ "eval_accuracy_at_0.5_thresh": 0.874468085106383,
426
+ "eval_f1_at_0.5_thresh": 0.8513853904282116,
427
+ "eval_loss": 0.11494793742895126,
428
+ "eval_precision_at_0.5_thresh": 0.8578680203045685,
429
+ "eval_recall_at_0.5_thresh": 0.845,
430
+ "eval_runtime": 1.1713,
431
+ "eval_samples_per_second": 401.267,
432
+ "eval_steps_per_second": 12.806,
433
+ "step": 450
434
+ }
435
+ ],
436
+ "logging_steps": 10,
437
+ "max_steps": 1175,
438
+ "num_input_tokens_seen": 0,
439
+ "num_train_epochs": 5,
440
+ "save_steps": 50,
441
+ "stateful_callbacks": {
442
+ "TrainerControl": {
443
+ "args": {
444
+ "should_epoch_stop": false,
445
+ "should_evaluate": false,
446
+ "should_log": false,
447
+ "should_save": true,
448
+ "should_training_stop": false
449
+ },
450
+ "attributes": {}
451
+ }
452
+ },
453
+ "total_flos": 0.0,
454
+ "train_batch_size": 16,
455
+ "trial_name": null,
456
+ "trial_params": null
457
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ed40793a99548d59325ef3b9fb84abc4853c5ba98258ada966204fd517a80d4
3
+ size 5713