amentaphd commited on
Commit
44e0275
·
verified ·
1 Parent(s): a274420

Upload folder using huggingface_hub

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,1215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - generated_from_trainer
7
+ - dataset_size:46338
8
+ - loss:MatryoshkaLoss
9
+ - loss:MultipleNegativesRankingLoss
10
+ base_model: Alibaba-NLP/gte-modernbert-base
11
+ widget:
12
+ - source_sentence: What are the specific points and subparagraphs mentioned in the
13
+ context of Article 4(3) that relate to the introductory wording and how do they
14
+ connect to the provisions outlined in Article 3(1)?
15
+ sentences:
16
+ - 51 - Article 2, points 52, 53,54, 55 and 56 - Article 3 - Article 4(1) Article
17
+ 3(1), first subparagraph Article 4(2), first subparagraph Article 4(2), second
18
+ subparagraph Article 3(1), second subparagraph, introductory wording Article 4(3),
19
+ first subparagraph, introductory wording Article 3(1), second subparagraph, points
20
+ (a) and (b) Article 4(3), first subparagraph, points (a) and (b) Article 3(1),
21
+ second subparagraph, point (c) - Article 3(1), second subparagraph, point (d)
22
+ Article 4(3), first subparagraph, point (c) Article 3(1), third subparagraph,
23
+ introductory wording - - Article 4(3), first subparagraph, point (d), introductory
24
+ wording - Article 4(3), first subparagraph, points (d)(i), (ii) and (iii) Article
25
+ 3(1), third subparagraph, point (a) Article 4(3), first subparagraph, point (d)(iv)
26
+ - Article 4(3), first subparagraph, point (e), introductory wording Article 3(1),
27
+ third subparagraph, point (b) Article 4(3), first subparagraph, point (e)(i) Article
28
+ 3(1), third subparagraph, point (c) Article 4(3), first subparagraph, point (e)(ii)
29
+ Article 3(1), third subparagraph, point (d) Article 4(3), first subparagraph,
30
+ point (e)(iii) Article 3(1), third subparagraph, point (e) - - Article 4(3), first
31
+ subparagraph, point (e)(iv) Article 3(2) and (3) - Article 3(4) Article 35(6)
32
+ Article 3(5) and (6) - - Article 4(4) - Article 4(5) Article 4(6) Article 4(7)
33
+ - Article 5 Article 5(1), first subparagraph Article 6(1), first subparagraph
34
+ Article 5(1), second subparagraph Article 6(1), fifth subparagraph - Article 6(1),
35
+ second and third subparagraph Article 5(1), third subparagraph Article 6(1), fourth
36
+ subparagraph Article 5(1), fourth and fifth subparagraph - Article 5(2) - Article
37
+ 6(2) Article 6(2), second subparagraph Article 5(3) Article 6(3) Article 5(4)
38
+ Article 6(4) Article 5(5) Article 6(5) Article 5(5), first subparagraph, point
39
+ (b) Article 6(5), second subparagraph, point (c) - Article 6(5), second subparagraph,
40
+ point (b) Article 5(6) Article 6(6) - Article 6(6), second subparagraph, point
41
+ (a) Article 5(6), second subparagraph Article 6(6), second subparagraph, point
42
+ (b) Article 5(6), third subparagraph Article 6(6), third subparagraph Article
43
+ 5(7) - Article 6(1), first subparagraph Article 7(1), first
44
+ - 'ii.
45
+
46
+
47
+ measures to protect against retaliation its own workers who are whistleblowers
48
+ in accordance with the applicable law transposing Directive (EU) 2019/1937 of
49
+ the European Parliament and of the Council ( 121 );
50
+
51
+
52
+ (d)
53
+
54
+
55
+ where the undertaking has no policies on the protection of whistle-blowers ( 122
56
+ ), it shall state this and whether it has plans to implement them and the timetable
57
+ for implementation;
58
+
59
+
60
+ (e)
61
+
62
+
63
+ beyond the procedures to follow-up on reports by whistleblowers in accordance
64
+ with the applicable law transposing Directive (EU) 2019/1937, whether the undertaking
65
+ has procedures to investigate business conduct incidents , including incidents
66
+ of corruption and bribery , promptly, independently and objectively;
67
+
68
+
69
+ (f)
70
+
71
+
72
+ where applicable, whether the undertaking has in place policies with respect to
73
+ animal welfare;
74
+
75
+
76
+ (g)
77
+
78
+
79
+ the undertaking’s policy for training within the organisation on business conduct,
80
+ including target audience, frequency and depth of coverage; and
81
+
82
+
83
+ (h)
84
+
85
+
86
+ the functions within the undertaking that are most at risk in respect of corruption
87
+ and bribery .
88
+
89
+
90
+ Undertakings that are subject to legal requirements under national law transposing
91
+ Directive (EU) 2019/1937, or to equivalent legal requirements with regard to the
92
+ protection of whistle-blowers, may comply with the disclosure specified in paragraph
93
+ 10 (d) by stating that they are subject to those legal requirements.
94
+
95
+
96
+ Disclosure Requirement G1-2 – Management of relationships with suppliers
97
+
98
+
99
+ The undertaking shall provide information about the management of its relationships
100
+ with its suppliers and its impacts on its supply chain.
101
+
102
+
103
+ The objective of this Disclosure Requirement is to provide an understanding of
104
+ the undertaking’s management of its procurement process including fair behaviour
105
+ with suppliers .
106
+
107
+
108
+ The undertaking shall provide a description of its policy to prevent late payments,
109
+ specifically to SMEs.
110
+
111
+
112
+ The disclosure required under paragraph 12 shall include the following information:
113
+
114
+
115
+ (a)
116
+
117
+
118
+ the undertaking’s approach to its relationships with its suppliers , taking account
119
+ of risks to the undertaking related to its supply chain and of impacts on sustainability
120
+ matters ; and
121
+
122
+
123
+ (b)
124
+
125
+
126
+ whether and how it takes into account social and environmental criteria for the
127
+ selection of its suppliers.
128
+
129
+
130
+ Disclosure Requirement G1-3 – Prevention and detection of corruption and bribery
131
+
132
+
133
+ The undertaking shall provide information about its system to prevent and detect,
134
+ investigate, and respond to allegations or incidents relating to corruption and
135
+ bribery including the related training.
136
+
137
+
138
+ The objective of this Disclosure Requirement is to provide transparency on the
139
+ key procedures of the undertaking to prevent, detect, and address allegations
140
+ about corruption and bribery . This includes the training provided to own workers
141
+ and/or information provided internally or to suppliers .
142
+
143
+
144
+ The disclosure required under paragraph 16 shall include the following information:
145
+
146
+
147
+ (a)
148
+
149
+
150
+ a description of the procedures in place to prevent, detect, and address allegations
151
+ or incidents of corruption and bribery ;
152
+
153
+
154
+ (b)
155
+
156
+
157
+ whether the investigators or investigating committee are separate from the chain
158
+ of management involved in the matter; and
159
+
160
+
161
+ (c)
162
+
163
+
164
+ the process, if any, to report outcomes to the administrative, management and
165
+ supervisory bodies .
166
+
167
+
168
+ Where the undertaking has no such procedures in place, it shall disclose this
169
+ fact and, where applicable, its plans to adopt them.
170
+
171
+
172
+ The disclosures required by paragraph 16 shall include information about how the
173
+ undertaking communicates its policies to those for whom they are relevant to ensure
174
+ that the policy is accessible and that they understand its implications.
175
+
176
+
177
+ The disclosure required by paragraph 16 shall include information about the following
178
+ with respect to training:
179
+
180
+
181
+ (a)
182
+
183
+
184
+ the nature, scope and depth of anti- corruption and anti- bribery training programmes
185
+ offered or required by the undertaking;
186
+
187
+
188
+ (b)
189
+
190
+
191
+ the percentage of functions-at-risk covered by training programmes; and
192
+
193
+
194
+ (c)
195
+
196
+
197
+ the extent to which training is given to members of the administrative, management
198
+ and supervisory bodies.
199
+
200
+
201
+ Metrics and targets
202
+
203
+
204
+ Disclosure Requirement G1-4 – Incidents of corruption or bribery
205
+
206
+
207
+ The undertaking shall provide information on incidents of corruption or bribery
208
+ during the reporting period.'
209
+ - '(39)
210
+
211
+
212
+ ‘algorithmic trading’ means trading in financial instruments where a computer
213
+ algorithm automatically determines individual parameters of orders such as whether
214
+ to initiate the order, the timing, price or quantity of the order or how to manage
215
+ the order after its submission, with limited or no human intervention, and does
216
+ not include any system that is only used for the purpose of routing orders to
217
+ one or more trading venues or for the processing of orders involving no determination
218
+ of any trading parameters or for the confirmation of orders or the post-trade
219
+ processing of executed transactions;
220
+
221
+
222
+ (40)
223
+
224
+
225
+ ‘high-frequency algorithmic trading technique’ means an algorithmic trading technique
226
+ characterised by:
227
+
228
+
229
+ (a)'
230
+ - source_sentence: What action does the Commission take if the scheme owner fails
231
+ to address the deficiencies and the scheme no longer meets the criteria in Annex
232
+ IV?
233
+ sentences:
234
+ - '2.
235
+
236
+
237
+ Implementing partners shall fill out the Scoreboard for their proposals for financing
238
+ and investment operations.
239
+
240
+
241
+ 3.
242
+
243
+
244
+ The Scoreboard shall cover the following elements:
245
+
246
+
247
+ (a)
248
+
249
+
250
+ a description of the proposed financing or investment operation;
251
+
252
+
253
+ (b)
254
+
255
+
256
+ how the proposed financing or investment operation contributes to EU policy objectives;
257
+
258
+
259
+ (c)
260
+
261
+
262
+ a description of additionality;
263
+
264
+
265
+ (d)
266
+
267
+
268
+ a description of the market failure or suboptimal investment situation;
269
+
270
+
271
+ (e)
272
+
273
+
274
+ the financial and technical contribution by the implementing partner;
275
+
276
+
277
+ (f)
278
+
279
+
280
+ the impact of the investment;
281
+
282
+
283
+ (g)
284
+
285
+
286
+ the financial profile of the financing or investment operation;
287
+
288
+
289
+ (h)
290
+
291
+
292
+ complementary indicators.
293
+
294
+
295
+ 4.
296
+
297
+
298
+ The Commission is empowered to adopt delegated acts in accordance with Article
299
+ 34 in order to supplement this Regulation by establishing additional elements
300
+ of the Scoreboard, including detailed rules for the Scoreboard to be used by the
301
+ implementing partners.
302
+
303
+
304
+ Article 23
305
+
306
+
307
+ Policy check
308
+
309
+
310
+ 1.
311
+
312
+
313
+ The Commission shall conduct a check to confirm that the financing and investment
314
+ operations proposed by the implementing partners other than the EIB comply with
315
+ Union law and policies.
316
+
317
+
318
+ 2.
319
+
320
+
321
+ EIB financing and investment operations that fall within the scope of this Regulation
322
+ shall not be covered by the EU guarantee where the Commission delivers an unfavourable
323
+ opinion within the framework of the procedure provided for in Article 19 of the
324
+ EIB Statute.
325
+
326
+
327
+ ▼M1
328
+
329
+
330
+ 3.
331
+
332
+
333
+ In the context of the procedures referred to in paragraphs 1 and 2 of this Article,
334
+ the Commission shall take into account any Sovereignty Seal awarded in accordance
335
+ with Article 4 of Regulation (EU) 2024/795 to a project.
336
+
337
+
338
+ ▼B
339
+
340
+
341
+ Article 24
342
+
343
+
344
+ Investment Committee
345
+
346
+
347
+ 1.
348
+
349
+
350
+ A fully independent investment committee shall be established for the InvestEU
351
+ Fund (the ‘Investment Committee’). The Investment Committee shall:
352
+
353
+
354
+ (a)
355
+
356
+
357
+ examine the proposals for financing and investment operations submitted by implementing
358
+ partners for coverage under the EU guarantee that have passed the policy check
359
+ referred to in Article 23(1) of this Regulation or that have received a favourable
360
+ opinion within the framework of the procedure provided for in Article 19 of the
361
+ EIB Statute;
362
+
363
+
364
+ (b)'
365
+ - (6) | The maritime transport sector is subject to strong international competition.
366
+ Major differences in regulatory burdens across flag states have often led to unwanted
367
+ practices such as the reflagging of ships. The sector’s intrinsic global character
368
+ underlines the importance of a flag-neutral approach and of a favourable regulatory
369
+ environment, which would help to attract new investment and safeguard the competitiveness
370
+ of Union ports, shipowners and ship operators.
371
+ - '8.
372
+
373
+
374
+ Where the scheme owner fails or refuses to take the necessary remedial action
375
+ and where the Commission has determined that the deficiencies referred to in paragraph
376
+ 6 of this Article mean that the scheme no longer fulfils the criteria laid down
377
+ in Annex IV, or of the recognised subset of those criteria, the Commission shall
378
+ withdraw the recognition of the scheme by means of implementing acts. Those implementing
379
+ acts shall be adopted in accordance with the examination procedure referred to
380
+ in Article 39(3).
381
+
382
+
383
+ 9.'
384
+ - source_sentence: What roles do upstream and downstream business partners play in
385
+ the overall production and distribution process as described?
386
+ sentences:
387
+ - (25) The chain of activities should cover activities of a company’s upstream business
388
+ partners related to the production of goods or the provision of services by the
389
+ company, including the design, extraction, sourcing, manufacture, transport, storage
390
+ and supply of raw materials, products or parts of the products and development
391
+ of the product or the service, and activities of a company’s downstream business
392
+ partners related to the distribution, transport and storage of the product, where
393
+ the business partners carry out those activities for the company or on behalf
394
+ of the company. This Directive should not cover the disposal of the product. In
395
+ addition, under this Directive the chain of activities should not encompass the
396
+ distribution,
397
+ - '7.
398
+
399
+
400
+ Any actor in the supply chain who is required to prepare a chemical safety report
401
+ according to Articles 14 or 37 shall place the relevant exposure scenarios (including
402
+ use and exposure categories where appropriate) in an annex to the safety data
403
+ sheet covering identified uses and including specific conditions resulting from
404
+ the application of Section 3 of Annex XI.
405
+
406
+
407
+ Any downstream user shall include relevant exposure scenarios, and use other relevant
408
+ information, from the safety data sheet supplied to him when compiling his own
409
+ safety data sheet for identified uses.'
410
+ - '8.
411
+
412
+
413
+ Authorisations shall be subject to a time-limited review without prejudice to
414
+ any decision on a future review period and shall normally be subject to conditions,
415
+ including monitoring. The duration of the time-limited review for any authorisation
416
+ shall be determined on a case-by-case basis taking into account all relevant information
417
+ including the elements listed in paragraph 4(a) to (d), as appropriate.
418
+
419
+
420
+ 9.
421
+
422
+
423
+ The authorisation shall specify:
424
+
425
+
426
+ (a)
427
+
428
+
429
+ the person(s) to whom the authorisation is granted;
430
+
431
+
432
+ (b)
433
+
434
+
435
+ the identity of the substance(s);
436
+
437
+
438
+ (c)
439
+
440
+
441
+ the use(s) for which the authorisation is granted;
442
+
443
+
444
+ (d)
445
+
446
+
447
+ any conditions under which the authorisation is granted;
448
+
449
+
450
+ (e)
451
+
452
+
453
+ the time-limited review period;
454
+
455
+
456
+ (f)
457
+
458
+
459
+ any monitoring arrangement.
460
+
461
+
462
+ 10.'
463
+ - source_sentence: What conditions must be met for the stability study in organic
464
+ solvents to be deemed unnecessary for a substance?
465
+ sentences:
466
+ - 'AR 23. When disclosing information required under paragraph 29 for the purpose
467
+ of setting targets the undertaking shall consider the need for an informed and
468
+ willing consent of local and indigenous peoples , the need for appropriate consultations
469
+ and the need to respect the decisions of these communities.
470
+
471
+
472
+ AR 24. The targets related to material impacts may be presented in a table as
473
+ illustrated below:
474
+
475
+
476
+ Type of target according to mitigation hierarchy Baseline value and base year
477
+ Target value and geographical scope Connected policy or legislation if relevant
478
+ 2025 2030 Up to 2050 Avoidance Minimisation Rehabilitation and restoration Compensation
479
+ or offsets'
480
+ - '1.
481
+
482
+
483
+ Member States shall, in accordance with paragraph 2, draw up a register of producers,
484
+ including producers supplying EEE by means of distance communication. That register
485
+ shall serve to monitor compliance with the requirements of this Directive.
486
+
487
+
488
+ Producers supplying EEE by means of distance communication as defined in Article
489
+ 3(1)(f)(iv) shall be registered in the Member State that they sell to. Where such
490
+ producers are not registered in the Member State that they are selling to, they
491
+ shall be registered through their authorised representatives as referred to in
492
+ Article 17(2).
493
+
494
+
495
+ 2.
496
+
497
+
498
+ Member States shall ensure that:
499
+
500
+
501
+ (a)
502
+
503
+
504
+ each producer, or each authorised representative where appointed under Article
505
+ 17, is registered as required and has the possibility of entering online in their
506
+ national register all relevant information reflecting that producer’s activities
507
+ in that Member State;
508
+
509
+
510
+ (b)
511
+
512
+
513
+ upon registering, each producer, or each authorised representative where appointed
514
+ under Article 17, provides the information set out in Annex X, Part A, undertaking
515
+ to update it as appropriate;
516
+
517
+
518
+ (c)
519
+
520
+
521
+ each producer, or each authorised representative where appointed under Article
522
+ 17, provides the information set out in Annex X, Part B;
523
+
524
+
525
+ (d)
526
+
527
+
528
+ national registers provide links to other national registers on their website
529
+ to facilitate, in all Member States, registration of producers or, where appointed
530
+ under Article 17, authorised representatives.
531
+
532
+
533
+ 3.
534
+
535
+
536
+ In order to ensure uniform conditions for the implementation of this Article,
537
+ the Commission shall adopt implementing acts establishing the format for registration
538
+ and reporting and the frequency of reporting to the register. Those implementing
539
+ acts shall be adopted in accordance with the examination procedure referred to
540
+ in Article 21(2).
541
+
542
+
543
+ 4.
544
+
545
+
546
+ Member States shall collect information, including substantiated estimates, on
547
+ an annual basis, on the quantities and categories of EEE placed on their markets,
548
+ collected through all routes, prepared for re-use, recycled and recovered within
549
+ the Member State, and on separately collected WEEE exported, by weight.
550
+
551
+
552
+ ▼M1 —————
553
+
554
+
555
+ ▼M1
556
+
557
+
558
+ 6.'
559
+ - 'COLUMN 1 STANDARD INFORMATION REQUIRED COLUMN 2 SPECIFIC RULES FOR ADAPTATION
560
+ FROM COLUMN 1 7.15. Stability in organic solvents and identity of relevant degradation
561
+ products Only required if stability of the substance is considered to be critical.
562
+ 7.15. The study does not need to be conducted if the substance is inorganic. 7.16.
563
+ Dissociation constant 7.16. The study does not need to be conducted if: — the
564
+ substance is hydrolytically unstable (half-life less than 12 hours) or is readily
565
+ oxidisable in water, or ►M70 ◄ ►M64 — or based on the structure, the substance
566
+ does not have any chemical group that can dissociate. ◄ 7.17. Viscosity ►M64 For
567
+ hydrocarbon substances the kinematic viscosity shall be determined at 40 °C. ◄'
568
+ - source_sentence: How is 'associated undertaking' defined, and what criteria determine
569
+ the significant influence of one undertaking over another in terms of voting rights?
570
+ sentences:
571
+ - '▼B
572
+
573
+
574
+ (6)
575
+
576
+
577
+ ‘purchase price’ means the price payable and any incidental expenses minus any
578
+ incidental reductions in the cost of acquisition;
579
+
580
+
581
+ (7)
582
+
583
+
584
+ ‘production cost’ means the purchase price of raw materials, consumables and other
585
+ costs directly attributable to the item in question. Member States shall permit
586
+ or require the inclusion of a reasonable proportion of fixed or variable overhead
587
+ costs indirectly attributable to the item in question, to the extent that they
588
+ relate to the period of production. Distribution costs shall not be included;
589
+
590
+
591
+ (8)
592
+
593
+
594
+ ‘value adjustment’ means the adjustments intended to take account of changes in
595
+ the values of individual assets established at the balance sheet date, whether
596
+ the change is final or not;
597
+
598
+
599
+ (9)
600
+
601
+
602
+ ‘parent undertaking’ means an undertaking which controls one or more subsidiary
603
+ undertakings;
604
+
605
+
606
+ (10)
607
+
608
+
609
+ ‘subsidiary undertaking’ means an undertaking controlled by a parent undertaking,
610
+ including any subsidiary undertaking of an ultimate parent undertaking;
611
+
612
+
613
+ (11)
614
+
615
+
616
+ ‘group’ means a parent undertaking and all its subsidiary undertakings;
617
+
618
+
619
+ (12)
620
+
621
+
622
+ ‘affiliated undertakings’ means any two or more undertakings within a group;
623
+
624
+
625
+ (13)
626
+
627
+
628
+ ‘associated undertaking’ means an undertaking in which another undertaking has
629
+ a participating interest, and over whose operating and financial policies that
630
+ other undertaking exercises significant influence. An undertaking is presumed
631
+ to exercise a significant influence over another undertaking where it has 20 %
632
+ or more of the shareholders'' or members'' voting rights in that other undertaking;
633
+
634
+
635
+ (14)
636
+
637
+
638
+ ‘investment undertakings’ means:
639
+
640
+
641
+ (a)
642
+
643
+
644
+ undertakings the sole object of which is to invest their funds in various securities,
645
+ real property and other assets, with the sole aim of spreading investment risks
646
+ and giving their shareholders the benefit of the results of the management of
647
+ their assets,
648
+
649
+
650
+ (b)
651
+
652
+
653
+ undertakings associated with investment undertakings with fixed capital, if the
654
+ sole object of those associated undertakings is to acquire fully paid shares issued
655
+ by those investment undertakings without prejudice to point (h) of Article 22(1)
656
+ of Directive 2012/30/EU;
657
+
658
+
659
+ (15)'
660
+ - and non-European non-financial corporations not subject to the disclosure obligations
661
+ laid down in Directive 2013/34/EU. That information may be disclosed only once,
662
+ based on counterparties’ turnover alignment for the general-purpose lending loans,
663
+ as in the case of the GAR. The first disclosure reference date of this template
664
+ is as of 31 December 2024. Institutions are not required to disclose this information
665
+ before 1 January 2025. ---|---|---
666
+ - 'ANNEX II
667
+
668
+
669
+ Due diligence statement
670
+
671
+
672
+ Information to be contained in the due diligence statement in accordance with
673
+ Article 4(2):
674
+
675
+
676
+ 1.
677
+
678
+
679
+ Operator’s name, address and, in the event of relevant commodities and relevant
680
+ products entering or leaving the market, the Economic Operators Registration and
681
+ Identification (EORI) number in accordance with Article 9 of Regulation (EU) No
682
+ 952/2013.
683
+
684
+
685
+ 2.'
686
+ pipeline_tag: sentence-similarity
687
+ library_name: sentence-transformers
688
+ metrics:
689
+ - cosine_accuracy@1
690
+ - cosine_accuracy@3
691
+ - cosine_accuracy@5
692
+ - cosine_accuracy@10
693
+ - cosine_precision@1
694
+ - cosine_precision@3
695
+ - cosine_precision@5
696
+ - cosine_precision@10
697
+ - cosine_recall@1
698
+ - cosine_recall@3
699
+ - cosine_recall@5
700
+ - cosine_recall@10
701
+ - cosine_ndcg@10
702
+ - cosine_mrr@10
703
+ - cosine_map@100
704
+ model-index:
705
+ - name: SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
706
+ results:
707
+ - task:
708
+ type: information-retrieval
709
+ name: Information Retrieval
710
+ dataset:
711
+ name: Unknown
712
+ type: unknown
713
+ metrics:
714
+ - type: cosine_accuracy@1
715
+ value: 0.6910063870188158
716
+ name: Cosine Accuracy@1
717
+ - type: cosine_accuracy@3
718
+ value: 0.9109269808389435
719
+ name: Cosine Accuracy@3
720
+ - type: cosine_accuracy@5
721
+ value: 0.9461418953909891
722
+ name: Cosine Accuracy@5
723
+ - type: cosine_accuracy@10
724
+ value: 0.9742793026065941
725
+ name: Cosine Accuracy@10
726
+ - type: cosine_precision@1
727
+ value: 0.6910063870188158
728
+ name: Cosine Precision@1
729
+ - type: cosine_precision@3
730
+ value: 0.30364232694631454
731
+ name: Cosine Precision@3
732
+ - type: cosine_precision@5
733
+ value: 0.18922837907819778
734
+ name: Cosine Precision@5
735
+ - type: cosine_precision@10
736
+ value: 0.09742793026065939
737
+ name: Cosine Precision@10
738
+ - type: cosine_recall@1
739
+ value: 0.6910063870188158
740
+ name: Cosine Recall@1
741
+ - type: cosine_recall@3
742
+ value: 0.9109269808389435
743
+ name: Cosine Recall@3
744
+ - type: cosine_recall@5
745
+ value: 0.9461418953909891
746
+ name: Cosine Recall@5
747
+ - type: cosine_recall@10
748
+ value: 0.9742793026065941
749
+ name: Cosine Recall@10
750
+ - type: cosine_ndcg@10
751
+ value: 0.8471731447814336
752
+ name: Cosine Ndcg@10
753
+ - type: cosine_mrr@10
754
+ value: 0.804833419644399
755
+ name: Cosine Mrr@10
756
+ - type: cosine_map@100
757
+ value: 0.8061197699360279
758
+ name: Cosine Map@100
759
+ ---
760
+
761
+ # SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
762
+
763
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
764
+
765
+ ## Model Details
766
+
767
+ ### Model Description
768
+ - **Model Type:** Sentence Transformer
769
+ - **Base model:** [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) <!-- at revision bc02f0a92d1b6dd82108036f6cb4b7b423fb7434 -->
770
+ - **Maximum Sequence Length:** 8192 tokens
771
+ - **Output Dimensionality:** 768 dimensions
772
+ - **Similarity Function:** Cosine Similarity
773
+ <!-- - **Training Dataset:** Unknown -->
774
+ <!-- - **Language:** Unknown -->
775
+ <!-- - **License:** Unknown -->
776
+
777
+ ### Model Sources
778
+
779
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
780
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
781
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
782
+
783
+ ### Full Model Architecture
784
+
785
+ ```
786
+ SentenceTransformer(
787
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: ModernBertModel
788
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
789
+ )
790
+ ```
791
+
792
+ ## Usage
793
+
794
+ ### Direct Usage (Sentence Transformers)
795
+
796
+ First install the Sentence Transformers library:
797
+
798
+ ```bash
799
+ pip install -U sentence-transformers
800
+ ```
801
+
802
+ Then you can load this model and run inference.
803
+ ```python
804
+ from sentence_transformers import SentenceTransformer
805
+
806
+ # Download from the 🤗 Hub
807
+ model = SentenceTransformer("sentence_transformers_model_id")
808
+ # Run inference
809
+ sentences = [
810
+ "How is 'associated undertaking' defined, and what criteria determine the significant influence of one undertaking over another in terms of voting rights?",
811
+ "▼B\n\n(6)\n\n‘purchase price’ means the price payable and any incidental expenses minus any incidental reductions in the cost of acquisition;\n\n(7)\n\n‘production cost’ means the purchase price of raw materials, consumables and other costs directly attributable to the item in question. Member States shall permit or require the inclusion of a reasonable proportion of fixed or variable overhead costs indirectly attributable to the item in question, to the extent that they relate to the period of production. Distribution costs shall not be included;\n\n(8)\n\n‘value adjustment’ means the adjustments intended to take account of changes in the values of individual assets established at the balance sheet date, whether the change is final or not;\n\n(9)\n\n‘parent undertaking’ means an undertaking which controls one or more subsidiary undertakings;\n\n(10)\n\n‘subsidiary undertaking’ means an undertaking controlled by a parent undertaking, including any subsidiary undertaking of an ultimate parent undertaking;\n\n(11)\n\n‘group’ means a parent undertaking and all its subsidiary undertakings;\n\n(12)\n\n‘affiliated undertakings’ means any two or more undertakings within a group;\n\n(13)\n\n‘associated undertaking’ means an undertaking in which another undertaking has a participating interest, and over whose operating and financial policies that other undertaking exercises significant influence. An undertaking is presumed to exercise a significant influence over another undertaking where it has 20 % or more of the shareholders' or members' voting rights in that other undertaking;\n\n(14)\n\n‘investment undertakings’ means:\n\n(a)\n\nundertakings the sole object of which is to invest their funds in various securities, real property and other assets, with the sole aim of spreading investment risks and giving their shareholders the benefit of the results of the management of their assets,\n\n(b)\n\nundertakings associated with investment undertakings with fixed capital, if the sole object of those associated undertakings is to acquire fully paid shares issued by those investment undertakings without prejudice to point (h) of Article 22(1) of Directive 2012/30/EU;\n\n(15)",
812
+ 'and non-European non-financial corporations not subject to the disclosure obligations laid down in Directive 2013/34/EU. That information may be disclosed only once, based on counterparties’ turnover alignment for the general-purpose lending loans, as in the case of the GAR. The first disclosure reference date of this template is as of 31 December 2024. Institutions are not required to disclose this information before 1 January 2025. ---|---|---',
813
+ ]
814
+ embeddings = model.encode(sentences)
815
+ print(embeddings.shape)
816
+ # [3, 768]
817
+
818
+ # Get the similarity scores for the embeddings
819
+ similarities = model.similarity(embeddings, embeddings)
820
+ print(similarities.shape)
821
+ # [3, 3]
822
+ ```
823
+
824
+ <!--
825
+ ### Direct Usage (Transformers)
826
+
827
+ <details><summary>Click to see the direct usage in Transformers</summary>
828
+
829
+ </details>
830
+ -->
831
+
832
+ <!--
833
+ ### Downstream Usage (Sentence Transformers)
834
+
835
+ You can finetune this model on your own dataset.
836
+
837
+ <details><summary>Click to expand</summary>
838
+
839
+ </details>
840
+ -->
841
+
842
+ <!--
843
+ ### Out-of-Scope Use
844
+
845
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
846
+ -->
847
+
848
+ ## Evaluation
849
+
850
+ ### Metrics
851
+
852
+ #### Information Retrieval
853
+
854
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
855
+
856
+ | Metric | Value |
857
+ |:--------------------|:-----------|
858
+ | cosine_accuracy@1 | 0.691 |
859
+ | cosine_accuracy@3 | 0.9109 |
860
+ | cosine_accuracy@5 | 0.9461 |
861
+ | cosine_accuracy@10 | 0.9743 |
862
+ | cosine_precision@1 | 0.691 |
863
+ | cosine_precision@3 | 0.3036 |
864
+ | cosine_precision@5 | 0.1892 |
865
+ | cosine_precision@10 | 0.0974 |
866
+ | cosine_recall@1 | 0.691 |
867
+ | cosine_recall@3 | 0.9109 |
868
+ | cosine_recall@5 | 0.9461 |
869
+ | cosine_recall@10 | 0.9743 |
870
+ | **cosine_ndcg@10** | **0.8472** |
871
+ | cosine_mrr@10 | 0.8048 |
872
+ | cosine_map@100 | 0.8061 |
873
+
874
+ <!--
875
+ ## Bias, Risks and Limitations
876
+
877
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
878
+ -->
879
+
880
+ <!--
881
+ ### Recommendations
882
+
883
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
884
+ -->
885
+
886
+ ## Training Details
887
+
888
+ ### Training Dataset
889
+
890
+ #### Unnamed Dataset
891
+
892
+ * Size: 46,338 training samples
893
+ * Columns: <code>sentence_0</code> and <code>sentence_1</code>
894
+ * Approximate statistics based on the first 1000 samples:
895
+ | | sentence_0 | sentence_1 |
896
+ |:--------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
897
+ | type | string | string |
898
+ | details | <ul><li>min: 13 tokens</li><li>mean: 34.18 tokens</li><li>max: 251 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 231.33 tokens</li><li>max: 2146 tokens</li></ul> |
899
+ * Samples:
900
+ | sentence_0 | sentence_1 |
901
+ |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
902
+ | <code>How is 'energy efficiency' defined in the context of Directive (EU) 2018/2001?</code> | <code>of Directive (EU) 2018/2001; --- --- (8) ‘energy efficiency’ means the ratio of output of performance, service, goods or energy to input of energy; --- --- (9) ‘energy savings’ means an amount of saved energy determined by measuring or estimating consumption, or both,, before and after the implementation of an energy efficiency improvement measure, whilst ensuring normalisation for external conditions that affect energy consumption; --- --- (10) ‘energy efficiency improvement’ means an increase in energy efficiency as a result of any technological, behavioural or economic changes; --- --- (11) ‘energy service’ means the physical benefit, utility or good derived from a combination of energy with energy-efficient technology or with action,</code> |
903
+ | <code>What are the sources of information that the external experts will use to create the list of conflict-affected and high-risk areas?</code> | <code>2.<br><br>The Commission shall call upon external expertise that will provide an indicative, non-exhaustive, regularly updated list of conflict-affected and high-risk areas. That list shall be based on the external experts' analysis of the handbook referred to in paragraph 1 and existing information from, inter alia, academics and supply chain due diligence schemes. Union importers sourcing from areas which are not mentioned on that list shall also maintain their responsibility to comply with the due diligence obligations under this Regulation.<br><br>Article 15<br><br>Committee procedure<br><br>1.<br><br>The Commission shall be assisted by a committee. That committee shall be a committee within the meaning of Regulation (EU) No 182/2011.<br><br>2.</code> |
904
+ | <code>What is the maximum time frame for completing the undertaking according to the technical specifications set out in Annexes II and III after the Directive enters into force?</code> | <code>is undertaken according to the technical specifications set out in Annexes II and III and that it is completed at the latest four years after the date of entry into force of this Directive.<br><br>2. The analyses and reviews mentioned under paragraph 1 shall be reviewed, and if necessary updated at the latest 13 years after the date of entry into force of this Directive and every six years thereafter.<br><br>Article 6<br><br>Register of protected areas</code> |
905
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
906
+ ```json
907
+ {
908
+ "loss": "MultipleNegativesRankingLoss",
909
+ "matryoshka_dims": [
910
+ 768,
911
+ 512,
912
+ 256,
913
+ 128,
914
+ 64
915
+ ],
916
+ "matryoshka_weights": [
917
+ 1,
918
+ 1,
919
+ 1,
920
+ 1,
921
+ 1
922
+ ],
923
+ "n_dims_per_step": -1
924
+ }
925
+ ```
926
+
927
+ ### Training Hyperparameters
928
+ #### Non-Default Hyperparameters
929
+
930
+ - `eval_strategy`: steps
931
+ - `per_device_train_batch_size`: 4
932
+ - `per_device_eval_batch_size`: 4
933
+ - `num_train_epochs`: 4
934
+ - `multi_dataset_batch_sampler`: round_robin
935
+
936
+ #### All Hyperparameters
937
+ <details><summary>Click to expand</summary>
938
+
939
+ - `overwrite_output_dir`: False
940
+ - `do_predict`: False
941
+ - `eval_strategy`: steps
942
+ - `prediction_loss_only`: True
943
+ - `per_device_train_batch_size`: 4
944
+ - `per_device_eval_batch_size`: 4
945
+ - `per_gpu_train_batch_size`: None
946
+ - `per_gpu_eval_batch_size`: None
947
+ - `gradient_accumulation_steps`: 1
948
+ - `eval_accumulation_steps`: None
949
+ - `torch_empty_cache_steps`: None
950
+ - `learning_rate`: 5e-05
951
+ - `weight_decay`: 0.0
952
+ - `adam_beta1`: 0.9
953
+ - `adam_beta2`: 0.999
954
+ - `adam_epsilon`: 1e-08
955
+ - `max_grad_norm`: 1
956
+ - `num_train_epochs`: 4
957
+ - `max_steps`: -1
958
+ - `lr_scheduler_type`: linear
959
+ - `lr_scheduler_kwargs`: {}
960
+ - `warmup_ratio`: 0.0
961
+ - `warmup_steps`: 0
962
+ - `log_level`: passive
963
+ - `log_level_replica`: warning
964
+ - `log_on_each_node`: True
965
+ - `logging_nan_inf_filter`: True
966
+ - `save_safetensors`: True
967
+ - `save_on_each_node`: False
968
+ - `save_only_model`: False
969
+ - `restore_callback_states_from_checkpoint`: False
970
+ - `no_cuda`: False
971
+ - `use_cpu`: False
972
+ - `use_mps_device`: False
973
+ - `seed`: 42
974
+ - `data_seed`: None
975
+ - `jit_mode_eval`: False
976
+ - `use_ipex`: False
977
+ - `bf16`: False
978
+ - `fp16`: False
979
+ - `fp16_opt_level`: O1
980
+ - `half_precision_backend`: auto
981
+ - `bf16_full_eval`: False
982
+ - `fp16_full_eval`: False
983
+ - `tf32`: None
984
+ - `local_rank`: 0
985
+ - `ddp_backend`: None
986
+ - `tpu_num_cores`: None
987
+ - `tpu_metrics_debug`: False
988
+ - `debug`: []
989
+ - `dataloader_drop_last`: False
990
+ - `dataloader_num_workers`: 0
991
+ - `dataloader_prefetch_factor`: None
992
+ - `past_index`: -1
993
+ - `disable_tqdm`: False
994
+ - `remove_unused_columns`: True
995
+ - `label_names`: None
996
+ - `load_best_model_at_end`: False
997
+ - `ignore_data_skip`: False
998
+ - `fsdp`: []
999
+ - `fsdp_min_num_params`: 0
1000
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
1001
+ - `fsdp_transformer_layer_cls_to_wrap`: None
1002
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
1003
+ - `deepspeed`: None
1004
+ - `label_smoothing_factor`: 0.0
1005
+ - `optim`: adamw_torch
1006
+ - `optim_args`: None
1007
+ - `adafactor`: False
1008
+ - `group_by_length`: False
1009
+ - `length_column_name`: length
1010
+ - `ddp_find_unused_parameters`: None
1011
+ - `ddp_bucket_cap_mb`: None
1012
+ - `ddp_broadcast_buffers`: False
1013
+ - `dataloader_pin_memory`: True
1014
+ - `dataloader_persistent_workers`: False
1015
+ - `skip_memory_metrics`: True
1016
+ - `use_legacy_prediction_loop`: False
1017
+ - `push_to_hub`: False
1018
+ - `resume_from_checkpoint`: None
1019
+ - `hub_model_id`: None
1020
+ - `hub_strategy`: every_save
1021
+ - `hub_private_repo`: None
1022
+ - `hub_always_push`: False
1023
+ - `gradient_checkpointing`: False
1024
+ - `gradient_checkpointing_kwargs`: None
1025
+ - `include_inputs_for_metrics`: False
1026
+ - `include_for_metrics`: []
1027
+ - `eval_do_concat_batches`: True
1028
+ - `fp16_backend`: auto
1029
+ - `push_to_hub_model_id`: None
1030
+ - `push_to_hub_organization`: None
1031
+ - `mp_parameters`:
1032
+ - `auto_find_batch_size`: False
1033
+ - `full_determinism`: False
1034
+ - `torchdynamo`: None
1035
+ - `ray_scope`: last
1036
+ - `ddp_timeout`: 1800
1037
+ - `torch_compile`: False
1038
+ - `torch_compile_backend`: None
1039
+ - `torch_compile_mode`: None
1040
+ - `dispatch_batches`: None
1041
+ - `split_batches`: None
1042
+ - `include_tokens_per_second`: False
1043
+ - `include_num_input_tokens_seen`: False
1044
+ - `neftune_noise_alpha`: None
1045
+ - `optim_target_modules`: None
1046
+ - `batch_eval_metrics`: False
1047
+ - `eval_on_start`: False
1048
+ - `use_liger_kernel`: False
1049
+ - `eval_use_gather_object`: False
1050
+ - `average_tokens_across_devices`: False
1051
+ - `prompts`: None
1052
+ - `batch_sampler`: batch_sampler
1053
+ - `multi_dataset_batch_sampler`: round_robin
1054
+
1055
+ </details>
1056
+
1057
+ ### Training Logs
1058
+ | Epoch | Step | Training Loss | cosine_ndcg@10 |
1059
+ |:------:|:-----:|:-------------:|:--------------:|
1060
+ | 0.0432 | 500 | 0.358 | - |
1061
+ | 0.0863 | 1000 | 0.1048 | - |
1062
+ | 0.1295 | 1500 | 0.0827 | - |
1063
+ | 0.1726 | 2000 | 0.067 | 0.7969 |
1064
+ | 0.2158 | 2500 | 0.0491 | - |
1065
+ | 0.2590 | 3000 | 0.0831 | - |
1066
+ | 0.3021 | 3500 | 0.062 | - |
1067
+ | 0.3453 | 4000 | 0.0657 | 0.8050 |
1068
+ | 0.3884 | 4500 | 0.0522 | - |
1069
+ | 0.4316 | 5000 | 0.049 | - |
1070
+ | 0.4748 | 5500 | 0.0426 | - |
1071
+ | 0.5179 | 6000 | 0.0708 | 0.8215 |
1072
+ | 0.5611 | 6500 | 0.0236 | - |
1073
+ | 0.6042 | 7000 | 0.024 | - |
1074
+ | 0.6474 | 7500 | 0.0256 | - |
1075
+ | 0.6905 | 8000 | 0.041 | 0.8105 |
1076
+ | 0.7337 | 8500 | 0.0285 | - |
1077
+ | 0.7769 | 9000 | 0.0249 | - |
1078
+ | 0.8200 | 9500 | 0.0368 | - |
1079
+ | 0.8632 | 10000 | 0.0588 | 0.8118 |
1080
+ | 0.9063 | 10500 | 0.0386 | - |
1081
+ | 0.9495 | 11000 | 0.0456 | - |
1082
+ | 0.9927 | 11500 | 0.0399 | - |
1083
+ | 1.0 | 11585 | - | 0.8184 |
1084
+ | 1.0358 | 12000 | 0.0424 | 0.8239 |
1085
+ | 1.0790 | 12500 | 0.0107 | - |
1086
+ | 1.1221 | 13000 | 0.0279 | - |
1087
+ | 1.1653 | 13500 | 0.0236 | - |
1088
+ | 1.2085 | 14000 | 0.024 | 0.8193 |
1089
+ | 1.2516 | 14500 | 0.0143 | - |
1090
+ | 1.2948 | 15000 | 0.0118 | - |
1091
+ | 1.3379 | 15500 | 0.0078 | - |
1092
+ | 1.3811 | 16000 | 0.023 | 0.8217 |
1093
+ | 1.4243 | 16500 | 0.0239 | - |
1094
+ | 1.4674 | 17000 | 0.0335 | - |
1095
+ | 1.5106 | 17500 | 0.0119 | - |
1096
+ | 1.5537 | 18000 | 0.0411 | 0.8292 |
1097
+ | 1.5969 | 18500 | 0.0168 | - |
1098
+ | 1.6401 | 19000 | 0.0059 | - |
1099
+ | 1.6832 | 19500 | 0.0234 | - |
1100
+ | 1.7264 | 20000 | 0.0184 | 0.8366 |
1101
+ | 1.7695 | 20500 | 0.0128 | - |
1102
+ | 1.8127 | 21000 | 0.0166 | - |
1103
+ | 1.8558 | 21500 | 0.0181 | - |
1104
+ | 1.8990 | 22000 | 0.0148 | 0.8353 |
1105
+ | 1.9422 | 22500 | 0.0225 | - |
1106
+ | 1.9853 | 23000 | 0.0158 | - |
1107
+ | 2.0 | 23170 | - | 0.8360 |
1108
+ | 2.0285 | 23500 | 0.0123 | - |
1109
+ | 2.0716 | 24000 | 0.0173 | 0.8329 |
1110
+ | 2.1148 | 24500 | 0.0167 | - |
1111
+ | 2.1580 | 25000 | 0.0125 | - |
1112
+ | 2.2011 | 25500 | 0.013 | - |
1113
+ | 2.2443 | 26000 | 0.0079 | 0.8338 |
1114
+ | 2.2874 | 26500 | 0.007 | - |
1115
+ | 2.3306 | 27000 | 0.0171 | - |
1116
+ | 2.3738 | 27500 | 0.0058 | - |
1117
+ | 2.4169 | 28000 | 0.0048 | 0.8405 |
1118
+ | 2.4601 | 28500 | 0.005 | - |
1119
+ | 2.5032 | 29000 | 0.0141 | - |
1120
+ | 2.5464 | 29500 | 0.0132 | - |
1121
+ | 2.5896 | 30000 | 0.006 | 0.8461 |
1122
+ | 2.6327 | 30500 | 0.0095 | - |
1123
+ | 2.6759 | 31000 | 0.0061 | - |
1124
+ | 2.7190 | 31500 | 0.0107 | - |
1125
+ | 2.7622 | 32000 | 0.0157 | 0.8451 |
1126
+ | 2.8054 | 32500 | 0.005 | - |
1127
+ | 2.8485 | 33000 | 0.0087 | - |
1128
+ | 2.8917 | 33500 | 0.0064 | - |
1129
+ | 2.9348 | 34000 | 0.005 | 0.8449 |
1130
+ | 2.9780 | 34500 | 0.0115 | - |
1131
+ | 3.0 | 34755 | - | 0.8451 |
1132
+ | 3.0211 | 35000 | 0.0079 | - |
1133
+ | 3.0643 | 35500 | 0.0045 | - |
1134
+ | 3.1075 | 36000 | 0.0029 | 0.8443 |
1135
+ | 3.1506 | 36500 | 0.0161 | - |
1136
+ | 3.1938 | 37000 | 0.0144 | - |
1137
+ | 3.2369 | 37500 | 0.0076 | - |
1138
+ | 3.2801 | 38000 | 0.0157 | 0.8500 |
1139
+ | 3.3233 | 38500 | 0.0039 | - |
1140
+ | 3.3664 | 39000 | 0.0045 | - |
1141
+ | 3.4096 | 39500 | 0.0033 | - |
1142
+ | 3.4527 | 40000 | 0.0064 | 0.8434 |
1143
+ | 3.4959 | 40500 | 0.0054 | - |
1144
+ | 3.5391 | 41000 | 0.0061 | - |
1145
+ | 3.5822 | 41500 | 0.0051 | - |
1146
+ | 3.6254 | 42000 | 0.0019 | 0.8472 |
1147
+
1148
+
1149
+ ### Framework Versions
1150
+ - Python: 3.10.15
1151
+ - Sentence Transformers: 3.4.1
1152
+ - Transformers: 4.49.0
1153
+ - PyTorch: 2.6.0+cu126
1154
+ - Accelerate: 1.5.2
1155
+ - Datasets: 3.4.1
1156
+ - Tokenizers: 0.21.1
1157
+
1158
+ ## Citation
1159
+
1160
+ ### BibTeX
1161
+
1162
+ #### Sentence Transformers
1163
+ ```bibtex
1164
+ @inproceedings{reimers-2019-sentence-bert,
1165
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
1166
+ author = "Reimers, Nils and Gurevych, Iryna",
1167
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
1168
+ month = "11",
1169
+ year = "2019",
1170
+ publisher = "Association for Computational Linguistics",
1171
+ url = "https://arxiv.org/abs/1908.10084",
1172
+ }
1173
+ ```
1174
+
1175
+ #### MatryoshkaLoss
1176
+ ```bibtex
1177
+ @misc{kusupati2024matryoshka,
1178
+ title={Matryoshka Representation Learning},
1179
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
1180
+ year={2024},
1181
+ eprint={2205.13147},
1182
+ archivePrefix={arXiv},
1183
+ primaryClass={cs.LG}
1184
+ }
1185
+ ```
1186
+
1187
+ #### MultipleNegativesRankingLoss
1188
+ ```bibtex
1189
+ @misc{henderson2017efficient,
1190
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
1191
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
1192
+ year={2017},
1193
+ eprint={1705.00652},
1194
+ archivePrefix={arXiv},
1195
+ primaryClass={cs.CL}
1196
+ }
1197
+ ```
1198
+
1199
+ <!--
1200
+ ## Glossary
1201
+
1202
+ *Clearly define terms in order to be accessible across audiences.*
1203
+ -->
1204
+
1205
+ <!--
1206
+ ## Model Card Authors
1207
+
1208
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
1209
+ -->
1210
+
1211
+ <!--
1212
+ ## Model Card Contact
1213
+
1214
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
1215
+ -->
config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Alibaba-NLP/gte-modernbert-base",
3
+ "architectures": [
4
+ "ModernBertModel"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 50281,
9
+ "classifier_activation": "gelu",
10
+ "classifier_bias": false,
11
+ "classifier_dropout": 0.0,
12
+ "classifier_pooling": "mean",
13
+ "cls_token_id": 50281,
14
+ "decoder_bias": true,
15
+ "deterministic_flash_attn": false,
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 50282,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "initializer_cutoff_factor": 2.0,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 1152,
26
+ "layer_norm_eps": 1e-05,
27
+ "local_attention": 128,
28
+ "local_rope_theta": 10000.0,
29
+ "max_position_embeddings": 8192,
30
+ "mlp_bias": false,
31
+ "mlp_dropout": 0.0,
32
+ "model_type": "modernbert",
33
+ "norm_bias": false,
34
+ "norm_eps": 1e-05,
35
+ "num_attention_heads": 12,
36
+ "num_hidden_layers": 22,
37
+ "pad_token_id": 50283,
38
+ "position_embedding_type": "absolute",
39
+ "reference_compile": false,
40
+ "repad_logits_with_grad": false,
41
+ "sep_token_id": 50282,
42
+ "sparse_pred_ignore_index": -100,
43
+ "sparse_prediction": false,
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.49.0",
46
+ "vocab_size": 50368
47
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.49.0",
5
+ "pytorch": "2.6.0+cu126"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09d142207c04c435aad0e65bb57b4edb7fa82de7f409696c6b50576caceb655c
3
+ size 596070136
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25837551c5ea01836209ba69f40aff854c8e38884c5d12027898dd74731bea30
3
+ size 1192227066
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d239e3621d79502a0429d2f66b0ee1d9d87731a0b30b289302595590195e7ae0
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:127f06add1ffec77cf1dbfde5d47abb256ba37206b1fb51a0c26d412c7203b0e
3
+ size 1064
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": true,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,945 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "|||IP_ADDRESS|||",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "1": {
12
+ "content": "<|padding|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "50254": {
20
+ "content": " ",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "50255": {
28
+ "content": " ",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "50256": {
36
+ "content": " ",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "50257": {
44
+ "content": " ",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "50258": {
52
+ "content": " ",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "50259": {
60
+ "content": " ",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "50260": {
68
+ "content": " ",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "50261": {
76
+ "content": " ",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "50262": {
84
+ "content": " ",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "50263": {
92
+ "content": " ",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "50264": {
100
+ "content": " ",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "50265": {
108
+ "content": " ",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "50266": {
116
+ "content": " ",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "50267": {
124
+ "content": " ",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "50268": {
132
+ "content": " ",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "50269": {
140
+ "content": " ",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "50270": {
148
+ "content": " ",
149
+ "lstrip": false,
150
+ "normalized": true,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "50271": {
156
+ "content": " ",
157
+ "lstrip": false,
158
+ "normalized": true,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "50272": {
164
+ "content": " ",
165
+ "lstrip": false,
166
+ "normalized": true,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "50273": {
172
+ "content": " ",
173
+ "lstrip": false,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": false
178
+ },
179
+ "50274": {
180
+ "content": " ",
181
+ "lstrip": false,
182
+ "normalized": true,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "50275": {
188
+ "content": " ",
189
+ "lstrip": false,
190
+ "normalized": true,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ },
195
+ "50276": {
196
+ "content": " ",
197
+ "lstrip": false,
198
+ "normalized": true,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": false
202
+ },
203
+ "50277": {
204
+ "content": "|||EMAIL_ADDRESS|||",
205
+ "lstrip": false,
206
+ "normalized": true,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": false
210
+ },
211
+ "50278": {
212
+ "content": "|||PHONE_NUMBER|||",
213
+ "lstrip": false,
214
+ "normalized": true,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": false
218
+ },
219
+ "50279": {
220
+ "content": "<|endoftext|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "50280": {
228
+ "content": "[UNK]",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "50281": {
236
+ "content": "[CLS]",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "50282": {
244
+ "content": "[SEP]",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "50283": {
252
+ "content": "[PAD]",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "50284": {
260
+ "content": "[MASK]",
261
+ "lstrip": true,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "50285": {
268
+ "content": "[unused0]",
269
+ "lstrip": false,
270
+ "normalized": true,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": false
274
+ },
275
+ "50286": {
276
+ "content": "[unused1]",
277
+ "lstrip": false,
278
+ "normalized": true,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": false
282
+ },
283
+ "50287": {
284
+ "content": "[unused2]",
285
+ "lstrip": false,
286
+ "normalized": true,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": false
290
+ },
291
+ "50288": {
292
+ "content": "[unused3]",
293
+ "lstrip": false,
294
+ "normalized": true,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": false
298
+ },
299
+ "50289": {
300
+ "content": "[unused4]",
301
+ "lstrip": false,
302
+ "normalized": true,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": false
306
+ },
307
+ "50290": {
308
+ "content": "[unused5]",
309
+ "lstrip": false,
310
+ "normalized": true,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": false
314
+ },
315
+ "50291": {
316
+ "content": "[unused6]",
317
+ "lstrip": false,
318
+ "normalized": true,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": false
322
+ },
323
+ "50292": {
324
+ "content": "[unused7]",
325
+ "lstrip": false,
326
+ "normalized": true,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": false
330
+ },
331
+ "50293": {
332
+ "content": "[unused8]",
333
+ "lstrip": false,
334
+ "normalized": true,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": false
338
+ },
339
+ "50294": {
340
+ "content": "[unused9]",
341
+ "lstrip": false,
342
+ "normalized": true,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": false
346
+ },
347
+ "50295": {
348
+ "content": "[unused10]",
349
+ "lstrip": false,
350
+ "normalized": true,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": false
354
+ },
355
+ "50296": {
356
+ "content": "[unused11]",
357
+ "lstrip": false,
358
+ "normalized": true,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": false
362
+ },
363
+ "50297": {
364
+ "content": "[unused12]",
365
+ "lstrip": false,
366
+ "normalized": true,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": false
370
+ },
371
+ "50298": {
372
+ "content": "[unused13]",
373
+ "lstrip": false,
374
+ "normalized": true,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": false
378
+ },
379
+ "50299": {
380
+ "content": "[unused14]",
381
+ "lstrip": false,
382
+ "normalized": true,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": false
386
+ },
387
+ "50300": {
388
+ "content": "[unused15]",
389
+ "lstrip": false,
390
+ "normalized": true,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": false
394
+ },
395
+ "50301": {
396
+ "content": "[unused16]",
397
+ "lstrip": false,
398
+ "normalized": true,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": false
402
+ },
403
+ "50302": {
404
+ "content": "[unused17]",
405
+ "lstrip": false,
406
+ "normalized": true,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": false
410
+ },
411
+ "50303": {
412
+ "content": "[unused18]",
413
+ "lstrip": false,
414
+ "normalized": true,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": false
418
+ },
419
+ "50304": {
420
+ "content": "[unused19]",
421
+ "lstrip": false,
422
+ "normalized": true,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": false
426
+ },
427
+ "50305": {
428
+ "content": "[unused20]",
429
+ "lstrip": false,
430
+ "normalized": true,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": false
434
+ },
435
+ "50306": {
436
+ "content": "[unused21]",
437
+ "lstrip": false,
438
+ "normalized": true,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": false
442
+ },
443
+ "50307": {
444
+ "content": "[unused22]",
445
+ "lstrip": false,
446
+ "normalized": true,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": false
450
+ },
451
+ "50308": {
452
+ "content": "[unused23]",
453
+ "lstrip": false,
454
+ "normalized": true,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": false
458
+ },
459
+ "50309": {
460
+ "content": "[unused24]",
461
+ "lstrip": false,
462
+ "normalized": true,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": false
466
+ },
467
+ "50310": {
468
+ "content": "[unused25]",
469
+ "lstrip": false,
470
+ "normalized": true,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": false
474
+ },
475
+ "50311": {
476
+ "content": "[unused26]",
477
+ "lstrip": false,
478
+ "normalized": true,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": false
482
+ },
483
+ "50312": {
484
+ "content": "[unused27]",
485
+ "lstrip": false,
486
+ "normalized": true,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": false
490
+ },
491
+ "50313": {
492
+ "content": "[unused28]",
493
+ "lstrip": false,
494
+ "normalized": true,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": false
498
+ },
499
+ "50314": {
500
+ "content": "[unused29]",
501
+ "lstrip": false,
502
+ "normalized": true,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": false
506
+ },
507
+ "50315": {
508
+ "content": "[unused30]",
509
+ "lstrip": false,
510
+ "normalized": true,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": false
514
+ },
515
+ "50316": {
516
+ "content": "[unused31]",
517
+ "lstrip": false,
518
+ "normalized": true,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": false
522
+ },
523
+ "50317": {
524
+ "content": "[unused32]",
525
+ "lstrip": false,
526
+ "normalized": true,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": false
530
+ },
531
+ "50318": {
532
+ "content": "[unused33]",
533
+ "lstrip": false,
534
+ "normalized": true,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": false
538
+ },
539
+ "50319": {
540
+ "content": "[unused34]",
541
+ "lstrip": false,
542
+ "normalized": true,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": false
546
+ },
547
+ "50320": {
548
+ "content": "[unused35]",
549
+ "lstrip": false,
550
+ "normalized": true,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": false
554
+ },
555
+ "50321": {
556
+ "content": "[unused36]",
557
+ "lstrip": false,
558
+ "normalized": true,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": false
562
+ },
563
+ "50322": {
564
+ "content": "[unused37]",
565
+ "lstrip": false,
566
+ "normalized": true,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": false
570
+ },
571
+ "50323": {
572
+ "content": "[unused38]",
573
+ "lstrip": false,
574
+ "normalized": true,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": false
578
+ },
579
+ "50324": {
580
+ "content": "[unused39]",
581
+ "lstrip": false,
582
+ "normalized": true,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": false
586
+ },
587
+ "50325": {
588
+ "content": "[unused40]",
589
+ "lstrip": false,
590
+ "normalized": true,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": false
594
+ },
595
+ "50326": {
596
+ "content": "[unused41]",
597
+ "lstrip": false,
598
+ "normalized": true,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": false
602
+ },
603
+ "50327": {
604
+ "content": "[unused42]",
605
+ "lstrip": false,
606
+ "normalized": true,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": false
610
+ },
611
+ "50328": {
612
+ "content": "[unused43]",
613
+ "lstrip": false,
614
+ "normalized": true,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": false
618
+ },
619
+ "50329": {
620
+ "content": "[unused44]",
621
+ "lstrip": false,
622
+ "normalized": true,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": false
626
+ },
627
+ "50330": {
628
+ "content": "[unused45]",
629
+ "lstrip": false,
630
+ "normalized": true,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": false
634
+ },
635
+ "50331": {
636
+ "content": "[unused46]",
637
+ "lstrip": false,
638
+ "normalized": true,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": false
642
+ },
643
+ "50332": {
644
+ "content": "[unused47]",
645
+ "lstrip": false,
646
+ "normalized": true,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": false
650
+ },
651
+ "50333": {
652
+ "content": "[unused48]",
653
+ "lstrip": false,
654
+ "normalized": true,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": false
658
+ },
659
+ "50334": {
660
+ "content": "[unused49]",
661
+ "lstrip": false,
662
+ "normalized": true,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": false
666
+ },
667
+ "50335": {
668
+ "content": "[unused50]",
669
+ "lstrip": false,
670
+ "normalized": true,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": false
674
+ },
675
+ "50336": {
676
+ "content": "[unused51]",
677
+ "lstrip": false,
678
+ "normalized": true,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": false
682
+ },
683
+ "50337": {
684
+ "content": "[unused52]",
685
+ "lstrip": false,
686
+ "normalized": true,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": false
690
+ },
691
+ "50338": {
692
+ "content": "[unused53]",
693
+ "lstrip": false,
694
+ "normalized": true,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": false
698
+ },
699
+ "50339": {
700
+ "content": "[unused54]",
701
+ "lstrip": false,
702
+ "normalized": true,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": false
706
+ },
707
+ "50340": {
708
+ "content": "[unused55]",
709
+ "lstrip": false,
710
+ "normalized": true,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": false
714
+ },
715
+ "50341": {
716
+ "content": "[unused56]",
717
+ "lstrip": false,
718
+ "normalized": true,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": false
722
+ },
723
+ "50342": {
724
+ "content": "[unused57]",
725
+ "lstrip": false,
726
+ "normalized": true,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": false
730
+ },
731
+ "50343": {
732
+ "content": "[unused58]",
733
+ "lstrip": false,
734
+ "normalized": true,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": false
738
+ },
739
+ "50344": {
740
+ "content": "[unused59]",
741
+ "lstrip": false,
742
+ "normalized": true,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": false
746
+ },
747
+ "50345": {
748
+ "content": "[unused60]",
749
+ "lstrip": false,
750
+ "normalized": true,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": false
754
+ },
755
+ "50346": {
756
+ "content": "[unused61]",
757
+ "lstrip": false,
758
+ "normalized": true,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": false
762
+ },
763
+ "50347": {
764
+ "content": "[unused62]",
765
+ "lstrip": false,
766
+ "normalized": true,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": false
770
+ },
771
+ "50348": {
772
+ "content": "[unused63]",
773
+ "lstrip": false,
774
+ "normalized": true,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": false
778
+ },
779
+ "50349": {
780
+ "content": "[unused64]",
781
+ "lstrip": false,
782
+ "normalized": true,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": false
786
+ },
787
+ "50350": {
788
+ "content": "[unused65]",
789
+ "lstrip": false,
790
+ "normalized": true,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": false
794
+ },
795
+ "50351": {
796
+ "content": "[unused66]",
797
+ "lstrip": false,
798
+ "normalized": true,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": false
802
+ },
803
+ "50352": {
804
+ "content": "[unused67]",
805
+ "lstrip": false,
806
+ "normalized": true,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": false
810
+ },
811
+ "50353": {
812
+ "content": "[unused68]",
813
+ "lstrip": false,
814
+ "normalized": true,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": false
818
+ },
819
+ "50354": {
820
+ "content": "[unused69]",
821
+ "lstrip": false,
822
+ "normalized": true,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": false
826
+ },
827
+ "50355": {
828
+ "content": "[unused70]",
829
+ "lstrip": false,
830
+ "normalized": true,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": false
834
+ },
835
+ "50356": {
836
+ "content": "[unused71]",
837
+ "lstrip": false,
838
+ "normalized": true,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": false
842
+ },
843
+ "50357": {
844
+ "content": "[unused72]",
845
+ "lstrip": false,
846
+ "normalized": true,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": false
850
+ },
851
+ "50358": {
852
+ "content": "[unused73]",
853
+ "lstrip": false,
854
+ "normalized": true,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": false
858
+ },
859
+ "50359": {
860
+ "content": "[unused74]",
861
+ "lstrip": false,
862
+ "normalized": true,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": false
866
+ },
867
+ "50360": {
868
+ "content": "[unused75]",
869
+ "lstrip": false,
870
+ "normalized": true,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": false
874
+ },
875
+ "50361": {
876
+ "content": "[unused76]",
877
+ "lstrip": false,
878
+ "normalized": true,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": false
882
+ },
883
+ "50362": {
884
+ "content": "[unused77]",
885
+ "lstrip": false,
886
+ "normalized": true,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": false
890
+ },
891
+ "50363": {
892
+ "content": "[unused78]",
893
+ "lstrip": false,
894
+ "normalized": true,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": false
898
+ },
899
+ "50364": {
900
+ "content": "[unused79]",
901
+ "lstrip": false,
902
+ "normalized": true,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": false
906
+ },
907
+ "50365": {
908
+ "content": "[unused80]",
909
+ "lstrip": false,
910
+ "normalized": true,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": false
914
+ },
915
+ "50366": {
916
+ "content": "[unused81]",
917
+ "lstrip": false,
918
+ "normalized": true,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": false
922
+ },
923
+ "50367": {
924
+ "content": "[unused82]",
925
+ "lstrip": false,
926
+ "normalized": true,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": false
930
+ }
931
+ },
932
+ "clean_up_tokenization_spaces": true,
933
+ "cls_token": "[CLS]",
934
+ "extra_special_tokens": {},
935
+ "mask_token": "[MASK]",
936
+ "model_input_names": [
937
+ "input_ids",
938
+ "attention_mask"
939
+ ],
940
+ "model_max_length": 1000000000000000019884624838656,
941
+ "pad_token": "[PAD]",
942
+ "sep_token": "[SEP]",
943
+ "tokenizer_class": "PreTrainedTokenizer",
944
+ "unk_token": "[UNK]"
945
+ }
trainer_state.json ADDED
@@ -0,0 +1,1083 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.6253776435045317,
5
+ "eval_steps": 2000,
6
+ "global_step": 42000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04315925766076824,
13
+ "grad_norm": 30.676715850830078,
14
+ "learning_rate": 2.1579628830384117e-06,
15
+ "loss": 0.358,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.08631851532153648,
20
+ "grad_norm": 19.524194717407227,
21
+ "learning_rate": 4.3159257660768235e-06,
22
+ "loss": 0.1048,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.1294777729823047,
27
+ "grad_norm": 0.00297492160461843,
28
+ "learning_rate": 6.473888649115235e-06,
29
+ "loss": 0.0827,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.17263703064307295,
34
+ "grad_norm": 8.900677680969238,
35
+ "learning_rate": 8.631851532153647e-06,
36
+ "loss": 0.067,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.17263703064307295,
41
+ "eval_cosine_accuracy@1": 0.6191955808734679,
42
+ "eval_cosine_accuracy@10": 0.9514931814258588,
43
+ "eval_cosine_accuracy@3": 0.8606939409632315,
44
+ "eval_cosine_accuracy@5": 0.909891248058001,
45
+ "eval_cosine_map@100": 0.748128574680106,
46
+ "eval_cosine_mrr@10": 0.7459635876906734,
47
+ "eval_cosine_ndcg@10": 0.7968614059582585,
48
+ "eval_cosine_precision@1": 0.6191955808734679,
49
+ "eval_cosine_precision@10": 0.09514931814258587,
50
+ "eval_cosine_precision@3": 0.28689798032107716,
51
+ "eval_cosine_precision@5": 0.18197824961160017,
52
+ "eval_cosine_recall@1": 0.6191955808734679,
53
+ "eval_cosine_recall@10": 0.9514931814258588,
54
+ "eval_cosine_recall@3": 0.8606939409632315,
55
+ "eval_cosine_recall@5": 0.909891248058001,
56
+ "eval_runtime": 468.0233,
57
+ "eval_samples_per_second": 0.0,
58
+ "eval_steps_per_second": 0.0,
59
+ "step": 2000
60
+ },
61
+ {
62
+ "epoch": 0.21579628830384118,
63
+ "grad_norm": 0.12022869288921356,
64
+ "learning_rate": 1.0789814415192059e-05,
65
+ "loss": 0.0491,
66
+ "step": 2500
67
+ },
68
+ {
69
+ "epoch": 0.2589555459646094,
70
+ "grad_norm": 0.07568053156137466,
71
+ "learning_rate": 1.294777729823047e-05,
72
+ "loss": 0.0831,
73
+ "step": 3000
74
+ },
75
+ {
76
+ "epoch": 0.3021148036253776,
77
+ "grad_norm": 0.0246192067861557,
78
+ "learning_rate": 1.5105740181268884e-05,
79
+ "loss": 0.062,
80
+ "step": 3500
81
+ },
82
+ {
83
+ "epoch": 0.3452740612861459,
84
+ "grad_norm": 0.009853623807430267,
85
+ "learning_rate": 1.7263703064307294e-05,
86
+ "loss": 0.0657,
87
+ "step": 4000
88
+ },
89
+ {
90
+ "epoch": 0.3452740612861459,
91
+ "eval_cosine_accuracy@1": 0.6362851717590196,
92
+ "eval_cosine_accuracy@10": 0.9523562920766442,
93
+ "eval_cosine_accuracy@3": 0.8606939409632315,
94
+ "eval_cosine_accuracy@5": 0.9110996029691006,
95
+ "eval_cosine_map@100": 0.7589134849598074,
96
+ "eval_cosine_mrr@10": 0.756632799848751,
97
+ "eval_cosine_ndcg@10": 0.8050365772218437,
98
+ "eval_cosine_precision@1": 0.6362851717590196,
99
+ "eval_cosine_precision@10": 0.09523562920766442,
100
+ "eval_cosine_precision@3": 0.28689798032107716,
101
+ "eval_cosine_precision@5": 0.1822199205938201,
102
+ "eval_cosine_recall@1": 0.6362851717590196,
103
+ "eval_cosine_recall@10": 0.9523562920766442,
104
+ "eval_cosine_recall@3": 0.8606939409632315,
105
+ "eval_cosine_recall@5": 0.9110996029691006,
106
+ "eval_runtime": 467.8258,
107
+ "eval_samples_per_second": 0.0,
108
+ "eval_steps_per_second": 0.0,
109
+ "step": 4000
110
+ },
111
+ {
112
+ "epoch": 0.38843331894691413,
113
+ "grad_norm": 0.017763391137123108,
114
+ "learning_rate": 1.9421665947345706e-05,
115
+ "loss": 0.0522,
116
+ "step": 4500
117
+ },
118
+ {
119
+ "epoch": 0.43159257660768235,
120
+ "grad_norm": 21.21623420715332,
121
+ "learning_rate": 1.982446885041485e-05,
122
+ "loss": 0.049,
123
+ "step": 5000
124
+ },
125
+ {
126
+ "epoch": 0.4747518342684506,
127
+ "grad_norm": 0.13613158464431763,
128
+ "learning_rate": 1.958467219797612e-05,
129
+ "loss": 0.0426,
130
+ "step": 5500
131
+ },
132
+ {
133
+ "epoch": 0.5179110919292188,
134
+ "grad_norm": 0.1645500212907791,
135
+ "learning_rate": 1.9344875545537384e-05,
136
+ "loss": 0.0708,
137
+ "step": 6000
138
+ },
139
+ {
140
+ "epoch": 0.5179110919292188,
141
+ "eval_cosine_accuracy@1": 0.6526842741239427,
142
+ "eval_cosine_accuracy@10": 0.9642672190574831,
143
+ "eval_cosine_accuracy@3": 0.8865872604867944,
144
+ "eval_cosine_accuracy@5": 0.9287070602451234,
145
+ "eval_cosine_map@100": 0.7759321604397249,
146
+ "eval_cosine_mrr@10": 0.7742270364616298,
147
+ "eval_cosine_ndcg@10": 0.8214808830487713,
148
+ "eval_cosine_precision@1": 0.6526842741239427,
149
+ "eval_cosine_precision@10": 0.0964267219057483,
150
+ "eval_cosine_precision@3": 0.2955290868289315,
151
+ "eval_cosine_precision@5": 0.1857414120490247,
152
+ "eval_cosine_recall@1": 0.6526842741239427,
153
+ "eval_cosine_recall@10": 0.9642672190574831,
154
+ "eval_cosine_recall@3": 0.8865872604867944,
155
+ "eval_cosine_recall@5": 0.9287070602451234,
156
+ "eval_runtime": 467.7458,
157
+ "eval_samples_per_second": 0.0,
158
+ "eval_steps_per_second": 0.0,
159
+ "step": 6000
160
+ },
161
+ {
162
+ "epoch": 0.561070349589987,
163
+ "grad_norm": 0.3336288332939148,
164
+ "learning_rate": 1.9105078893098655e-05,
165
+ "loss": 0.0236,
166
+ "step": 6500
167
+ },
168
+ {
169
+ "epoch": 0.6042296072507553,
170
+ "grad_norm": 0.011359921656548977,
171
+ "learning_rate": 1.886528224065992e-05,
172
+ "loss": 0.024,
173
+ "step": 7000
174
+ },
175
+ {
176
+ "epoch": 0.6473888649115235,
177
+ "grad_norm": 0.0021573721896857023,
178
+ "learning_rate": 1.8625485588221192e-05,
179
+ "loss": 0.0256,
180
+ "step": 7500
181
+ },
182
+ {
183
+ "epoch": 0.6905481225722918,
184
+ "grad_norm": 0.024769997224211693,
185
+ "learning_rate": 1.8385688935782457e-05,
186
+ "loss": 0.041,
187
+ "step": 8000
188
+ },
189
+ {
190
+ "epoch": 0.6905481225722918,
191
+ "eval_cosine_accuracy@1": 0.6390471258415329,
192
+ "eval_cosine_accuracy@10": 0.9573623338511997,
193
+ "eval_cosine_accuracy@3": 0.8693250474710857,
194
+ "eval_cosine_accuracy@5": 0.9195580873467979,
195
+ "eval_cosine_map@100": 0.7640704294756044,
196
+ "eval_cosine_mrr@10": 0.762041421091137,
197
+ "eval_cosine_ndcg@10": 0.8104943817099518,
198
+ "eval_cosine_precision@1": 0.6390471258415329,
199
+ "eval_cosine_precision@10": 0.09573623338511995,
200
+ "eval_cosine_precision@3": 0.2897750158236953,
201
+ "eval_cosine_precision@5": 0.18391161746935958,
202
+ "eval_cosine_recall@1": 0.6390471258415329,
203
+ "eval_cosine_recall@10": 0.9573623338511997,
204
+ "eval_cosine_recall@3": 0.8693250474710857,
205
+ "eval_cosine_recall@5": 0.9195580873467979,
206
+ "eval_runtime": 467.5761,
207
+ "eval_samples_per_second": 0.0,
208
+ "eval_steps_per_second": 0.0,
209
+ "step": 8000
210
+ },
211
+ {
212
+ "epoch": 0.73370738023306,
213
+ "grad_norm": 0.001473304582759738,
214
+ "learning_rate": 1.8145892283343725e-05,
215
+ "loss": 0.0285,
216
+ "step": 8500
217
+ },
218
+ {
219
+ "epoch": 0.7768666378938283,
220
+ "grad_norm": 0.002119662007316947,
221
+ "learning_rate": 1.7906095630904994e-05,
222
+ "loss": 0.0249,
223
+ "step": 9000
224
+ },
225
+ {
226
+ "epoch": 0.8200258955545965,
227
+ "grad_norm": 0.035019177943468094,
228
+ "learning_rate": 1.7666298978466262e-05,
229
+ "loss": 0.0368,
230
+ "step": 9500
231
+ },
232
+ {
233
+ "epoch": 0.8631851532153647,
234
+ "grad_norm": 0.2664908468723297,
235
+ "learning_rate": 1.742650232602753e-05,
236
+ "loss": 0.0588,
237
+ "step": 10000
238
+ },
239
+ {
240
+ "epoch": 0.8631851532153647,
241
+ "eval_cosine_accuracy@1": 0.6407733471431037,
242
+ "eval_cosine_accuracy@10": 0.9589159330226135,
243
+ "eval_cosine_accuracy@3": 0.8734679785948558,
244
+ "eval_cosine_accuracy@5": 0.9204211979975833,
245
+ "eval_cosine_map@100": 0.7652575174635105,
246
+ "eval_cosine_mrr@10": 0.7632412818974197,
247
+ "eval_cosine_ndcg@10": 0.811775458664963,
248
+ "eval_cosine_precision@1": 0.6407733471431037,
249
+ "eval_cosine_precision@10": 0.09589159330226135,
250
+ "eval_cosine_precision@3": 0.2911559928649519,
251
+ "eval_cosine_precision@5": 0.18408423959951664,
252
+ "eval_cosine_recall@1": 0.6407733471431037,
253
+ "eval_cosine_recall@10": 0.9589159330226135,
254
+ "eval_cosine_recall@3": 0.8734679785948558,
255
+ "eval_cosine_recall@5": 0.9204211979975833,
256
+ "eval_runtime": 467.8166,
257
+ "eval_samples_per_second": 0.0,
258
+ "eval_steps_per_second": 0.0,
259
+ "step": 10000
260
+ },
261
+ {
262
+ "epoch": 0.9063444108761329,
263
+ "grad_norm": 0.032082412391901016,
264
+ "learning_rate": 1.71867056735888e-05,
265
+ "loss": 0.0386,
266
+ "step": 10500
267
+ },
268
+ {
269
+ "epoch": 0.9495036685369012,
270
+ "grad_norm": 8.98410415649414,
271
+ "learning_rate": 1.6946909021150067e-05,
272
+ "loss": 0.0456,
273
+ "step": 11000
274
+ },
275
+ {
276
+ "epoch": 0.9926629261976694,
277
+ "grad_norm": 0.002887778216972947,
278
+ "learning_rate": 1.6707112368711332e-05,
279
+ "loss": 0.0399,
280
+ "step": 11500
281
+ },
282
+ {
283
+ "epoch": 1.0358221838584376,
284
+ "grad_norm": 0.039170317351818085,
285
+ "learning_rate": 1.6467315716272604e-05,
286
+ "loss": 0.0424,
287
+ "step": 12000
288
+ },
289
+ {
290
+ "epoch": 1.0358221838584376,
291
+ "eval_cosine_accuracy@1": 0.6606248921111687,
292
+ "eval_cosine_accuracy@10": 0.9654755739685827,
293
+ "eval_cosine_accuracy@3": 0.8808907301916106,
294
+ "eval_cosine_accuracy@5": 0.9300880372863801,
295
+ "eval_cosine_map@100": 0.7789505370634054,
296
+ "eval_cosine_mrr@10": 0.7772537463112309,
297
+ "eval_cosine_ndcg@10": 0.8239196088222247,
298
+ "eval_cosine_precision@1": 0.6606248921111687,
299
+ "eval_cosine_precision@10": 0.09654755739685827,
300
+ "eval_cosine_precision@3": 0.2936302433972035,
301
+ "eval_cosine_precision@5": 0.186017607457276,
302
+ "eval_cosine_recall@1": 0.6606248921111687,
303
+ "eval_cosine_recall@10": 0.9654755739685827,
304
+ "eval_cosine_recall@3": 0.8808907301916106,
305
+ "eval_cosine_recall@5": 0.9300880372863801,
306
+ "eval_runtime": 467.7683,
307
+ "eval_samples_per_second": 0.0,
308
+ "eval_steps_per_second": 0.0,
309
+ "step": 12000
310
+ },
311
+ {
312
+ "epoch": 1.0789814415192058,
313
+ "grad_norm": 0.07316175103187561,
314
+ "learning_rate": 1.622751906383387e-05,
315
+ "loss": 0.0107,
316
+ "step": 12500
317
+ },
318
+ {
319
+ "epoch": 1.122140699179974,
320
+ "grad_norm": 0.03618592023849487,
321
+ "learning_rate": 1.598772241139514e-05,
322
+ "loss": 0.0279,
323
+ "step": 13000
324
+ },
325
+ {
326
+ "epoch": 1.1652999568407423,
327
+ "grad_norm": 0.023356635123491287,
328
+ "learning_rate": 1.5747925758956405e-05,
329
+ "loss": 0.0236,
330
+ "step": 13500
331
+ },
332
+ {
333
+ "epoch": 1.2084592145015105,
334
+ "grad_norm": 0.002293772529810667,
335
+ "learning_rate": 1.5508129106517674e-05,
336
+ "loss": 0.024,
337
+ "step": 14000
338
+ },
339
+ {
340
+ "epoch": 1.2084592145015105,
341
+ "eval_cosine_accuracy@1": 0.6506128085620576,
342
+ "eval_cosine_accuracy@10": 0.9640945969273261,
343
+ "eval_cosine_accuracy@3": 0.8803728638011393,
344
+ "eval_cosine_accuracy@5": 0.9266355946832384,
345
+ "eval_cosine_map@100": 0.7732572758885798,
346
+ "eval_cosine_mrr@10": 0.7715017303313533,
347
+ "eval_cosine_ndcg@10": 0.8192838549207232,
348
+ "eval_cosine_precision@1": 0.6506128085620576,
349
+ "eval_cosine_precision@10": 0.09640945969273261,
350
+ "eval_cosine_precision@3": 0.29345762126704644,
351
+ "eval_cosine_precision@5": 0.18532711893664763,
352
+ "eval_cosine_recall@1": 0.6506128085620576,
353
+ "eval_cosine_recall@10": 0.9640945969273261,
354
+ "eval_cosine_recall@3": 0.8803728638011393,
355
+ "eval_cosine_recall@5": 0.9266355946832384,
356
+ "eval_runtime": 467.8783,
357
+ "eval_samples_per_second": 0.0,
358
+ "eval_steps_per_second": 0.0,
359
+ "step": 14000
360
+ },
361
+ {
362
+ "epoch": 1.2516184721622787,
363
+ "grad_norm": 0.007560160476714373,
364
+ "learning_rate": 1.5268332454078942e-05,
365
+ "loss": 0.0143,
366
+ "step": 14500
367
+ },
368
+ {
369
+ "epoch": 1.2947777298230472,
370
+ "grad_norm": 0.004202102776616812,
371
+ "learning_rate": 1.5028535801640209e-05,
372
+ "loss": 0.0118,
373
+ "step": 15000
374
+ },
375
+ {
376
+ "epoch": 1.3379369874838152,
377
+ "grad_norm": 0.00022126469411887228,
378
+ "learning_rate": 1.4788739149201479e-05,
379
+ "loss": 0.0078,
380
+ "step": 15500
381
+ },
382
+ {
383
+ "epoch": 1.3810962451445836,
384
+ "grad_norm": 0.011956814676523209,
385
+ "learning_rate": 1.4548942496762745e-05,
386
+ "loss": 0.023,
387
+ "step": 16000
388
+ },
389
+ {
390
+ "epoch": 1.3810962451445836,
391
+ "eval_cosine_accuracy@1": 0.6533747626445711,
392
+ "eval_cosine_accuracy@10": 0.9642672190574831,
393
+ "eval_cosine_accuracy@3": 0.8826169514931814,
394
+ "eval_cosine_accuracy@5": 0.9302606594165372,
395
+ "eval_cosine_map@100": 0.7763076224553367,
396
+ "eval_cosine_mrr@10": 0.7745393318153555,
397
+ "eval_cosine_ndcg@10": 0.8216976031852626,
398
+ "eval_cosine_precision@1": 0.6533747626445711,
399
+ "eval_cosine_precision@10": 0.0964267219057483,
400
+ "eval_cosine_precision@3": 0.2942056504977271,
401
+ "eval_cosine_precision@5": 0.18605213188330738,
402
+ "eval_cosine_recall@1": 0.6533747626445711,
403
+ "eval_cosine_recall@10": 0.9642672190574831,
404
+ "eval_cosine_recall@3": 0.8826169514931814,
405
+ "eval_cosine_recall@5": 0.9302606594165372,
406
+ "eval_runtime": 467.7532,
407
+ "eval_samples_per_second": 0.0,
408
+ "eval_steps_per_second": 0.0,
409
+ "step": 16000
410
+ },
411
+ {
412
+ "epoch": 1.4242555028053516,
413
+ "grad_norm": 0.008947977796196938,
414
+ "learning_rate": 1.4309145844324015e-05,
415
+ "loss": 0.0239,
416
+ "step": 16500
417
+ },
418
+ {
419
+ "epoch": 1.46741476046612,
420
+ "grad_norm": 0.20168237388134003,
421
+ "learning_rate": 1.4069349191885282e-05,
422
+ "loss": 0.0335,
423
+ "step": 17000
424
+ },
425
+ {
426
+ "epoch": 1.510574018126888,
427
+ "grad_norm": 0.003233299357816577,
428
+ "learning_rate": 1.3829552539446552e-05,
429
+ "loss": 0.0119,
430
+ "step": 17500
431
+ },
432
+ {
433
+ "epoch": 1.5537332757876565,
434
+ "grad_norm": 0.013063711114227772,
435
+ "learning_rate": 1.3589755887007819e-05,
436
+ "loss": 0.0411,
437
+ "step": 18000
438
+ },
439
+ {
440
+ "epoch": 1.5537332757876565,
441
+ "eval_cosine_accuracy@1": 0.6644225789746245,
442
+ "eval_cosine_accuracy@10": 0.9680649059209391,
443
+ "eval_cosine_accuracy@3": 0.8898670809597791,
444
+ "eval_cosine_accuracy@5": 0.9335404798895218,
445
+ "eval_cosine_map@100": 0.7848911785594413,
446
+ "eval_cosine_mrr@10": 0.7833323743214994,
447
+ "eval_cosine_ndcg@10": 0.8292454833247894,
448
+ "eval_cosine_precision@1": 0.6644225789746245,
449
+ "eval_cosine_precision@10": 0.09680649059209388,
450
+ "eval_cosine_precision@3": 0.2966223603199264,
451
+ "eval_cosine_precision@5": 0.18670809597790436,
452
+ "eval_cosine_recall@1": 0.6644225789746245,
453
+ "eval_cosine_recall@10": 0.9680649059209391,
454
+ "eval_cosine_recall@3": 0.8898670809597791,
455
+ "eval_cosine_recall@5": 0.9335404798895218,
456
+ "eval_runtime": 467.9161,
457
+ "eval_samples_per_second": 0.0,
458
+ "eval_steps_per_second": 0.0,
459
+ "step": 18000
460
+ },
461
+ {
462
+ "epoch": 1.5968925334484245,
463
+ "grad_norm": 3.0231621265411377,
464
+ "learning_rate": 1.3349959234569087e-05,
465
+ "loss": 0.0168,
466
+ "step": 18500
467
+ },
468
+ {
469
+ "epoch": 1.640051791109193,
470
+ "grad_norm": 0.08278048038482666,
471
+ "learning_rate": 1.3110162582130355e-05,
472
+ "loss": 0.0059,
473
+ "step": 19000
474
+ },
475
+ {
476
+ "epoch": 1.6832110487699612,
477
+ "grad_norm": 0.10015950351953506,
478
+ "learning_rate": 1.2870365929691622e-05,
479
+ "loss": 0.0234,
480
+ "step": 19500
481
+ },
482
+ {
483
+ "epoch": 1.7263703064307294,
484
+ "grad_norm": 2.1657984256744385,
485
+ "learning_rate": 1.263056927725289e-05,
486
+ "loss": 0.0184,
487
+ "step": 20000
488
+ },
489
+ {
490
+ "epoch": 1.7263703064307294,
491
+ "eval_cosine_accuracy@1": 0.6768513723459347,
492
+ "eval_cosine_accuracy@10": 0.969963749352667,
493
+ "eval_cosine_accuracy@3": 0.897807698947005,
494
+ "eval_cosine_accuracy@5": 0.9369929224926635,
495
+ "eval_cosine_map@100": 0.7938770196077543,
496
+ "eval_cosine_mrr@10": 0.7923516066188262,
497
+ "eval_cosine_ndcg@10": 0.8365875778541227,
498
+ "eval_cosine_precision@1": 0.6768513723459347,
499
+ "eval_cosine_precision@10": 0.09699637493526668,
500
+ "eval_cosine_precision@3": 0.29926923298233504,
501
+ "eval_cosine_precision@5": 0.1873985844985327,
502
+ "eval_cosine_recall@1": 0.6768513723459347,
503
+ "eval_cosine_recall@10": 0.969963749352667,
504
+ "eval_cosine_recall@3": 0.897807698947005,
505
+ "eval_cosine_recall@5": 0.9369929224926635,
506
+ "eval_runtime": 467.8044,
507
+ "eval_samples_per_second": 0.0,
508
+ "eval_steps_per_second": 0.0,
509
+ "step": 20000
510
+ },
511
+ {
512
+ "epoch": 1.7695295640914976,
513
+ "grad_norm": 1.5666255950927734,
514
+ "learning_rate": 1.2390772624814159e-05,
515
+ "loss": 0.0128,
516
+ "step": 20500
517
+ },
518
+ {
519
+ "epoch": 1.8126888217522659,
520
+ "grad_norm": 0.00032274972181767225,
521
+ "learning_rate": 1.2150975972375427e-05,
522
+ "loss": 0.0166,
523
+ "step": 21000
524
+ },
525
+ {
526
+ "epoch": 1.855848079413034,
527
+ "grad_norm": 0.051935628056526184,
528
+ "learning_rate": 1.1911179319936694e-05,
529
+ "loss": 0.0181,
530
+ "step": 21500
531
+ },
532
+ {
533
+ "epoch": 1.8990073370738023,
534
+ "grad_norm": 0.02546406351029873,
535
+ "learning_rate": 1.1671382667497964e-05,
536
+ "loss": 0.0148,
537
+ "step": 22000
538
+ },
539
+ {
540
+ "epoch": 1.8990073370738023,
541
+ "eval_cosine_accuracy@1": 0.6744346625237355,
542
+ "eval_cosine_accuracy@10": 0.9697911272225099,
543
+ "eval_cosine_accuracy@3": 0.8971172104263767,
544
+ "eval_cosine_accuracy@5": 0.9388917659243915,
545
+ "eval_cosine_map@100": 0.792274316391964,
546
+ "eval_cosine_mrr@10": 0.7907476593261165,
547
+ "eval_cosine_ndcg@10": 0.8353359235071491,
548
+ "eval_cosine_precision@1": 0.6744346625237355,
549
+ "eval_cosine_precision@10": 0.09697911272225099,
550
+ "eval_cosine_precision@3": 0.2990390701421256,
551
+ "eval_cosine_precision@5": 0.1877783531848783,
552
+ "eval_cosine_recall@1": 0.6744346625237355,
553
+ "eval_cosine_recall@10": 0.9697911272225099,
554
+ "eval_cosine_recall@3": 0.8971172104263767,
555
+ "eval_cosine_recall@5": 0.9388917659243915,
556
+ "eval_runtime": 467.8952,
557
+ "eval_samples_per_second": 0.0,
558
+ "eval_steps_per_second": 0.0,
559
+ "step": 22000
560
+ },
561
+ {
562
+ "epoch": 1.9421665947345705,
563
+ "grad_norm": 0.009108115918934345,
564
+ "learning_rate": 1.143158601505923e-05,
565
+ "loss": 0.0225,
566
+ "step": 22500
567
+ },
568
+ {
569
+ "epoch": 1.9853258523953388,
570
+ "grad_norm": 0.06883949786424637,
571
+ "learning_rate": 1.1191789362620497e-05,
572
+ "loss": 0.0158,
573
+ "step": 23000
574
+ },
575
+ {
576
+ "epoch": 2.028485110056107,
577
+ "grad_norm": 0.00019052527204621583,
578
+ "learning_rate": 1.0951992710181767e-05,
579
+ "loss": 0.0123,
580
+ "step": 23500
581
+ },
582
+ {
583
+ "epoch": 2.071644367716875,
584
+ "grad_norm": 0.005655207671225071,
585
+ "learning_rate": 1.0712196057743034e-05,
586
+ "loss": 0.0173,
587
+ "step": 24000
588
+ },
589
+ {
590
+ "epoch": 2.071644367716875,
591
+ "eval_cosine_accuracy@1": 0.6718453305713793,
592
+ "eval_cosine_accuracy@10": 0.9685827723114103,
593
+ "eval_cosine_accuracy@3": 0.8934921456930779,
594
+ "eval_cosine_accuracy@5": 0.9383738995339203,
595
+ "eval_cosine_map@100": 0.7895192117982024,
596
+ "eval_cosine_mrr@10": 0.7879250134946668,
597
+ "eval_cosine_ndcg@10": 0.832874525127316,
598
+ "eval_cosine_precision@1": 0.6718453305713793,
599
+ "eval_cosine_precision@10": 0.09685827723114103,
600
+ "eval_cosine_precision@3": 0.297830715231026,
601
+ "eval_cosine_precision@5": 0.18767477990678402,
602
+ "eval_cosine_recall@1": 0.6718453305713793,
603
+ "eval_cosine_recall@10": 0.9685827723114103,
604
+ "eval_cosine_recall@3": 0.8934921456930779,
605
+ "eval_cosine_recall@5": 0.9383738995339203,
606
+ "eval_runtime": 468.4558,
607
+ "eval_samples_per_second": 0.0,
608
+ "eval_steps_per_second": 0.0,
609
+ "step": 24000
610
+ },
611
+ {
612
+ "epoch": 2.1148036253776437,
613
+ "grad_norm": 0.1119648739695549,
614
+ "learning_rate": 1.0472399405304304e-05,
615
+ "loss": 0.0167,
616
+ "step": 24500
617
+ },
618
+ {
619
+ "epoch": 2.1579628830384117,
620
+ "grad_norm": 0.03796195238828659,
621
+ "learning_rate": 1.023260275286557e-05,
622
+ "loss": 0.0125,
623
+ "step": 25000
624
+ },
625
+ {
626
+ "epoch": 2.20112214069918,
627
+ "grad_norm": 0.012651159428060055,
628
+ "learning_rate": 9.992806100426838e-06,
629
+ "loss": 0.013,
630
+ "step": 25500
631
+ },
632
+ {
633
+ "epoch": 2.244281398359948,
634
+ "grad_norm": 0.0021349990274757147,
635
+ "learning_rate": 9.753009447988107e-06,
636
+ "loss": 0.0079,
637
+ "step": 26000
638
+ },
639
+ {
640
+ "epoch": 2.244281398359948,
641
+ "eval_cosine_accuracy@1": 0.669255998619023,
642
+ "eval_cosine_accuracy@10": 0.9709994821336095,
643
+ "eval_cosine_accuracy@3": 0.8950457448644916,
644
+ "eval_cosine_accuracy@5": 0.9390643880545486,
645
+ "eval_cosine_map@100": 0.7897457483356454,
646
+ "eval_cosine_mrr@10": 0.7882845059308039,
647
+ "eval_cosine_ndcg@10": 0.8337888145070348,
648
+ "eval_cosine_precision@1": 0.669255998619023,
649
+ "eval_cosine_precision@10": 0.09709994821336093,
650
+ "eval_cosine_precision@3": 0.29834858162149724,
651
+ "eval_cosine_precision@5": 0.18781287761090973,
652
+ "eval_cosine_recall@1": 0.669255998619023,
653
+ "eval_cosine_recall@10": 0.9709994821336095,
654
+ "eval_cosine_recall@3": 0.8950457448644916,
655
+ "eval_cosine_recall@5": 0.9390643880545486,
656
+ "eval_runtime": 467.762,
657
+ "eval_samples_per_second": 0.0,
658
+ "eval_steps_per_second": 0.0,
659
+ "step": 26000
660
+ },
661
+ {
662
+ "epoch": 2.2874406560207166,
663
+ "grad_norm": 0.4521012306213379,
664
+ "learning_rate": 9.513212795549375e-06,
665
+ "loss": 0.007,
666
+ "step": 26500
667
+ },
668
+ {
669
+ "epoch": 2.3305999136814846,
670
+ "grad_norm": 0.0015283157117664814,
671
+ "learning_rate": 9.273416143110643e-06,
672
+ "loss": 0.0171,
673
+ "step": 27000
674
+ },
675
+ {
676
+ "epoch": 2.373759171342253,
677
+ "grad_norm": 0.0033215314615517855,
678
+ "learning_rate": 9.033619490671912e-06,
679
+ "loss": 0.0058,
680
+ "step": 27500
681
+ },
682
+ {
683
+ "epoch": 2.416918429003021,
684
+ "grad_norm": 4.302379131317139,
685
+ "learning_rate": 8.793822838233178e-06,
686
+ "loss": 0.0048,
687
+ "step": 28000
688
+ },
689
+ {
690
+ "epoch": 2.416918429003021,
691
+ "eval_cosine_accuracy@1": 0.6825479026411186,
692
+ "eval_cosine_accuracy@10": 0.9718625927843949,
693
+ "eval_cosine_accuracy@3": 0.8993612981184188,
694
+ "eval_cosine_accuracy@5": 0.9390643880545486,
695
+ "eval_cosine_map@100": 0.7983751737002095,
696
+ "eval_cosine_mrr@10": 0.7969948679166703,
697
+ "eval_cosine_ndcg@10": 0.8405363983140419,
698
+ "eval_cosine_precision@1": 0.6825479026411186,
699
+ "eval_cosine_precision@10": 0.09718625927843948,
700
+ "eval_cosine_precision@3": 0.2997870993728063,
701
+ "eval_cosine_precision@5": 0.18781287761090973,
702
+ "eval_cosine_recall@1": 0.6825479026411186,
703
+ "eval_cosine_recall@10": 0.9718625927843949,
704
+ "eval_cosine_recall@3": 0.8993612981184188,
705
+ "eval_cosine_recall@5": 0.9390643880545486,
706
+ "eval_runtime": 467.6926,
707
+ "eval_samples_per_second": 0.0,
708
+ "eval_steps_per_second": 0.0,
709
+ "step": 28000
710
+ },
711
+ {
712
+ "epoch": 2.4600776866637895,
713
+ "grad_norm": 0.001049822778441012,
714
+ "learning_rate": 8.554026185794447e-06,
715
+ "loss": 0.005,
716
+ "step": 28500
717
+ },
718
+ {
719
+ "epoch": 2.5032369443245575,
720
+ "grad_norm": 0.0011170560028403997,
721
+ "learning_rate": 8.314229533355715e-06,
722
+ "loss": 0.0141,
723
+ "step": 29000
724
+ },
725
+ {
726
+ "epoch": 2.546396201985326,
727
+ "grad_norm": 0.0026090971659868956,
728
+ "learning_rate": 8.074432880916982e-06,
729
+ "loss": 0.0132,
730
+ "step": 29500
731
+ },
732
+ {
733
+ "epoch": 2.5895554596460943,
734
+ "grad_norm": 7.936817564768717e-05,
735
+ "learning_rate": 7.83463622847825e-06,
736
+ "loss": 0.006,
737
+ "step": 30000
738
+ },
739
+ {
740
+ "epoch": 2.5895554596460943,
741
+ "eval_cosine_accuracy@1": 0.6911790091489729,
742
+ "eval_cosine_accuracy@10": 0.9735888140859659,
743
+ "eval_cosine_accuracy@3": 0.9092007595373727,
744
+ "eval_cosine_accuracy@5": 0.9442430519592612,
745
+ "eval_cosine_map@100": 0.8050289389600185,
746
+ "eval_cosine_mrr@10": 0.8036913735515502,
747
+ "eval_cosine_ndcg@10": 0.8461133955612519,
748
+ "eval_cosine_precision@1": 0.6911790091489729,
749
+ "eval_cosine_precision@10": 0.09735888140859657,
750
+ "eval_cosine_precision@3": 0.3030669198457909,
751
+ "eval_cosine_precision@5": 0.18884861039185225,
752
+ "eval_cosine_recall@1": 0.6911790091489729,
753
+ "eval_cosine_recall@10": 0.9735888140859659,
754
+ "eval_cosine_recall@3": 0.9092007595373727,
755
+ "eval_cosine_recall@5": 0.9442430519592612,
756
+ "eval_runtime": 467.8028,
757
+ "eval_samples_per_second": 0.0,
758
+ "eval_steps_per_second": 0.0,
759
+ "step": 30000
760
+ },
761
+ {
762
+ "epoch": 2.6327147173068624,
763
+ "grad_norm": 0.014025676064193249,
764
+ "learning_rate": 7.5948395760395184e-06,
765
+ "loss": 0.0095,
766
+ "step": 30500
767
+ },
768
+ {
769
+ "epoch": 2.6758739749676304,
770
+ "grad_norm": 0.0240753386169672,
771
+ "learning_rate": 7.355042923600787e-06,
772
+ "loss": 0.0061,
773
+ "step": 31000
774
+ },
775
+ {
776
+ "epoch": 2.719033232628399,
777
+ "grad_norm": 0.051389552652835846,
778
+ "learning_rate": 7.115246271162055e-06,
779
+ "loss": 0.0107,
780
+ "step": 31500
781
+ },
782
+ {
783
+ "epoch": 2.7621924902891672,
784
+ "grad_norm": 0.0053047193214297295,
785
+ "learning_rate": 6.875449618723323e-06,
786
+ "loss": 0.0157,
787
+ "step": 32000
788
+ },
789
+ {
790
+ "epoch": 2.7621924902891672,
791
+ "eval_cosine_accuracy@1": 0.689452787847402,
792
+ "eval_cosine_accuracy@10": 0.9723804591748663,
793
+ "eval_cosine_accuracy@3": 0.9074745382358018,
794
+ "eval_cosine_accuracy@5": 0.9442430519592612,
795
+ "eval_cosine_map@100": 0.8041420474637542,
796
+ "eval_cosine_mrr@10": 0.8027525694667068,
797
+ "eval_cosine_ndcg@10": 0.8451171490975874,
798
+ "eval_cosine_precision@1": 0.689452787847402,
799
+ "eval_cosine_precision@10": 0.09723804591748661,
800
+ "eval_cosine_precision@3": 0.3024915127452673,
801
+ "eval_cosine_precision@5": 0.18884861039185225,
802
+ "eval_cosine_recall@1": 0.689452787847402,
803
+ "eval_cosine_recall@10": 0.9723804591748663,
804
+ "eval_cosine_recall@3": 0.9074745382358018,
805
+ "eval_cosine_recall@5": 0.9442430519592612,
806
+ "eval_runtime": 467.7248,
807
+ "eval_samples_per_second": 0.0,
808
+ "eval_steps_per_second": 0.0,
809
+ "step": 32000
810
+ },
811
+ {
812
+ "epoch": 2.8053517479499352,
813
+ "grad_norm": 0.005983938928693533,
814
+ "learning_rate": 6.635652966284592e-06,
815
+ "loss": 0.005,
816
+ "step": 32500
817
+ },
818
+ {
819
+ "epoch": 2.8485110056107033,
820
+ "grad_norm": 0.006458807270973921,
821
+ "learning_rate": 6.395856313845859e-06,
822
+ "loss": 0.0087,
823
+ "step": 33000
824
+ },
825
+ {
826
+ "epoch": 2.8916702632714717,
827
+ "grad_norm": 0.00440911203622818,
828
+ "learning_rate": 6.1560596614071276e-06,
829
+ "loss": 0.0064,
830
+ "step": 33500
831
+ },
832
+ {
833
+ "epoch": 2.93482952093224,
834
+ "grad_norm": 0.0034452094696462154,
835
+ "learning_rate": 5.916263008968395e-06,
836
+ "loss": 0.005,
837
+ "step": 34000
838
+ },
839
+ {
840
+ "epoch": 2.93482952093224,
841
+ "eval_cosine_accuracy@1": 0.6884170550664596,
842
+ "eval_cosine_accuracy@10": 0.9725530813050233,
843
+ "eval_cosine_accuracy@3": 0.9083376488865873,
844
+ "eval_cosine_accuracy@5": 0.9463145175211463,
845
+ "eval_cosine_map@100": 0.8037708008346327,
846
+ "eval_cosine_mrr@10": 0.8023887614773162,
847
+ "eval_cosine_ndcg@10": 0.8449160090668899,
848
+ "eval_cosine_precision@1": 0.6884170550664596,
849
+ "eval_cosine_precision@10": 0.0972553081305023,
850
+ "eval_cosine_precision@3": 0.30277921629552906,
851
+ "eval_cosine_precision@5": 0.18926290350422922,
852
+ "eval_cosine_recall@1": 0.6884170550664596,
853
+ "eval_cosine_recall@10": 0.9725530813050233,
854
+ "eval_cosine_recall@3": 0.9083376488865873,
855
+ "eval_cosine_recall@5": 0.9463145175211463,
856
+ "eval_runtime": 467.6593,
857
+ "eval_samples_per_second": 0.0,
858
+ "eval_steps_per_second": 0.0,
859
+ "step": 34000
860
+ },
861
+ {
862
+ "epoch": 2.977988778593008,
863
+ "grad_norm": 1.5224103927612305,
864
+ "learning_rate": 5.6764663565296625e-06,
865
+ "loss": 0.0115,
866
+ "step": 34500
867
+ },
868
+ {
869
+ "epoch": 3.0211480362537766,
870
+ "grad_norm": 0.007577585522085428,
871
+ "learning_rate": 5.436669704090931e-06,
872
+ "loss": 0.0079,
873
+ "step": 35000
874
+ },
875
+ {
876
+ "epoch": 3.0643072939145446,
877
+ "grad_norm": 0.01359875500202179,
878
+ "learning_rate": 5.196873051652199e-06,
879
+ "loss": 0.0045,
880
+ "step": 35500
881
+ },
882
+ {
883
+ "epoch": 3.107466551575313,
884
+ "grad_norm": 0.005014342721551657,
885
+ "learning_rate": 4.9570763992134675e-06,
886
+ "loss": 0.0029,
887
+ "step": 36000
888
+ },
889
+ {
890
+ "epoch": 3.107466551575313,
891
+ "eval_cosine_accuracy@1": 0.6875539444156741,
892
+ "eval_cosine_accuracy@10": 0.972035214914552,
893
+ "eval_cosine_accuracy@3": 0.9067840497151735,
894
+ "eval_cosine_accuracy@5": 0.9442430519592612,
895
+ "eval_cosine_map@100": 0.8031759037555115,
896
+ "eval_cosine_mrr@10": 0.8017571836836468,
897
+ "eval_cosine_ndcg@10": 0.8443043752760462,
898
+ "eval_cosine_precision@1": 0.6875539444156741,
899
+ "eval_cosine_precision@10": 0.09720352149145518,
900
+ "eval_cosine_precision@3": 0.3022613499050578,
901
+ "eval_cosine_precision@5": 0.18884861039185225,
902
+ "eval_cosine_recall@1": 0.6875539444156741,
903
+ "eval_cosine_recall@10": 0.972035214914552,
904
+ "eval_cosine_recall@3": 0.9067840497151735,
905
+ "eval_cosine_recall@5": 0.9442430519592612,
906
+ "eval_runtime": 467.7266,
907
+ "eval_samples_per_second": 0.0,
908
+ "eval_steps_per_second": 0.0,
909
+ "step": 36000
910
+ },
911
+ {
912
+ "epoch": 3.150625809236081,
913
+ "grad_norm": 0.015572451055049896,
914
+ "learning_rate": 4.717279746774736e-06,
915
+ "loss": 0.0161,
916
+ "step": 36500
917
+ },
918
+ {
919
+ "epoch": 3.1937850668968495,
920
+ "grad_norm": 0.004311546217650175,
921
+ "learning_rate": 4.477483094336003e-06,
922
+ "loss": 0.0144,
923
+ "step": 37000
924
+ },
925
+ {
926
+ "epoch": 3.2369443245576175,
927
+ "grad_norm": 0.0009289888548664749,
928
+ "learning_rate": 4.237686441897272e-06,
929
+ "loss": 0.0076,
930
+ "step": 37500
931
+ },
932
+ {
933
+ "epoch": 3.280103582218386,
934
+ "grad_norm": 0.0010557913919910789,
935
+ "learning_rate": 3.997889789458539e-06,
936
+ "loss": 0.0157,
937
+ "step": 38000
938
+ },
939
+ {
940
+ "epoch": 3.280103582218386,
941
+ "eval_cosine_accuracy@1": 0.6977386500949422,
942
+ "eval_cosine_accuracy@10": 0.9747971689970655,
943
+ "eval_cosine_accuracy@3": 0.909891248058001,
944
+ "eval_cosine_accuracy@5": 0.9470050060417745,
945
+ "eval_cosine_map@100": 0.809749193191093,
946
+ "eval_cosine_mrr@10": 0.8084805416498834,
947
+ "eval_cosine_ndcg@10": 0.8499994995327701,
948
+ "eval_cosine_precision@1": 0.6977386500949422,
949
+ "eval_cosine_precision@10": 0.09747971689970651,
950
+ "eval_cosine_precision@3": 0.30329708268600036,
951
+ "eval_cosine_precision@5": 0.18940100120835487,
952
+ "eval_cosine_recall@1": 0.6977386500949422,
953
+ "eval_cosine_recall@10": 0.9747971689970655,
954
+ "eval_cosine_recall@3": 0.909891248058001,
955
+ "eval_cosine_recall@5": 0.9470050060417745,
956
+ "eval_runtime": 467.9009,
957
+ "eval_samples_per_second": 0.0,
958
+ "eval_steps_per_second": 0.0,
959
+ "step": 38000
960
+ },
961
+ {
962
+ "epoch": 3.323262839879154,
963
+ "grad_norm": 0.002490697894245386,
964
+ "learning_rate": 3.7580931370198075e-06,
965
+ "loss": 0.0039,
966
+ "step": 38500
967
+ },
968
+ {
969
+ "epoch": 3.3664220975399224,
970
+ "grad_norm": 0.0011037011863663793,
971
+ "learning_rate": 3.518296484581076e-06,
972
+ "loss": 0.0045,
973
+ "step": 39000
974
+ },
975
+ {
976
+ "epoch": 3.4095813552006904,
977
+ "grad_norm": 0.008491401560604572,
978
+ "learning_rate": 3.2784998321423433e-06,
979
+ "loss": 0.0033,
980
+ "step": 39500
981
+ },
982
+ {
983
+ "epoch": 3.452740612861459,
984
+ "grad_norm": 0.0002366910339333117,
985
+ "learning_rate": 3.0387031797036116e-06,
986
+ "loss": 0.0064,
987
+ "step": 40000
988
+ },
989
+ {
990
+ "epoch": 3.452740612861459,
991
+ "eval_cosine_accuracy@1": 0.6832383911617469,
992
+ "eval_cosine_accuracy@10": 0.97393405834628,
993
+ "eval_cosine_accuracy@3": 0.9062661833247022,
994
+ "eval_cosine_accuracy@5": 0.9464871396513033,
995
+ "eval_cosine_map@100": 0.8011659555812971,
996
+ "eval_cosine_mrr@10": 0.7998895081365299,
997
+ "eval_cosine_ndcg@10": 0.8433601615941685,
998
+ "eval_cosine_precision@1": 0.6832383911617469,
999
+ "eval_cosine_precision@10": 0.097393405834628,
1000
+ "eval_cosine_precision@3": 0.30208872777490076,
1001
+ "eval_cosine_precision@5": 0.18929742793026064,
1002
+ "eval_cosine_recall@1": 0.6832383911617469,
1003
+ "eval_cosine_recall@10": 0.97393405834628,
1004
+ "eval_cosine_recall@3": 0.9062661833247022,
1005
+ "eval_cosine_recall@5": 0.9464871396513033,
1006
+ "eval_runtime": 467.6658,
1007
+ "eval_samples_per_second": 0.0,
1008
+ "eval_steps_per_second": 0.0,
1009
+ "step": 40000
1010
+ },
1011
+ {
1012
+ "epoch": 3.495899870522227,
1013
+ "grad_norm": 0.0015487176133319736,
1014
+ "learning_rate": 2.7989065272648796e-06,
1015
+ "loss": 0.0054,
1016
+ "step": 40500
1017
+ },
1018
+ {
1019
+ "epoch": 3.5390591281829953,
1020
+ "grad_norm": 1.1207655668258667,
1021
+ "learning_rate": 2.559109874826148e-06,
1022
+ "loss": 0.0061,
1023
+ "step": 41000
1024
+ },
1025
+ {
1026
+ "epoch": 3.5822183858437633,
1027
+ "grad_norm": 0.0002378961944486946,
1028
+ "learning_rate": 2.319313222387416e-06,
1029
+ "loss": 0.0051,
1030
+ "step": 41500
1031
+ },
1032
+ {
1033
+ "epoch": 3.6253776435045317,
1034
+ "grad_norm": 0.0002853251644410193,
1035
+ "learning_rate": 2.0795165699486837e-06,
1036
+ "loss": 0.0019,
1037
+ "step": 42000
1038
+ },
1039
+ {
1040
+ "epoch": 3.6253776435045317,
1041
+ "eval_cosine_accuracy@1": 0.6910063870188158,
1042
+ "eval_cosine_accuracy@10": 0.9742793026065941,
1043
+ "eval_cosine_accuracy@3": 0.9109269808389435,
1044
+ "eval_cosine_accuracy@5": 0.9461418953909891,
1045
+ "eval_cosine_map@100": 0.8061197699360279,
1046
+ "eval_cosine_mrr@10": 0.804833419644399,
1047
+ "eval_cosine_ndcg@10": 0.8471731447814336,
1048
+ "eval_cosine_precision@1": 0.6910063870188158,
1049
+ "eval_cosine_precision@10": 0.09742793026065939,
1050
+ "eval_cosine_precision@3": 0.30364232694631454,
1051
+ "eval_cosine_precision@5": 0.18922837907819778,
1052
+ "eval_cosine_recall@1": 0.6910063870188158,
1053
+ "eval_cosine_recall@10": 0.9742793026065941,
1054
+ "eval_cosine_recall@3": 0.9109269808389435,
1055
+ "eval_cosine_recall@5": 0.9461418953909891,
1056
+ "eval_runtime": 467.6854,
1057
+ "eval_samples_per_second": 0.0,
1058
+ "eval_steps_per_second": 0.0,
1059
+ "step": 42000
1060
+ }
1061
+ ],
1062
+ "logging_steps": 500,
1063
+ "max_steps": 46340,
1064
+ "num_input_tokens_seen": 0,
1065
+ "num_train_epochs": 4,
1066
+ "save_steps": 2000,
1067
+ "stateful_callbacks": {
1068
+ "TrainerControl": {
1069
+ "args": {
1070
+ "should_epoch_stop": false,
1071
+ "should_evaluate": false,
1072
+ "should_log": false,
1073
+ "should_save": true,
1074
+ "should_training_stop": false
1075
+ },
1076
+ "attributes": {}
1077
+ }
1078
+ },
1079
+ "total_flos": 0.0,
1080
+ "train_batch_size": 4,
1081
+ "trial_name": null,
1082
+ "trial_params": null
1083
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8adee61c42569acda3b6148ffcf73faf677af58c325f10f3d4798bf52f61a30e
3
+ size 5624