sergabrr commited on
Commit
ccbb2bb
·
verified ·
1 Parent(s): fde439b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -1690
README.md CHANGED
@@ -1,1690 +0,0 @@
1
- ---
2
- model-index:
3
- - name: ru-en-RoSBERTa
4
- results:
5
- - dataset:
6
- config: default
7
- name: MTEB CEDRClassification (default)
8
- revision: c0ba03d058e3e1b2f3fd20518875a4563dd12db4
9
- split: test
10
- type: ai-forever/cedr-classification
11
- metrics:
12
- - type: accuracy
13
- value: 44.68650371944739
14
- - type: f1
15
- value: 40.7601061886426
16
- - type: lrap
17
- value: 70.69633368756747
18
- - type: main_score
19
- value: 44.68650371944739
20
- task:
21
- type: MultilabelClassification
22
- - dataset:
23
- config: default
24
- name: MTEB GeoreviewClassification (default)
25
- revision: 3765c0d1de6b7d264bc459433c45e5a75513839c
26
- split: test
27
- type: ai-forever/georeview-classification
28
- metrics:
29
- - type: accuracy
30
- value: 49.697265625
31
- - type: f1
32
- value: 47.793186725286866
33
- - type: f1_weighted
34
- value: 47.79131720298068
35
- - type: main_score
36
- value: 49.697265625
37
- task:
38
- type: Classification
39
- - dataset:
40
- config: default
41
- name: MTEB GeoreviewClusteringP2P (default)
42
- revision: 97a313c8fc85b47f13f33e7e9a95c1ad888c7fec
43
- split: test
44
- type: ai-forever/georeview-clustering-p2p
45
- metrics:
46
- - type: main_score
47
- value: 65.42249614873316
48
- - type: v_measure
49
- value: 65.42249614873316
50
- - type: v_measure_std
51
- value: 0.8524815312312278
52
- task:
53
- type: Clustering
54
- - dataset:
55
- config: default
56
- name: MTEB HeadlineClassification (default)
57
- revision: 2fe05ee6b5832cda29f2ef7aaad7b7fe6a3609eb
58
- split: test
59
- type: ai-forever/headline-classification
60
- metrics:
61
- - type: accuracy
62
- value: 78.0029296875
63
- - type: f1
64
- value: 77.95151940601424
65
- - type: f1_weighted
66
- value: 77.95054643947716
67
- - type: main_score
68
- value: 78.0029296875
69
- task:
70
- type: Classification
71
- - dataset:
72
- config: default
73
- name: MTEB InappropriatenessClassification (default)
74
- revision: 601651fdc45ef243751676e62dd7a19f491c0285
75
- split: test
76
- type: ai-forever/inappropriateness-classification
77
- metrics:
78
- - type: accuracy
79
- value: 61.32324218750001
80
- - type: ap
81
- value: 57.11029460364367
82
- - type: ap_weighted
83
- value: 57.11029460364367
84
- - type: f1
85
- value: 60.971337406307214
86
- - type: f1_weighted
87
- value: 60.971337406307214
88
- - type: main_score
89
- value: 61.32324218750001
90
- task:
91
- type: Classification
92
- - dataset:
93
- config: default
94
- name: MTEB KinopoiskClassification (default)
95
- revision: 5911f26666ac11af46cb9c6849d0dc80a378af24
96
- split: test
97
- type: ai-forever/kinopoisk-sentiment-classification
98
- metrics:
99
- - type: accuracy
100
- value: 63.27333333333334
101
- - type: f1
102
- value: 61.007042785228116
103
- - type: f1_weighted
104
- value: 61.007042785228116
105
- - type: main_score
106
- value: 63.27333333333334
107
- task:
108
- type: Classification
109
- - dataset:
110
- config: ru
111
- name: MTEB MIRACLReranking (ru)
112
- revision: 6d1962c527217f8927fca80f890f14f36b2802af
113
- split: dev
114
- type: miracl/mmteb-miracl-reranking
115
- metrics:
116
- - type: MAP@1(MIRACL)
117
- value: 30.691000000000003
118
- - type: MAP@10(MIRACL)
119
- value: 49.178
120
- - type: MAP@100(MIRACL)
121
- value: 51.225
122
- - type: MAP@1000(MIRACL)
123
- value: 51.225
124
- - type: MAP@20(MIRACL)
125
- value: 50.613
126
- - type: MAP@3(MIRACL)
127
- value: 42.457
128
- - type: MAP@5(MIRACL)
129
- value: 46.172000000000004
130
- - type: NDCG@1(MIRACL)
131
- value: 51.002
132
- - type: NDCG@10(MIRACL)
133
- value: 56.912
134
- - type: NDCG@100(MIRACL)
135
- value: 61.197
136
- - type: NDCG@1000(MIRACL)
137
- value: 61.197
138
- - type: NDCG@20(MIRACL)
139
- value: 59.453
140
- - type: NDCG@3(MIRACL)
141
- value: 51.083
142
- - type: NDCG@5(MIRACL)
143
- value: 53.358000000000004
144
- - type: P@1(MIRACL)
145
- value: 51.002
146
- - type: P@10(MIRACL)
147
- value: 14.852000000000002
148
- - type: P@100(MIRACL)
149
- value: 1.9529999999999998
150
- - type: P@1000(MIRACL)
151
- value: 0.19499999999999998
152
- - type: P@20(MIRACL)
153
- value: 8.657
154
- - type: P@3(MIRACL)
155
- value: 31.435000000000002
156
- - type: P@5(MIRACL)
157
- value: 23.608999999999998
158
- - type: Recall@1(MIRACL)
159
- value: 30.691000000000003
160
- - type: Recall@10(MIRACL)
161
- value: 67.006
162
- - type: Recall@100(MIRACL)
163
- value: 79.952
164
- - type: Recall@1000(MIRACL)
165
- value: 79.952
166
- - type: Recall@20(MIRACL)
167
- value: 73.811
168
- - type: Recall@3(MIRACL)
169
- value: 49.142
170
- - type: Recall@5(MIRACL)
171
- value: 57.553
172
- - type: main_score
173
- value: 56.912
174
- - type: nAUC_MAP@1000_diff1(MIRACL)
175
- value: 10.786403475779332
176
- - type: nAUC_MAP@1000_max(MIRACL)
177
- value: 29.477246196287275
178
- - type: nAUC_MAP@1000_std(MIRACL)
179
- value: 15.938834129839046
180
- - type: nAUC_MAP@100_diff1(MIRACL)
181
- value: 10.786403475779332
182
- - type: nAUC_MAP@100_max(MIRACL)
183
- value: 29.477246196287275
184
- - type: nAUC_MAP@100_std(MIRACL)
185
- value: 15.938834129839046
186
- - type: nAUC_MAP@10_diff1(MIRACL)
187
- value: 12.255091348037595
188
- - type: nAUC_MAP@10_max(MIRACL)
189
- value: 26.72625370045134
190
- - type: nAUC_MAP@10_std(MIRACL)
191
- value: 14.180071586837812
192
- - type: nAUC_MAP@1_diff1(MIRACL)
193
- value: 28.616487922173768
194
- - type: nAUC_MAP@1_max(MIRACL)
195
- value: 12.986192530664518
196
- - type: nAUC_MAP@1_std(MIRACL)
197
- value: 4.086145762604503
198
- - type: nAUC_MAP@20_diff1(MIRACL)
199
- value: 11.360341572700476
200
- - type: nAUC_MAP@20_max(MIRACL)
201
- value: 28.612330384153832
202
- - type: nAUC_MAP@20_std(MIRACL)
203
- value: 15.787480742877937
204
- - type: nAUC_MAP@3_diff1(MIRACL)
205
- value: 18.033783954867623
206
- - type: nAUC_MAP@3_max(MIRACL)
207
- value: 20.97092332905034
208
- - type: nAUC_MAP@3_std(MIRACL)
209
- value: 9.106058710108279
210
- - type: nAUC_MAP@5_diff1(MIRACL)
211
- value: 14.784231238848433
212
- - type: nAUC_MAP@5_max(MIRACL)
213
- value: 23.841145797143
214
- - type: nAUC_MAP@5_std(MIRACL)
215
- value: 11.25686258970321
216
- - type: nAUC_NDCG@1000_diff1(MIRACL)
217
- value: 1.4728095471561125
218
- - type: nAUC_NDCG@1000_max(MIRACL)
219
- value: 39.84262968697792
220
- - type: nAUC_NDCG@1000_std(MIRACL)
221
- value: 22.4186410243652
222
- - type: nAUC_NDCG@100_diff1(MIRACL)
223
- value: 1.4728095471561125
224
- - type: nAUC_NDCG@100_max(MIRACL)
225
- value: 39.84262968697792
226
- - type: nAUC_NDCG@100_std(MIRACL)
227
- value: 22.4186410243652
228
- - type: nAUC_NDCG@10_diff1(MIRACL)
229
- value: 5.242996478950954
230
- - type: nAUC_NDCG@10_max(MIRACL)
231
- value: 33.86925934510759
232
- - type: nAUC_NDCG@10_std(MIRACL)
233
- value: 19.457386638149625
234
- - type: nAUC_NDCG@1_diff1(MIRACL)
235
- value: 16.925455715967676
236
- - type: nAUC_NDCG@1_max(MIRACL)
237
- value: 36.72266755084653
238
- - type: nAUC_NDCG@1_std(MIRACL)
239
- value: 18.357456476212622
240
- - type: nAUC_NDCG@20_diff1(MIRACL)
241
- value: 3.361697278095995
242
- - type: nAUC_NDCG@20_max(MIRACL)
243
- value: 37.38923489423496
244
- - type: nAUC_NDCG@20_std(MIRACL)
245
- value: 22.29168372402657
246
- - type: nAUC_NDCG@3_diff1(MIRACL)
247
- value: 10.936904314592084
248
- - type: nAUC_NDCG@3_max(MIRACL)
249
- value: 30.547718047674284
250
- - type: nAUC_NDCG@3_std(MIRACL)
251
- value: 15.142352896765665
252
- - type: nAUC_NDCG@5_diff1(MIRACL)
253
- value: 8.618074920961075
254
- - type: nAUC_NDCG@5_max(MIRACL)
255
- value: 30.808600807482367
256
- - type: nAUC_NDCG@5_std(MIRACL)
257
- value: 15.793512242130051
258
- - type: nAUC_P@1000_diff1(MIRACL)
259
- value: -24.81839490148569
260
- - type: nAUC_P@1000_max(MIRACL)
261
- value: 34.16200383739091
262
- - type: nAUC_P@1000_std(MIRACL)
263
- value: 20.95890369662007
264
- - type: nAUC_P@100_diff1(MIRACL)
265
- value: -24.818394901485657
266
- - type: nAUC_P@100_max(MIRACL)
267
- value: 34.16200383739092
268
- - type: nAUC_P@100_std(MIRACL)
269
- value: 20.958903696620112
270
- - type: nAUC_P@10_diff1(MIRACL)
271
- value: -22.646461560750986
272
- - type: nAUC_P@10_max(MIRACL)
273
- value: 34.57373514819872
274
- - type: nAUC_P@10_std(MIRACL)
275
- value: 24.27599718176041
276
- - type: nAUC_P@1_diff1(MIRACL)
277
- value: 16.925455715967676
278
- - type: nAUC_P@1_max(MIRACL)
279
- value: 36.72266755084653
280
- - type: nAUC_P@1_std(MIRACL)
281
- value: 18.357456476212622
282
- - type: nAUC_P@20_diff1(MIRACL)
283
- value: -23.33449798384014
284
- - type: nAUC_P@20_max(MIRACL)
285
- value: 34.92822081787735
286
- - type: nAUC_P@20_std(MIRACL)
287
- value: 25.048280657629267
288
- - type: nAUC_P@3_diff1(MIRACL)
289
- value: -11.60659490286
290
- - type: nAUC_P@3_max(MIRACL)
291
- value: 38.187883056013035
292
- - type: nAUC_P@3_std(MIRACL)
293
- value: 21.234776997940628
294
- - type: nAUC_P@5_diff1(MIRACL)
295
- value: -18.86697977242918
296
- - type: nAUC_P@5_max(MIRACL)
297
- value: 35.6110661197626
298
- - type: nAUC_P@5_std(MIRACL)
299
- value: 22.11165620702996
300
- - type: nAUC_Recall@1000_diff1(MIRACL)
301
- value: -31.456413113303867
302
- - type: nAUC_Recall@1000_max(MIRACL)
303
- value: 63.785265733309636
304
- - type: nAUC_Recall@1000_std(MIRACL)
305
- value: 36.587933217871914
306
- - type: nAUC_Recall@100_diff1(MIRACL)
307
- value: -31.456413113303867
308
- - type: nAUC_Recall@100_max(MIRACL)
309
- value: 63.785265733309636
310
- - type: nAUC_Recall@100_std(MIRACL)
311
- value: 36.587933217871914
312
- - type: nAUC_Recall@10_diff1(MIRACL)
313
- value: -9.518740341549913
314
- - type: nAUC_Recall@10_max(MIRACL)
315
- value: 35.00853357699468
316
- - type: nAUC_Recall@10_std(MIRACL)
317
- value: 22.79313936486099
318
- - type: nAUC_Recall@1_diff1(MIRACL)
319
- value: 28.616487922173768
320
- - type: nAUC_Recall@1_max(MIRACL)
321
- value: 12.986192530664518
322
- - type: nAUC_Recall@1_std(MIRACL)
323
- value: 4.086145762604503
324
- - type: nAUC_Recall@20_diff1(MIRACL)
325
- value: -17.771143411342166
326
- - type: nAUC_Recall@20_max(MIRACL)
327
- value: 47.59780316487735
328
- - type: nAUC_Recall@20_std(MIRACL)
329
- value: 33.25494707686132
330
- - type: nAUC_Recall@3_diff1(MIRACL)
331
- value: 10.171226133119783
332
- - type: nAUC_Recall@3_max(MIRACL)
333
- value: 21.097634288680847
334
- - type: nAUC_Recall@3_std(MIRACL)
335
- value: 10.087211861733298
336
- - type: nAUC_Recall@5_diff1(MIRACL)
337
- value: 1.6868374913242932
338
- - type: nAUC_Recall@5_max(MIRACL)
339
- value: 25.874440474993165
340
- - type: nAUC_Recall@5_std(MIRACL)
341
- value: 13.46380924822079
342
- task:
343
- type: Reranking
344
- - dataset:
345
- config: ru
346
- name: MTEB MIRACLRetrieval (ru)
347
- revision: main
348
- split: dev
349
- type: miracl/mmteb-miracl
350
- metrics:
351
- - type: main_score
352
- value: 53.909
353
- - type: map_at_1
354
- value: 24.308
355
- - type: map_at_10
356
- value: 43.258
357
- - type: map_at_100
358
- value: 46.053
359
- - type: map_at_1000
360
- value: 46.176
361
- - type: map_at_20
362
- value: 44.962
363
- - type: map_at_3
364
- value: 36.129
365
- - type: map_at_5
366
- value: 40.077
367
- - type: mrr_at_1
368
- value: 49.92012779552716
369
- - type: mrr_at_10
370
- value: 62.639554490592865
371
- - type: mrr_at_100
372
- value: 63.09260401526302
373
- - type: mrr_at_1000
374
- value: 63.10428906436666
375
- - type: mrr_at_20
376
- value: 62.94919151853632
377
- - type: mrr_at_3
378
- value: 60.15708200212997
379
- - type: mrr_at_5
380
- value: 61.83439829605969
381
- - type: nauc_map_at_1000_diff1
382
- value: 24.249990208199268
383
- - type: nauc_map_at_1000_max
384
- value: 25.29688440384686
385
- - type: nauc_map_at_1000_std
386
- value: 2.4312163206740536
387
- - type: nauc_map_at_100_diff1
388
- value: 24.2554939267347
389
- - type: nauc_map_at_100_max
390
- value: 25.25054164924535
391
- - type: nauc_map_at_100_std
392
- value: 2.4121726280069757
393
- - type: nauc_map_at_10_diff1
394
- value: 24.411765629418987
395
- - type: nauc_map_at_10_max
396
- value: 23.13035697774593
397
- - type: nauc_map_at_10_std
398
- value: -0.1673711528601927
399
- - type: nauc_map_at_1_diff1
400
- value: 30.55123128484441
401
- - type: nauc_map_at_1_max
402
- value: 13.83849108263988
403
- - type: nauc_map_at_1_std
404
- value: -7.087181528435525
405
- - type: nauc_map_at_20_diff1
406
- value: 24.125033292556417
407
- - type: nauc_map_at_20_max
408
- value: 24.563171125814296
409
- - type: nauc_map_at_20_std
410
- value: 1.266006461448722
411
- - type: nauc_map_at_3_diff1
412
- value: 25.71581305774253
413
- - type: nauc_map_at_3_max
414
- value: 18.708623514300097
415
- - type: nauc_map_at_3_std
416
- value: -4.772722288463871
417
- - type: nauc_map_at_5_diff1
418
- value: 25.352787694389097
419
- - type: nauc_map_at_5_max
420
- value: 20.974296353287084
421
- - type: nauc_map_at_5_std
422
- value: -3.4007260047029835
423
- - type: nauc_mrr_at_1000_diff1
424
- value: 29.492072727604622
425
- - type: nauc_mrr_at_1000_max
426
- value: 34.60333674990558
427
- - type: nauc_mrr_at_1000_std
428
- value: 11.223537361751173
429
- - type: nauc_mrr_at_100_diff1
430
- value: 29.47919553914885
431
- - type: nauc_mrr_at_100_max
432
- value: 34.618795300361995
433
- - type: nauc_mrr_at_100_std
434
- value: 11.243824787491663
435
- - type: nauc_mrr_at_10_diff1
436
- value: 29.481060608078298
437
- - type: nauc_mrr_at_10_max
438
- value: 34.752363175415745
439
- - type: nauc_mrr_at_10_std
440
- value: 10.98618160728943
441
- - type: nauc_mrr_at_1_diff1
442
- value: 31.81056902767142
443
- - type: nauc_mrr_at_1_max
444
- value: 30.351978574096773
445
- - type: nauc_mrr_at_1_std
446
- value: 9.735911194663025
447
- - type: nauc_mrr_at_20_diff1
448
- value: 29.390754002995035
449
- - type: nauc_mrr_at_20_max
450
- value: 34.75816984434079
451
- - type: nauc_mrr_at_20_std
452
- value: 11.325226515477347
453
- - type: nauc_mrr_at_3_diff1
454
- value: 29.948364490803186
455
- - type: nauc_mrr_at_3_max
456
- value: 33.973850208221556
457
- - type: nauc_mrr_at_3_std
458
- value: 9.988883050022485
459
- - type: nauc_mrr_at_5_diff1
460
- value: 29.477773016468696
461
- - type: nauc_mrr_at_5_max
462
- value: 34.38532892473932
463
- - type: nauc_mrr_at_5_std
464
- value: 10.206783034393654
465
- - type: nauc_ndcg_at_1000_diff1
466
- value: 24.15494700259076
467
- - type: nauc_ndcg_at_1000_max
468
- value: 32.367504385127035
469
- - type: nauc_ndcg_at_1000_std
470
- value: 10.372857487814498
471
- - type: nauc_ndcg_at_100_diff1
472
- value: 23.97247958991815
473
- - type: nauc_ndcg_at_100_max
474
- value: 32.21110774026889
475
- - type: nauc_ndcg_at_100_std
476
- value: 11.065328347817761
477
- - type: nauc_ndcg_at_10_diff1
478
- value: 24.038789867355796
479
- - type: nauc_ndcg_at_10_max
480
- value: 28.14682223937745
481
- - type: nauc_ndcg_at_10_std
482
- value: 4.518525314723316
483
- - type: nauc_ndcg_at_1_diff1
484
- value: 31.81056902767142
485
- - type: nauc_ndcg_at_1_max
486
- value: 30.351978574096773
487
- - type: nauc_ndcg_at_1_std
488
- value: 9.735911194663025
489
- - type: nauc_ndcg_at_20_diff1
490
- value: 23.157990079778138
491
- - type: nauc_ndcg_at_20_max
492
- value: 30.521172934621703
493
- - type: nauc_ndcg_at_20_std
494
- value: 7.660125728373433
495
- - type: nauc_ndcg_at_3_diff1
496
- value: 24.44153871615053
497
- - type: nauc_ndcg_at_3_max
498
- value: 27.08209732696818
499
- - type: nauc_ndcg_at_3_std
500
- value: 3.8766269917792537
501
- - type: nauc_ndcg_at_5_diff1
502
- value: 24.952468410841863
503
- - type: nauc_ndcg_at_5_max
504
- value: 26.29873769608537
505
- - type: nauc_ndcg_at_5_std
506
- value: 1.3359423751654511
507
- - type: nauc_precision_at_1000_diff1
508
- value: -9.104010991734798
509
- - type: nauc_precision_at_1000_max
510
- value: 20.36838078039637
511
- - type: nauc_precision_at_1000_std
512
- value: 26.889986331386297
513
- - type: nauc_precision_at_100_diff1
514
- value: -7.181546793298205
515
- - type: nauc_precision_at_100_max
516
- value: 24.32969645433586
517
- - type: nauc_precision_at_100_std
518
- value: 31.546209514202232
519
- - type: nauc_precision_at_10_diff1
520
- value: -1.0044021788494442
521
- - type: nauc_precision_at_10_max
522
- value: 29.37074096666726
523
- - type: nauc_precision_at_10_std
524
- value: 25.000959926288214
525
- - type: nauc_precision_at_1_diff1
526
- value: 31.81056902767142
527
- - type: nauc_precision_at_1_max
528
- value: 30.351978574096773
529
- - type: nauc_precision_at_1_std
530
- value: 9.735911194663025
531
- - type: nauc_precision_at_20_diff1
532
- value: -5.242529022989003
533
- - type: nauc_precision_at_20_max
534
- value: 28.199268120740822
535
- - type: nauc_precision_at_20_std
536
- value: 28.460986811065037
537
- - type: nauc_precision_at_3_diff1
538
- value: 9.46419634664173
539
- - type: nauc_precision_at_3_max
540
- value: 32.203956451949914
541
- - type: nauc_precision_at_3_std
542
- value: 16.4095713138301
543
- - type: nauc_precision_at_5_diff1
544
- value: 3.719098257572974
545
- - type: nauc_precision_at_5_max
546
- value: 30.53411024247047
547
- - type: nauc_precision_at_5_std
548
- value: 17.926227114457067
549
- - type: nauc_recall_at_1000_diff1
550
- value: 12.347919922311121
551
- - type: nauc_recall_at_1000_max
552
- value: 62.10824756167678
553
- - type: nauc_recall_at_1000_std
554
- value: 65.9625810682273
555
- - type: nauc_recall_at_100_diff1
556
- value: 11.945066948287723
557
- - type: nauc_recall_at_100_max
558
- value: 37.07070306829974
559
- - type: nauc_recall_at_100_std
560
- value: 38.76495395051901
561
- - type: nauc_recall_at_10_diff1
562
- value: 14.793964290237943
563
- - type: nauc_recall_at_10_max
564
- value: 23.170920682517334
565
- - type: nauc_recall_at_10_std
566
- value: 5.07461971737137
567
- - type: nauc_recall_at_1_diff1
568
- value: 30.55123128484441
569
- - type: nauc_recall_at_1_max
570
- value: 13.83849108263988
571
- - type: nauc_recall_at_1_std
572
- value: -7.087181528435525
573
- - type: nauc_recall_at_20_diff1
574
- value: 10.349310874535616
575
- - type: nauc_recall_at_20_max
576
- value: 27.72667852012557
577
- - type: nauc_recall_at_20_std
578
- value: 13.37946493360006
579
- - type: nauc_recall_at_3_diff1
580
- value: 20.660181561801195
581
- - type: nauc_recall_at_3_max
582
- value: 16.734608747226137
583
- - type: nauc_recall_at_3_std
584
- value: -5.887299100086449
585
- - type: nauc_recall_at_5_diff1
586
- value: 19.292387971699007
587
- - type: nauc_recall_at_5_max
588
- value: 18.151647291256193
589
- - type: nauc_recall_at_5_std
590
- value: -5.3874570564310895
591
- - type: ndcg_at_1
592
- value: 49.919999999999995
593
- - type: ndcg_at_10
594
- value: 53.909
595
- - type: ndcg_at_100
596
- value: 61.346999999999994
597
- - type: ndcg_at_1000
598
- value: 62.831
599
- - type: ndcg_at_20
600
- value: 57.44200000000001
601
- - type: ndcg_at_3
602
- value: 48.034
603
- - type: ndcg_at_5
604
- value: 50.151
605
- - type: precision_at_1
606
- value: 49.919999999999995
607
- - type: precision_at_10
608
- value: 16.206
609
- - type: precision_at_100
610
- value: 2.467
611
- - type: precision_at_1000
612
- value: 0.27499999999999997
613
- - type: precision_at_20
614
- value: 9.847999999999999
615
- - type: precision_at_3
616
- value: 33.013999999999996
617
- - type: precision_at_5
618
- value: 25.495
619
- - type: recall_at_1
620
- value: 24.308
621
- - type: recall_at_10
622
- value: 64.226
623
- - type: recall_at_100
624
- value: 88.532
625
- - type: recall_at_1000
626
- value: 96.702
627
- - type: recall_at_20
628
- value: 73.855
629
- - type: recall_at_3
630
- value: 43.75
631
- - type: recall_at_5
632
- value: 53.293
633
- task:
634
- type: Retrieval
635
- - dataset:
636
- config: ru
637
- name: MTEB MassiveIntentClassification (ru)
638
- revision: 4672e20407010da34463acc759c162ca9734bca6
639
- split: test
640
- type: mteb/amazon_massive_intent
641
- metrics:
642
- - type: accuracy
643
- value: 66.96704774714189
644
- - type: f1
645
- value: 63.75700201120695
646
- - type: f1_weighted
647
- value: 65.79948352494334
648
- - type: main_score
649
- value: 66.96704774714189
650
- task:
651
- type: Classification
652
- - dataset:
653
- config: ru
654
- name: MTEB MassiveScenarioClassification (ru)
655
- revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
656
- split: test
657
- type: mteb/amazon_massive_scenario
658
- metrics:
659
- - type: accuracy
660
- value: 71.79556153328849
661
- - type: f1
662
- value: 71.04798190430378
663
- - type: f1_weighted
664
- value: 71.11136110921589
665
- - type: main_score
666
- value: 71.79556153328849
667
- task:
668
- type: Classification
669
- - dataset:
670
- config: default
671
- name: MTEB RUParaPhraserSTS (default)
672
- revision: 43265056790b8f7c59e0139acb4be0a8dad2c8f4
673
- split: test
674
- type: merionum/ru_paraphraser
675
- metrics:
676
- - type: cosine_pearson
677
- value: 69.4312341087414
678
- - type: cosine_spearman
679
- value: 76.16273410937974
680
- - type: euclidean_pearson
681
- value: 73.59970264325928
682
- - type: euclidean_spearman
683
- value: 76.16273410937974
684
- - type: main_score
685
- value: 76.16273410937974
686
- - type: manhattan_pearson
687
- value: 73.63850191752708
688
- - type: manhattan_spearman
689
- value: 76.22156395676978
690
- - type: pearson
691
- value: 69.4312341087414
692
- - type: spearman
693
- value: 76.16273410937974
694
- task:
695
- type: STS
696
- - dataset:
697
- config: default
698
- name: MTEB RiaNewsRetrieval (default)
699
- revision: 82374b0bbacda6114f39ff9c5b925fa1512ca5d7
700
- split: test
701
- type: ai-forever/ria-news-retrieval
702
- metrics:
703
- - type: main_score
704
- value: 78.864
705
- - type: map_at_1
706
- value: 67.61
707
- - type: map_at_10
708
- value: 75.44800000000001
709
- - type: map_at_100
710
- value: 75.73
711
- - type: map_at_1000
712
- value: 75.74
713
- - type: map_at_20
714
- value: 75.63
715
- - type: map_at_3
716
- value: 74.058
717
- - type: map_at_5
718
- value: 74.935
719
- - type: mrr_at_1
720
- value: 67.61
721
- - type: mrr_at_10
722
- value: 75.44837698412663
723
- - type: mrr_at_100
724
- value: 75.7296913526584
725
- - type: mrr_at_1000
726
- value: 75.7404584781072
727
- - type: mrr_at_20
728
- value: 75.62998240983255
729
- - type: mrr_at_3
730
- value: 74.05833333333295
731
- - type: mrr_at_5
732
- value: 74.93533333333274
733
- - type: nauc_map_at_1000_diff1
734
- value: 76.73003886073126
735
- - type: nauc_map_at_1000_max
736
- value: 23.880592237559313
737
- - type: nauc_map_at_1000_std
738
- value: -16.639489061431295
739
- - type: nauc_map_at_100_diff1
740
- value: 76.72565072181389
741
- - type: nauc_map_at_100_max
742
- value: 23.881455390102456
743
- - type: nauc_map_at_100_std
744
- value: -16.63176355032267
745
- - type: nauc_map_at_10_diff1
746
- value: 76.64273887966773
747
- - type: nauc_map_at_10_max
748
- value: 23.81082154251487
749
- - type: nauc_map_at_10_std
750
- value: -16.77740307482434
751
- - type: nauc_map_at_1_diff1
752
- value: 79.73607180360645
753
- - type: nauc_map_at_1_max
754
- value: 21.20262368559921
755
- - type: nauc_map_at_1_std
756
- value: -19.089796155513238
757
- - type: nauc_map_at_20_diff1
758
- value: 76.7030611694817
759
- - type: nauc_map_at_20_max
760
- value: 23.838907707504127
761
- - type: nauc_map_at_20_std
762
- value: -16.672743811541736
763
- - type: nauc_map_at_3_diff1
764
- value: 76.50523775835022
765
- - type: nauc_map_at_3_max
766
- value: 23.60179905501101
767
- - type: nauc_map_at_3_std
768
- value: -17.693757802981956
769
- - type: nauc_map_at_5_diff1
770
- value: 76.61576372823448
771
- - type: nauc_map_at_5_max
772
- value: 23.862587318336775
773
- - type: nauc_map_at_5_std
774
- value: -17.0437966767025
775
- - type: nauc_mrr_at_1000_diff1
776
- value: 76.73003886073126
777
- - type: nauc_mrr_at_1000_max
778
- value: 23.880592237559313
779
- - type: nauc_mrr_at_1000_std
780
- value: -16.639489061431295
781
- - type: nauc_mrr_at_100_diff1
782
- value: 76.72565072181389
783
- - type: nauc_mrr_at_100_max
784
- value: 23.881455390102456
785
- - type: nauc_mrr_at_100_std
786
- value: -16.63176355032267
787
- - type: nauc_mrr_at_10_diff1
788
- value: 76.64273887966773
789
- - type: nauc_mrr_at_10_max
790
- value: 23.81082154251487
791
- - type: nauc_mrr_at_10_std
792
- value: -16.77740307482434
793
- - type: nauc_mrr_at_1_diff1
794
- value: 79.73607180360645
795
- - type: nauc_mrr_at_1_max
796
- value: 21.20262368559921
797
- - type: nauc_mrr_at_1_std
798
- value: -19.089796155513238
799
- - type: nauc_mrr_at_20_diff1
800
- value: 76.7030611694817
801
- - type: nauc_mrr_at_20_max
802
- value: 23.838907707504127
803
- - type: nauc_mrr_at_20_std
804
- value: -16.672743811541736
805
- - type: nauc_mrr_at_3_diff1
806
- value: 76.50523775835022
807
- - type: nauc_mrr_at_3_max
808
- value: 23.60179905501101
809
- - type: nauc_mrr_at_3_std
810
- value: -17.693757802981956
811
- - type: nauc_mrr_at_5_diff1
812
- value: 76.61576372823448
813
- - type: nauc_mrr_at_5_max
814
- value: 23.862587318336775
815
- - type: nauc_mrr_at_5_std
816
- value: -17.0437966767025
817
- - type: nauc_ndcg_at_1000_diff1
818
- value: 76.016960312922
819
- - type: nauc_ndcg_at_1000_max
820
- value: 25.434179222015285
821
- - type: nauc_ndcg_at_1000_std
822
- value: -14.489226598374966
823
- - type: nauc_ndcg_at_100_diff1
824
- value: 75.87402195675239
825
- - type: nauc_ndcg_at_100_max
826
- value: 25.562687163467295
827
- - type: nauc_ndcg_at_100_std
828
- value: -14.165819919505346
829
- - type: nauc_ndcg_at_10_diff1
830
- value: 75.47305900096035
831
- - type: nauc_ndcg_at_10_max
832
- value: 24.9111489869184
833
- - type: nauc_ndcg_at_10_std
834
- value: -15.106328069022739
835
- - type: nauc_ndcg_at_1_diff1
836
- value: 79.73607180360645
837
- - type: nauc_ndcg_at_1_max
838
- value: 21.20262368559921
839
- - type: nauc_ndcg_at_1_std
840
- value: -19.089796155513238
841
- - type: nauc_ndcg_at_20_diff1
842
- value: 75.71180859144839
843
- - type: nauc_ndcg_at_20_max
844
- value: 25.12671193294504
845
- - type: nauc_ndcg_at_20_std
846
- value: -14.582900241958443
847
- - type: nauc_ndcg_at_3_diff1
848
- value: 75.32126900936046
849
- - type: nauc_ndcg_at_3_max
850
- value: 24.39543091769943
851
- - type: nauc_ndcg_at_3_std
852
- value: -17.183511551234538
853
- - type: nauc_ndcg_at_5_diff1
854
- value: 75.46170695160178
855
- - type: nauc_ndcg_at_5_max
856
- value: 25.001670951020937
857
- - type: nauc_ndcg_at_5_std
858
- value: -15.861405796419376
859
- - type: nauc_precision_at_1000_diff1
860
- value: 65.48397136632431
861
- - type: nauc_precision_at_1000_max
862
- value: 77.05533391807842
863
- - type: nauc_precision_at_1000_std
864
- value: 54.14509238038628
865
- - type: nauc_precision_at_100_diff1
866
- value: 66.6077978535527
867
- - type: nauc_precision_at_100_max
868
- value: 54.07639576230772
869
- - type: nauc_precision_at_100_std
870
- value: 28.071043659958185
871
- - type: nauc_precision_at_10_diff1
872
- value: 68.71592258481675
873
- - type: nauc_precision_at_10_max
874
- value: 31.40944055975099
875
- - type: nauc_precision_at_10_std
876
- value: -4.421548783271478
877
- - type: nauc_precision_at_1_diff1
878
- value: 79.73607180360645
879
- - type: nauc_precision_at_1_max
880
- value: 21.20262368559921
881
- - type: nauc_precision_at_1_std
882
- value: -19.089796155513238
883
- - type: nauc_precision_at_20_diff1
884
- value: 68.87539427047768
885
- - type: nauc_precision_at_20_max
886
- value: 35.602508001542176
887
- - type: nauc_precision_at_20_std
888
- value: 3.6366951424017184
889
- - type: nauc_precision_at_3_diff1
890
- value: 70.84549884977267
891
- - type: nauc_precision_at_3_max
892
- value: 27.35862016332144
893
- - type: nauc_precision_at_3_std
894
- value: -15.255203279510601
895
- - type: nauc_precision_at_5_diff1
896
- value: 70.27864341297163
897
- - type: nauc_precision_at_5_max
898
- value: 30.29162962827962
899
- - type: nauc_precision_at_5_std
900
- value: -10.193470309556703
901
- - type: nauc_recall_at_1000_diff1
902
- value: 65.48397136632475
903
- - type: nauc_recall_at_1000_max
904
- value: 77.05533391807865
905
- - type: nauc_recall_at_1000_std
906
- value: 54.14509238038722
907
- - type: nauc_recall_at_100_diff1
908
- value: 66.60779785355253
909
- - type: nauc_recall_at_100_max
910
- value: 54.07639576230805
911
- - type: nauc_recall_at_100_std
912
- value: 28.071043659958207
913
- - type: nauc_recall_at_10_diff1
914
- value: 68.71592258481655
915
- - type: nauc_recall_at_10_max
916
- value: 31.409440559751168
917
- - type: nauc_recall_at_10_std
918
- value: -4.421548783271414
919
- - type: nauc_recall_at_1_diff1
920
- value: 79.73607180360645
921
- - type: nauc_recall_at_1_max
922
- value: 21.20262368559921
923
- - type: nauc_recall_at_1_std
924
- value: -19.089796155513238
925
- - type: nauc_recall_at_20_diff1
926
- value: 68.87539427047763
927
- - type: nauc_recall_at_20_max
928
- value: 35.60250800154217
929
- - type: nauc_recall_at_20_std
930
- value: 3.6366951424018716
931
- - type: nauc_recall_at_3_diff1
932
- value: 70.84549884977265
933
- - type: nauc_recall_at_3_max
934
- value: 27.358620163321408
935
- - type: nauc_recall_at_3_std
936
- value: -15.255203279510626
937
- - type: nauc_recall_at_5_diff1
938
- value: 70.2786434129717
939
- - type: nauc_recall_at_5_max
940
- value: 30.291629628279733
941
- - type: nauc_recall_at_5_std
942
- value: -10.193470309556629
943
- - type: ndcg_at_1
944
- value: 67.61
945
- - type: ndcg_at_10
946
- value: 78.864
947
- - type: ndcg_at_100
948
- value: 80.211
949
- - type: ndcg_at_1000
950
- value: 80.50699999999999
951
- - type: ndcg_at_20
952
- value: 79.514
953
- - type: ndcg_at_3
954
- value: 76.05499999999999
955
- - type: ndcg_at_5
956
- value: 77.625
957
- - type: precision_at_1
958
- value: 67.61
959
- - type: precision_at_10
960
- value: 8.941
961
- - type: precision_at_100
962
- value: 0.9570000000000001
963
- - type: precision_at_1000
964
- value: 0.098
965
- - type: precision_at_20
966
- value: 4.598
967
- - type: precision_at_3
968
- value: 27.267000000000003
969
- - type: precision_at_5
970
- value: 17.118
971
- - type: recall_at_1
972
- value: 67.61
973
- - type: recall_at_10
974
- value: 89.41
975
- - type: recall_at_100
976
- value: 95.67
977
- - type: recall_at_1000
978
- value: 98.02
979
- - type: recall_at_20
980
- value: 91.96
981
- - type: recall_at_3
982
- value: 81.8
983
- - type: recall_at_5
984
- value: 85.59
985
- task:
986
- type: Retrieval
987
- - dataset:
988
- config: default
989
- name: MTEB RuBQReranking (default)
990
- revision: 2e96b8f098fa4b0950fc58eacadeb31c0d0c7fa2
991
- split: test
992
- type: ai-forever/rubq-reranking
993
- metrics:
994
- - type: main_score
995
- value: 70.8676293869892
996
- - type: map
997
- value: 70.8676293869892
998
- - type: mrr
999
- value: 76.21519142795738
1000
- - type: nAUC_map_diff1
1001
- value: 37.107477549298316
1002
- - type: nAUC_map_max
1003
- value: 24.03175751284917
1004
- - type: nAUC_map_std
1005
- value: 10.543266622518289
1006
- - type: nAUC_mrr_diff1
1007
- value: 41.59000224211641
1008
- - type: nAUC_mrr_max
1009
- value: 31.06363682531277
1010
- - type: nAUC_mrr_std
1011
- value: 14.95221681925582
1012
- task:
1013
- type: Reranking
1014
- - dataset:
1015
- config: default
1016
- name: MTEB RuBQRetrieval (default)
1017
- revision: e19b6ffa60b3bc248e0b41f4cc37c26a55c2a67b
1018
- split: test
1019
- type: ai-forever/rubq-retrieval
1020
- metrics:
1021
- - type: main_score
1022
- value: 66.77499999999999
1023
- - type: map_at_1
1024
- value: 38.964
1025
- - type: map_at_10
1026
- value: 58.679
1027
- - type: map_at_100
1028
- value: 59.74699999999999
1029
- - type: map_at_1000
1030
- value: 59.784000000000006
1031
- - type: map_at_20
1032
- value: 59.386
1033
- - type: map_at_3
1034
- value: 53.183
1035
- - type: map_at_5
1036
- value: 56.619
1037
- - type: mrr_at_1
1038
- value: 56.08747044917257
1039
- - type: mrr_at_10
1040
- value: 67.69477747757892
1041
- - type: mrr_at_100
1042
- value: 68.11028091076142
1043
- - type: mrr_at_1000
1044
- value: 68.12016895906572
1045
- - type: mrr_at_20
1046
- value: 67.99200829920431
1047
- - type: mrr_at_3
1048
- value: 65.40583136327825
1049
- - type: mrr_at_5
1050
- value: 66.86564223798278
1051
- - type: nauc_map_at_1000_diff1
1052
- value: 35.13932221843019
1053
- - type: nauc_map_at_1000_max
1054
- value: 31.603311334444573
1055
- - type: nauc_map_at_1000_std
1056
- value: -8.046320861408992
1057
- - type: nauc_map_at_100_diff1
1058
- value: 35.10777181986462
1059
- - type: nauc_map_at_100_max
1060
- value: 31.603059769116086
1061
- - type: nauc_map_at_100_std
1062
- value: -8.027533855390534
1063
- - type: nauc_map_at_10_diff1
1064
- value: 34.864122757362644
1065
- - type: nauc_map_at_10_max
1066
- value: 31.625252670171776
1067
- - type: nauc_map_at_10_std
1068
- value: -8.334256854154406
1069
- - type: nauc_map_at_1_diff1
1070
- value: 40.90418146524424
1071
- - type: nauc_map_at_1_max
1072
- value: 22.269308553048656
1073
- - type: nauc_map_at_1_std
1074
- value: -9.89932822257807
1075
- - type: nauc_map_at_20_diff1
1076
- value: 34.88664926631265
1077
- - type: nauc_map_at_20_max
1078
- value: 31.60883821879978
1079
- - type: nauc_map_at_20_std
1080
- value: -8.095294415067395
1081
- - type: nauc_map_at_3_diff1
1082
- value: 35.13227486507324
1083
- - type: nauc_map_at_3_max
1084
- value: 28.53848590790504
1085
- - type: nauc_map_at_3_std
1086
- value: -9.223288317647375
1087
- - type: nauc_map_at_5_diff1
1088
- value: 35.0811457266201
1089
- - type: nauc_map_at_5_max
1090
- value: 30.904120563551984
1091
- - type: nauc_map_at_5_std
1092
- value: -9.190854442617361
1093
- - type: nauc_mrr_at_1000_diff1
1094
- value: 43.43247399448727
1095
- - type: nauc_mrr_at_1000_max
1096
- value: 37.599979998251435
1097
- - type: nauc_mrr_at_1000_std
1098
- value: -8.461570912726742
1099
- - type: nauc_mrr_at_100_diff1
1100
- value: 43.42803056119293
1101
- - type: nauc_mrr_at_100_max
1102
- value: 37.60590141137654
1103
- - type: nauc_mrr_at_100_std
1104
- value: -8.456064029069271
1105
- - type: nauc_mrr_at_10_diff1
1106
- value: 43.34260974243939
1107
- - type: nauc_mrr_at_10_max
1108
- value: 37.7505248362988
1109
- - type: nauc_mrr_at_10_std
1110
- value: -8.4789005424329
1111
- - type: nauc_mrr_at_1_diff1
1112
- value: 46.8647472051038
1113
- - type: nauc_mrr_at_1_max
1114
- value: 34.40507832070825
1115
- - type: nauc_mrr_at_1_std
1116
- value: -9.148947481764475
1117
- - type: nauc_mrr_at_20_diff1
1118
- value: 43.37024314535158
1119
- - type: nauc_mrr_at_20_max
1120
- value: 37.62040185137823
1121
- - type: nauc_mrr_at_20_std
1122
- value: -8.497477607790167
1123
- - type: nauc_mrr_at_3_diff1
1124
- value: 42.980588675445404
1125
- - type: nauc_mrr_at_3_max
1126
- value: 37.43524263010435
1127
- - type: nauc_mrr_at_3_std
1128
- value: -8.698337782804687
1129
- - type: nauc_mrr_at_5_diff1
1130
- value: 43.224910985482765
1131
- - type: nauc_mrr_at_5_max
1132
- value: 38.00633132611649
1133
- - type: nauc_mrr_at_5_std
1134
- value: -8.554751807691591
1135
- - type: nauc_ndcg_at_1000_diff1
1136
- value: 36.58393000267959
1137
- - type: nauc_ndcg_at_1000_max
1138
- value: 34.491617466873194
1139
- - type: nauc_ndcg_at_1000_std
1140
- value: -6.968933918560401
1141
- - type: nauc_ndcg_at_100_diff1
1142
- value: 35.909285337288004
1143
- - type: nauc_ndcg_at_100_max
1144
- value: 34.60361766529284
1145
- - type: nauc_ndcg_at_100_std
1146
- value: -6.3241815724593256
1147
- - type: nauc_ndcg_at_10_diff1
1148
- value: 34.86940448346685
1149
- - type: nauc_ndcg_at_10_max
1150
- value: 34.89327996781203
1151
- - type: nauc_ndcg_at_10_std
1152
- value: -7.377912505502211
1153
- - type: nauc_ndcg_at_1_diff1
1154
- value: 47.16372543032823
1155
- - type: nauc_ndcg_at_1_max
1156
- value: 34.48620759685232
1157
- - type: nauc_ndcg_at_1_std
1158
- value: -8.881483248224074
1159
- - type: nauc_ndcg_at_20_diff1
1160
- value: 34.901006085701795
1161
- - type: nauc_ndcg_at_20_max
1162
- value: 34.766948088105174
1163
- - type: nauc_ndcg_at_20_std
1164
- value: -6.680375186500669
1165
- - type: nauc_ndcg_at_3_diff1
1166
- value: 35.16537335241684
1167
- - type: nauc_ndcg_at_3_max
1168
- value: 31.385279916552566
1169
- - type: nauc_ndcg_at_3_std
1170
- value: -8.871530629591442
1171
- - type: nauc_ndcg_at_5_diff1
1172
- value: 35.152664105492605
1173
- - type: nauc_ndcg_at_5_max
1174
- value: 33.89982336069226
1175
- - type: nauc_ndcg_at_5_std
1176
- value: -8.92795810387048
1177
- - type: nauc_precision_at_1000_diff1
1178
- value: -6.773234121047722
1179
- - type: nauc_precision_at_1000_max
1180
- value: 7.0059404092503925
1181
- - type: nauc_precision_at_1000_std
1182
- value: 4.757430160226248
1183
- - type: nauc_precision_at_100_diff1
1184
- value: -6.88009476644726
1185
- - type: nauc_precision_at_100_max
1186
- value: 10.391099419327492
1187
- - type: nauc_precision_at_100_std
1188
- value: 7.203837158689326
1189
- - type: nauc_precision_at_10_diff1
1190
- value: -0.7155570800016817
1191
- - type: nauc_precision_at_10_max
1192
- value: 21.06902041338105
1193
- - type: nauc_precision_at_10_std
1194
- value: 3.7465404459270815
1195
- - type: nauc_precision_at_1_diff1
1196
- value: 47.16372543032823
1197
- - type: nauc_precision_at_1_max
1198
- value: 34.48620759685232
1199
- - type: nauc_precision_at_1_std
1200
- value: -8.881483248224074
1201
- - type: nauc_precision_at_20_diff1
1202
- value: -4.695792117927824
1203
- - type: nauc_precision_at_20_max
1204
- value: 16.53698826752203
1205
- - type: nauc_precision_at_20_std
1206
- value: 6.681726081495262
1207
- - type: nauc_precision_at_3_diff1
1208
- value: 12.446292477522807
1209
- - type: nauc_precision_at_3_max
1210
- value: 27.622770072159884
1211
- - type: nauc_precision_at_3_std
1212
- value: -2.243774812074271
1213
- - type: nauc_precision_at_5_diff1
1214
- value: 5.851972491534291
1215
- - type: nauc_precision_at_5_max
1216
- value: 25.400246002612235
1217
- - type: nauc_precision_at_5_std
1218
- value: -0.8059534151280825
1219
- - type: nauc_recall_at_1000_diff1
1220
- value: 17.33619903703495
1221
- - type: nauc_recall_at_1000_max
1222
- value: 46.39520954734979
1223
- - type: nauc_recall_at_1000_std
1224
- value: 59.70020859630654
1225
- - type: nauc_recall_at_100_diff1
1226
- value: 9.309667388080348
1227
- - type: nauc_recall_at_100_max
1228
- value: 35.92482580062717
1229
- - type: nauc_recall_at_100_std
1230
- value: 24.021627313676188
1231
- - type: nauc_recall_at_10_diff1
1232
- value: 19.87959406394684
1233
- - type: nauc_recall_at_10_max
1234
- value: 35.00740821313158
1235
- - type: nauc_recall_at_10_std
1236
- value: -2.6455284599102784
1237
- - type: nauc_recall_at_1_diff1
1238
- value: 40.90418146524424
1239
- - type: nauc_recall_at_1_max
1240
- value: 22.269308553048656
1241
- - type: nauc_recall_at_1_std
1242
- value: -9.89932822257807
1243
- - type: nauc_recall_at_20_diff1
1244
- value: 15.028975252982061
1245
- - type: nauc_recall_at_20_max
1246
- value: 34.901307836728016
1247
- - type: nauc_recall_at_20_std
1248
- value: 2.9027647776175494
1249
- - type: nauc_recall_at_3_diff1
1250
- value: 26.13225834790859
1251
- - type: nauc_recall_at_3_max
1252
- value: 27.915627935543725
1253
- - type: nauc_recall_at_3_std
1254
- value: -8.069525359773976
1255
- - type: nauc_recall_at_5_diff1
1256
- value: 24.184086614024686
1257
- - type: nauc_recall_at_5_max
1258
- value: 32.607378848166675
1259
- - type: nauc_recall_at_5_std
1260
- value: -7.730984752196379
1261
- - type: ndcg_at_1
1262
- value: 55.969
1263
- - type: ndcg_at_10
1264
- value: 66.77499999999999
1265
- - type: ndcg_at_100
1266
- value: 70.324
1267
- - type: ndcg_at_1000
1268
- value: 70.95700000000001
1269
- - type: ndcg_at_20
1270
- value: 68.613
1271
- - type: ndcg_at_3
1272
- value: 59.256
1273
- - type: ndcg_at_5
1274
- value: 63.223
1275
- - type: precision_at_1
1276
- value: 55.969
1277
- - type: precision_at_10
1278
- value: 13.297999999999998
1279
- - type: precision_at_100
1280
- value: 1.585
1281
- - type: precision_at_1000
1282
- value: 0.167
1283
- - type: precision_at_20
1284
- value: 7.222
1285
- - type: precision_at_3
1286
- value: 32.467
1287
- - type: precision_at_5
1288
- value: 23.073
1289
- - type: recall_at_1
1290
- value: 38.964
1291
- - type: recall_at_10
1292
- value: 81.248
1293
- - type: recall_at_100
1294
- value: 95.124
1295
- - type: recall_at_1000
1296
- value: 99.30600000000001
1297
- - type: recall_at_20
1298
- value: 87.35199999999999
1299
- - type: recall_at_3
1300
- value: 62.785000000000004
1301
- - type: recall_at_5
1302
- value: 71.986
1303
- task:
1304
- type: Retrieval
1305
- - dataset:
1306
- config: default
1307
- name: MTEB RuReviewsClassification (default)
1308
- revision: f6d2c31f4dc6b88f468552750bfec05b4b41b05a
1309
- split: test
1310
- type: ai-forever/ru-reviews-classification
1311
- metrics:
1312
- - type: accuracy
1313
- value: 67.958984375
1314
- - type: f1
1315
- value: 67.250877785427
1316
- - type: f1_weighted
1317
- value: 67.25215701797296
1318
- - type: main_score
1319
- value: 67.958984375
1320
- task:
1321
- type: Classification
1322
- - dataset:
1323
- config: default
1324
- name: MTEB RuSTSBenchmarkSTS (default)
1325
- revision: 7cf24f325c6da6195df55bef3d86b5e0616f3018
1326
- split: test
1327
- type: ai-forever/ru-stsbenchmark-sts
1328
- metrics:
1329
- - type: cosine_pearson
1330
- value: 79.11336124619963
1331
- - type: cosine_spearman
1332
- value: 78.69157477180703
1333
- - type: euclidean_pearson
1334
- value: 77.84066073571212
1335
- - type: euclidean_spearman
1336
- value: 78.69157477180703
1337
- - type: main_score
1338
- value: 78.69157477180703
1339
- - type: manhattan_pearson
1340
- value: 77.79213012957939
1341
- - type: manhattan_spearman
1342
- value: 78.61384378877501
1343
- - type: pearson
1344
- value: 79.11336124619963
1345
- - type: spearman
1346
- value: 78.69157477180703
1347
- task:
1348
- type: STS
1349
- - dataset:
1350
- config: default
1351
- name: MTEB RuSciBenchGRNTIClassification (default)
1352
- revision: 673a610d6d3dd91a547a0d57ae1b56f37ebbf6a1
1353
- split: test
1354
- type: ai-forever/ru-scibench-grnti-classification
1355
- metrics:
1356
- - type: accuracy
1357
- value: 59.326171875
1358
- - type: f1
1359
- value: 58.01171745357119
1360
- - type: f1_weighted
1361
- value: 58.02106511480968
1362
- - type: main_score
1363
- value: 59.326171875
1364
- task:
1365
- type: Classification
1366
- - dataset:
1367
- config: default
1368
- name: MTEB RuSciBenchGRNTIClusteringP2P (default)
1369
- revision: 673a610d6d3dd91a547a0d57ae1b56f37ebbf6a1
1370
- split: test
1371
- type: ai-forever/ru-scibench-grnti-classification
1372
- metrics:
1373
- - type: main_score
1374
- value: 55.46570753380975
1375
- - type: v_measure
1376
- value: 55.46570753380975
1377
- - type: v_measure_std
1378
- value: 0.9813885872798612
1379
- task:
1380
- type: Clustering
1381
- - dataset:
1382
- config: default
1383
- name: MTEB RuSciBenchOECDClassification (default)
1384
- revision: 26c88e99dcaba32bb45d0e1bfc21902337f6d471
1385
- split: test
1386
- type: ai-forever/ru-scibench-oecd-classification
1387
- metrics:
1388
- - type: accuracy
1389
- value: 46.328125
1390
- - type: f1
1391
- value: 44.19158709013339
1392
- - type: f1_weighted
1393
- value: 44.190957945676026
1394
- - type: main_score
1395
- value: 46.328125
1396
- task:
1397
- type: Classification
1398
- - dataset:
1399
- config: default
1400
- name: MTEB RuSciBenchOECDClusteringP2P (default)
1401
- revision: 26c88e99dcaba32bb45d0e1bfc21902337f6d471
1402
- split: test
1403
- type: ai-forever/ru-scibench-oecd-classification
1404
- metrics:
1405
- - type: main_score
1406
- value: 47.28635342613908
1407
- - type: v_measure
1408
- value: 47.28635342613908
1409
- - type: v_measure_std
1410
- value: 0.7431017612993989
1411
- task:
1412
- type: Clustering
1413
- - dataset:
1414
- config: ru
1415
- name: MTEB STS22 (ru)
1416
- revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
1417
- split: test
1418
- type: mteb/sts22-crosslingual-sts
1419
- metrics:
1420
- - type: cosine_pearson
1421
- value: 63.10139371129796
1422
- - type: cosine_spearman
1423
- value: 67.06445400504978
1424
- - type: euclidean_pearson
1425
- value: 62.74563386470613
1426
- - type: euclidean_spearman
1427
- value: 67.06445400504978
1428
- - type: main_score
1429
- value: 67.06445400504978
1430
- - type: manhattan_pearson
1431
- value: 62.540465664732395
1432
- - type: manhattan_spearman
1433
- value: 66.65899492022648
1434
- - type: pearson
1435
- value: 63.10139371129796
1436
- - type: spearman
1437
- value: 67.06445400504978
1438
- task:
1439
- type: STS
1440
- - dataset:
1441
- config: default
1442
- name: MTEB SensitiveTopicsClassification (default)
1443
- revision: 416b34a802308eac30e4192afc0ff99bb8dcc7f2
1444
- split: test
1445
- type: ai-forever/sensitive-topics-classification
1446
- metrics:
1447
- - type: accuracy
1448
- value: 33.0712890625
1449
- - type: f1
1450
- value: 38.063573562290024
1451
- - type: lrap
1452
- value: 49.586995442707696
1453
- - type: main_score
1454
- value: 33.0712890625
1455
- task:
1456
- type: MultilabelClassification
1457
- - dataset:
1458
- config: default
1459
- name: MTEB TERRa (default)
1460
- revision: 7b58f24536063837d644aab9a023c62199b2a612
1461
- split: dev
1462
- type: ai-forever/terra-pairclassification
1463
- metrics:
1464
- - type: cosine_accuracy
1465
- value: 61.563517915309454
1466
- - type: cosine_accuracy_threshold
1467
- value: 75.3734290599823
1468
- - type: cosine_ap
1469
- value: 60.78861909325018
1470
- - type: cosine_f1
1471
- value: 67.25663716814158
1472
- - type: cosine_f1_threshold
1473
- value: 54.05237674713135
1474
- - type: cosine_precision
1475
- value: 50.836120401337794
1476
- - type: cosine_recall
1477
- value: 99.34640522875817
1478
- - type: dot_accuracy
1479
- value: 61.563517915309454
1480
- - type: dot_accuracy_threshold
1481
- value: 75.37343502044678
1482
- - type: dot_ap
1483
- value: 60.78861909325018
1484
- - type: dot_f1
1485
- value: 67.25663716814158
1486
- - type: dot_f1_threshold
1487
- value: 54.05237674713135
1488
- - type: dot_precision
1489
- value: 50.836120401337794
1490
- - type: dot_recall
1491
- value: 99.34640522875817
1492
- - type: euclidean_accuracy
1493
- value: 61.563517915309454
1494
- - type: euclidean_accuracy_threshold
1495
- value: 70.18057107925415
1496
- - type: euclidean_ap
1497
- value: 60.78861909325018
1498
- - type: euclidean_f1
1499
- value: 67.25663716814158
1500
- - type: euclidean_f1_threshold
1501
- value: 95.86195945739746
1502
- - type: euclidean_precision
1503
- value: 50.836120401337794
1504
- - type: euclidean_recall
1505
- value: 99.34640522875817
1506
- - type: main_score
1507
- value: 60.78861909325018
1508
- - type: manhattan_accuracy
1509
- value: 60.91205211726385
1510
- - type: manhattan_accuracy_threshold
1511
- value: 1813.1645202636719
1512
- - type: manhattan_ap
1513
- value: 60.478709337038936
1514
- - type: manhattan_f1
1515
- value: 67.10816777041943
1516
- - type: manhattan_f1_threshold
1517
- value: 2475.027275085449
1518
- - type: manhattan_precision
1519
- value: 50.66666666666667
1520
- - type: manhattan_recall
1521
- value: 99.34640522875817
1522
- - type: max_ap
1523
- value: 60.78861909325018
1524
- - type: max_f1
1525
- value: 67.25663716814158
1526
- - type: max_precision
1527
- value: 50.836120401337794
1528
- - type: max_recall
1529
- value: 99.34640522875817
1530
- - type: similarity_accuracy
1531
- value: 61.563517915309454
1532
- - type: similarity_accuracy_threshold
1533
- value: 75.3734290599823
1534
- - type: similarity_ap
1535
- value: 60.78861909325018
1536
- - type: similarity_f1
1537
- value: 67.25663716814158
1538
- - type: similarity_f1_threshold
1539
- value: 54.05237674713135
1540
- - type: similarity_precision
1541
- value: 50.836120401337794
1542
- - type: similarity_recall
1543
- value: 99.34640522875817
1544
- task:
1545
- type: PairClassification
1546
- license: mit
1547
- language:
1548
- - ru
1549
- - en
1550
- tags:
1551
- - mteb
1552
- - transformers
1553
- - sentence-transformers
1554
- base_model: ai-forever/ruRoberta-large
1555
- ---
1556
-
1557
- # Model Card for ru-en-RoSBERTa
1558
-
1559
- The ru-en-RoSBERTa is a general text embedding model for Russian. The model is based on [ruRoBERTa](https://huggingface.co/ai-forever/ruRoberta-large) and fine-tuned with ~4M pairs of supervised, synthetic and unsupervised data in Russian and English. Tokenizer supports some English tokens from [RoBERTa](https://huggingface.co/FacebookAI/roberta-large) tokenizer.
1560
-
1561
- For more model details please refer to our [article](https://arxiv.org/abs/2408.12503).
1562
-
1563
- ## Usage
1564
-
1565
- The model can be used as is with prefixes. It is recommended to use CLS pooling. The choice of prefix and pooling depends on the task.
1566
-
1567
- We use the following basic rules to choose a prefix:
1568
- - `"search_query: "` and `"search_document: "` prefixes are for answer or relevant paragraph retrieval
1569
- - `"classification: "` prefix is for symmetric paraphrasing related tasks (STS, NLI, Bitext Mining)
1570
- - `"clustering: "` prefix is for any tasks that rely on thematic features (topic classification, title-body retrieval)
1571
-
1572
- To better tailor the model to your needs, you can fine-tune it with relevant high-quality Russian and English datasets.
1573
-
1574
- Below are examples of texts encoding using the Transformers and SentenceTransformers libraries.
1575
-
1576
- ### Transformers
1577
-
1578
- ```python
1579
- import torch
1580
- import torch.nn.functional as F
1581
- from transformers import AutoTokenizer, AutoModel
1582
-
1583
-
1584
- def pool(hidden_state, mask, pooling_method="cls"):
1585
- if pooling_method == "mean":
1586
- s = torch.sum(hidden_state * mask.unsqueeze(-1).float(), dim=1)
1587
- d = mask.sum(axis=1, keepdim=True).float()
1588
- return s / d
1589
- elif pooling_method == "cls":
1590
- return hidden_state[:, 0]
1591
-
1592
- inputs = [
1593
- #
1594
- "classification: Он нам и <unk> не нужон ваш Интернет!",
1595
- "clustering: В Ярославской области разрешили работу бань, но без посетителей",
1596
- "search_query: Сколько программистов нужно, чтобы вкрутить лампочку?",
1597
-
1598
- #
1599
- "classification: What a time to be alive!",
1600
- "clustering: Ярославским баням разрешили работать без посетителей",
1601
- "search_document: Чтобы вкрутить лампочку, требуется три программиста: один напишет программу извлечения лампочки, другой — вкручивания лампочки, а третий проведет тестирование.",
1602
- ]
1603
-
1604
- tokenizer = AutoTokenizer.from_pretrained("ai-forever/ru-en-RoSBERTa")
1605
- model = AutoModel.from_pretrained("ai-forever/ru-en-RoSBERTa")
1606
-
1607
- tokenized_inputs = tokenizer(inputs, max_length=512, padding=True, truncation=True, return_tensors="pt")
1608
-
1609
- with torch.no_grad():
1610
- outputs = model(**tokenized_inputs)
1611
-
1612
- embeddings = pool(
1613
- outputs.last_hidden_state,
1614
- tokenized_inputs["attention_mask"],
1615
- pooling_method="cls" # or try "mean"
1616
- )
1617
-
1618
- embeddings = F.normalize(embeddings, p=2, dim=1)
1619
-
1620
- sim_scores = embeddings[:3] @ embeddings[3:].T
1621
- print(sim_scores.diag().tolist())
1622
- # [0.4796873927116394, 0.9409002065658569, 0.7761015892028809]
1623
- ```
1624
-
1625
- ### SentenceTransformers
1626
-
1627
- ```python
1628
- from sentence_transformers import SentenceTransformer
1629
-
1630
-
1631
- inputs = [
1632
- #
1633
- "classification: Он нам и <unk> не нужон ваш Интернет!",
1634
- "clustering: В Ярославской области разрешили работу бань, но без посетителей",
1635
- "search_query: Сколько программистов нужно, чтобы вкрутить лампочку?",
1636
-
1637
- #
1638
- "classification: What a time to be alive!",
1639
- "clustering: Ярославским баням разрешили работать без посетителей",
1640
- "search_document: Чтобы вкрутить лампочку, требуется три программиста: один напишет программу извлечения лампочки, другой — вкручивания лампочки, а третий проведет тестирование.",
1641
- ]
1642
-
1643
- # loads model with CLS pooling
1644
- model = SentenceTransformer("ai-forever/ru-en-RoSBERTa")
1645
-
1646
- # embeddings are normalized by default
1647
- embeddings = model.encode(inputs, convert_to_tensor=True)
1648
-
1649
- sim_scores = embeddings[:3] @ embeddings[3:].T
1650
- print(sim_scores.diag().tolist())
1651
- # [0.47968706488609314, 0.940900444984436, 0.7761018872261047]
1652
- ```
1653
-
1654
- or using prompts (sentence-transformers>=2.4.0):
1655
-
1656
- ```python
1657
- from sentence_transformers import SentenceTransformer
1658
-
1659
-
1660
- # loads model with CLS pooling
1661
- model = SentenceTransformer("ai-forever/ru-en-RoSBERTa")
1662
-
1663
- classification = model.encode(["Он нам и <unk> не нужон ваш Интернет!", "What a time to be alive!"], prompt_name="classification")
1664
- print(classification[0] @ classification[1].T) # 0.47968706488609314
1665
-
1666
- clustering = model.encode(["В Ярославской области разрешили работу бань, но без посетителей", "Ярославским баням разрешили работать без посетителей"], prompt_name="clustering")
1667
- print(clustering[0] @ clustering[1].T) # 0.940900444984436
1668
-
1669
- query_embedding = model.encode("Сколько программистов нужно, чтобы вкрутить лампочку?", prompt_name="search_query")
1670
- document_embedding = model.encode("Чтобы вкрутить лампочку, требуется три программиста: один напишет программу извлечения лампочки, другой — вкручивания лампочки, а третий проведет тестирование.", prompt_name="search_document")
1671
- print(query_embedding @ document_embedding.T) # 0.7761018872261047
1672
- ```
1673
-
1674
- ## Citation
1675
-
1676
- ```
1677
- @misc{snegirev2024russianfocusedembeddersexplorationrumteb,
1678
- title={The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design},
1679
- author={Artem Snegirev and Maria Tikhonova and Anna Maksimova and Alena Fenogenova and Alexander Abramov},
1680
- year={2024},
1681
- eprint={2408.12503},
1682
- archivePrefix={arXiv},
1683
- primaryClass={cs.CL},
1684
- url={https://arxiv.org/abs/2408.12503},
1685
- }
1686
- ```
1687
-
1688
- ## Limitations
1689
-
1690
- The model is designed to process texts in Russian, the quality in English is unknown. Maximum input text length is limited to 512 tokens.