File size: 2,006 Bytes
bf74331
 
db0da0a
bf74331
 
 
 
 
 
 
 
 
 
 
 
 
db0da0a
 
bf74331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from __future__ import annotations

from dataclasses import dataclass, field


@dataclass
class DatasetConfig:
    """Configuration for the evaluation dataset."""

    name: str = "mteb/stsbenchmark-sts"
    config: str | None = None
    split: str = "test"
    query_col: str = "sentence1"
    passage_col: str = "sentence2"
    score_col: str | None = "score"
    score_scale: float = 5.0
    # Pre-loaded data (dict of column-name -> list). When set, skip HF download.
    data: dict[str, list] | None = field(default=None, repr=False)


DATASET_PRESETS: dict[str, DatasetConfig] = {
    "sts": DatasetConfig(
        name="mteb/stsbenchmark-sts",
        split="test",
        query_col="sentence1",
        passage_col="sentence2",
        score_col="score",
        score_scale=5.0,
    ),
    "natural-questions": DatasetConfig(
        name="sentence-transformers/natural-questions",
        split="train",
        query_col="query",
        passage_col="answer",
        score_col=None,
    ),
    "msmarco": DatasetConfig(
        name="sentence-transformers/msmarco-bm25",
        config="triplet",
        split="train",
        query_col="query",
        passage_col="positive",
        score_col=None,
    ),
    "squad": DatasetConfig(
        name="sentence-transformers/squad",
        split="train",
        query_col="question",
        passage_col="answer",
        score_col=None,
    ),
    "trivia-qa": DatasetConfig(
        name="sentence-transformers/trivia-qa",
        split="train",
        query_col="query",
        passage_col="answer",
        score_col=None,
    ),
    "gooaq": DatasetConfig(
        name="sentence-transformers/gooaq",
        split="train",
        query_col="question",
        passage_col="answer",
        score_col=None,
    ),
    "hotpotqa": DatasetConfig(
        name="sentence-transformers/hotpotqa",
        config="triplet",
        split="train",
        query_col="anchor",
        passage_col="positive",
        score_col=None,
    ),
}