Spaces:
Running
Running
File size: 2,006 Bytes
bf74331 db0da0a bf74331 db0da0a bf74331 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class DatasetConfig:
"""Configuration for the evaluation dataset."""
name: str = "mteb/stsbenchmark-sts"
config: str | None = None
split: str = "test"
query_col: str = "sentence1"
passage_col: str = "sentence2"
score_col: str | None = "score"
score_scale: float = 5.0
# Pre-loaded data (dict of column-name -> list). When set, skip HF download.
data: dict[str, list] | None = field(default=None, repr=False)
DATASET_PRESETS: dict[str, DatasetConfig] = {
"sts": DatasetConfig(
name="mteb/stsbenchmark-sts",
split="test",
query_col="sentence1",
passage_col="sentence2",
score_col="score",
score_scale=5.0,
),
"natural-questions": DatasetConfig(
name="sentence-transformers/natural-questions",
split="train",
query_col="query",
passage_col="answer",
score_col=None,
),
"msmarco": DatasetConfig(
name="sentence-transformers/msmarco-bm25",
config="triplet",
split="train",
query_col="query",
passage_col="positive",
score_col=None,
),
"squad": DatasetConfig(
name="sentence-transformers/squad",
split="train",
query_col="question",
passage_col="answer",
score_col=None,
),
"trivia-qa": DatasetConfig(
name="sentence-transformers/trivia-qa",
split="train",
query_col="query",
passage_col="answer",
score_col=None,
),
"gooaq": DatasetConfig(
name="sentence-transformers/gooaq",
split="train",
query_col="question",
passage_col="answer",
score_col=None,
),
"hotpotqa": DatasetConfig(
name="sentence-transformers/hotpotqa",
config="triplet",
split="train",
query_col="anchor",
passage_col="positive",
score_col=None,
),
}
|