Spaces:
Sleeping
Sleeping
File size: 1,038 Bytes
0a02cd7 db06013 0a02cd7 db06013 0a02cd7 db06013 0a02cd7 db06013 0a02cd7 db06013 0a02cd7 db06013 0a02cd7 db06013 0a02cd7 db06013 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import logging
from datasets import load_dataset
logger = logging.getLogger(__name__)
class DataLoader:
def __init__(self, cache_dir: str = "./cache"):
self.cache_dir = cache_dir
def load_msmarco_passage(self, split: str = "train"):
"""Load MS MARCO Passage Ranking dataset from Hugging Face (v2.1)"""
try:
logger.info(f"Downloading MS MARCO Passage Ranking {split} (v2.1) from Hugging Face")
ds = load_dataset("ms_marco", "v2.1", split=split)
return ds
except Exception as e:
logger.error(f"Failed to load MS MARCO Passage Ranking: {e}")
raise
def get_passage_dataset(self, split: str = "train"):
"""Load MS MARCO Passage Ranking dataset"""
try:
ds = self.load_msmarco_passage(split)
logger.info("MS MARCO Passage Ranking loaded successfully")
return ds
except Exception as e:
logger.error(f"Failed to load MS MARCO Passage Ranking: {e}")
raise |