Spaces:
Sleeping
Sleeping
| import logging | |
| from datasets import load_dataset | |
| logger = logging.getLogger(__name__) | |
| class DataLoader: | |
| def __init__(self, cache_dir: str = "./cache"): | |
| self.cache_dir = cache_dir | |
| def load_msmarco_passage(self, split: str = "train"): | |
| """Load MS MARCO Passage Ranking dataset from Hugging Face (v2.1)""" | |
| try: | |
| logger.info(f"Downloading MS MARCO Passage Ranking {split} (v2.1) from Hugging Face") | |
| ds = load_dataset("ms_marco", "v2.1", split=split) | |
| return ds | |
| except Exception as e: | |
| logger.error(f"Failed to load MS MARCO Passage Ranking: {e}") | |
| raise | |
| def get_passage_dataset(self, split: str = "train"): | |
| """Load MS MARCO Passage Ranking dataset""" | |
| try: | |
| ds = self.load_msmarco_passage(split) | |
| logger.info("MS MARCO Passage Ranking loaded successfully") | |
| return ds | |
| except Exception as e: | |
| logger.error(f"Failed to load MS MARCO Passage Ranking: {e}") | |
| raise |