Spaces:
Sleeping
Sleeping
| import os | |
| import logging | |
| import pickle | |
| from datasets import load_dataset | |
| from config import ConfigConstants # For saving the dataset locally | |
| def load_data(data_set_name): | |
| local_path = ConfigConstants.DATA_SET_PATH + 'local_datasets' | |
| os.makedirs(local_path, exist_ok=True) | |
| dataset_file = os.path.join(local_path, f"{data_set_name}_test.pkl") | |
| if os.path.exists(dataset_file): | |
| logging.info(f"Loading dataset {data_set_name} from local storage. File location {dataset_file}") | |
| with open(dataset_file, "rb") as f: | |
| dataset = pickle.load(f) | |
| else: | |
| logging.info("Loading dataset from Hugging Face") | |
| dataset = load_dataset("rungalileo/ragbench", data_set_name, split="test") | |
| logging.info(f"Saving {data_set_name} dataset locally") | |
| with open(dataset_file, "wb") as f: | |
| pickle.dump(dataset, f) | |
| logging.info("Dataset loaded successfully") | |
| logging.info(f"Number of documents found: {dataset.num_rows}") | |
| return dataset | |