| from datasets import load_dataset | |
| from src.config import config | |
| def download_dataset(): | |
| print(f"Downloading dataset: {config.data.dataset_path}") | |
| dataset = load_dataset( | |
| config.data.dataset_path, | |
| split="train", | |
| cache_dir="cache" | |
| ) | |
| print(f"Dataset downloaded successfully: {len(dataset)} rows") | |
| if __name__ == "__main__": | |
| download_dataset() |