| from __future__ import annotations |
|
|
| import os |
| import shutil |
| import subprocess |
| from pathlib import Path |
|
|
|
|
| def _ensure_kaggle_json() -> None: |
| repo_root = Path(__file__).resolve().parents[1] |
| local = repo_root / "kaggle.json" |
| workspace_root = repo_root.parent / "kaggle.json" |
| home = Path.home() / ".kaggle" / "kaggle.json" |
|
|
| if home.exists(): |
| return |
|
|
| if not local.exists() and not workspace_root.exists(): |
| raise FileNotFoundError( |
| "Kaggle credentials not found. Put kaggle.json at ~/.kaggle/kaggle.json " |
| "or copy it to the repo root (it is gitignored)." |
| ) |
|
|
| src = local if local.exists() else workspace_root |
| home.parent.mkdir(parents=True, exist_ok=True) |
| shutil.copyfile(src, home) |
| os.chmod(home, 0o600) |
|
|
|
|
| def main() -> None: |
| repo_root = Path(__file__).resolve().parents[1] |
| raw_dir = repo_root / "data" / "raw" |
| raw_dir.mkdir(parents=True, exist_ok=True) |
|
|
| _ensure_kaggle_json() |
|
|
| competition = "ieee-fraud-detection" |
| zip_path = raw_dir / f"{competition}.zip" |
|
|
| cmd = [ |
| "kaggle", |
| "competitions", |
| "download", |
| "-c", |
| competition, |
| "-p", |
| str(raw_dir), |
| "--force", |
| ] |
| subprocess.run(cmd, check=True) |
|
|
| if zip_path.exists(): |
| shutil.unpack_archive(str(zip_path), str(raw_dir)) |
|
|
| expected = raw_dir / "train_transaction.csv" |
| if not expected.exists(): |
| raise FileNotFoundError( |
| f"Expected {expected} after download/unzip. " |
| "Make sure you've accepted the Kaggle competition rules." |
| ) |
|
|
| print(f"Downloaded to {raw_dir}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|