File size: 1,687 Bytes
ae7c16d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | from __future__ import annotations
import os
import shutil
import subprocess
from pathlib import Path
def _ensure_kaggle_json() -> None:
repo_root = Path(__file__).resolve().parents[1]
local = repo_root / "kaggle.json"
workspace_root = repo_root.parent / "kaggle.json"
home = Path.home() / ".kaggle" / "kaggle.json"
if home.exists():
return
if not local.exists() and not workspace_root.exists():
raise FileNotFoundError(
"Kaggle credentials not found. Put kaggle.json at ~/.kaggle/kaggle.json "
"or copy it to the repo root (it is gitignored)."
)
src = local if local.exists() else workspace_root
home.parent.mkdir(parents=True, exist_ok=True)
shutil.copyfile(src, home)
os.chmod(home, 0o600)
def main() -> None:
repo_root = Path(__file__).resolve().parents[1]
raw_dir = repo_root / "data" / "raw"
raw_dir.mkdir(parents=True, exist_ok=True)
_ensure_kaggle_json()
competition = "ieee-fraud-detection"
zip_path = raw_dir / f"{competition}.zip"
cmd = [
"kaggle",
"competitions",
"download",
"-c",
competition,
"-p",
str(raw_dir),
"--force",
]
subprocess.run(cmd, check=True)
if zip_path.exists():
shutil.unpack_archive(str(zip_path), str(raw_dir))
expected = raw_dir / "train_transaction.csv"
if not expected.exists():
raise FileNotFoundError(
f"Expected {expected} after download/unzip. "
"Make sure you've accepted the Kaggle competition rules."
)
print(f"Downloaded to {raw_dir}")
if __name__ == "__main__":
main()
|