#!/usr/bin/env python3 from __future__ import annotations import argparse import importlib import shutil import subprocess import sys import tempfile from pathlib import Path from packaging.utils import canonicalize_name DEFAULT_WHEEL_SUBDIR = "vendor/wheels/kaggle_py312_manylinux_x86_64" WHEEL_MANIFEST_NAME = "_wheel_manifest.txt" OFFLINE_PACKAGES = [ "annotated-types", "blosc2", "certifi", "charset-normalizer", "dill", "filelock", "fire", "idna", "joblib", "loguru", "msgpack", "ndindex", "numexpr", "packaging", "py-cpuinfo", "pydantic", "pydantic-core", "pydantic-settings", "pyyaml", "pyqlib", "python-dotenv", "python-redis-lock", "redis", "requests", "ruamel.yaml", "setuptools-scm", "tables", "termcolor", "tqdm", "typing-extensions", "typing-inspection", "urllib3", ] def _can_import_runtime() -> tuple[bool, str]: try: importlib.import_module("tables") import qlib # noqa: F401 from qlib.backtest import backtest # noqa: F401 from qlib.backtest.executor import SimulatorExecutor # noqa: F401 from qlib.contrib.strategy.signal_strategy import TopkDropoutStrategy # noqa: F401 return True, "" except Exception as exc: # pragma: no cover - best effort diagnostic return False, f"{type(exc).__name__}: {exc}" def _resolve_repo_root() -> Path: return Path(__file__).resolve().parents[2] def _resolve_wheel_dir(repo_root: Path, override: str | None) -> Path: if override: return Path(override).expanduser().resolve() return (repo_root / DEFAULT_WHEEL_SUBDIR).resolve() def _read_wheel_manifest(wheel_dir: Path) -> list[str]: manifest_path = wheel_dir / WHEEL_MANIFEST_NAME if not manifest_path.exists(): return [] return [ line.strip() for line in manifest_path.read_text().splitlines() if line.strip() and not line.strip().startswith("#") ] def _prepare_wheelhouse(wheel_dir: Path) -> Path: manifest = _read_wheel_manifest(wheel_dir) direct_wheels = sorted(wheel_dir.glob("*.whl")) if direct_wheels and not manifest: return wheel_dir source_files = { path.name: path for path in sorted(wheel_dir.iterdir()) if path.is_file() and path.name != WHEEL_MANIFEST_NAME } temp_root = Path(tempfile.mkdtemp(prefix="aae_kaggle_wheels_")) if manifest: restored = 0 for expected_name in manifest: target = temp_root / expected_name source = source_files.get(expected_name) if source is None: matches = [path for name, path in source_files.items() if expected_name.startswith(name)] if len(matches) == 1: source = matches[0] if source is None: continue shutil.copy2(source, target) if source.name != expected_name: restored += 1 if restored: print(f"Restored {restored} truncated wheel filename(s) into {temp_root}", flush=True) for path in direct_wheels: target = temp_root / path.name if not target.exists(): shutil.copy2(path, target) return temp_root def _install_from_wheels(wheel_dir: Path, force_reinstall: bool) -> None: prepared_wheel_dir = _prepare_wheelhouse(wheel_dir) wheel_map: dict[str, Path] = {} for wheel_path in sorted(prepared_wheel_dir.glob("*.whl")): package_name = canonicalize_name(wheel_path.name.split("-", 1)[0]) wheel_map[package_name] = wheel_path missing = [pkg for pkg in OFFLINE_PACKAGES if canonicalize_name(pkg) not in wheel_map] if missing: raise FileNotFoundError( "Missing offline wheels for packages: " + ", ".join(missing) + f". wheel_dir={wheel_dir} prepared_wheel_dir={prepared_wheel_dir}" ) cmd = [ sys.executable, "-m", "pip", "install", "--no-index", "--no-deps", ] if force_reinstall: cmd.append("--force-reinstall") cmd.extend(str(wheel_map[canonicalize_name(pkg)]) for pkg in OFFLINE_PACKAGES) print("Running offline install:\n ", " ".join(cmd), flush=True) subprocess.run(cmd, check=True) def main() -> None: parser = argparse.ArgumentParser(description="Install Kaggle backtest runtime from wheels bundled in the dataset.") parser.add_argument("--wheel-dir", default=None, help="Override local wheelhouse path.") parser.add_argument("--force-reinstall", action="store_true", help="Force reinstall even if imports already work.") parser.add_argument("--check-only", action="store_true", help="Only verify imports; do not install.") args = parser.parse_args() repo_root = _resolve_repo_root() wheel_dir = _resolve_wheel_dir(repo_root, args.wheel_dir) if not wheel_dir.exists(): raise FileNotFoundError(f"Offline wheel directory not found: {wheel_dir}") ok, detail = _can_import_runtime() if ok and not args.force_reinstall: print("Offline runtime already available; skipping install.", flush=True) elif args.check_only: raise RuntimeError(f"Offline runtime check failed: {detail}") else: print(f"Runtime import check failed before install: {detail}", flush=True) _install_from_wheels(wheel_dir, force_reinstall=args.force_reinstall) ok, detail = _can_import_runtime() if not ok: raise RuntimeError(f"Offline runtime verification failed after install: {detail}") import qlib import tables print("qlib import OK:", qlib.__file__, flush=True) print("tables import OK:", tables.__file__, flush=True) print("wheel_dir =", wheel_dir, flush=True) if __name__ == "__main__": main()