| """Manual export path for consumer-facing Hugging Face runtime bundles.""" |
|
|
| from __future__ import annotations |
|
|
| import re |
| import shutil |
| from dataclasses import dataclass |
| from pathlib import Path |
| from tempfile import TemporaryDirectory |
| from typing import Optional, Sequence |
|
|
| import torch |
| from huggingface_hub import HfApi, create_repo |
|
|
| from sim_priors_pk import config_dir, project_dir |
| from sim_priors_pk.hub_runtime.configuration_sim_priors_pk import PKHubConfig |
| from sim_priors_pk.hub_runtime.modeling_sim_priors_pk import PKHubModel |
| from sim_priors_pk.hub_runtime.runtime_contract import ( |
| build_runtime_config_payload, |
| resolve_model_card_text, |
| runtime_readme_text, |
| ) |
|
|
| ROOT_CONFIGURATION_FILENAME = "configuration_sim_priors_pk.py" |
| ROOT_MODELING_FILENAME = "modeling_sim_priors_pk.py" |
| _HF_TOKEN_PATTERN = re.compile(r"hf_[A-Za-z0-9]{20,}") |
| _COMET_KEY_ASSIGNMENT_PATTERN = re.compile(r"(COMET_API_KEY\s*=\s*)(['\"]).*?\2") |
| _HF_KEY_ASSIGNMENT_PATTERN = re.compile(r"(HF_KEYS\s*=\s*)(['\"]).*?\2") |
|
|
|
|
| @dataclass |
| class RuntimeBundleArtifacts: |
| """Return metadata for a staged runtime bundle.""" |
|
|
| bundle_dir: Path |
| runtime_repo_id: str |
| original_repo_id: Optional[str] |
| readme_path: Path |
|
|
|
|
| def default_runtime_repo_id(experiment, *, suffix: str = "-runtime") -> str: |
| """Resolve the default runtime bundle repo id for a loaded experiment.""" |
|
|
| if getattr(experiment, "exp_config", None) is None: |
| raise RuntimeError("Experiment config is not loaded.") |
| if getattr(experiment, "hf_token", None) is None: |
| raise RuntimeError( |
| "No Hugging Face token available. Set hugging_face_token in the config or KEYS.txt." |
| ) |
|
|
| user = HfApi().whoami(token=experiment.hf_token)["name"] |
| return f"{user}/{experiment.exp_config.hf_model_name}{suffix}" |
|
|
|
|
| def _default_original_repo_id(experiment) -> Optional[str]: |
| """Infer the legacy/native Hub repo id if enough metadata is available.""" |
|
|
| if getattr(experiment, "exp_config", None) is None: |
| return None |
| if getattr(experiment, "hf_token", None) is None: |
| return None |
| user = HfApi().whoami(token=experiment.hf_token)["name"] |
| return f"{user}/{experiment.exp_config.hf_model_name}" |
|
|
|
|
| def _validate_loaded_experiment(experiment) -> None: |
| """Ensure the loaded experiment has the minimum state needed for manual export.""" |
|
|
| if getattr(experiment, "model", None) is None: |
| raise RuntimeError("Experiment model is not loaded.") |
| if getattr(experiment, "exp_config", None) is None: |
| raise RuntimeError("Experiment config is not loaded.") |
| if getattr(experiment, "experiment_dir", None) is None: |
| raise RuntimeError("Experiment directory is required before pushing.") |
| if getattr(experiment, "hf_token", None) is None: |
| raise RuntimeError( |
| "No Hugging Face token available. Set hugging_face_token in the config or KEYS.txt." |
| ) |
|
|
|
|
| def _copy_runtime_support_files(bundle_dir: Path) -> None: |
| """Copy the local package and root remote-code entrypoints into the bundle.""" |
|
|
| package_src = project_dir / "sim_priors_pk" |
| package_dst = bundle_dir / "sim_priors_pk" |
| shutil.copytree(package_src, package_dst, dirs_exist_ok=True, ignore=shutil.ignore_patterns("__pycache__")) |
|
|
| root_config_src = package_src / "hub_runtime" / ROOT_CONFIGURATION_FILENAME |
| root_modeling_src = package_src / "hub_runtime" / ROOT_MODELING_FILENAME |
| shutil.copy2(root_config_src, bundle_dir / ROOT_CONFIGURATION_FILENAME) |
| shutil.copy2(root_modeling_src, bundle_dir / ROOT_MODELING_FILENAME) |
|
|
| for extra_name in ("requirements.txt", "LICENSE"): |
| extra_src = project_dir / extra_name |
| if extra_src.is_file(): |
| shutil.copy2(extra_src, bundle_dir / extra_name) |
|
|
| _scrub_runtime_bundle_secrets(bundle_dir) |
| _validate_no_hf_secrets(bundle_dir) |
|
|
|
|
| def _scrub_runtime_bundle_secrets(bundle_dir: Path) -> None: |
| """Remove token-like secrets from copied source files before Hub upload.""" |
|
|
| candidate_files = [ |
| *bundle_dir.rglob("*.py"), |
| *bundle_dir.rglob("*.md"), |
| *bundle_dir.rglob("*.txt"), |
| *bundle_dir.rglob("*.json"), |
| ] |
| for path in candidate_files: |
| try: |
| text = path.read_text(encoding="utf-8") |
| except UnicodeDecodeError: |
| continue |
|
|
| updated = text |
| updated = _HF_TOKEN_PATTERN.sub("hf_REDACTED", updated) |
| updated = _COMET_KEY_ASSIGNMENT_PATTERN.sub(r"\1\2REDACTED\2", updated) |
| updated = _HF_KEY_ASSIGNMENT_PATTERN.sub(r"\1\2REDACTED\2", updated) |
|
|
| if path.as_posix().endswith("sim_priors_pk/utils/__init__.py"): |
| updated = ( |
| "PASCAL_BASE_DIR = ''\n" |
| "NERSC_BASE_DIR = ''\n" |
| "NERSC_EXPERIMENT_DIR = ''\n" |
| "COMET_API_KEY = 'REDACTED'\n" |
| "HF_KEYS = 'REDACTED'\n" |
| "WORKSPACE = ''\n" |
| "PROJECT = ''\n" |
| ) |
|
|
| if updated != text: |
| path.write_text(updated, encoding="utf-8") |
|
|
|
|
| def _validate_no_hf_secrets(bundle_dir: Path) -> None: |
| """Fail fast if token-like Hugging Face secrets remain after scrubbing.""" |
|
|
| offending_files: list[str] = [] |
| for path in bundle_dir.rglob("*"): |
| if not path.is_file(): |
| continue |
| if path.suffix not in {".py", ".md", ".txt", ".json"}: |
| continue |
| try: |
| text = path.read_text(encoding="utf-8") |
| except UnicodeDecodeError: |
| continue |
| if _HF_TOKEN_PATTERN.search(text): |
| offending_files.append(str(path.relative_to(bundle_dir))) |
|
|
| if offending_files: |
| raise RuntimeError( |
| "Refusing to upload runtime bundle because token-like Hugging Face secrets " |
| f"remain after scrubbing: {offending_files}" |
| ) |
|
|
|
|
| def build_runtime_bundle_dir( |
| *, |
| experiment, |
| bundle_dir: Path, |
| model_card_path: Optional[Sequence[str]] = None, |
| hf_repo_id: Optional[str] = None, |
| original_repo_id: Optional[str] = None, |
| ) -> RuntimeBundleArtifacts: |
| """Stage a self-contained runtime bundle in ``bundle_dir`` without uploading it.""" |
|
|
| _validate_loaded_experiment(experiment) |
| bundle_dir.mkdir(parents=True, exist_ok=True) |
|
|
| runtime_repo_id = hf_repo_id or default_runtime_repo_id(experiment) |
| native_repo_id = original_repo_id or _default_original_repo_id(experiment) |
|
|
| normalized_model_card_path = tuple( |
| model_card_path |
| if model_card_path is not None |
| else getattr(experiment.exp_config, "hf_model_card_path", ("hf_model_cards", "README.md")) |
| ) |
| local_model_card_path = Path(config_dir).joinpath(*normalized_model_card_path) |
| base_model_card = resolve_model_card_text(local_model_card_path) |
|
|
| runtime_payload = build_runtime_config_payload( |
| backbone=experiment.model, |
| exp_config=experiment.exp_config, |
| original_repo_id=native_repo_id, |
| runtime_repo_id=runtime_repo_id, |
| ) |
| runtime_config = PKHubConfig( |
| **runtime_payload, |
| auto_map={ |
| "AutoConfig": f"{ROOT_CONFIGURATION_FILENAME[:-3]}.PKHubConfig", |
| "AutoModel": f"{ROOT_MODELING_FILENAME[:-3]}.PKHubModel", |
| }, |
| architectures=["PKHubModel"], |
| ) |
|
|
| runtime_model = PKHubModel(runtime_config, backbone=experiment.model) |
| state_dict = {name: tensor.detach().cpu() for name, tensor in runtime_model.state_dict().items()} |
| torch.save(state_dict, bundle_dir / "pytorch_model.bin") |
| runtime_config.save_pretrained(str(bundle_dir)) |
|
|
| _copy_runtime_support_files(bundle_dir) |
|
|
| readme_text = runtime_readme_text( |
| base_model_card=base_model_card, |
| runtime_repo_id=runtime_repo_id, |
| original_repo_id=native_repo_id, |
| supported_tasks=runtime_config.supported_tasks, |
| default_task=runtime_config.default_task, |
| ) |
| readme_path = bundle_dir / "README.md" |
| readme_path.write_text(readme_text, encoding="utf-8") |
|
|
| return RuntimeBundleArtifacts( |
| bundle_dir=bundle_dir, |
| runtime_repo_id=runtime_repo_id, |
| original_repo_id=native_repo_id, |
| readme_path=readme_path, |
| ) |
|
|
|
|
| def push_loaded_model_runtime_bundle( |
| experiment, |
| model_card_path: Optional[Sequence[str]] = None, |
| hf_repo_id: Optional[str] = None, |
| alias_name: str = "runtime_bundle_hf", |
| commit_message: str = "manual runtime bundle push", |
| *, |
| original_repo_id: Optional[str] = None, |
| exist_ok: bool = True, |
| ) -> str: |
| """Build and upload the consumer-facing runtime bundle for a loaded experiment.""" |
|
|
| _validate_loaded_experiment(experiment) |
| runtime_repo_id = hf_repo_id or default_runtime_repo_id(experiment) |
| create_repo(runtime_repo_id, exist_ok=exist_ok, token=experiment.hf_token) |
|
|
| bundle_root = Path(experiment.experiment_dir) / alias_name |
| bundle_root.mkdir(parents=True, exist_ok=True) |
|
|
| with TemporaryDirectory(dir=str(bundle_root), prefix="hf_runtime_bundle_") as temp_dir: |
| staged_dir = Path(temp_dir) |
| build_runtime_bundle_dir( |
| experiment=experiment, |
| bundle_dir=staged_dir, |
| model_card_path=model_card_path, |
| hf_repo_id=runtime_repo_id, |
| original_repo_id=original_repo_id, |
| ) |
|
|
| api = HfApi(token=experiment.hf_token) |
| api.upload_folder( |
| folder_path=str(staged_dir), |
| repo_id=runtime_repo_id, |
| commit_message=commit_message, |
| token=experiment.hf_token, |
| ) |
|
|
| return runtime_repo_id |
|
|
|
|
| __all__ = [ |
| "RuntimeBundleArtifacts", |
| "build_runtime_bundle_dir", |
| "default_runtime_repo_id", |
| "push_loaded_model_runtime_bundle", |
| ] |
|
|