Spaces:
Sleeping
Sleeping
| """ | |
| Upload G.U.I.D.E. inference model weights to Hugging Face Model Hub. | |
| Uploads only the files needed for inference — skips checkpoint-* subdirectories | |
| (intermediate training snapshots) and .DS_Store files. | |
| Usage: | |
| python scripts/upload_models_to_hub.py --repo-id <username>/guide-models | |
| Prerequisites: | |
| huggingface-cli login # run once before uploading | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import logging | |
| import sys | |
| from pathlib import Path | |
| logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") | |
| logger = logging.getLogger(__name__) | |
| _REPO_ROOT = Path(__file__).parent.parent | |
| _MODELS_ROOT = _REPO_ROOT / "models" | |
| _MODEL_SUBDIRS = [ | |
| "domain_classifier", | |
| "evidence_ner", | |
| "next_action", | |
| ] | |
| def parse_args() -> argparse.Namespace: | |
| """Parse CLI arguments; exits with usage message if --repo-id is missing.""" | |
| parser = argparse.ArgumentParser( | |
| description="Upload G.U.I.D.E. model weights to Hugging Face Model Hub.", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=( | |
| "examples:\n" | |
| " python scripts/upload_models_to_hub.py --repo-id myuser/guide-models\n" | |
| ), | |
| ) | |
| parser.add_argument( | |
| "--repo-id", | |
| required=True, | |
| metavar="USER/REPO", | |
| help="HF Hub repo ID to upload to (e.g. myuser/guide-models)", | |
| ) | |
| return parser.parse_args() | |
| def filter_inference_files(model_dir: Path) -> list[Path]: | |
| """ | |
| Return only the inference-needed files directly under model_dir. | |
| Skips: | |
| - Any file inside a checkpoint-* subdirectory (training artifacts) | |
| - Any .DS_Store file | |
| Args: | |
| model_dir: Path to a model directory (e.g. models/evidence_ner). | |
| Returns: | |
| List of Path objects for files that should be uploaded. | |
| """ | |
| results: list[Path] = [] | |
| for path in model_dir.rglob("*"): | |
| if not path.is_file(): | |
| continue | |
| if path.name == ".DS_Store": | |
| continue | |
| # Skip anything inside a checkpoint-* directory | |
| if any(part.startswith("checkpoint-") for part in path.relative_to(model_dir).parts): | |
| continue | |
| results.append(path) | |
| return results | |
| def upload_models(repo_id: str) -> None: | |
| """ | |
| Upload inference files for all three G.U.I.D.E. models to HF Hub. | |
| Iterates over domain_classifier/, evidence_ner/, and next_action/ under | |
| models/, filters to inference-only files via filter_inference_files(), and | |
| uploads each file preserving its path relative to models/ as the Hub path. | |
| Logs a warning and continues if a model directory does not exist locally. | |
| Args: | |
| repo_id: HF Hub repository ID, e.g. "myuser/guide-models". | |
| """ | |
| try: | |
| from huggingface_hub import HfApi | |
| from huggingface_hub.utils import HfHubHTTPError | |
| except ImportError: | |
| logger.error("huggingface_hub is not installed. Run: pip install huggingface_hub") | |
| sys.exit(1) | |
| api = HfApi() | |
| # Verify auth before doing any work | |
| try: | |
| api.whoami() | |
| except HfHubHTTPError: | |
| logger.error( | |
| "Not authenticated with Hugging Face. Run: huggingface-cli login" | |
| ) | |
| sys.exit(1) | |
| # Ensure repo exists (creates it if not) | |
| api.create_repo(repo_id=repo_id, repo_type="model", exist_ok=True) | |
| logger.info("Uploading to: https://huggingface.co/%s", repo_id) | |
| total_uploaded = 0 | |
| for subdir_name in _MODEL_SUBDIRS: | |
| model_dir = _MODELS_ROOT / subdir_name | |
| if not model_dir.exists(): | |
| logger.warning("Model directory not found, skipping: %s", model_dir) | |
| continue | |
| files = filter_inference_files(model_dir) | |
| if not files: | |
| logger.warning("No inference files found in %s, skipping.", model_dir) | |
| continue | |
| logger.info("Uploading %d file(s) from %s …", len(files), subdir_name) | |
| for local_path in files: | |
| # Hub path mirrors local structure: domain_classifier/config.json etc. | |
| path_in_repo = local_path.relative_to(_MODELS_ROOT).as_posix() | |
| api.upload_file( | |
| path_or_fileobj=str(local_path), | |
| path_in_repo=path_in_repo, | |
| repo_id=repo_id, | |
| repo_type="model", | |
| ) | |
| logger.info(" uploaded: %s", path_in_repo) | |
| total_uploaded += 1 | |
| logger.info("Done. %d file(s) uploaded to %s", total_uploaded, repo_id) | |
| if __name__ == "__main__": | |
| args = parse_args() | |
| upload_models(args.repo_id) | |