""" Upload G.U.I.D.E. inference model weights to Hugging Face Model Hub. Uploads only the files needed for inference — skips checkpoint-* subdirectories (intermediate training snapshots) and .DS_Store files. Usage: python scripts/upload_models_to_hub.py --repo-id /guide-models Prerequisites: huggingface-cli login # run once before uploading """ from __future__ import annotations import argparse import logging import sys from pathlib import Path logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") logger = logging.getLogger(__name__) _REPO_ROOT = Path(__file__).parent.parent _MODELS_ROOT = _REPO_ROOT / "models" _MODEL_SUBDIRS = [ "domain_classifier", "evidence_ner", "next_action", ] def parse_args() -> argparse.Namespace: """Parse CLI arguments; exits with usage message if --repo-id is missing.""" parser = argparse.ArgumentParser( description="Upload G.U.I.D.E. model weights to Hugging Face Model Hub.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=( "examples:\n" " python scripts/upload_models_to_hub.py --repo-id myuser/guide-models\n" ), ) parser.add_argument( "--repo-id", required=True, metavar="USER/REPO", help="HF Hub repo ID to upload to (e.g. myuser/guide-models)", ) return parser.parse_args() def filter_inference_files(model_dir: Path) -> list[Path]: """ Return only the inference-needed files directly under model_dir. Skips: - Any file inside a checkpoint-* subdirectory (training artifacts) - Any .DS_Store file Args: model_dir: Path to a model directory (e.g. models/evidence_ner). Returns: List of Path objects for files that should be uploaded. """ results: list[Path] = [] for path in model_dir.rglob("*"): if not path.is_file(): continue if path.name == ".DS_Store": continue # Skip anything inside a checkpoint-* directory if any(part.startswith("checkpoint-") for part in path.relative_to(model_dir).parts): continue results.append(path) return results def upload_models(repo_id: str) -> None: """ Upload inference files for all three G.U.I.D.E. models to HF Hub. Iterates over domain_classifier/, evidence_ner/, and next_action/ under models/, filters to inference-only files via filter_inference_files(), and uploads each file preserving its path relative to models/ as the Hub path. Logs a warning and continues if a model directory does not exist locally. Args: repo_id: HF Hub repository ID, e.g. "myuser/guide-models". """ try: from huggingface_hub import HfApi from huggingface_hub.utils import HfHubHTTPError except ImportError: logger.error("huggingface_hub is not installed. Run: pip install huggingface_hub") sys.exit(1) api = HfApi() # Verify auth before doing any work try: api.whoami() except HfHubHTTPError: logger.error( "Not authenticated with Hugging Face. Run: huggingface-cli login" ) sys.exit(1) # Ensure repo exists (creates it if not) api.create_repo(repo_id=repo_id, repo_type="model", exist_ok=True) logger.info("Uploading to: https://huggingface.co/%s", repo_id) total_uploaded = 0 for subdir_name in _MODEL_SUBDIRS: model_dir = _MODELS_ROOT / subdir_name if not model_dir.exists(): logger.warning("Model directory not found, skipping: %s", model_dir) continue files = filter_inference_files(model_dir) if not files: logger.warning("No inference files found in %s, skipping.", model_dir) continue logger.info("Uploading %d file(s) from %s …", len(files), subdir_name) for local_path in files: # Hub path mirrors local structure: domain_classifier/config.json etc. path_in_repo = local_path.relative_to(_MODELS_ROOT).as_posix() api.upload_file( path_or_fileobj=str(local_path), path_in_repo=path_in_repo, repo_id=repo_id, repo_type="model", ) logger.info(" uploaded: %s", path_in_repo) total_uploaded += 1 logger.info("Done. %d file(s) uploaded to %s", total_uploaded, repo_id) if __name__ == "__main__": args = parse_args() upload_models(args.repo_id)