#!/usr/bin/env python3 """ Push to Hub CLI Push models, adapters, datasets, or Gradio apps to Hugging Face Hub. Unified interface for all Hub uploads. Usage: python scripts/push_to_hub.py --model ./outputs/final_adapter --repo username/model python scripts/push_to_hub.py --space ./app --repo username/chatbot-space """ import argparse import os import sys from pathlib import Path # Add src to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) from rich.console import Console from rich.prompt import Confirm console = Console() def push_model( model_path: Path, repo_id: str, token: str, private: bool = True, merge: bool = False, base_model: str = "Qwen/Qwen3-4B-Instruct", ) -> str: """Push a model or adapter to Hub.""" from huggingface_hub import HfApi api = HfApi(token=token) # Create repo console.print(f"Creating/updating repo: {repo_id}") api.create_repo(repo_id=repo_id, private=private, exist_ok=True) if merge: # Merge adapter first then push console.print("Merging adapter with base model...") from src.training.merge_adapter import merge_adapter merged_path = model_path.parent / "merged_for_push" merge_adapter( base_model=base_model, adapter_path=model_path, output_path=merged_path, push_to_hub=True, hub_model_id=repo_id, hub_token=token, private=private, ) return f"https://huggingface.co/{repo_id}" else: # Upload adapter directly console.print(f"Uploading from: {model_path}") api.upload_folder( folder_path=str(model_path), repo_id=repo_id, token=token, ) return f"https://huggingface.co/{repo_id}" def push_dataset( dataset_path: Path, repo_id: str, token: str, private: bool = True, ) -> str: """Push a dataset to Hub.""" from datasets import load_dataset from huggingface_hub import HfApi api = HfApi(token=token) # Create dataset repo console.print(f"Creating/updating dataset repo: {repo_id}") api.create_repo(repo_id=repo_id, repo_type="dataset", private=private, exist_ok=True) # Check if it's a directory or file if dataset_path.is_dir(): # Upload folder api.upload_folder( folder_path=str(dataset_path), repo_id=repo_id, repo_type="dataset", token=token, ) else: # Upload single file api.upload_file( path_or_fileobj=str(dataset_path), path_in_repo=dataset_path.name, repo_id=repo_id, repo_type="dataset", token=token, ) return f"https://huggingface.co/datasets/{repo_id}" def push_space( space_path: Path, repo_id: str, token: str, private: bool = True, sdk: str = "gradio", hardware: str = "cpu-basic", ) -> str: """Push a Gradio app to HF Spaces.""" from huggingface_hub import HfApi api = HfApi(token=token) # Create space console.print(f"Creating/updating Space: {repo_id}") api.create_repo( repo_id=repo_id, repo_type="space", space_sdk=sdk, private=private, exist_ok=True, ) # Upload app files console.print(f"Uploading from: {space_path}") api.upload_folder( folder_path=str(space_path), repo_id=repo_id, repo_type="space", token=token, ) # Update space hardware if specified if hardware != "cpu-basic": console.print(f"Setting hardware: {hardware}") api.request_space_hardware(repo_id=repo_id, hardware=hardware, token=token) return f"https://huggingface.co/spaces/{repo_id}" def main(): parser = argparse.ArgumentParser( description="Push models, datasets, or apps to Hugging Face Hub", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Push adapter python scripts/push_to_hub.py \\ --model ./outputs/final_adapter \\ --repo username/ceo-voice-model # Push merged model python scripts/push_to_hub.py \\ --model ./outputs/final_adapter \\ --repo username/ceo-voice-model \\ --merge # Push dataset python scripts/push_to_hub.py \\ --dataset data/training/ \\ --repo username/ceo-training-data # Push Gradio Space python scripts/push_to_hub.py \\ --space ./app \\ --repo username/ceo-chatbot \\ --hardware t4-small Hardware options for Spaces: cpu-basic, cpu-upgrade, t4-small, t4-medium, a10g-small, a10g-large Environment: HF_TOKEN - Hugging Face token (required) """, ) # Source arguments (mutually exclusive) source_group = parser.add_mutually_exclusive_group(required=True) source_group.add_argument("--model", help="Path to model/adapter") source_group.add_argument("--dataset", help="Path to dataset") source_group.add_argument("--space", help="Path to Gradio app directory") # Target arguments parser.add_argument("--repo", required=True, help="Hub repository ID") # Model-specific arguments parser.add_argument( "--merge", action="store_true", help="Merge adapter into base model before pushing", ) parser.add_argument( "--base-model", default="Qwen/Qwen3-4B-Instruct", help="Base model for merging (default: Qwen/Qwen3-4B-Instruct)", ) # Space-specific arguments parser.add_argument( "--hardware", default="cpu-basic", choices=[ "cpu-basic", "cpu-upgrade", "t4-small", "t4-medium", "a10g-small", "a10g-large", "a100-large", ], help="Hardware for Space (default: cpu-basic)", ) parser.add_argument( "--sdk", default="gradio", choices=["gradio", "streamlit", "docker"], help="SDK for Space (default: gradio)", ) # Common arguments parser.add_argument( "--public", action="store_true", help="Make repository public (default: private)", ) parser.add_argument("--yes", "-y", action="store_true", help="Skip confirmation") args = parser.parse_args() console.print("\n[bold blue]AI Executive - Push to Hub[/bold blue]") console.print("=" * 50) # Check token token = os.environ.get("HF_TOKEN") if not token: console.print("[red]Error:[/red] HF_TOKEN not found in environment") console.print("\nSet it with:") console.print(" export HF_TOKEN=your_token_here") return 1 private = not args.public # Determine what we're pushing if args.model: source_path = Path(args.model) push_type = "model" if not source_path.exists(): console.print(f"[red]Error:[/red] Model path not found: {source_path}") return 1 elif args.dataset: source_path = Path(args.dataset) push_type = "dataset" if not source_path.exists(): console.print(f"[red]Error:[/red] Dataset path not found: {source_path}") return 1 else: source_path = Path(args.space) push_type = "space" if not source_path.exists(): console.print(f"[red]Error:[/red] Space path not found: {source_path}") return 1 # Display info console.print(f"\n[yellow]Push Configuration[/yellow]") console.print(f"Type: {push_type}") console.print(f"Source: {source_path}") console.print(f"Target: {args.repo}") console.print(f"Visibility: {'public' if args.public else 'private'}") if push_type == "model" and args.merge: console.print(f"Merge: Yes (base: {args.base_model})") if push_type == "space": console.print(f"SDK: {args.sdk}") console.print(f"Hardware: {args.hardware}") # Confirm if not args.yes: console.print() if not Confirm.ask("Proceed with push?"): console.print("[dim]Cancelled.[/dim]") return 0 # Push console.print("\n[yellow]Pushing to Hub...[/yellow]") try: if push_type == "model": url = push_model( model_path=source_path, repo_id=args.repo, token=token, private=private, merge=args.merge, base_model=args.base_model, ) elif push_type == "dataset": url = push_dataset( dataset_path=source_path, repo_id=args.repo, token=token, private=private, ) else: url = push_space( space_path=source_path, repo_id=args.repo, token=token, private=private, sdk=args.sdk, hardware=args.hardware, ) except Exception as e: console.print(f"[red]Push failed:[/red] {e}") import traceback traceback.print_exc() return 1 # Success console.print("\n" + "=" * 50) console.print("[bold green]Push complete![/bold green]") console.print(f"\nURL: {url}") return 0 if __name__ == "__main__": exit(main())