Spaces:

AI-Talent-Force
/

ai_exec

Paused

File size: 9,416 Bytes

45ee481

#!/usr/bin/env python3
"""
Push to Hub CLI

Push models, adapters, datasets, or Gradio apps to Hugging Face Hub.
Unified interface for all Hub uploads.

Usage:
    python scripts/push_to_hub.py --model ./outputs/final_adapter --repo username/model
    python scripts/push_to_hub.py --space ./app --repo username/chatbot-space
"""

import argparse
import os
import sys
from pathlib import Path

# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

from rich.console import Console
from rich.prompt import Confirm

console = Console()


def push_model(
    model_path: Path,
    repo_id: str,
    token: str,
    private: bool = True,
    merge: bool = False,
    base_model: str = "Qwen/Qwen3-4B-Instruct",
) -> str:
    """Push a model or adapter to Hub."""
    from huggingface_hub import HfApi

    api = HfApi(token=token)

    # Create repo
    console.print(f"Creating/updating repo: {repo_id}")
    api.create_repo(repo_id=repo_id, private=private, exist_ok=True)

    if merge:
        # Merge adapter first then push
        console.print("Merging adapter with base model...")
        from src.training.merge_adapter import merge_adapter

        merged_path = model_path.parent / "merged_for_push"
        merge_adapter(
            base_model=base_model,
            adapter_path=model_path,
            output_path=merged_path,
            push_to_hub=True,
            hub_model_id=repo_id,
            hub_token=token,
            private=private,
        )
        return f"https://huggingface.co/{repo_id}"
    else:
        # Upload adapter directly
        console.print(f"Uploading from: {model_path}")
        api.upload_folder(
            folder_path=str(model_path),
            repo_id=repo_id,
            token=token,
        )

    return f"https://huggingface.co/{repo_id}"


def push_dataset(
    dataset_path: Path,
    repo_id: str,
    token: str,
    private: bool = True,
) -> str:
    """Push a dataset to Hub."""
    from datasets import load_dataset
    from huggingface_hub import HfApi

    api = HfApi(token=token)

    # Create dataset repo
    console.print(f"Creating/updating dataset repo: {repo_id}")
    api.create_repo(repo_id=repo_id, repo_type="dataset", private=private, exist_ok=True)

    # Check if it's a directory or file
    if dataset_path.is_dir():
        # Upload folder
        api.upload_folder(
            folder_path=str(dataset_path),
            repo_id=repo_id,
            repo_type="dataset",
            token=token,
        )
    else:
        # Upload single file
        api.upload_file(
            path_or_fileobj=str(dataset_path),
            path_in_repo=dataset_path.name,
            repo_id=repo_id,
            repo_type="dataset",
            token=token,
        )

    return f"https://huggingface.co/datasets/{repo_id}"


def push_space(
    space_path: Path,
    repo_id: str,
    token: str,
    private: bool = True,
    sdk: str = "gradio",
    hardware: str = "cpu-basic",
) -> str:
    """Push a Gradio app to HF Spaces."""
    from huggingface_hub import HfApi

    api = HfApi(token=token)

    # Create space
    console.print(f"Creating/updating Space: {repo_id}")
    api.create_repo(
        repo_id=repo_id,
        repo_type="space",
        space_sdk=sdk,
        private=private,
        exist_ok=True,
    )

    # Upload app files
    console.print(f"Uploading from: {space_path}")
    api.upload_folder(
        folder_path=str(space_path),
        repo_id=repo_id,
        repo_type="space",
        token=token,
    )

    # Update space hardware if specified
    if hardware != "cpu-basic":
        console.print(f"Setting hardware: {hardware}")
        api.request_space_hardware(repo_id=repo_id, hardware=hardware, token=token)

    return f"https://huggingface.co/spaces/{repo_id}"


def main():
    parser = argparse.ArgumentParser(
        description="Push models, datasets, or apps to Hugging Face Hub",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
    # Push adapter
    python scripts/push_to_hub.py \\
        --model ./outputs/final_adapter \\
        --repo username/ceo-voice-model

    # Push merged model
    python scripts/push_to_hub.py \\
        --model ./outputs/final_adapter \\
        --repo username/ceo-voice-model \\
        --merge

    # Push dataset
    python scripts/push_to_hub.py \\
        --dataset data/training/ \\
        --repo username/ceo-training-data

    # Push Gradio Space
    python scripts/push_to_hub.py \\
        --space ./app \\
        --repo username/ceo-chatbot \\
        --hardware t4-small

Hardware options for Spaces:
    cpu-basic, cpu-upgrade, t4-small, t4-medium, a10g-small, a10g-large

Environment:
    HF_TOKEN - Hugging Face token (required)
        """,
    )

    # Source arguments (mutually exclusive)
    source_group = parser.add_mutually_exclusive_group(required=True)
    source_group.add_argument("--model", help="Path to model/adapter")
    source_group.add_argument("--dataset", help="Path to dataset")
    source_group.add_argument("--space", help="Path to Gradio app directory")

    # Target arguments
    parser.add_argument("--repo", required=True, help="Hub repository ID")

    # Model-specific arguments
    parser.add_argument(
        "--merge",
        action="store_true",
        help="Merge adapter into base model before pushing",
    )
    parser.add_argument(
        "--base-model",
        default="Qwen/Qwen3-4B-Instruct",
        help="Base model for merging (default: Qwen/Qwen3-4B-Instruct)",
    )

    # Space-specific arguments
    parser.add_argument(
        "--hardware",
        default="cpu-basic",
        choices=[
            "cpu-basic", "cpu-upgrade",
            "t4-small", "t4-medium",
            "a10g-small", "a10g-large",
            "a100-large",
        ],
        help="Hardware for Space (default: cpu-basic)",
    )
    parser.add_argument(
        "--sdk",
        default="gradio",
        choices=["gradio", "streamlit", "docker"],
        help="SDK for Space (default: gradio)",
    )

    # Common arguments
    parser.add_argument(
        "--public",
        action="store_true",
        help="Make repository public (default: private)",
    )
    parser.add_argument("--yes", "-y", action="store_true", help="Skip confirmation")

    args = parser.parse_args()

    console.print("\n[bold blue]AI Executive - Push to Hub[/bold blue]")
    console.print("=" * 50)

    # Check token
    token = os.environ.get("HF_TOKEN")
    if not token:
        console.print("[red]Error:[/red] HF_TOKEN not found in environment")
        console.print("\nSet it with:")
        console.print("  export HF_TOKEN=your_token_here")
        return 1

    private = not args.public

    # Determine what we're pushing
    if args.model:
        source_path = Path(args.model)
        push_type = "model"
        if not source_path.exists():
            console.print(f"[red]Error:[/red] Model path not found: {source_path}")
            return 1
    elif args.dataset:
        source_path = Path(args.dataset)
        push_type = "dataset"
        if not source_path.exists():
            console.print(f"[red]Error:[/red] Dataset path not found: {source_path}")
            return 1
    else:
        source_path = Path(args.space)
        push_type = "space"
        if not source_path.exists():
            console.print(f"[red]Error:[/red] Space path not found: {source_path}")
            return 1

    # Display info
    console.print(f"\n[yellow]Push Configuration[/yellow]")
    console.print(f"Type: {push_type}")
    console.print(f"Source: {source_path}")
    console.print(f"Target: {args.repo}")
    console.print(f"Visibility: {'public' if args.public else 'private'}")

    if push_type == "model" and args.merge:
        console.print(f"Merge: Yes (base: {args.base_model})")
    if push_type == "space":
        console.print(f"SDK: {args.sdk}")
        console.print(f"Hardware: {args.hardware}")

    # Confirm
    if not args.yes:
        console.print()
        if not Confirm.ask("Proceed with push?"):
            console.print("[dim]Cancelled.[/dim]")
            return 0

    # Push
    console.print("\n[yellow]Pushing to Hub...[/yellow]")

    try:
        if push_type == "model":
            url = push_model(
                model_path=source_path,
                repo_id=args.repo,
                token=token,
                private=private,
                merge=args.merge,
                base_model=args.base_model,
            )
        elif push_type == "dataset":
            url = push_dataset(
                dataset_path=source_path,
                repo_id=args.repo,
                token=token,
                private=private,
            )
        else:
            url = push_space(
                space_path=source_path,
                repo_id=args.repo,
                token=token,
                private=private,
                sdk=args.sdk,
                hardware=args.hardware,
            )
    except Exception as e:
        console.print(f"[red]Push failed:[/red] {e}")
        import traceback
        traceback.print_exc()
        return 1

    # Success
    console.print("\n" + "=" * 50)
    console.print("[bold green]Push complete![/bold green]")
    console.print(f"\nURL: {url}")

    return 0


if __name__ == "__main__":
    exit(main())