ai_exec / scripts /push_to_hub.py
Chaitanya-aitf's picture
Upload 38 files
45ee481 verified
#!/usr/bin/env python3
"""
Push to Hub CLI
Push models, adapters, datasets, or Gradio apps to Hugging Face Hub.
Unified interface for all Hub uploads.
Usage:
python scripts/push_to_hub.py --model ./outputs/final_adapter --repo username/model
python scripts/push_to_hub.py --space ./app --repo username/chatbot-space
"""
import argparse
import os
import sys
from pathlib import Path
# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from rich.console import Console
from rich.prompt import Confirm
console = Console()
def push_model(
model_path: Path,
repo_id: str,
token: str,
private: bool = True,
merge: bool = False,
base_model: str = "Qwen/Qwen3-4B-Instruct",
) -> str:
"""Push a model or adapter to Hub."""
from huggingface_hub import HfApi
api = HfApi(token=token)
# Create repo
console.print(f"Creating/updating repo: {repo_id}")
api.create_repo(repo_id=repo_id, private=private, exist_ok=True)
if merge:
# Merge adapter first then push
console.print("Merging adapter with base model...")
from src.training.merge_adapter import merge_adapter
merged_path = model_path.parent / "merged_for_push"
merge_adapter(
base_model=base_model,
adapter_path=model_path,
output_path=merged_path,
push_to_hub=True,
hub_model_id=repo_id,
hub_token=token,
private=private,
)
return f"https://huggingface.co/{repo_id}"
else:
# Upload adapter directly
console.print(f"Uploading from: {model_path}")
api.upload_folder(
folder_path=str(model_path),
repo_id=repo_id,
token=token,
)
return f"https://huggingface.co/{repo_id}"
def push_dataset(
dataset_path: Path,
repo_id: str,
token: str,
private: bool = True,
) -> str:
"""Push a dataset to Hub."""
from datasets import load_dataset
from huggingface_hub import HfApi
api = HfApi(token=token)
# Create dataset repo
console.print(f"Creating/updating dataset repo: {repo_id}")
api.create_repo(repo_id=repo_id, repo_type="dataset", private=private, exist_ok=True)
# Check if it's a directory or file
if dataset_path.is_dir():
# Upload folder
api.upload_folder(
folder_path=str(dataset_path),
repo_id=repo_id,
repo_type="dataset",
token=token,
)
else:
# Upload single file
api.upload_file(
path_or_fileobj=str(dataset_path),
path_in_repo=dataset_path.name,
repo_id=repo_id,
repo_type="dataset",
token=token,
)
return f"https://huggingface.co/datasets/{repo_id}"
def push_space(
space_path: Path,
repo_id: str,
token: str,
private: bool = True,
sdk: str = "gradio",
hardware: str = "cpu-basic",
) -> str:
"""Push a Gradio app to HF Spaces."""
from huggingface_hub import HfApi
api = HfApi(token=token)
# Create space
console.print(f"Creating/updating Space: {repo_id}")
api.create_repo(
repo_id=repo_id,
repo_type="space",
space_sdk=sdk,
private=private,
exist_ok=True,
)
# Upload app files
console.print(f"Uploading from: {space_path}")
api.upload_folder(
folder_path=str(space_path),
repo_id=repo_id,
repo_type="space",
token=token,
)
# Update space hardware if specified
if hardware != "cpu-basic":
console.print(f"Setting hardware: {hardware}")
api.request_space_hardware(repo_id=repo_id, hardware=hardware, token=token)
return f"https://huggingface.co/spaces/{repo_id}"
def main():
parser = argparse.ArgumentParser(
description="Push models, datasets, or apps to Hugging Face Hub",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Push adapter
python scripts/push_to_hub.py \\
--model ./outputs/final_adapter \\
--repo username/ceo-voice-model
# Push merged model
python scripts/push_to_hub.py \\
--model ./outputs/final_adapter \\
--repo username/ceo-voice-model \\
--merge
# Push dataset
python scripts/push_to_hub.py \\
--dataset data/training/ \\
--repo username/ceo-training-data
# Push Gradio Space
python scripts/push_to_hub.py \\
--space ./app \\
--repo username/ceo-chatbot \\
--hardware t4-small
Hardware options for Spaces:
cpu-basic, cpu-upgrade, t4-small, t4-medium, a10g-small, a10g-large
Environment:
HF_TOKEN - Hugging Face token (required)
""",
)
# Source arguments (mutually exclusive)
source_group = parser.add_mutually_exclusive_group(required=True)
source_group.add_argument("--model", help="Path to model/adapter")
source_group.add_argument("--dataset", help="Path to dataset")
source_group.add_argument("--space", help="Path to Gradio app directory")
# Target arguments
parser.add_argument("--repo", required=True, help="Hub repository ID")
# Model-specific arguments
parser.add_argument(
"--merge",
action="store_true",
help="Merge adapter into base model before pushing",
)
parser.add_argument(
"--base-model",
default="Qwen/Qwen3-4B-Instruct",
help="Base model for merging (default: Qwen/Qwen3-4B-Instruct)",
)
# Space-specific arguments
parser.add_argument(
"--hardware",
default="cpu-basic",
choices=[
"cpu-basic", "cpu-upgrade",
"t4-small", "t4-medium",
"a10g-small", "a10g-large",
"a100-large",
],
help="Hardware for Space (default: cpu-basic)",
)
parser.add_argument(
"--sdk",
default="gradio",
choices=["gradio", "streamlit", "docker"],
help="SDK for Space (default: gradio)",
)
# Common arguments
parser.add_argument(
"--public",
action="store_true",
help="Make repository public (default: private)",
)
parser.add_argument("--yes", "-y", action="store_true", help="Skip confirmation")
args = parser.parse_args()
console.print("\n[bold blue]AI Executive - Push to Hub[/bold blue]")
console.print("=" * 50)
# Check token
token = os.environ.get("HF_TOKEN")
if not token:
console.print("[red]Error:[/red] HF_TOKEN not found in environment")
console.print("\nSet it with:")
console.print(" export HF_TOKEN=your_token_here")
return 1
private = not args.public
# Determine what we're pushing
if args.model:
source_path = Path(args.model)
push_type = "model"
if not source_path.exists():
console.print(f"[red]Error:[/red] Model path not found: {source_path}")
return 1
elif args.dataset:
source_path = Path(args.dataset)
push_type = "dataset"
if not source_path.exists():
console.print(f"[red]Error:[/red] Dataset path not found: {source_path}")
return 1
else:
source_path = Path(args.space)
push_type = "space"
if not source_path.exists():
console.print(f"[red]Error:[/red] Space path not found: {source_path}")
return 1
# Display info
console.print(f"\n[yellow]Push Configuration[/yellow]")
console.print(f"Type: {push_type}")
console.print(f"Source: {source_path}")
console.print(f"Target: {args.repo}")
console.print(f"Visibility: {'public' if args.public else 'private'}")
if push_type == "model" and args.merge:
console.print(f"Merge: Yes (base: {args.base_model})")
if push_type == "space":
console.print(f"SDK: {args.sdk}")
console.print(f"Hardware: {args.hardware}")
# Confirm
if not args.yes:
console.print()
if not Confirm.ask("Proceed with push?"):
console.print("[dim]Cancelled.[/dim]")
return 0
# Push
console.print("\n[yellow]Pushing to Hub...[/yellow]")
try:
if push_type == "model":
url = push_model(
model_path=source_path,
repo_id=args.repo,
token=token,
private=private,
merge=args.merge,
base_model=args.base_model,
)
elif push_type == "dataset":
url = push_dataset(
dataset_path=source_path,
repo_id=args.repo,
token=token,
private=private,
)
else:
url = push_space(
space_path=source_path,
repo_id=args.repo,
token=token,
private=private,
sdk=args.sdk,
hardware=args.hardware,
)
except Exception as e:
console.print(f"[red]Push failed:[/red] {e}")
import traceback
traceback.print_exc()
return 1
# Success
console.print("\n" + "=" * 50)
console.print("[bold green]Push complete![/bold green]")
console.print(f"\nURL: {url}")
return 0
if __name__ == "__main__":
exit(main())