rae-training / scripts /deploy_to_hf_space.sh

Upload scripts/deploy_to_hf_space.sh with huggingface_hub

dbad084 verified 8 days ago

5.57 kB

	#!/bin/bash
	# ═══════════════════════════════════════════════════════════════
	# Deploy RAE Training to HuggingFace Spaces
	# ═══════════════════════════════════════════════════════════════
	#
	# Creates an AutoTrain Space with GPU hardware for cloud training.
	# This is the zero-local-GPU path — HF handles the compute.
	#
	# Prerequisites:
	# - HF account with billing enabled
	# - HF_TOKEN with write access
	# - huggingface_hub CLI installed
	#
	# Usage:
	# export HF_TOKEN=hf_xxxxx
	# ./scripts/deploy_to_hf_space.sh
	# ═══════════════════════════════════════════════════════════════

	set -euo pipefail

	SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
	PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
	cd "$PROJECT_DIR"

	# ── Configuration ─────────────────────────────────────────────
	SPACE_NAME="${HF_USERNAME:-TrueV1sion123}/rae-training"
	HARDWARE="t4-medium" # Options: cpu-basic, t4-small, t4-medium, a10g-small, a10g-large, a100-large

	echo "═══════════════════════════════════════════════════════"
	echo " DEPLOY RAE TRAINING TO HF SPACES"
	echo " Space: $SPACE_NAME"
	echo " Hardware: $HARDWARE"
	echo "═══════════════════════════════════════════════════════"

	# Check token
	if [ -z "${HF_TOKEN:-}" ]; then
	echo "Error: HF_TOKEN not set"
	echo " export HF_TOKEN=hf_your_write_token"
	exit 1
	fi

	# Install huggingface_hub if needed
	pip install -q huggingface_hub 2>/dev/null \|\| true

	# ── Option 1: AutoTrain Space (Recommended) ──────────────────
	# Creates a Space using the official AutoTrain Docker image
	# You then upload your data and config through the web UI

	echo ""
	echo "▶ Creating AutoTrain Space..."
	echo " This creates a GPU-backed Space with the AutoTrain UI."
	echo " After creation, upload your training data and start training."
	echo ""

	python3 << 'PYTHON_SCRIPT'
	import os
	from huggingface_hub import HfApi, create_repo

	api = HfApi(token=os.environ["HF_TOKEN"])
	space_name = os.environ.get("SPACE_NAME", "rae-training")
	username = api.whoami()["name"]
	repo_id = f"{username}/{space_name}"

	# Create the Space
	try:
	create_repo(
	repo_id=repo_id,
	repo_type="space",
	space_sdk="docker",
	space_hardware="t4-medium",
	private=True,
	token=os.environ["HF_TOKEN"],
	)
	print(f"✓ Space created: https://huggingface.co/spaces/{repo_id}")
	except Exception as e:
	if "already exists" in str(e).lower():
	print(f"✓ Space already exists: https://huggingface.co/spaces/{repo_id}")
	else:
	print(f"✗ Error creating space: {e}")
	raise

	# Upload the AutoTrain Dockerfile
	dockerfile_content = """FROM huggingface/autotrain-advanced:latest

	# RAE Training Environment
	COPY configs/autotrain_rae_sft.yaml /app/config.yaml
	COPY data/ /app/data/

	# Set environment
	ENV AUTOTRAIN_CONFIG=/app/config.yaml

	# Default command
	CMD ["autotrain", "--config", "/app/config.yaml"]
	"""

	api.upload_file(
	path_or_fileobj=dockerfile_content.encode(),
	path_in_repo="Dockerfile",
	repo_id=repo_id,
	repo_type="space",
	token=os.environ["HF_TOKEN"],
	)
	print("✓ Dockerfile uploaded")

	# Upload config
	api.upload_file(
	path_or_fileobj="configs/autotrain_rae_sft.yaml",
	path_in_repo="configs/autotrain_rae_sft.yaml",
	repo_id=repo_id,
	repo_type="space",
	token=os.environ["HF_TOKEN"],
	)
	print("✓ Config uploaded")

	print(f"\n{'═' * 50}")
	print(f" Space ready: https://huggingface.co/spaces/{repo_id}")
	print(f" Next steps:")
	print(f" 1. Upload training data (data/rae_training_data/)")
	print(f" 2. Start the Space to begin training")
	print(f" 3. Monitor via the Space UI or TensorBoard")
	print(f"{'═' * 50}")
	PYTHON_SCRIPT

	# ── Option 2: Push dataset to HF Hub ─────────────────────────
	echo ""
	echo "▶ Pushing training dataset to Hub..."

	python3 << 'PYTHON_SCRIPT2'
	import os
	from huggingface_hub import HfApi

	api = HfApi(token=os.environ["HF_TOKEN"])
	username = api.whoami()["name"]
	dataset_repo = f"{username}/rae-training-data"

	try:
	api.create_repo(dataset_repo, repo_type="dataset", private=True, exist_ok=True)

	# Upload training data if it exists
	import glob
	data_files = glob.glob("data/rae_training_data/*")

	if data_files:
	for f in data_files:
	api.upload_file(
	path_or_fileobj=f,
	path_in_repo=os.path.basename(f),
	repo_id=dataset_repo,
	repo_type="dataset",
	)
	print(f" ✓ Uploaded {os.path.basename(f)}")
	print(f"✓ Dataset repo: https://huggingface.co/datasets/{dataset_repo}")
	else:
	print(" ⚠ No training data found. Run generate_dataset.sh first.")
	except Exception as e:
	print(f" ⚠ Dataset upload: {e}")
	PYTHON_SCRIPT2

	echo ""
	echo "Deployment complete!"