File size: 5,571 Bytes

dbad084

#!/bin/bash
# ═══════════════════════════════════════════════════════════════
# Deploy RAE Training to HuggingFace Spaces
# ═══════════════════════════════════════════════════════════════
#
# Creates an AutoTrain Space with GPU hardware for cloud training.
# This is the zero-local-GPU path — HF handles the compute.
#
# Prerequisites:
#   - HF account with billing enabled
#   - HF_TOKEN with write access
#   - huggingface_hub CLI installed
#
# Usage:
#   export HF_TOKEN=hf_xxxxx
#   ./scripts/deploy_to_hf_space.sh
# ═══════════════════════════════════════════════════════════════

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"

# ── Configuration ─────────────────────────────────────────────
SPACE_NAME="${HF_USERNAME:-TrueV1sion123}/rae-training"
HARDWARE="t4-medium"  # Options: cpu-basic, t4-small, t4-medium, a10g-small, a10g-large, a100-large

echo "═══════════════════════════════════════════════════════"
echo "  DEPLOY RAE TRAINING TO HF SPACES"
echo "  Space: $SPACE_NAME"
echo "  Hardware: $HARDWARE"
echo "═══════════════════════════════════════════════════════"

# Check token
if [ -z "${HF_TOKEN:-}" ]; then
    echo "Error: HF_TOKEN not set"
    echo "  export HF_TOKEN=hf_your_write_token"
    exit 1
fi

# Install huggingface_hub if needed
pip install -q huggingface_hub 2>/dev/null || true

# ── Option 1: AutoTrain Space (Recommended) ──────────────────
# Creates a Space using the official AutoTrain Docker image
# You then upload your data and config through the web UI

echo ""
echo "▶ Creating AutoTrain Space..."
echo "  This creates a GPU-backed Space with the AutoTrain UI."
echo "  After creation, upload your training data and start training."
echo ""

python3 << 'PYTHON_SCRIPT'
import os
from huggingface_hub import HfApi, create_repo

api = HfApi(token=os.environ["HF_TOKEN"])
space_name = os.environ.get("SPACE_NAME", "rae-training")
username = api.whoami()["name"]
repo_id = f"{username}/{space_name}"

# Create the Space
try:
    create_repo(
        repo_id=repo_id,
        repo_type="space",
        space_sdk="docker",
        space_hardware="t4-medium",
        private=True,
        token=os.environ["HF_TOKEN"],
    )
    print(f"✓ Space created: https://huggingface.co/spaces/{repo_id}")
except Exception as e:
    if "already exists" in str(e).lower():
        print(f"✓ Space already exists: https://huggingface.co/spaces/{repo_id}")
    else:
        print(f"✗ Error creating space: {e}")
        raise

# Upload the AutoTrain Dockerfile
dockerfile_content = """FROM huggingface/autotrain-advanced:latest

# RAE Training Environment
COPY configs/autotrain_rae_sft.yaml /app/config.yaml
COPY data/ /app/data/

# Set environment
ENV AUTOTRAIN_CONFIG=/app/config.yaml

# Default command
CMD ["autotrain", "--config", "/app/config.yaml"]
"""

api.upload_file(
    path_or_fileobj=dockerfile_content.encode(),
    path_in_repo="Dockerfile",
    repo_id=repo_id,
    repo_type="space",
    token=os.environ["HF_TOKEN"],
)
print("✓ Dockerfile uploaded")

# Upload config
api.upload_file(
    path_or_fileobj="configs/autotrain_rae_sft.yaml",
    path_in_repo="configs/autotrain_rae_sft.yaml",
    repo_id=repo_id,
    repo_type="space",
    token=os.environ["HF_TOKEN"],
)
print("✓ Config uploaded")

print(f"\n{'═' * 50}")
print(f"  Space ready: https://huggingface.co/spaces/{repo_id}")
print(f"  Next steps:")
print(f"  1. Upload training data (data/rae_training_data/)")
print(f"  2. Start the Space to begin training")
print(f"  3. Monitor via the Space UI or TensorBoard")
print(f"{'═' * 50}")
PYTHON_SCRIPT

# ── Option 2: Push dataset to HF Hub ─────────────────────────
echo ""
echo "▶ Pushing training dataset to Hub..."

python3 << 'PYTHON_SCRIPT2'
import os
from huggingface_hub import HfApi

api = HfApi(token=os.environ["HF_TOKEN"])
username = api.whoami()["name"]
dataset_repo = f"{username}/rae-training-data"

try:
    api.create_repo(dataset_repo, repo_type="dataset", private=True, exist_ok=True)
    
    # Upload training data if it exists
    import glob
    data_files = glob.glob("data/rae_training_data/*")
    
    if data_files:
        for f in data_files:
            api.upload_file(
                path_or_fileobj=f,
                path_in_repo=os.path.basename(f),
                repo_id=dataset_repo,
                repo_type="dataset",
            )
            print(f"  ✓ Uploaded {os.path.basename(f)}")
        print(f"✓ Dataset repo: https://huggingface.co/datasets/{dataset_repo}")
    else:
        print("  ⚠ No training data found. Run generate_dataset.sh first.")
except Exception as e:
    print(f"  ⚠ Dataset upload: {e}")
PYTHON_SCRIPT2

echo ""
echo "Deployment complete!"