File size: 5,571 Bytes
dbad084
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/bin/bash
# ═══════════════════════════════════════════════════════════════
# Deploy RAE Training to HuggingFace Spaces
# ═══════════════════════════════════════════════════════════════
#
# Creates an AutoTrain Space with GPU hardware for cloud training.
# This is the zero-local-GPU path β€” HF handles the compute.
#
# Prerequisites:
#   - HF account with billing enabled
#   - HF_TOKEN with write access
#   - huggingface_hub CLI installed
#
# Usage:
#   export HF_TOKEN=hf_xxxxx
#   ./scripts/deploy_to_hf_space.sh
# ═══════════════════════════════════════════════════════════════

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"

# ── Configuration ─────────────────────────────────────────────
SPACE_NAME="${HF_USERNAME:-TrueV1sion123}/rae-training"
HARDWARE="t4-medium"  # Options: cpu-basic, t4-small, t4-medium, a10g-small, a10g-large, a100-large

echo "═══════════════════════════════════════════════════════"
echo "  DEPLOY RAE TRAINING TO HF SPACES"
echo "  Space: $SPACE_NAME"
echo "  Hardware: $HARDWARE"
echo "═══════════════════════════════════════════════════════"

# Check token
if [ -z "${HF_TOKEN:-}" ]; then
    echo "Error: HF_TOKEN not set"
    echo "  export HF_TOKEN=hf_your_write_token"
    exit 1
fi

# Install huggingface_hub if needed
pip install -q huggingface_hub 2>/dev/null || true

# ── Option 1: AutoTrain Space (Recommended) ──────────────────
# Creates a Space using the official AutoTrain Docker image
# You then upload your data and config through the web UI

echo ""
echo "β–Ά Creating AutoTrain Space..."
echo "  This creates a GPU-backed Space with the AutoTrain UI."
echo "  After creation, upload your training data and start training."
echo ""

python3 << 'PYTHON_SCRIPT'
import os
from huggingface_hub import HfApi, create_repo

api = HfApi(token=os.environ["HF_TOKEN"])
space_name = os.environ.get("SPACE_NAME", "rae-training")
username = api.whoami()["name"]
repo_id = f"{username}/{space_name}"

# Create the Space
try:
    create_repo(
        repo_id=repo_id,
        repo_type="space",
        space_sdk="docker",
        space_hardware="t4-medium",
        private=True,
        token=os.environ["HF_TOKEN"],
    )
    print(f"βœ“ Space created: https://huggingface.co/spaces/{repo_id}")
except Exception as e:
    if "already exists" in str(e).lower():
        print(f"βœ“ Space already exists: https://huggingface.co/spaces/{repo_id}")
    else:
        print(f"βœ— Error creating space: {e}")
        raise

# Upload the AutoTrain Dockerfile
dockerfile_content = """FROM huggingface/autotrain-advanced:latest

# RAE Training Environment
COPY configs/autotrain_rae_sft.yaml /app/config.yaml
COPY data/ /app/data/

# Set environment
ENV AUTOTRAIN_CONFIG=/app/config.yaml

# Default command
CMD ["autotrain", "--config", "/app/config.yaml"]
"""

api.upload_file(
    path_or_fileobj=dockerfile_content.encode(),
    path_in_repo="Dockerfile",
    repo_id=repo_id,
    repo_type="space",
    token=os.environ["HF_TOKEN"],
)
print("βœ“ Dockerfile uploaded")

# Upload config
api.upload_file(
    path_or_fileobj="configs/autotrain_rae_sft.yaml",
    path_in_repo="configs/autotrain_rae_sft.yaml",
    repo_id=repo_id,
    repo_type="space",
    token=os.environ["HF_TOKEN"],
)
print("βœ“ Config uploaded")

print(f"\n{'═' * 50}")
print(f"  Space ready: https://huggingface.co/spaces/{repo_id}")
print(f"  Next steps:")
print(f"  1. Upload training data (data/rae_training_data/)")
print(f"  2. Start the Space to begin training")
print(f"  3. Monitor via the Space UI or TensorBoard")
print(f"{'═' * 50}")
PYTHON_SCRIPT

# ── Option 2: Push dataset to HF Hub ─────────────────────────
echo ""
echo "β–Ά Pushing training dataset to Hub..."

python3 << 'PYTHON_SCRIPT2'
import os
from huggingface_hub import HfApi

api = HfApi(token=os.environ["HF_TOKEN"])
username = api.whoami()["name"]
dataset_repo = f"{username}/rae-training-data"

try:
    api.create_repo(dataset_repo, repo_type="dataset", private=True, exist_ok=True)
    
    # Upload training data if it exists
    import glob
    data_files = glob.glob("data/rae_training_data/*")
    
    if data_files:
        for f in data_files:
            api.upload_file(
                path_or_fileobj=f,
                path_in_repo=os.path.basename(f),
                repo_id=dataset_repo,
                repo_type="dataset",
            )
            print(f"  βœ“ Uploaded {os.path.basename(f)}")
        print(f"βœ“ Dataset repo: https://huggingface.co/datasets/{dataset_repo}")
    else:
        print("  ⚠ No training data found. Run generate_dataset.sh first.")
except Exception as e:
    print(f"  ⚠ Dataset upload: {e}")
PYTHON_SCRIPT2

echo ""
echo "Deployment complete!"