icarus112's picture
Upload folder using huggingface_hub
422445b verified
#!/usr/bin/env python3
import json
import os
import sys
import time
import requests
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
# Reuse the existing launcher logic for setup
import scripts.launch_feather_hf_job as original_launcher
def main():
token = original_launcher.require_token()
routing = original_launcher.resolve_routing(token=token)
# Sync overlay and wait for space
if os.environ.get('FEATHER_HF_SKIP_UPLOAD', '0') != '1':
original_launcher.sync_overlay_from_repo()
from huggingface_hub import HfApi
api = HfApi(token=token)
print(f"[launch] uploading folder to {routing.space_repo}...")
api.upload_folder(
repo_id=routing.space_repo,
repo_type='space',
folder_path=str(original_launcher.IMAGE_DIR),
commit_message='Update Feather pretrain runtime',
token=token
)
print("[launch] waiting for Space build...")
original_launcher.wait_for_space(api, routing.space_repo)
# Prepare env
env = {
'HF_REPO_ID': routing.output_repo,
'FEATHER_HF_OWNER': routing.owner,
'FEATHER_HF_SPACE_REPO': routing.space_repo,
'FEATHER_HF_OUTPUT_REPO': routing.output_repo,
'FEATHER_HF_RETINA_CACHE_REPO': routing.retina_cache_repo,
'HYDRA_RETINA_CACHE_REPO': routing.retina_cache_repo,
'HYDRA_TARGET_SHARDS': original_launcher.TARGET_SHARDS,
'HYDRA_TIME_BUDGET': original_launcher.TIME_BUDGET,
'PYTHONUNBUFFERED': '1',
'FEATHER_RUNTIME_MODE': 'job',
'FEATHER_GPU_PROFILE': original_launcher.GPU_PROFILE,
'FEATHER_HF_FLAVOR': original_launcher.GPU_FLAVOR,
'HTM_CUDA_ARCH': original_launcher.HTM_CUDA_ARCH,
'TORCH_CUDA_ARCH_LIST': original_launcher.TORCH_CUDA_ARCH,
'TRITON_CACHE_DIR': f'/workspace/triton_cache/{original_launcher.GPU_PROFILE}',
'TRITON_CACHE_REPO': f'{routing.owner}/feather-triton-cache-{original_launcher.GPU_PROFILE}',
}
# Apply A10 defaults part of original_launcher
if original_launcher.GPU_FLAVOR.startswith('a10'):
_a10_defaults = {
'HYDRA_MUON_COMPILE': '0',
'HYDRA_FORCE_HTM_CPU': '1',
'HYDRA_INERT_MAMBA': '1',
'HYDRA_ALLOW_SYNTHETIC_RETINA': '1',
'HYDRA_FASTPATH': '1',
'HYDRA_FUSED_SDR_PROJECT': '0',
'HYDRA_HTM_FUSED': '0',
'HYDRA_BACKGROUND_PREFETCH': '0',
}
for k, v in _a10_defaults.items():
if k in os.environ: env[k] = os.environ[k]
else: env.setdefault(k, v)
# Passthrough
for k, v in os.environ.items():
if (k.startswith('HYDRA_') or k.startswith('FEATHER_')) and k not in env:
env[k] = v
# Payload for REST API
payload = {
"spaceId": routing.space_repo,
"command": ["/bin/bash", "-c", "python /app/entrypoint.py"],
"env": env,
"secrets": {"HF_TOKEN": token},
"flavor": original_launcher.GPU_FLAVOR,
"timeout": original_launcher.TIMEOUT,
}
print(f"[launch] submitting HF Job on {original_launcher.GPU_FLAVOR} via REST...")
url = f"https://huggingface.co/api/jobs/{routing.job_namespace}"
headers = {"Authorization": f"Bearer {token}"}
resp = requests.post(url, json=payload, headers=headers)
if resp.status_code != 200:
print(f"[error] {resp.status_code}: {resp.text}")
return 1
job_data = resp.json()
print(f"[launch] submitted job_id={job_data['id']} status={job_data['status']['stage']} url={job_data['url']}")
return 0
if __name__ == '__main__':
sys.exit(main())