Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| import json | |
| import os | |
| import sys | |
| import time | |
| import requests | |
| from pathlib import Path | |
| REPO_ROOT = Path(__file__).resolve().parents[1] | |
| if str(REPO_ROOT) not in sys.path: | |
| sys.path.insert(0, str(REPO_ROOT)) | |
| # Reuse the existing launcher logic for setup | |
| import scripts.launch_feather_hf_job as original_launcher | |
| def main(): | |
| token = original_launcher.require_token() | |
| routing = original_launcher.resolve_routing(token=token) | |
| # Sync overlay and wait for space | |
| if os.environ.get('FEATHER_HF_SKIP_UPLOAD', '0') != '1': | |
| original_launcher.sync_overlay_from_repo() | |
| from huggingface_hub import HfApi | |
| api = HfApi(token=token) | |
| print(f"[launch] uploading folder to {routing.space_repo}...") | |
| api.upload_folder( | |
| repo_id=routing.space_repo, | |
| repo_type='space', | |
| folder_path=str(original_launcher.IMAGE_DIR), | |
| commit_message='Update Feather pretrain runtime', | |
| token=token | |
| ) | |
| print("[launch] waiting for Space build...") | |
| original_launcher.wait_for_space(api, routing.space_repo) | |
| # Prepare env | |
| env = { | |
| 'HF_REPO_ID': routing.output_repo, | |
| 'FEATHER_HF_OWNER': routing.owner, | |
| 'FEATHER_HF_SPACE_REPO': routing.space_repo, | |
| 'FEATHER_HF_OUTPUT_REPO': routing.output_repo, | |
| 'FEATHER_HF_RETINA_CACHE_REPO': routing.retina_cache_repo, | |
| 'HYDRA_RETINA_CACHE_REPO': routing.retina_cache_repo, | |
| 'HYDRA_TARGET_SHARDS': original_launcher.TARGET_SHARDS, | |
| 'HYDRA_TIME_BUDGET': original_launcher.TIME_BUDGET, | |
| 'PYTHONUNBUFFERED': '1', | |
| 'FEATHER_RUNTIME_MODE': 'job', | |
| 'FEATHER_GPU_PROFILE': original_launcher.GPU_PROFILE, | |
| 'FEATHER_HF_FLAVOR': original_launcher.GPU_FLAVOR, | |
| 'HTM_CUDA_ARCH': original_launcher.HTM_CUDA_ARCH, | |
| 'TORCH_CUDA_ARCH_LIST': original_launcher.TORCH_CUDA_ARCH, | |
| 'TRITON_CACHE_DIR': f'/workspace/triton_cache/{original_launcher.GPU_PROFILE}', | |
| 'TRITON_CACHE_REPO': f'{routing.owner}/feather-triton-cache-{original_launcher.GPU_PROFILE}', | |
| } | |
| # Apply A10 defaults part of original_launcher | |
| if original_launcher.GPU_FLAVOR.startswith('a10'): | |
| _a10_defaults = { | |
| 'HYDRA_MUON_COMPILE': '0', | |
| 'HYDRA_FORCE_HTM_CPU': '1', | |
| 'HYDRA_INERT_MAMBA': '1', | |
| 'HYDRA_ALLOW_SYNTHETIC_RETINA': '1', | |
| 'HYDRA_FASTPATH': '1', | |
| 'HYDRA_FUSED_SDR_PROJECT': '0', | |
| 'HYDRA_HTM_FUSED': '0', | |
| 'HYDRA_BACKGROUND_PREFETCH': '0', | |
| } | |
| for k, v in _a10_defaults.items(): | |
| if k in os.environ: env[k] = os.environ[k] | |
| else: env.setdefault(k, v) | |
| # Passthrough | |
| for k, v in os.environ.items(): | |
| if (k.startswith('HYDRA_') or k.startswith('FEATHER_')) and k not in env: | |
| env[k] = v | |
| # Payload for REST API | |
| payload = { | |
| "spaceId": routing.space_repo, | |
| "command": ["/bin/bash", "-c", "python /app/entrypoint.py"], | |
| "env": env, | |
| "secrets": {"HF_TOKEN": token}, | |
| "flavor": original_launcher.GPU_FLAVOR, | |
| "timeout": original_launcher.TIMEOUT, | |
| } | |
| print(f"[launch] submitting HF Job on {original_launcher.GPU_FLAVOR} via REST...") | |
| url = f"https://huggingface.co/api/jobs/{routing.job_namespace}" | |
| headers = {"Authorization": f"Bearer {token}"} | |
| resp = requests.post(url, json=payload, headers=headers) | |
| if resp.status_code != 200: | |
| print(f"[error] {resp.status_code}: {resp.text}") | |
| return 1 | |
| job_data = resp.json() | |
| print(f"[launch] submitted job_id={job_data['id']} status={job_data['status']['stage']} url={job_data['url']}") | |
| return 0 | |
| if __name__ == '__main__': | |
| sys.exit(main()) | |