File size: 1,431 Bytes
e5cf7c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
import requests
import json
from pathlib import Path

# Use the environment variables from the task
token = os.environ.get("HF_TOKEN")
namespace = "GAInTech"
space_id = "GAInTech/feather-a10g-pretrain-v2-1778241600"
flavor = "a10g-large"

# Constructed environment - minimal for testing first
env = {
    "FEATHER_RUNTIME_MODE": "job",
    "HYDRA_BATCH_SIZE": "96",
    "HYDRA_TOTAL_BATCH": "196608",
    "HYDRA_USE_NEMOTRON": "1",
    "HYDRA_TARGET_SHARDS": "0",
    "HYDRA_FORCE_HTM_CPU": "1",
    "HYDRA_INERT_MAMBA": "1"
}

# The payload that actually works for space-backed jobs
payload = {
    "spaceId": space_id,
    "command": ["/bin/bash", "-c", "python /app/entrypoint.py"],
    "env": env,
    "secrets": {"HF_TOKEN": token},
    "flavor": flavor,
    "timeout": "12h"
}

url = f"https://huggingface.co/api/jobs/{namespace}"
headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json"
}

print(f"POST {url}")
# Sending with spaceId (CamelCase)
resp = requests.post(url, json=payload, headers=headers)
print(f"Status: {resp.status_code}")
print(f"Response: {resp.text}")

if resp.status_code != 200:
    # Fallback to space_id (snake_case)
    payload.pop("spaceId")
    payload["space_id"] = space_id
    print("Retrying with space_id...")
    resp = requests.post(url, json=payload, headers=headers)
    print(f"Status: {resp.status_code}")
    print(f"Response: {resp.text}")