File size: 4,406 Bytes
e5cf7c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import requests
import sys
import base64

token = os.environ.get("HF_TOKEN")
namespace = "GAInTech"
space_id = "GAInTech/feather-a10g-gt80k-runtime-public"

hotpatch_script = """
import os
import sys
from pathlib import Path

def patch(path, old, new):
    p = Path(path)
    if not p.exists(): return
    s = p.read_text()
    if old in s:
        p.write_text(s.replace(old, new))
        print(f'[hotpatch] patched {path}')

# 1. training.py fix
p1 = Path('/workspace/feather/hydra/training.py')
if p1.exists():
    s = p1.read_text()
    s = s.replace('mdlm_mask_id = MDLM_MASK_ID if MDLM_MASK_ID >= 0 else (vocab_size - 1)',
                  '_m = int(os.environ.get("HYDRA_MDLM_MASK_ID", "-1"))\\n    mdlm_mask_id = _m if _m >= 0 else (vocab_size - 1)')
    s = s.replace('if not RESUME_CKPT or RESUME_CKPT.lower() == "none":', 
                  '_r = os.environ.get("HYDRA_RESUME_CKPT", "none")\\n    if not _r or _r.lower() == "none":')
    s = s.replace('resume_path = Path(os.path.expanduser(RESUME_CKPT))',
                  'resume_path = Path(os.path.expanduser(os.environ.get("HYDRA_RESUME_CKPT", "none")))')
    for m_line in ['def main():', 'def main() -> None:']:
        if m_line in s: s = s.replace(m_line, m_line + '\\n    _prof = False')
    p1.write_text(s)

# 2. htm.py stub
p_htm = Path('/workspace/feather/subsystems/htm.py')
if p_htm.exists():
    s = p_htm.read_text()
    if 'class _StubRegion' not in s:
        stub = "\\nclass _StubRegion:\\n    def __init__(self, *a, **k): self.n_columns=2048\\n    def step(self, *a, **k): import numpy as np; return (np.zeros(2048), None, None, 1.0)\\n    def step_many(self, sdr, *a): import numpy as np; T=sdr.shape[0]; return (np.zeros((T,2048)), np.ones(T, dtype=np.float32))\\n    def reset(self): pass\\n"
        s = s.replace('import htm_rust', 'import htm_rust' + stub)
        s = s.replace('_HTM_REGION_CLS = getattr(htm_rust, "HTMRegion", None)', '_HTM_REGION_CLS = _StubRegion')
        p_htm.write_text(s)

# 3. stream fix
patch('/workspace/feather/prepare_nemotron.py',
      'local_only = os.environ.get("HYDRA_LOCAL_SHARDS_ONLY", "1") == "1"',
      'local_only = False')

# 4. sdr_semantic.py: DEVICE MOVEMENT FIX
p_sem = Path('/workspace/feather/subsystems/sdr_semantic.py')
if p_sem.exists():
    s = p_sem.read_text()
    # Signature fix
    s = s.replace('contrastive_rank: int = 64,\\n    ) -> None:', 
                  'contrastive_rank: int = 64, hebbian_alpha: float = 0.01, learnable: bool | None = None) -> None:')
    # _apply fix (Crucial: move _retina_data Too!)
    old_apply = '        self._retina_indices = fn(self._retina_indices)'
    new_apply = '''        if hasattr(self, "_retina_indices") and self._retina_indices is not None:
            self._retina_indices = fn(self._retina_indices)
        if hasattr(self, "_retina_data") and self._retina_data is not None:
            self._retina_data = fn(self._retina_data)'''
    s = s.replace(old_apply, new_apply)
    # hebbian member
    if 'self.hebbian_alpha =' not in s:
        s = s.replace('self.som_alpha = float(som_alpha)', 'self.som_alpha = float(som_alpha)\\n        self.hebbian_alpha = 0.01')
    p_sem.write_text(s)
    print('[hotpatch] sdr_semantic device movement fixed')

# 5. sdr_retina.py: Repo fix
patch('/workspace/feather/subsystems/sdr_retina.py',
      'icarus112/feather-retina-cache',
      'GAInTech/feather-retina-cache')
"""

encoded = base64.b64encode(hotpatch_script.encode()).decode()
command = [
    "/bin/bash", "-c",
    f"python3 -c 'import base64; exec(base64.b64decode(\"{encoded}\"))' && python /app/entrypoint.py"
]

env = {
    "FEATHER_RUNTIME_MODE": "job",
    "HYDRA_BATCH_SIZE": "96",
    "HYDRA_TOTAL_BATCH": "196608",
    "HYDRA_USE_NEMOTRON": "1",
    "HYDRA_TARGET_SHARDS": "0",
    "HYDRA_FORCE_HTM_CPU": "1",
    "HYDRA_INERT_MAMBA": "1",
    "HYDRA_FASTPATH": "1",
    "PYTHONUNBUFFERED": "1",
    "HYDRA_RESUME_CKPT": "none"
}

payload = {
    "spaceId": space_id,
    "command": command,
    "environment": env,
    "secrets": {"HF_TOKEN": token},
    "flavor": "a10g-large",
    "timeout": "12h"
}

url = f"https://huggingface.co/api/jobs/{namespace}"
headers = {"Authorization": f"Bearer {token}"}

r = requests.post(url, json=payload, headers=headers)
if r.status_code == 200:
    print(f"Success! Job ID: {r.json()['id']}")
else:
    print(f"Error {r.status_code}: {r.text}")