File size: 5,292 Bytes
e5cf7c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import requests
import sys
import base64

token = os.environ.get("HF_TOKEN")
namespace = "GAInTech"
space_id = "GAInTech/feather-a10g-gt80k-runtime-public"

hotpatch_script = """
import os
import sys
from pathlib import Path

def patch(path, old, new):
    p = Path(path)
    if not p.exists(): return
    s = p.read_text()
    if old in s:
        p.write_text(s.replace(old, new))
        print(f'[hotpatch] patched {path}')

# 1. training.py: Final Boss Fix (Checkpointing + Imports)
p1 = Path('/workspace/feather/hydra/training.py')
if p1.exists():
    s = p1.read_text()
    
    # Imports fix
    im_old = 'WARMUP_RATIO, WEIGHT_DECAY,'
    im_new = 'WARMUP_RATIO, WEIGHT_DECAY, CKPT_INTERVAL, CKPT_ROTATIONS, RESUME_CKPT,'
    if im_old in s: s = s.replace(im_old, im_new)
    
    # Safely define variables if imports failed
    pre_main = '''
    _prof = False
    ema_model = None
    try: _ci = CKPT_INTERVAL
    except: _ci = int(os.environ.get("HYDRA_CKPT_INTERVAL", "1000"))
    CKPT_INTERVAL = _ci
    try: _cr = CKPT_ROTATIONS
    except: _cr = int(os.environ.get("HYDRA_CKPT_ROTATIONS", "3"))
    CKPT_ROTATIONS = _cr
'''
    for m_line in ['def main() -> None:', 'def main():']:
        if m_line in s: s = s.replace(m_line, m_line + pre_main)

    # MDLM logic
    s = s.replace('mdlm_mask_id = MDLM_MASK_ID if MDLM_MASK_ID >= 0 else (vocab_size - 1)',
                  '_m = int(os.environ.get("HYDRA_MDLM_MASK_ID", "-1"))\\n    mdlm_mask_id = _m if _m >= 0 else (vocab_size - 1)')
    
    # RESUME logic
    raw_resume = 'os.environ.get("HYDRA_RESUME_CKPT", "none")'
    s = s.replace('if not RESUME_CKPT or RESUME_CKPT.lower() == "none":', 
                  f'_r = {raw_resume}\\n    if not _r or _r.lower() == "none":')
    s = s.replace('resume_path = Path(os.path.expanduser(RESUME_CKPT))',
                  f'resume_path = Path(os.path.expanduser({raw_resume}))')
    
    # Catch block NameError
    s = s.replace('if ema_model is not None:', 'if locals().get("ema_model") is not None:')
    
    p1.write_text(s)
    print('[hotpatch] training.py absolute supreme final fix')

# 2. htm.py: Stub
p_htm = Path('/workspace/feather/subsystems/htm.py')
if p_htm.exists():
    s = p_htm.read_text()
    if 'class _StubRegion' not in s:
        stub = "\\nclass _StubRegion:\\n    def __init__(self, *a, **k): self.n_columns=2048\\n    def step(self, *a, **k): import numpy as np; return (np.zeros(2048), None, None, 1.0)\\n    def step_many(self, sdr, *a): import numpy as np; T=sdr.shape[0]; return (np.zeros((T,2048)), np.ones(T, dtype=np.float32))\\n    def reset(self): pass\\n"
        s = s.replace('import htm_rust', 'import htm_rust' + stub)
        s = s.replace('_HTM_REGION_CLS = getattr(htm_rust, "HTMRegion", None)', '_HTM_REGION_CLS = _StubRegion')
        p_htm.write_text(s)

# 3. stream fix
patch('/workspace/feather/prepare_nemotron.py',
      'local_only = os.environ.get("HYDRA_LOCAL_SHARDS_ONLY", "1") == \"1\"',
      'local_only = False')

# 4. sdr_semantic.py: DEVICE MOVEMENT FIX
p_sem = Path('/workspace/feather/subsystems/sdr_semantic.py')
if p_sem.exists():
    s = p_sem.read_text()
    s = s.replace('contrastive_rank: int = 64,\\n    ) -> None:', 
                  'contrastive_rank: int = 64, hebbian_alpha: float = 0.01, learnable: bool | None = None) -> None:')
    old_apply = '        self._retina_indices = fn(self._retina_indices)'
    new_apply = '''        if hasattr(self, "_retina_indices") and self._retina_indices is not None:
            self._retina_indices = fn(self._retina_indices)
        if hasattr(self, "_retina_data") and self._retina_data is not None:
            self._retina_data = fn(self._retina_data)'''
    if old_apply in s: s = s.replace(old_apply, new_apply)
    if 'self.hebbian_alpha =' not in s:
        s = s.replace('self.som_alpha = float(som_alpha)', 'self.som_alpha = float(som_alpha)\\n        self.hebbian_alpha = 0.01')
    p_sem.write_text(s)

# 5. sdr_retina.py: Repo fix
patch('/workspace/feather/subsystems/sdr_retina.py',
      'icarus112/feather-retina-cache',
      'GAInTech/feather-retina-cache')
"""

encoded = base64.b64encode(hotpatch_script.encode()).decode()
command = [
    "/bin/bash", "-c",
    f"python3 -c 'import base64; exec(base64.b64decode(\"{encoded}\"))' && python /app/entrypoint.py"
]

env = {
    "FEATHER_RUNTIME_MODE": "job",
    "HYDRA_BATCH_SIZE": "96",
    "HYDRA_TOTAL_BATCH": "196608",
    "HYDRA_USE_NEMOTRON": "1",
    "HYDRA_TARGET_SHARDS": "0",
    "HYDRA_FORCE_HTM_CPU": "1",
    "HYDRA_INERT_MAMBA": "1",
    "HYDRA_FASTPATH": "0",
    "HYDRA_MODEL_COMPILE": "0",
    "HYDRA_MUON_COMPILE": "0",
    "PYTHONUNBUFFERED": "1",
    "HYDRA_RESUME_CKPT": "none",
    "HYDRA_HYENA_LAYERS": "0,1,2,3",
    "HYDRA_N_LAYER": "4",
    "TORCH_COMPILE_BACKEND": "eager",
    "DYNAMO_DISABLE": "1"
}

payload = {
    "spaceId": space_id,
    "command": command,
    "environment": env,
    "secrets": {"HF_TOKEN": token},
    "flavor": "a10g-large",
    "timeout": "12h"
}

url = f"https://huggingface.co/api/jobs/{namespace}"
headers = {"Authorization": f"Bearer {token}"}

r = requests.post(url, json=payload, headers=headers)
if r.status_code == 200:
    print(f"Success! Job ID: {r.json()['id']}")
else:
    print(f"Error {r.status_code}: {r.text}")