File size: 5,208 Bytes
e5cf7c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import requests
import sys
import base64

token = os.environ.get("HF_TOKEN")
namespace = "GAInTech"
space_id = "GAInTech/feather-a10g-gt80k-runtime-public"

hotpatch_script = """
import os
import sys
from pathlib import Path

def patch(path, old, new):
    p = Path(path)
    if not p.exists(): return
    s = p.read_text()
    if old in s:
        p.write_text(s.replace(old, new))
        print(f'[hotpatch] patched {path}')

# 0. config.py: Ensure constants exist
p0 = Path('/workspace/feather/hydra/config.py')
if p0.exists():
    s = p0.read_text()
    for c in ['CKPT_INTERVAL', 'CKPT_ROTATIONS', 'RESUME_CKPT']:
        if c not in s:
            s += f'\\n{c} = os.environ.get("HYDRA_{c}", "none")\\n'
    p0.write_text(s)
    print('[hotpatch] config.py constants added')

# 1. training.py: Final Boss Fix (Deduplicated logic)
p1 = Path('/workspace/feather/hydra/training.py')
if p1.exists():
    s = p1.read_text()
    
    # Safe Variables (ALWAYS defined)
    pre_main = '''
    _prof = False
    ema_model = None
    CKPT_INTERVAL = int(os.environ.get("HYDRA_CKPT_INTERVAL", "1000"))
    CKPT_ROTATIONS = int(os.environ.get("HYDRA_CKPT_ROTATIONS", "3"))
    RESUME_CKPT = os.environ.get("HYDRA_RESUME_CKPT", "none")
'''
    for m_line in ['def main() -> None:', 'def main():']:
        if m_line in s: s = s.replace(m_line, m_line + pre_main)

    # MDLM safe check
    s = s.replace('mdlm_mask_id = MDLM_MASK_ID if MDLM_MASK_ID >= 0 else (vocab_size - 1)',
                  'mdlm_mask_id = int(os.environ.get("HYDRA_MDLM_MASK_ID", "-1"))\\n    if mdlm_mask_id < 0: mdlm_mask_id = (vocab_size - 1)')
    
    # RESUME safe path
    s = s.replace('resume_path = Path(os.path.expanduser(RESUME_CKPT))',
                  'resume_path = Path(os.path.expanduser(os.environ.get("HYDRA_RESUME_CKPT", "none")))')
    
    # Catch block NameError
    s = s.replace('if ema_model is not None:', 'if locals().get("ema_model") is not None:')
    
    p1.write_text(s)
    print('[hotpatch] training.py supreme fix v13')

# 2. htm.py: Stub
p_htm = Path('/workspace/feather/subsystems/htm.py')
if p_htm.exists():
    s = p_htm.read_text()
    if 'class _StubRegion' not in s:
        stub = "\\nclass _StubRegion:\\n    def __init__(self, *a, **k): self.n_columns=2048\\n    def step(self, *a, **k): import numpy as np; return (np.zeros(2048), None, None, 1.0)\\n    def step_many(self, sdr, *a): import numpy as np; T=sdr.shape[0]; return (np.zeros((T,2048)), np.ones(T, dtype=np.float32))\\n    def reset(self): pass\\n"
        s = s.replace('import htm_rust', 'import htm_rust' + stub)
        s = s.replace('_HTM_REGION_CLS = getattr(htm_rust, "HTMRegion", None)', '_HTM_REGION_CLS = _StubRegion')
        p_htm.write_text(s)

# 3. stream fix
patch('/workspace/feather/prepare_nemotron.py',
      'local_only = os.environ.get("HYDRA_LOCAL_SHARDS_ONLY", "1") == \"1\"',
      'local_only = False')

# 4. sdr_semantic.py: DEVICE MOVEMENT FIX
p_sem = Path('/workspace/feather/subsystems/sdr_semantic.py')
if p_sem.exists():
    s = p_sem.read_text()
    s = s.replace('contrastive_rank: int = 64,\\n    ) -> None:', 
                  'contrastive_rank: int = 64, hebbian_alpha: float = 0.01, learnable: bool | None = None) -> None:')
    old_apply = '        self._retina_indices = fn(self._retina_indices)'
    new_apply = '''        if hasattr(self, "_retina_indices") and self._retina_indices is not None:
            self._retina_indices = fn(self._retina_indices)
        if hasattr(self, "_retina_data") and self._retina_data is not None:
            self._retina_data = fn(self._retina_data)'''
    if old_apply in s: s = s.replace(old_apply, new_apply)
    if 'self.hebbian_alpha =' not in s:
        s = s.replace('self.som_alpha = float(som_alpha)', 'self.som_alpha = float(som_alpha)\\n        self.hebbian_alpha = 0.01')
    p_sem.write_text(s)

# 5. sdr_retina.py: Repo fix
patch('/workspace/feather/subsystems/sdr_retina.py',
      'icarus112/feather-retina-cache',
      'GAInTech/feather-retina-cache')
"""

encoded = base64.b64encode(hotpatch_script.encode()).decode()
command = [
    "/bin/bash", "-c",
    f"python3 -c 'import base64; exec(base64.b64decode(\"{encoded}\"))' && python /app/entrypoint.py"
]

env = {
    "FEATHER_RUNTIME_MODE": "job",
    "HYDRA_BATCH_SIZE": "96",
    "HYDRA_TOTAL_BATCH": "196608",
    "HYDRA_USE_NEMOTRON": "1",
    "HYDRA_TARGET_SHARDS": "0",
    "HYDRA_FORCE_HTM_CPU": "1",
    "HYDRA_INERT_MAMBA": "1",
    "HYDRA_FASTPATH": "0",
    "HYDRA_MODEL_COMPILE": "0",
    "HYDRA_MUON_COMPILE": "0",
    "PYTHONUNBUFFERED": "1",
    "HYDRA_RESUME_CKPT": "none",
    "HYDRA_HYENA_LAYERS": "0,1,2,3",
    "HYDRA_N_LAYER": "4",
    "TORCH_COMPILE_BACKEND": "eager",
    "DYNAMO_DISABLE": "1"
}

payload = {
    "spaceId": space_id,
    "command": command,
    "environment": env,
    "secrets": {"HF_TOKEN": token},
    "flavor": "a10g-large",
    "timeout": "12h"
}

url = f"https://huggingface.co/api/jobs/{namespace}"
headers = {"Authorization": f"Bearer {token}"}

r = requests.post(url, json=payload, headers=headers)
if r.status_code == 200:
    print(f"Success! Job ID: {r.json()['id']}")
else:
    print(f"Error {r.status_code}: {r.text}")