File size: 4,652 Bytes
e5cf7c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import requests
import sys
import base64

token = os.environ.get("HF_TOKEN")
namespace = "GAInTech"
space_id = "GAInTech/feather-a10g-gt80k-runtime-public"

hotpatch_script = """
import os
import sys
from pathlib import Path

def patch(path, old, new):
    p = Path(path)
    if not p.exists(): return
    s = p.read_text()
    if old in s:
        p.write_text(s.replace(old, new))
        print(f'[hotpatch] patched {path}')

# 1. training.py: Full function replacement + import fix
p1 = Path('/workspace/feather/hydra/training.py')
if p1.exists():
    s = p1.read_text()
    # Ensure MDLM logic is safe
    s = s.replace('mdlm_mask_id = MDLM_MASK_ID if MDLM_MASK_ID >= 0 else (vocab_size - 1)',
                  'try: _m = MDLM_MASK_ID\\n    except: _m = -1\\n    mdlm_mask_id = _m if _m >= 0 else (vocab_size - 1)')
    
    # Fix the maybe_resume_ckpt logic to be independent of RESUME_CKPT import
    old_func = '''def maybe_resume_ckpt(
    model: PostSemClawModel,
    optimizer: torch.optim.Optimizer,
    device: torch.device,
) -> tuple[int, float, float, float, int]:
    if not RESUME_CKPT or RESUME_CKPT.lower() == "none":'''
    
    new_func = '''def maybe_resume_ckpt(
    model: PostSemClawModel,
    optimizer: torch.optim.Optimizer,
    device: torch.device,
) -> tuple[int, float, float, float, int]:
    _r = os.environ.get("HYDRA_RESUME_CKPT", "none")
    if not _r or _r.lower() == "none":'''
    
    s = s.replace(old_func, new_func)
    
    # Fix the resume_path line
    s = s.replace('resume_path = Path(os.path.expanduser(RESUME_CKPT))',
                  'resume_path = Path(os.path.expanduser(os.environ.get("HYDRA_RESUME_CKPT", "none")))')
    
    p1.write_text(s)
    print('[hotpatch] training.py fix v6')

# 2. htm.py: Stub
p_htm = Path('/workspace/feather/subsystems/htm.py')
if p_htm.exists():
    s = p_htm.read_text()
    if 'class _StubRegion' not in s:
        stub = "\\nclass _StubRegion:\\n    def __init__(self, *a, **k): self.n_columns=2048\\n    def step(self, *a, **k): import numpy as np; return (np.zeros(2048), None, None, 1.0)\\n    def step_many(self, sdr, *a): import numpy as np; T=sdr.shape[0]; return (np.zeros((T,2048)), np.ones(T, dtype=np.float32))\\n    def reset(self): pass\\n"
        s = s.replace('import htm_rust', 'import htm_rust' + stub)
        s = s.replace('_HTM_REGION_CLS = getattr(htm_rust, "HTMRegion", None)', '_HTM_REGION_CLS = _StubRegion')
        p_htm.write_text(s)
        print('[hotpatch] htm.py stub')

# 3. stream fix
patch('/workspace/feather/prepare_nemotron.py',
      'local_only = os.environ.get("HYDRA_LOCAL_SHARDS_ONLY", "1") == "1"',
      'local_only = False')

# 4. sdr_semantic.py fix
p_sem = Path('/workspace/feather/subsystems/sdr_semantic.py')
if p_sem.exists():
    s = p_sem.read_text()
    # Fix signature
    old_sig = '        som_alpha: float = 0.05,\\n        contrastive_rank: int = 64,\\n    ) -> None:'
    new_sig = '        som_alpha: float = 0.05,\\n        contrastive_rank: int = 64,\\n        hebbian_alpha: float = 0.01,\\n        learnable: bool | None = None,\\n    ) -> None:'
    if old_sig in s: s = s.replace(old_sig, new_sig)
    # Fix _apply
    s = s.replace('self._retina_indices = fn(self._retina_indices)', 'if hasattr(self, "_retina_indices") and self._retina_indices is not None: self._retina_indices = fn(self._retina_indices)')
    p_sem.write_text(s)
    print('[hotpatch] sdr_semantic.py fix')

# 5. sdr_retina.py: Cache Repo
patch('/workspace/feather/subsystems/sdr_retina.py',
      '"icarus112/feather-retina-cache"',
      '"GAInTech/feather-retina-cache"')
"""

encoded = base64.b64encode(hotpatch_script.encode()).decode()
command = [
    "/bin/bash", "-c",
    f"python3 -c 'import base64; exec(base64.b64decode(\"{encoded}\"))' && python /app/entrypoint.py"
]

env = {
    "FEATHER_RUNTIME_MODE": "job",
    "HYDRA_BATCH_SIZE": "96",
    "HYDRA_TOTAL_BATCH": "196608",
    "HYDRA_USE_NEMOTRON": "1",
    "HYDRA_TARGET_SHARDS": "0",
    "HYDRA_FORCE_HTM_CPU": "1",
    "HYDRA_INERT_MAMBA": "1",
    "HYDRA_FASTPATH": "1",
    "PYTHONUNBUFFERED": "1",
    "HYDRA_RESUME_CKPT": "none"
}

payload = {
    "spaceId": space_id,
    "command": command,
    "environment": env,
    "secrets": {"HF_TOKEN": token},
    "flavor": "a10g-large",
    "timeout": "12h"
}

url = f"https://huggingface.co/api/jobs/{namespace}"
headers = {"Authorization": f"Bearer {token}"}

print(f"Submitting FINAL HOTPATCH launch to {url}...")
r = requests.post(url, json=payload, headers=headers)
if r.status_code == 200:
    print(f"Success! Job ID: {r.json()['id']}")
else:
    print(f"Error {r.status_code}: {r.text}")