Spaces:
Runtime error
Runtime error
File size: 5,208 Bytes
e5cf7c3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | import os
import requests
import sys
import base64
token = os.environ.get("HF_TOKEN")
namespace = "GAInTech"
space_id = "GAInTech/feather-a10g-gt80k-runtime-public"
hotpatch_script = """
import os
import sys
from pathlib import Path
def patch(path, old, new):
p = Path(path)
if not p.exists(): return
s = p.read_text()
if old in s:
p.write_text(s.replace(old, new))
print(f'[hotpatch] patched {path}')
# 0. config.py: Ensure constants exist
p0 = Path('/workspace/feather/hydra/config.py')
if p0.exists():
s = p0.read_text()
for c in ['CKPT_INTERVAL', 'CKPT_ROTATIONS', 'RESUME_CKPT']:
if c not in s:
s += f'\\n{c} = os.environ.get("HYDRA_{c}", "none")\\n'
p0.write_text(s)
print('[hotpatch] config.py constants added')
# 1. training.py: Final Boss Fix (Deduplicated logic)
p1 = Path('/workspace/feather/hydra/training.py')
if p1.exists():
s = p1.read_text()
# Safe Variables (ALWAYS defined)
pre_main = '''
_prof = False
ema_model = None
CKPT_INTERVAL = int(os.environ.get("HYDRA_CKPT_INTERVAL", "1000"))
CKPT_ROTATIONS = int(os.environ.get("HYDRA_CKPT_ROTATIONS", "3"))
RESUME_CKPT = os.environ.get("HYDRA_RESUME_CKPT", "none")
'''
for m_line in ['def main() -> None:', 'def main():']:
if m_line in s: s = s.replace(m_line, m_line + pre_main)
# MDLM safe check
s = s.replace('mdlm_mask_id = MDLM_MASK_ID if MDLM_MASK_ID >= 0 else (vocab_size - 1)',
'mdlm_mask_id = int(os.environ.get("HYDRA_MDLM_MASK_ID", "-1"))\\n if mdlm_mask_id < 0: mdlm_mask_id = (vocab_size - 1)')
# RESUME safe path
s = s.replace('resume_path = Path(os.path.expanduser(RESUME_CKPT))',
'resume_path = Path(os.path.expanduser(os.environ.get("HYDRA_RESUME_CKPT", "none")))')
# Catch block NameError
s = s.replace('if ema_model is not None:', 'if locals().get("ema_model") is not None:')
p1.write_text(s)
print('[hotpatch] training.py supreme fix v13')
# 2. htm.py: Stub
p_htm = Path('/workspace/feather/subsystems/htm.py')
if p_htm.exists():
s = p_htm.read_text()
if 'class _StubRegion' not in s:
stub = "\\nclass _StubRegion:\\n def __init__(self, *a, **k): self.n_columns=2048\\n def step(self, *a, **k): import numpy as np; return (np.zeros(2048), None, None, 1.0)\\n def step_many(self, sdr, *a): import numpy as np; T=sdr.shape[0]; return (np.zeros((T,2048)), np.ones(T, dtype=np.float32))\\n def reset(self): pass\\n"
s = s.replace('import htm_rust', 'import htm_rust' + stub)
s = s.replace('_HTM_REGION_CLS = getattr(htm_rust, "HTMRegion", None)', '_HTM_REGION_CLS = _StubRegion')
p_htm.write_text(s)
# 3. stream fix
patch('/workspace/feather/prepare_nemotron.py',
'local_only = os.environ.get("HYDRA_LOCAL_SHARDS_ONLY", "1") == \"1\"',
'local_only = False')
# 4. sdr_semantic.py: DEVICE MOVEMENT FIX
p_sem = Path('/workspace/feather/subsystems/sdr_semantic.py')
if p_sem.exists():
s = p_sem.read_text()
s = s.replace('contrastive_rank: int = 64,\\n ) -> None:',
'contrastive_rank: int = 64, hebbian_alpha: float = 0.01, learnable: bool | None = None) -> None:')
old_apply = ' self._retina_indices = fn(self._retina_indices)'
new_apply = ''' if hasattr(self, "_retina_indices") and self._retina_indices is not None:
self._retina_indices = fn(self._retina_indices)
if hasattr(self, "_retina_data") and self._retina_data is not None:
self._retina_data = fn(self._retina_data)'''
if old_apply in s: s = s.replace(old_apply, new_apply)
if 'self.hebbian_alpha =' not in s:
s = s.replace('self.som_alpha = float(som_alpha)', 'self.som_alpha = float(som_alpha)\\n self.hebbian_alpha = 0.01')
p_sem.write_text(s)
# 5. sdr_retina.py: Repo fix
patch('/workspace/feather/subsystems/sdr_retina.py',
'icarus112/feather-retina-cache',
'GAInTech/feather-retina-cache')
"""
encoded = base64.b64encode(hotpatch_script.encode()).decode()
command = [
"/bin/bash", "-c",
f"python3 -c 'import base64; exec(base64.b64decode(\"{encoded}\"))' && python /app/entrypoint.py"
]
env = {
"FEATHER_RUNTIME_MODE": "job",
"HYDRA_BATCH_SIZE": "96",
"HYDRA_TOTAL_BATCH": "196608",
"HYDRA_USE_NEMOTRON": "1",
"HYDRA_TARGET_SHARDS": "0",
"HYDRA_FORCE_HTM_CPU": "1",
"HYDRA_INERT_MAMBA": "1",
"HYDRA_FASTPATH": "0",
"HYDRA_MODEL_COMPILE": "0",
"HYDRA_MUON_COMPILE": "0",
"PYTHONUNBUFFERED": "1",
"HYDRA_RESUME_CKPT": "none",
"HYDRA_HYENA_LAYERS": "0,1,2,3",
"HYDRA_N_LAYER": "4",
"TORCH_COMPILE_BACKEND": "eager",
"DYNAMO_DISABLE": "1"
}
payload = {
"spaceId": space_id,
"command": command,
"environment": env,
"secrets": {"HF_TOKEN": token},
"flavor": "a10g-large",
"timeout": "12h"
}
url = f"https://huggingface.co/api/jobs/{namespace}"
headers = {"Authorization": f"Bearer {token}"}
r = requests.post(url, json=payload, headers=headers)
if r.status_code == 200:
print(f"Success! Job ID: {r.json()['id']}")
else:
print(f"Error {r.status_code}: {r.text}")
|