Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """Continuous Feather autoresearch loop for local RTX 3060. | |
| Protocol: | |
| - One GPU owner, sequential runs only. | |
| - 300s training budget, redirected logs. | |
| - Parse val_bpb / metrics JSON from disk. | |
| - Append TSV ledger. | |
| - Keep searching until hard gate is reached or process is killed. | |
| This loop mutates runtime env first because current Feather exposes most active | |
| architecture/optimizer knobs through HYDRA_* gates. Code edits can be added as | |
| candidate generators after the env frontier is exhausted. | |
| """ | |
| from __future__ import annotations | |
| import itertools | |
| import json | |
| import os | |
| import re | |
| import shlex | |
| import subprocess | |
| import time | |
| from pathlib import Path | |
| ROOT = Path('/home/mikeb/work/feather') | |
| LOGDIR = ROOT / 'logs' / 'autoresearch_may03' | |
| LEDGER = ROOT / 'autoresearch_may03_results.tsv' | |
| TARGET_BPB = float(os.environ.get('AUTORESEARCH_TARGET_BPB', '1.60')) | |
| # Strict autoresearch cadence: train.py gets HYDRA_TIME_BUDGET=300; wrapper only | |
| # allows startup + final eval overhead. Do not let one candidate occupy the GPU | |
| # for 10-12 minutes unless it is genuinely hung. | |
| RUN_TIMEOUT = int(os.environ.get('AUTORESEARCH_RUN_TIMEOUT', '430')) | |
| LOGDIR.mkdir(parents=True, exist_ok=True) | |
| if not LEDGER.exists(): | |
| LEDGER.write_text('ts\tcommit\tcandidate\tval_bpb\tpeak_tps\tmedian_tps\tmemory_gb\tstatus\tdescription\tlog\n') | |
| BASE = { | |
| 'LD_LIBRARY_PATH': '/usr/lib/wsl/lib:/usr/local/cuda/lib64', | |
| 'PYTORCH_CUDA_ALLOC_CONF': 'expandable_segments:True', | |
| 'HF_TOKEN': '', | |
| 'HUGGINGFACE_HUB_TOKEN': '', | |
| 'WANDB_DISABLED': 'true', | |
| 'HYDRA_USE_NEMOTRON': '1', | |
| 'HYDRA_USE_FULL_BLEND': '1', | |
| 'HYDRA_SAMPLED_SOFTMAX': '1024', | |
| 'HYDRA_SOFTCAP_CLAMP': '1', | |
| 'HYDRA_SEQ_LEN': '1024', | |
| 'HYDRA_HEADDIM': '32', | |
| 'HYDRA_EXPAND': '3', | |
| 'HYDRA_BATCH_SIZE': '8', | |
| 'HYDRA_TOTAL_BATCH': '16384', | |
| 'HYDRA_D_MODEL': '160', | |
| 'HYDRA_N_LAYER': '20', | |
| 'HYDRA_D_STATE': '64', | |
| 'HYDRA_TIME_BUDGET': '300', | |
| 'HYDRA_ENGRAM_N_COLUMNS': '16384', | |
| 'HYDRA_ENGRAM_TOPK': '64', | |
| 'HYDRA_GDN_LAYERS': '', | |
| 'HYDRA_MTP_K': '1', | |
| 'HYDRA_USE_MDLM': '0', | |
| 'HYDRA_MUON_COMPILE': '0', | |
| 'HYDRA_MUON_NS_STEPS': '2', # promoted from TPS-11 receipt | |
| 'HYDRA_MATRIX_LR': '0.04', | |
| 'HYDRA_EMBED_LR': '0.6', | |
| 'HYDRA_UNEMBED_LR': '0.004', | |
| 'HYDRA_DT_BIAS_LR': '0.6', | |
| 'HYDRA_LOCAL_SHARDS_ONLY': '1', | |
| 'HYDRA_BACKGROUND_PREFETCH': '0', | |
| 'HYDRA_STREAM_SHUFFLE_BUFFER': '256', | |
| 'HYDRA_STREAM_PREFETCH': '16', | |
| 'HYDRA_TOKEN_PREFETCH': '4', | |
| 'HYDRA_TOKEN_CACHE_GB': '1', | |
| 'HYDRA_CKPT_INTERVAL': '2000', | |
| 'HYDRA_MID_VAL_INTERVAL': '0', | |
| 'HYDRA_HESTIA_INTERVAL': '999999', | |
| 'HYDRA_HTM_SUBSAMPLE': '128', | |
| 'HYDRA_EVAL_BATCH': '1', | |
| 'HYDRA_EVAL_TOKENS': '1024', | |
| 'HYDRA_CE_CHUNK': '32', | |
| 'HYDRA_SKIP_FACTUAL_EVAL': '1', | |
| 'HYDRA_RESUME_CKPT': 'none', | |
| 'UV_PYTHON': '/usr/bin/python3', | |
| } | |
| # Ordered from lowest-risk/promising to wider/radical. Infinite outer loop will | |
| # revisit with perturbations after first pass. | |
| CANDIDATES: list[tuple[str, dict[str, str], str]] = [ | |
| # Plateau-escape candidates: stronger than tiny LR nudges. These attack | |
| # the 5-minute validation plateau by changing effective optimization, | |
| # temporal capacity, and memory pressure while keeping full architecture. | |
| # Real z-loss axis was tested after wiring fix: z=0.001 regressed | |
| # (2.0446 vs best 2.0237). Return to default z=1e-4 and mutate the | |
| # discovered l16/d192 basin more aggressively. | |
| ('basin_l16d192_lr085_emb11', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'192','HYDRA_MATRIX_LR':'0.085','HYDRA_EMBED_LR':'1.1'}, 'basin: l16d192 hotter LR default z'), | |
| ('basin_l16d192_lr10_emb13', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'192','HYDRA_MATRIX_LR':'0.10','HYDRA_EMBED_LR':'1.3'}, 'basin: l16d192 max hot LR default z'), | |
| ('basin_l16d192_lr065_emb09', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'192','HYDRA_MATRIX_LR':'0.065','HYDRA_EMBED_LR':'0.9'}, 'basin: l16d192 moderate LR default z'), | |
| ('basin_l16d192_ns1p5_nope_ns2_fasttb', {'HYDRA_TOTAL_BATCH':'24576','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'192','HYDRA_MATRIX_LR':'0.075','HYDRA_EMBED_LR':'1.0'}, 'basin: l16d192 TB24576 more updates default z'), | |
| ('basin_l16d192_dstate48', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'192','HYDRA_D_STATE':'48','HYDRA_MATRIX_LR':'0.075','HYDRA_EMBED_LR':'1.0'}, 'basin: l16d192 smaller d_state faster updates'), | |
| ('basin_l16d192_dstate80', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'192','HYDRA_D_STATE':'80','HYDRA_MATRIX_LR':'0.075','HYDRA_EMBED_LR':'1.0'}, 'basin: l16d192 d_state80 capacity'), | |
| ('basin_l18d160_hot_defaultz', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_N_LAYER':'18','HYDRA_D_MODEL':'160','HYDRA_MATRIX_LR':'0.075','HYDRA_EMBED_LR':'1.0'}, 'basin: valid deeper l18d160 default z'), | |
| # High-leverage evolutionary front around the discovered winner l16/d192. | |
| # This is no longer tiny-knob search: change shape + optimizer together. | |
| ('evo_l16d192_lr075_10', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'192','HYDRA_MATRIX_LR':'0.075','HYDRA_EMBED_LR':'1.0'}, 'evo: l16d192 with hotter LR for 300s descent'), | |
| ('evo_l16d192_lr05_07', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'192','HYDRA_MATRIX_LR':'0.05','HYDRA_EMBED_LR':'0.7'}, 'evo: l16d192 slightly cooler stability'), | |
| ('evo_l16d208', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'208','HYDRA_MATRIX_LR':'0.06','HYDRA_EMBED_LR':'0.8'}, 'evo: l16 wider d208'), | |
| ('evo_l14d224', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_N_LAYER':'14','HYDRA_D_MODEL':'224','HYDRA_MATRIX_LR':'0.06','HYDRA_EMBED_LR':'0.8'}, 'evo: l14 d224 speed/capacity trade'), | |
| ('evo_l12d256', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_N_LAYER':'12','HYDRA_D_MODEL':'256','HYDRA_MATRIX_LR':'0.06','HYDRA_EMBED_LR':'0.8'}, 'evo: l12 d256 wide-frontier probe'), | |
| ('evo_l10d288', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_N_LAYER':'10','HYDRA_D_MODEL':'288','HYDRA_MATRIX_LR':'0.06','HYDRA_EMBED_LR':'0.8'}, 'evo: l10 d288 radical width probe'), | |
| ('evo_l16d192_k768', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'192','HYDRA_SAMPLED_SOFTMAX':'768','HYDRA_MATRIX_LR':'0.06','HYDRA_EMBED_LR':'0.8'}, 'evo: l16d192 lower sampled softmax for more updates'), | |
| ('evo_l16d192_k512', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'192','HYDRA_SAMPLED_SOFTMAX':'512','HYDRA_MATRIX_LR':'0.06','HYDRA_EMBED_LR':'0.8'}, 'evo: l16d192 K512 throughput/calibration probe'), | |
| ('evo_l16d192_tb16384', {'HYDRA_TOTAL_BATCH':'16384','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'192','HYDRA_MATRIX_LR':'0.06','HYDRA_EMBED_LR':'0.8'}, 'evo: l16d192 smaller TB more optimizer steps'), | |
| ('escape_tb32768_z001_ns2_lr_hi', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_MATRIX_LR':'0.06','HYDRA_EMBED_LR':'0.8'}, 'plateau escape: faster 300s descent with champion TB/zloss'), | |
| ('escape_tb32768_z001_ns2_lr_lo', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_MATRIX_LR':'0.025','HYDRA_EMBED_LR':'0.45'}, 'plateau escape: lower LR calibration'), | |
| ('escape_tb32768_ns2_dstate96', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_D_STATE':'96'}, 'plateau escape: extra SSM state capacity'), | |
| ('escape_tb32768_ns2_l18_d176', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_N_LAYER':'18','HYDRA_D_MODEL':'176'}, 'plateau escape: trade depth for width at similar budget'), | |
| ('escape_tb32768_ns2_l16_d192', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_N_LAYER':'16','HYDRA_D_MODEL':'192'}, 'plateau escape: stronger width trade'), | |
| ('escape_tb32768_ns2_gdn3', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_GDN_LAYERS':'3,7,11'}, 'plateau escape: reintroduce known GDN quality axis'), | |
| ('escape_tb32768_ns2_gdn5', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_GDN_LAYERS':'0,4,8,12,16'}, 'plateau escape: distributed 5-GDN quality axis'), | |
| ('escape_tb32768_ns2_enk128', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_ENGRAM_TOPK':'128'}, 'plateau escape: wider engram read'), | |
| ('escape_tb32768_ns2_dr64', {'HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_SDR_DELTA_RANK':'64'}, 'plateau escape: wider SDR STE pipe despite prior weak amp'), | |
| ('escape_tb32768_ns3_lr_hi', {'HYDRA_MUON_NS_STEPS':'3','HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001','HYDRA_MATRIX_LR':'0.06','HYDRA_EMBED_LR':'0.8'}, 'plateau escape: stable NS3 plus faster LR'), | |
| ('ns2_lr_m003', {'HYDRA_MATRIX_LR':'0.03'}, 'slightly lower matrix LR stabilizer'), | |
| ('ns2_lr_m005', {'HYDRA_MATRIX_LR':'0.05'}, 'slightly higher matrix LR for faster 300s descent'), | |
| ('ns2_embed04', {'HYDRA_EMBED_LR':'0.4'}, 'lower embed LR calibration'), | |
| ('ns2_embed08', {'HYDRA_EMBED_LR':'0.8'}, 'higher embed LR fast lexical fit'), | |
| ('ns2_dt03', {'HYDRA_DT_BIAS_LR':'0.3'}, 'lower dt-bias LR stability'), | |
| ('ns2_dt10', {'HYDRA_DT_BIAS_LR':'1.0'}, 'higher dt-bias adaptation'), | |
| ('ns2_dstate96', {'HYDRA_D_STATE':'96'}, 'more SSM state capacity'), | |
| ('ns2_dstate128', {'HYDRA_D_STATE':'128'}, 'max SSM state capacity probe'), | |
| ('ns2_enk128', {'HYDRA_ENGRAM_TOPK':'128'}, 'wider engram retrieval'), | |
| ('ns2_enk32', {'HYDRA_ENGRAM_TOPK':'32'}, 'narrower engram retrieval / less noise'), | |
| ('ns2_htm64', {'HYDRA_HTM_SUBSAMPLE':'64'}, 'more frequent HTM update'), | |
| ('ns2_htm256', {'HYDRA_HTM_SUBSAMPLE':'256'}, 'less HTM overhead/noise'), | |
| ('ns2_gdn_3_7_11', {'HYDRA_GDN_LAYERS':'3,7,11'}, 'retest 3-GDN trend on NS2'), | |
| ('ns2_gdn_0_4_8_12_16', {'HYDRA_GDN_LAYERS':'0,4,8,12,16'}, '5-GDN distributed depth'), | |
| ('ns2_gdn_0_1_2', {'HYDRA_GDN_LAYERS':'0,1,2'}, 'early GDN locality'), | |
| ('ns2_l18', {'HYDRA_N_LAYER':'18'}, 'shallower depth for more updates in budget'), | |
| ('ns2_l22', {'HYDRA_N_LAYER':'22'}, 'deeper temporal hierarchy if fits'), | |
| ('ns2_d176', {'HYDRA_D_MODEL':'176'}, 'slightly wider model'), | |
| ('ns2_d192', {'HYDRA_D_MODEL':'192'}, 'wider model capacity probe'), | |
| ('ns3_gdn_3_7_11', {'HYDRA_MUON_NS_STEPS':'3','HYDRA_GDN_LAYERS':'3,7,11'}, 'known GDN axis with stable Muon NS3'), | |
| ('ns3_tb32768_z001', {'HYDRA_MUON_NS_STEPS':'3','HYDRA_TOTAL_BATCH':'32768','HYDRA_Z_LOSS_WEIGHT':'0.001'}, 'champion-ish optimizer defaults'), | |
| ] | |
| STEP_RE = re.compile(r'^step=\d+ .*?bpb=([0-9.]+).*?tps=([0-9.]+)', re.M) | |
| VAL_RE = re.compile(r'val_bpb:\s*([0-9.]+)') | |
| METRICS_RE = re.compile(r'\[METRICS_JSON\]\s*(\{.*\})') | |
| def current_commit() -> str: | |
| return subprocess.check_output(['git','rev-parse','--short','HEAD'], cwd=ROOT, text=True).strip() | |
| def completed_names() -> set[str]: | |
| done: set[str] = set() | |
| if not LEDGER.exists(): | |
| return done | |
| for line in LEDGER.read_text(errors='ignore').splitlines()[1:]: | |
| parts = line.split('\t') | |
| if len(parts) >= 3: | |
| done.add(parts[2]) | |
| return done | |
| def best_seen() -> float: | |
| best = 999.0 | |
| # Parse the TSV ledger first. Its rows are not `val_bpb:` log lines. | |
| if LEDGER.exists(): | |
| for line in LEDGER.read_text(errors='ignore').splitlines()[1:]: | |
| parts = line.split('\t') | |
| if len(parts) >= 4: | |
| try: | |
| v = float(parts[3]) | |
| except ValueError: | |
| continue | |
| if v > 0: | |
| best = min(best, v) | |
| # Also seed from known one-off receipts. | |
| for path in [ROOT/'run_tps11_ns2.log', ROOT/'run_tps7_bs10.log', ROOT/'run_tps1_htm256.log']: | |
| if not path.exists(): | |
| continue | |
| txt = path.read_text(errors='ignore') | |
| for m in VAL_RE.finditer(txt): | |
| best = min(best, float(m.group(1))) | |
| return best | |
| def parse_log(path: Path): | |
| txt = path.read_text(errors='ignore') if path.exists() else '' | |
| vals = [float(m.group(1)) for m in VAL_RE.finditer(txt)] | |
| pairs = [(float(a), float(b)) for a,b in STEP_RE.findall(txt)] | |
| tps = [b for _, b in pairs if b > 0] | |
| peak_tps = max(tps) if tps else 0.0 | |
| med_tps = sorted(tps)[len(tps)//2] if tps else 0.0 | |
| mem_gb = 0.0 | |
| metrics = None | |
| mm = list(METRICS_RE.finditer(txt)) | |
| if mm: | |
| try: | |
| metrics = json.loads(mm[-1].group(1)) | |
| mem_gb = float(metrics.get('peak_vram_mb', 0.0)) / 1024.0 | |
| except Exception: | |
| pass | |
| if vals: | |
| return vals[-1], peak_tps, med_tps, mem_gb, 'ok', metrics | |
| if 'out of memory' in txt.lower() or 'OutOfMemory' in txt or 'CUDA driver error: out of memory' in txt: | |
| return 0.0, peak_tps, med_tps, mem_gb, 'crash_oom', metrics | |
| if 'Traceback' in txt or 'RuntimeError' in txt or 'AssertionError' in txt: | |
| return 0.0, peak_tps, med_tps, mem_gb, 'crash', metrics | |
| return 0.0, peak_tps, med_tps, mem_gb, 'no_val', metrics | |
| def append(row: list[str]) -> None: | |
| with LEDGER.open('a') as f: | |
| f.write('\t'.join(row) + '\n') | |
| def perturb_candidates(round_idx: int): | |
| # Deterministic widening after first pass: combine the best-known NS2 with | |
| # small LR/zloss/GDN/engram perturbations. Keeps generating work forever. | |
| lrs = ['0.025','0.03','0.035','0.04','0.045','0.05'] | |
| embeds = ['0.45','0.55','0.6','0.7'] | |
| zloss = ['0.0001','0.0005','0.001','0.002'] | |
| gdns = ['', '3,7,11', '0,4,8,12,16', '0,1,2'] | |
| for i, (mlr, elr, zl, gdn) in enumerate(itertools.product(lrs, embeds, zloss, gdns)): | |
| name = f'auto_r{round_idx:02d}_{i:03d}' | |
| yield name, { | |
| 'HYDRA_MUON_NS_STEPS': '2', | |
| 'HYDRA_MATRIX_LR': mlr, | |
| 'HYDRA_EMBED_LR': elr, | |
| 'HYDRA_Z_LOSS_WEIGHT': zl, | |
| 'HYDRA_GDN_LAYERS': gdn, | |
| }, f'auto grid ns2 mlr={mlr} embed={elr} z={zl} gdn={gdn or "none"}' | |
| def run_candidate(name: str, delta: dict[str, str], desc: str, best: float): | |
| ts = time.strftime('%Y%m%d_%H%M%S') | |
| log = LOGDIR / f'{ts}_{name}.log' | |
| env = os.environ.copy() | |
| env.update(BASE) | |
| env.update(delta) | |
| cmd = ['taskset','-c','0-15', './.venv/bin/python', '-u', 'train.py'] | |
| print(f'[{time.strftime("%F %T")}] RUN {name} best={best:.6f} desc={desc}', flush=True) | |
| with log.open('w') as f: | |
| f.write(f'=== {name} ===\n') | |
| f.write(f'desc={desc}\n') | |
| f.write('env_delta=' + json.dumps(delta, sort_keys=True) + '\n') | |
| f.flush() | |
| try: | |
| rc = subprocess.run(cmd, cwd=ROOT, env=env, stdout=f, stderr=subprocess.STDOUT, timeout=RUN_TIMEOUT).returncode | |
| except subprocess.TimeoutExpired: | |
| rc = 124 | |
| f.write('\n[TIMEOUT]\n') | |
| val, peak, med, mem, status0, metrics = parse_log(log) | |
| if status0 == 'ok': | |
| status = 'keep' if val < best else 'discard' | |
| else: | |
| status = status0 | |
| append([ | |
| time.strftime('%F_%T'), current_commit(), name, f'{val:.6f}', f'{peak:.0f}', f'{med:.0f}', f'{mem:.2f}', status, desc.replace('\t',' '), str(log) | |
| ]) | |
| print(f'[{time.strftime("%F %T")}] DONE {name} val={val:.6f} peak={peak:.0f} med={med:.0f} mem={mem:.2f} status={status} log={log}', flush=True) | |
| return val if status == 'keep' else best, status | |
| def main(): | |
| best = best_seen() | |
| one_shot = os.environ.get('AUTORESEARCH_ONE_SHOT', '0') == '1' | |
| print(f'START autoresearch may03 best_seen={best:.6f} target={TARGET_BPB:.6f} one_shot={one_shot}', flush=True) | |
| round_idx = 0 | |
| done = completed_names() | |
| while True: | |
| stream = CANDIDATES if round_idx == 0 else list(perturb_candidates(round_idx)) | |
| for name, delta, desc in stream: | |
| if name in done: | |
| print(f'[{time.strftime("%F %T")}] SKIP {name} already ledgered', flush=True) | |
| continue | |
| best, status = run_candidate(name, delta, desc, best) | |
| done.add(name) | |
| if best <= TARGET_BPB: | |
| print(f'HARDGATE_REACHED best={best:.6f} target={TARGET_BPB:.6f}', flush=True) | |
| return | |
| # Let CUDA/WSL settle and reduce fragmentation. | |
| subprocess.run(['bash','-lc','python3 - <<"PY"\nimport torch\ntorch.cuda.empty_cache() if torch.cuda.is_available() else None\nPY'], cwd=ROOT, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
| if one_shot: | |
| print(f'ONE_SHOT_DONE best={best:.6f}', flush=True) | |
| return | |
| time.sleep(10) | |
| round_idx += 1 | |
| if one_shot: | |
| # No remaining unledgered candidates in the fixed queue; allow the | |
| # perturbation generator on the next cron tick instead of looping in | |
| # a long-lived process. | |
| print(f'ONE_SHOT_NO_FIXED_CANDIDATE best={best:.6f}', flush=True) | |
| return | |
| if __name__ == '__main__': | |
| main() | |