xero-bio-genesis / tests /gpu_train_daemon.py
transmutationist's picture
XERO: card + code + docs (WIP; see STATUS_AND_AUDIT.md)
e9e9f83 verified
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
XERO GPU TRAIN DAEMON — keep BOTH Tesla T4s processing the live corpus, forever.
Author: Michael Laurence Curzi
Company: ZEDEC AI / 36N9 Genetics LLC
License: MIT (Attribution Required)
Pairs with the polymath ingest daemon: as new knowledge lands in
data/fresh_corpus.jsonl, this folds fresh batches into a dual-GPU,
data-seeded tensor kernel so the cards never sit idle.
PYTHONPATH=modules python3 tests/gpu_train_daemon.py [--burst 15] [--fraction 0.65]
Honest scope: the organism's knowledge core is updated by the culturer/polymath
(CPU + network). This daemon does NOT pretend to gradient-train that core; it
runs a genuine, data-seeded tensor workload over the ingested corpus to (a)
exercise both T4s at high utilisation and (b) compute a dense representation of
the live corpus. Progress -> testing_logs/GPU_TRAIN.json (read by the monitor).
"""
from __future__ import annotations
import json
import os
import sys
import time
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(ROOT, "modules"))
LOG = os.path.join(ROOT, "testing_logs")
os.makedirs(LOG, exist_ok=True)
OUT = os.path.join(LOG, "GPU_TRAIN.json")
from xero_resources import detect, saturate_gpus
DIM = 8192
def _num_arg(flag: str, default: float) -> float:
if flag in sys.argv:
try:
return float(sys.argv[sys.argv.index(flag) + 1])
except Exception:
return default
return default
BURST = _num_arg("--burst", 15.0)
FRACTION = _num_arg("--fraction", 0.7)
def _load_texts(limit: int = 4000) -> list:
texts = []
try:
from xero_knowledge_sources import load_corpus
for u in load_corpus():
t = getattr(u, "text", None)
if t:
texts.append(t)
if len(texts) >= limit:
break
except Exception:
pass
fresh = os.path.join(ROOT, "data", "fresh_corpus.jsonl")
if os.path.exists(fresh):
try:
with open(fresh, encoding="utf-8", errors="ignore") as f:
for line in f:
try:
t = json.loads(line).get("text")
if t:
texts.append(t)
except Exception:
pass
except Exception:
pass
return texts
def _featurize(texts: list) -> list:
"""Hashed byte-trigram histogram -> DIM float vector (the data fold-in seed)."""
vec = [0.0] * DIM
total = 0
for t in texts:
b = (t or "")[:2000].encode("utf-8", "ignore")
for j in range(len(b) - 2):
vec[((b[j] << 16) | (b[j + 1] << 8) | b[j + 2]) % DIM] += 1.0
total += 1
if total:
m = max(vec) or 1.0
vec = [v / m for v in vec]
return vec
def main() -> int:
info = detect()
print(f"GPU TRAIN DAEMON · backend={info['backend']} · gpus={info['gpus']} · "
f"vram={info['vram_total_gb']}GB · burst={BURST}s · frac={FRACTION}")
if info["backend"] != "cuda":
json.dump({"available": False, "reason": "no cuda backend (install CUDA torch)",
"backend": info["backend"],
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())},
open(OUT, "w"), indent=2)
print(" no CUDA backend visible — install CUDA torch to use the T4s. Exiting.")
return 1
t0 = time.time()
bursts = 0
offset = 0
cached: list = []
while True:
if bursts % 8 == 0 or not cached:
cached = _load_texts()
units = len(cached)
start = offset % max(1, units)
batch = cached[start:start + 256] or cached[:256]
offset += 256
seed = _featurize(batch) if batch else None
r = saturate_gpus(seconds=BURST, fraction=FRACTION, seed=seed)
bursts += 1
rec = {"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"bursts": bursts, "corpus_units": units,
"elapsed_s": round(time.time() - t0, 1), **r}
with open(OUT, "w") as f:
json.dump(rec, f, indent=2)
if r.get("available"):
print(f" burst {bursts} · {r['gpus']}xGPU · matrix {r['matrix']} · "
f"{r['steps']} steps · {r.get('gflops_est')} GFLOP · "
f"vram {r.get('vram_used_gb')}GB · units {units}")
else:
print(f" burst {bursts} · GPU unavailable: {r.get('reason')}")
time.sleep(5)
if __name__ == "__main__":
try:
sys.exit(main())
except KeyboardInterrupt:
print("\nGPU train daemon stopped.")