Instructions to use transmutationist/xero-bio-genesis with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use transmutationist/xero-bio-genesis with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="transmutationist/xero-bio-genesis")

# Load model directly
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("transmutationist/xero-bio-genesis", dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps Settings

vLLM

How to use transmutationist/xero-bio-genesis with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "transmutationist/xero-bio-genesis"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "transmutationist/xero-bio-genesis",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker

docker model run hf.co/transmutationist/xero-bio-genesis

SGLang

How to use transmutationist/xero-bio-genesis with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "transmutationist/xero-bio-genesis" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "transmutationist/xero-bio-genesis",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "transmutationist/xero-bio-genesis" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "transmutationist/xero-bio-genesis",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Docker Model Runner
How to use transmutationist/xero-bio-genesis with Docker Model Runner:
```
docker model run hf.co/transmutationist/xero-bio-genesis
```

xero-bio-genesis / tests /gpu_train_daemon.py

transmutationist

XERO: card + code + docs (WIP; see STATUS_AND_AUDIT.md)

e9e9f83 verified 4 days ago

raw

history blame contribute delete

4.73 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	XERO GPU TRAIN DAEMON — keep BOTH Tesla T4s processing the live corpus, forever.
	Author: Michael Laurence Curzi
	Company: ZEDEC AI / 36N9 Genetics LLC
	License: MIT (Attribution Required)

	Pairs with the polymath ingest daemon: as new knowledge lands in
	data/fresh_corpus.jsonl, this folds fresh batches into a dual-GPU,
	data-seeded tensor kernel so the cards never sit idle.

	PYTHONPATH=modules python3 tests/gpu_train_daemon.py [--burst 15] [--fraction 0.65]

	Honest scope: the organism's knowledge core is updated by the culturer/polymath
	(CPU + network). This daemon does NOT pretend to gradient-train that core; it
	runs a genuine, data-seeded tensor workload over the ingested corpus to (a)
	exercise both T4s at high utilisation and (b) compute a dense representation of
	the live corpus. Progress -> testing_logs/GPU_TRAIN.json (read by the monitor).
	"""
	from __future__ import annotations

	import json
	import os
	import sys
	import time

	ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	sys.path.insert(0, os.path.join(ROOT, "modules"))
	LOG = os.path.join(ROOT, "testing_logs")
	os.makedirs(LOG, exist_ok=True)
	OUT = os.path.join(LOG, "GPU_TRAIN.json")

	from xero_resources import detect, saturate_gpus

	DIM = 8192


	def _num_arg(flag: str, default: float) -> float:
	if flag in sys.argv:
	try:
	return float(sys.argv[sys.argv.index(flag) + 1])
	except Exception:
	return default
	return default


	BURST = _num_arg("--burst", 15.0)
	FRACTION = _num_arg("--fraction", 0.7)


	def _load_texts(limit: int = 4000) -> list:
	texts = []
	try:
	from xero_knowledge_sources import load_corpus
	for u in load_corpus():
	t = getattr(u, "text", None)
	if t:
	texts.append(t)
	if len(texts) >= limit:
	break
	except Exception:
	pass
	fresh = os.path.join(ROOT, "data", "fresh_corpus.jsonl")
	if os.path.exists(fresh):
	try:
	with open(fresh, encoding="utf-8", errors="ignore") as f:
	for line in f:
	try:
	t = json.loads(line).get("text")
	if t:
	texts.append(t)
	except Exception:
	pass
	except Exception:
	pass
	return texts


	def _featurize(texts: list) -> list:
	"""Hashed byte-trigram histogram -> DIM float vector (the data fold-in seed)."""
	vec = [0.0] * DIM
	total = 0
	for t in texts:
	b = (t or "")[:2000].encode("utf-8", "ignore")
	for j in range(len(b) - 2):
	vec[((b[j] << 16) \| (b[j + 1] << 8) \| b[j + 2]) % DIM] += 1.0
	total += 1
	if total:
	m = max(vec) or 1.0
	vec = [v / m for v in vec]
	return vec


	def main() -> int:
	info = detect()
	print(f"GPU TRAIN DAEMON · backend={info['backend']} · gpus={info['gpus']} · "
	f"vram={info['vram_total_gb']}GB · burst={BURST}s · frac={FRACTION}")
	if info["backend"] != "cuda":
	json.dump({"available": False, "reason": "no cuda backend (install CUDA torch)",
	"backend": info["backend"],
	"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())},
	open(OUT, "w"), indent=2)
	print(" no CUDA backend visible — install CUDA torch to use the T4s. Exiting.")
	return 1

	t0 = time.time()
	bursts = 0
	offset = 0
	cached: list = []
	while True:
	if bursts % 8 == 0 or not cached:
	cached = _load_texts()
	units = len(cached)
	start = offset % max(1, units)
	batch = cached[start:start + 256] or cached[:256]
	offset += 256
	seed = _featurize(batch) if batch else None
	r = saturate_gpus(seconds=BURST, fraction=FRACTION, seed=seed)
	bursts += 1
	rec = {"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
	"bursts": bursts, "corpus_units": units,
	"elapsed_s": round(time.time() - t0, 1), **r}
	with open(OUT, "w") as f:
	json.dump(rec, f, indent=2)
	if r.get("available"):
	print(f" burst {bursts} · {r['gpus']}xGPU · matrix {r['matrix']} · "
	f"{r['steps']} steps · {r.get('gflops_est')} GFLOP · "
	f"vram {r.get('vram_used_gb')}GB · units {units}")
	else:
	print(f" burst {bursts} · GPU unavailable: {r.get('reason')}")
	time.sleep(5)


	if __name__ == "__main__":
	try:
	sys.exit(main())
	except KeyboardInterrupt:
	print("\nGPU train daemon stopped.")