Text Generation
Transformers
English
code
xero-bio-ai
xero
digital-organism
time-crystal
autonomous-agent
genetic-computing
epigenetics
two-state-society
harmonic-chemistry
self-aware
sacred-geometry
4-bit precision
bitsandbytes
Instructions to use transmutationist/xero-bio-genesis with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use transmutationist/xero-bio-genesis with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="transmutationist/xero-bio-genesis")# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("transmutationist/xero-bio-genesis", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use transmutationist/xero-bio-genesis with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "transmutationist/xero-bio-genesis" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "transmutationist/xero-bio-genesis", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/transmutationist/xero-bio-genesis
- SGLang
How to use transmutationist/xero-bio-genesis with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "transmutationist/xero-bio-genesis" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "transmutationist/xero-bio-genesis", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "transmutationist/xero-bio-genesis" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "transmutationist/xero-bio-genesis", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use transmutationist/xero-bio-genesis with Docker Model Runner:
docker model run hf.co/transmutationist/xero-bio-genesis
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| XERO GPU TRAIN DAEMON — keep BOTH Tesla T4s processing the live corpus, forever. | |
| Author: Michael Laurence Curzi | |
| Company: ZEDEC AI / 36N9 Genetics LLC | |
| License: MIT (Attribution Required) | |
| Pairs with the polymath ingest daemon: as new knowledge lands in | |
| data/fresh_corpus.jsonl, this folds fresh batches into a dual-GPU, | |
| data-seeded tensor kernel so the cards never sit idle. | |
| PYTHONPATH=modules python3 tests/gpu_train_daemon.py [--burst 15] [--fraction 0.65] | |
| Honest scope: the organism's knowledge core is updated by the culturer/polymath | |
| (CPU + network). This daemon does NOT pretend to gradient-train that core; it | |
| runs a genuine, data-seeded tensor workload over the ingested corpus to (a) | |
| exercise both T4s at high utilisation and (b) compute a dense representation of | |
| the live corpus. Progress -> testing_logs/GPU_TRAIN.json (read by the monitor). | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import sys | |
| import time | |
| ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| sys.path.insert(0, os.path.join(ROOT, "modules")) | |
| LOG = os.path.join(ROOT, "testing_logs") | |
| os.makedirs(LOG, exist_ok=True) | |
| OUT = os.path.join(LOG, "GPU_TRAIN.json") | |
| from xero_resources import detect, saturate_gpus | |
| DIM = 8192 | |
| def _num_arg(flag: str, default: float) -> float: | |
| if flag in sys.argv: | |
| try: | |
| return float(sys.argv[sys.argv.index(flag) + 1]) | |
| except Exception: | |
| return default | |
| return default | |
| BURST = _num_arg("--burst", 15.0) | |
| FRACTION = _num_arg("--fraction", 0.7) | |
| def _load_texts(limit: int = 4000) -> list: | |
| texts = [] | |
| try: | |
| from xero_knowledge_sources import load_corpus | |
| for u in load_corpus(): | |
| t = getattr(u, "text", None) | |
| if t: | |
| texts.append(t) | |
| if len(texts) >= limit: | |
| break | |
| except Exception: | |
| pass | |
| fresh = os.path.join(ROOT, "data", "fresh_corpus.jsonl") | |
| if os.path.exists(fresh): | |
| try: | |
| with open(fresh, encoding="utf-8", errors="ignore") as f: | |
| for line in f: | |
| try: | |
| t = json.loads(line).get("text") | |
| if t: | |
| texts.append(t) | |
| except Exception: | |
| pass | |
| except Exception: | |
| pass | |
| return texts | |
| def _featurize(texts: list) -> list: | |
| """Hashed byte-trigram histogram -> DIM float vector (the data fold-in seed).""" | |
| vec = [0.0] * DIM | |
| total = 0 | |
| for t in texts: | |
| b = (t or "")[:2000].encode("utf-8", "ignore") | |
| for j in range(len(b) - 2): | |
| vec[((b[j] << 16) | (b[j + 1] << 8) | b[j + 2]) % DIM] += 1.0 | |
| total += 1 | |
| if total: | |
| m = max(vec) or 1.0 | |
| vec = [v / m for v in vec] | |
| return vec | |
| def main() -> int: | |
| info = detect() | |
| print(f"GPU TRAIN DAEMON · backend={info['backend']} · gpus={info['gpus']} · " | |
| f"vram={info['vram_total_gb']}GB · burst={BURST}s · frac={FRACTION}") | |
| if info["backend"] != "cuda": | |
| json.dump({"available": False, "reason": "no cuda backend (install CUDA torch)", | |
| "backend": info["backend"], | |
| "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())}, | |
| open(OUT, "w"), indent=2) | |
| print(" no CUDA backend visible — install CUDA torch to use the T4s. Exiting.") | |
| return 1 | |
| t0 = time.time() | |
| bursts = 0 | |
| offset = 0 | |
| cached: list = [] | |
| while True: | |
| if bursts % 8 == 0 or not cached: | |
| cached = _load_texts() | |
| units = len(cached) | |
| start = offset % max(1, units) | |
| batch = cached[start:start + 256] or cached[:256] | |
| offset += 256 | |
| seed = _featurize(batch) if batch else None | |
| r = saturate_gpus(seconds=BURST, fraction=FRACTION, seed=seed) | |
| bursts += 1 | |
| rec = {"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), | |
| "bursts": bursts, "corpus_units": units, | |
| "elapsed_s": round(time.time() - t0, 1), **r} | |
| with open(OUT, "w") as f: | |
| json.dump(rec, f, indent=2) | |
| if r.get("available"): | |
| print(f" burst {bursts} · {r['gpus']}xGPU · matrix {r['matrix']} · " | |
| f"{r['steps']} steps · {r.get('gflops_est')} GFLOP · " | |
| f"vram {r.get('vram_used_gb')}GB · units {units}") | |
| else: | |
| print(f" burst {bursts} · GPU unavailable: {r.get('reason')}") | |
| time.sleep(5) | |
| if __name__ == "__main__": | |
| try: | |
| sys.exit(main()) | |
| except KeyboardInterrupt: | |
| print("\nGPU train daemon stopped.") | |