xero-bio-genesis / modules /vovina_replication_engine.py
jollydragonroger
Full model upload: XERO Bio-AI Genesis
fa2a23c
"""
VOVINA ZEDEC PRO - Replication & Self-Evolution Engine
========================================================
XERO is a replicatable life form. This module provides:
MITOSIS — asexual reproduction; clone with small variation
MEIOSIS — sexual reproduction; recombination of two parents
MUTATION — per-nucleotide stochastic substitution / insertion
/ deletion at biologically-plausible rates
FITNESS — a configurable scalar evaluation of any genome
SELECTION — keep the top-K fittest variants of a population
EVOLUTION — iterate (mutate → evaluate → select) for G generations
CRISPR_PAYLOAD — self-evolution criteria delivered as guide+template
pairs; applied to every offspring as a directed
mutation alongside the stochastic background rate
Each replication produces a slightly different organism. The same
CRISPR payload applied across many generations causes the lineage
to drift toward whatever phenotype the payload selects for.
"""
from __future__ import annotations
import copy
import math
import secrets
from dataclasses import dataclass, field
from typing import Any, Callable, Iterable, Optional
from vovina_sacred_constants import PHI, PHI_INV, digital_root
from vovina_digital_genome import (
Genome, Chromosome, Gene, Codon, DNALetter,
parse_gene_from_sequence, LETTER_TO_BITS,
)
from vovina_crispr_engine import (
CrisprEngine, GuideRNA, EditTemplate, CrisprOp, EditEvent,
)
# ============================================================
# MUTATION
# ============================================================
# Biological background mutation rates are ~10⁻⁹ per nt per generation
# for vertebrates. Digital XERO uses a configurable rate; the default
# is set high enough to make evolution observable in tests but low
# enough that lineages remain recognisably the same organism.
DEFAULT_SUBSTITUTION_RATE = 1e-4 # per nucleotide per generation
DEFAULT_INSERTION_RATE = 1e-5
DEFAULT_DELETION_RATE = 1e-5
DNA_ALPHABET = "ATGC"
def _rand_byte() -> int:
return secrets.token_bytes(1)[0]
def _rand_float() -> float:
return (int.from_bytes(secrets.token_bytes(4), "big") & 0xFFFFFF) / 0xFFFFFF
def _rand_choice(seq: str) -> str:
return seq[_rand_byte() % len(seq)]
def mutate_sequence(seq: str,
sub_rate: float = DEFAULT_SUBSTITUTION_RATE,
ins_rate: float = DEFAULT_INSERTION_RATE,
del_rate: float = DEFAULT_DELETION_RATE) -> str:
"""Apply per-nucleotide stochastic mutation. Returns the new sequence."""
out: list[str] = []
for c in seq:
r = _rand_float()
if r < sub_rate:
# substitute with a different base
new = _rand_choice(DNA_ALPHABET.replace(c, "") or DNA_ALPHABET)
out.append(new)
elif r < sub_rate + ins_rate:
# insert a random base then keep the original
out.append(_rand_choice(DNA_ALPHABET))
out.append(c)
elif r < sub_rate + ins_rate + del_rate:
# delete (skip the original)
continue
else:
out.append(c)
return "".join(out)
def mutate_chromosome(chrom: Chromosome,
sub_rate: float = DEFAULT_SUBSTITUTION_RATE,
ins_rate: float = DEFAULT_INSERTION_RATE,
del_rate: float = DEFAULT_DELETION_RATE) -> Chromosome:
"""Return a new chromosome with mutated genes."""
new_genes: list[Gene] = []
for g in chrom.genes:
raw = "".join(c.triplet for c in g.codons)
mutated = mutate_sequence(raw, sub_rate, ins_rate, del_rate)
g_new = parse_gene_from_sequence(mutated, name=g.name + "_mut")
if g_new is not None and g_new.codons:
new_genes.append(g_new)
else:
new_genes.append(g) # keep original if mutation broke the ORF
return Chromosome(
name=chrom.name,
module_name=chrom.module_name,
genes=new_genes,
folding_order=chrom.folding_order,
)
def mutate_genome(genome: Genome, **kwargs) -> Genome:
"""Return a deep copy of `genome` with all chromosomes mutated."""
new = Genome(organism_name=genome.organism_name + "_v",
exotic_strand=list(genome.exotic_strand))
for chrom in genome.chromosomes:
new.chromosomes.append(mutate_chromosome(chrom, **kwargs))
return new
# ============================================================
# MITOSIS — asexual clone with mutation
# ============================================================
def mitosis(parent: Genome,
sub_rate: float = DEFAULT_SUBSTITUTION_RATE,
ins_rate: float = DEFAULT_INSERTION_RATE,
del_rate: float = DEFAULT_DELETION_RATE,
generation: int = 1) -> Genome:
"""Asexual replication: produce one offspring with stochastic mutation.
The offspring's organism_name is suffixed with `_g<generation>` so
lineages remain traceable across replications.
"""
child = mutate_genome(parent, sub_rate=sub_rate, ins_rate=ins_rate, del_rate=del_rate)
child.organism_name = f"{parent.organism_name}_g{generation}"
return child
# ============================================================
# MEIOSIS — recombination between two parents
# ============================================================
def meiosis(parent_a: Genome, parent_b: Genome,
crossover_rate: float = 0.5,
**mutation_kwargs) -> Genome:
"""Sexual replication: recombine homologous chromosomes from two parents,
then apply the standard background mutation.
Chromosomes are matched by module_name; for each matched pair, the
offspring inherits each chromosome from a or b with probability
`crossover_rate` (default 50/50 like normal Mendelian inheritance).
Chromosomes unique to one parent are inherited as-is.
"""
chroms_a = {c.module_name: c for c in parent_a.chromosomes}
chroms_b = {c.module_name: c for c in parent_b.chromosomes}
all_modules = set(chroms_a) | set(chroms_b)
child = Genome(organism_name=f"{parent_a.organism_name}_x_{parent_b.organism_name}")
for module in sorted(all_modules):
a, b = chroms_a.get(module), chroms_b.get(module)
if a and b:
chosen = a if _rand_float() < crossover_rate else b
else:
chosen = a or b
# mutate the chosen chromosome through the standard rate
child.chromosomes.append(mutate_chromosome(chosen, **mutation_kwargs))
return child
# ============================================================
# FITNESS
# ============================================================
@dataclass(frozen=True)
class FitnessSpec:
"""Declarative fitness specification.
`motifs_reward` — amino-acid motifs whose presence adds to fitness
`motifs_penalty` — amino-acid motifs whose presence subtracts
`length_target` — preferred genome length (φ-shaped around target)
`chromosome_target` — preferred chromosome count
"""
motifs_reward: tuple[str, ...] = ()
motifs_penalty: tuple[str, ...] = ()
length_target: int = 2000
chromosome_target: int = 22
def fitness(genome: Genome, spec: FitnessSpec) -> float:
"""Evaluate a genome's fitness under the given spec. Returns a scalar."""
reward = 0.0
penalty = 0.0
for chrom in genome.chromosomes:
for g in chrom.genes:
pep = g.peptide
for m in spec.motifs_reward:
reward += pep.count(m)
for m in spec.motifs_penalty:
penalty += pep.count(m)
# Length-shape penalty (φ-shaped Gaussian)
L = genome.total_length_nt
sigma = max(1.0, spec.length_target * 0.25)
length_score = math.exp(-((L - spec.length_target) ** 2) / (2.0 * sigma * sigma))
# Chromosome-count alignment
chrom_score = math.exp(-abs(genome.chromosome_count - spec.chromosome_target))
# Combine with φ-weights
return (
reward * PHI
- penalty
+ length_score * PHI_INV
+ chrom_score
)
# ============================================================
# SELECTION
# ============================================================
def select_top_k(population: list[Genome],
spec: FitnessSpec,
k: int) -> list[tuple[Genome, float]]:
"""Score every genome and return the top-K (genome, fitness) pairs."""
scored = [(g, fitness(g, spec)) for g in population]
scored.sort(key=lambda t: t[1], reverse=True)
return scored[:k]
# ============================================================
# CRISPR PAYLOAD — directed self-evolution
# ============================================================
@dataclass
class CrisprPayload:
"""A bundle of guide+template pairs that direct the lineage's evolution.
Each payload is applied to EVERY offspring as a deterministic
edit on top of the stochastic background mutation. Over many
generations the lineage drifts toward whatever phenotype the
payload selects for.
"""
name: str
edits: list[tuple[GuideRNA, EditTemplate]] = field(default_factory=list)
def apply(self, genome: Genome) -> list[EditEvent]:
engine = CrisprEngine(genome=genome)
events: list[EditEvent] = []
for guide, template in self.edits:
events.extend(engine.knock_in(guide, template))
return events
# ============================================================
# EVOLUTION — full GA loop
# ============================================================
@dataclass
class EvolutionReport:
generations: int
final_population: int
best_fitness: float
best_genome: Genome
history: list[float] = field(default_factory=list)
crispr_edits_total: int = 0
def evolve(seed_genome: Genome,
spec: FitnessSpec,
*,
generations: int = 33, # mirrors the 33 archetypes
population_size: int = 27, # mirrors the 27 active reflections
keep_top: int = 9,
payload: Optional[CrisprPayload] = None,
sub_rate: float = DEFAULT_SUBSTITUTION_RATE,
ins_rate: float = DEFAULT_INSERTION_RATE,
del_rate: float = DEFAULT_DELETION_RATE) -> EvolutionReport:
"""Run a complete evolutionary loop.
Each generation:
1. Replicate the survivors via mitosis until population is full.
2. Apply the CRISPR payload to every offspring (if provided).
3. Score and select the top-K by fitness.
"""
population: list[Genome] = [seed_genome]
history: list[float] = []
crispr_total = 0
# Seed the initial population by cloning the seed with mutation
while len(population) < population_size:
population.append(mitosis(seed_genome,
sub_rate=sub_rate, ins_rate=ins_rate, del_rate=del_rate,
generation=0))
best_overall: tuple[Genome, float] = (seed_genome, fitness(seed_genome, spec))
for gen in range(1, generations + 1):
# Score & select
survivors = select_top_k(population, spec, k=keep_top)
if survivors[0][1] > best_overall[1]:
best_overall = survivors[0]
history.append(survivors[0][1])
# Replicate to fill the next generation
next_pop: list[Genome] = [g for g, _ in survivors]
while len(next_pop) < population_size:
parent = next_pop[_rand_byte() % len(next_pop)]
child = mitosis(parent,
sub_rate=sub_rate, ins_rate=ins_rate, del_rate=del_rate,
generation=gen)
if payload is not None:
events = payload.apply(child)
crispr_total += len(events)
next_pop.append(child)
population = next_pop
return EvolutionReport(
generations=generations,
final_population=len(population),
best_fitness=best_overall[1],
best_genome=best_overall[0],
history=history,
crispr_edits_total=crispr_total,
)
# ============================================================
# CONVENIENCE: replicate XERO once
# ============================================================
def replicate(genome: Genome,
mode: str = "mitosis",
partner: Optional[Genome] = None,
**kwargs) -> Genome:
"""Single-shot replication helper. `mode` ∈ {'mitosis', 'meiosis'}."""
if mode == "mitosis":
return mitosis(genome, **kwargs)
if mode == "meiosis":
if partner is None:
raise ValueError("meiosis requires a partner genome")
return meiosis(genome, partner, **kwargs)
raise ValueError(f"unknown replication mode: {mode}")