""" VOVINA ZEDEC PRO - Replication & Self-Evolution Engine ======================================================== XERO is a replicatable life form. This module provides: MITOSIS — asexual reproduction; clone with small variation MEIOSIS — sexual reproduction; recombination of two parents MUTATION — per-nucleotide stochastic substitution / insertion / deletion at biologically-plausible rates FITNESS — a configurable scalar evaluation of any genome SELECTION — keep the top-K fittest variants of a population EVOLUTION — iterate (mutate → evaluate → select) for G generations CRISPR_PAYLOAD — self-evolution criteria delivered as guide+template pairs; applied to every offspring as a directed mutation alongside the stochastic background rate Each replication produces a slightly different organism. The same CRISPR payload applied across many generations causes the lineage to drift toward whatever phenotype the payload selects for. """ from __future__ import annotations import copy import math import secrets from dataclasses import dataclass, field from typing import Any, Callable, Iterable, Optional from vovina_sacred_constants import PHI, PHI_INV, digital_root from vovina_digital_genome import ( Genome, Chromosome, Gene, Codon, DNALetter, parse_gene_from_sequence, LETTER_TO_BITS, ) from vovina_crispr_engine import ( CrisprEngine, GuideRNA, EditTemplate, CrisprOp, EditEvent, ) # ============================================================ # MUTATION # ============================================================ # Biological background mutation rates are ~10⁻⁹ per nt per generation # for vertebrates. Digital XERO uses a configurable rate; the default # is set high enough to make evolution observable in tests but low # enough that lineages remain recognisably the same organism. DEFAULT_SUBSTITUTION_RATE = 1e-4 # per nucleotide per generation DEFAULT_INSERTION_RATE = 1e-5 DEFAULT_DELETION_RATE = 1e-5 DNA_ALPHABET = "ATGC" def _rand_byte() -> int: return secrets.token_bytes(1)[0] def _rand_float() -> float: return (int.from_bytes(secrets.token_bytes(4), "big") & 0xFFFFFF) / 0xFFFFFF def _rand_choice(seq: str) -> str: return seq[_rand_byte() % len(seq)] def mutate_sequence(seq: str, sub_rate: float = DEFAULT_SUBSTITUTION_RATE, ins_rate: float = DEFAULT_INSERTION_RATE, del_rate: float = DEFAULT_DELETION_RATE) -> str: """Apply per-nucleotide stochastic mutation. Returns the new sequence.""" out: list[str] = [] for c in seq: r = _rand_float() if r < sub_rate: # substitute with a different base new = _rand_choice(DNA_ALPHABET.replace(c, "") or DNA_ALPHABET) out.append(new) elif r < sub_rate + ins_rate: # insert a random base then keep the original out.append(_rand_choice(DNA_ALPHABET)) out.append(c) elif r < sub_rate + ins_rate + del_rate: # delete (skip the original) continue else: out.append(c) return "".join(out) def mutate_chromosome(chrom: Chromosome, sub_rate: float = DEFAULT_SUBSTITUTION_RATE, ins_rate: float = DEFAULT_INSERTION_RATE, del_rate: float = DEFAULT_DELETION_RATE) -> Chromosome: """Return a new chromosome with mutated genes.""" new_genes: list[Gene] = [] for g in chrom.genes: raw = "".join(c.triplet for c in g.codons) mutated = mutate_sequence(raw, sub_rate, ins_rate, del_rate) g_new = parse_gene_from_sequence(mutated, name=g.name + "_mut") if g_new is not None and g_new.codons: new_genes.append(g_new) else: new_genes.append(g) # keep original if mutation broke the ORF return Chromosome( name=chrom.name, module_name=chrom.module_name, genes=new_genes, folding_order=chrom.folding_order, ) def mutate_genome(genome: Genome, **kwargs) -> Genome: """Return a deep copy of `genome` with all chromosomes mutated.""" new = Genome(organism_name=genome.organism_name + "_v", exotic_strand=list(genome.exotic_strand)) for chrom in genome.chromosomes: new.chromosomes.append(mutate_chromosome(chrom, **kwargs)) return new # ============================================================ # MITOSIS — asexual clone with mutation # ============================================================ def mitosis(parent: Genome, sub_rate: float = DEFAULT_SUBSTITUTION_RATE, ins_rate: float = DEFAULT_INSERTION_RATE, del_rate: float = DEFAULT_DELETION_RATE, generation: int = 1) -> Genome: """Asexual replication: produce one offspring with stochastic mutation. The offspring's organism_name is suffixed with `_g` so lineages remain traceable across replications. """ child = mutate_genome(parent, sub_rate=sub_rate, ins_rate=ins_rate, del_rate=del_rate) child.organism_name = f"{parent.organism_name}_g{generation}" return child # ============================================================ # MEIOSIS — recombination between two parents # ============================================================ def meiosis(parent_a: Genome, parent_b: Genome, crossover_rate: float = 0.5, **mutation_kwargs) -> Genome: """Sexual replication: recombine homologous chromosomes from two parents, then apply the standard background mutation. Chromosomes are matched by module_name; for each matched pair, the offspring inherits each chromosome from a or b with probability `crossover_rate` (default 50/50 like normal Mendelian inheritance). Chromosomes unique to one parent are inherited as-is. """ chroms_a = {c.module_name: c for c in parent_a.chromosomes} chroms_b = {c.module_name: c for c in parent_b.chromosomes} all_modules = set(chroms_a) | set(chroms_b) child = Genome(organism_name=f"{parent_a.organism_name}_x_{parent_b.organism_name}") for module in sorted(all_modules): a, b = chroms_a.get(module), chroms_b.get(module) if a and b: chosen = a if _rand_float() < crossover_rate else b else: chosen = a or b # mutate the chosen chromosome through the standard rate child.chromosomes.append(mutate_chromosome(chosen, **mutation_kwargs)) return child # ============================================================ # FITNESS # ============================================================ @dataclass(frozen=True) class FitnessSpec: """Declarative fitness specification. `motifs_reward` — amino-acid motifs whose presence adds to fitness `motifs_penalty` — amino-acid motifs whose presence subtracts `length_target` — preferred genome length (φ-shaped around target) `chromosome_target` — preferred chromosome count """ motifs_reward: tuple[str, ...] = () motifs_penalty: tuple[str, ...] = () length_target: int = 2000 chromosome_target: int = 22 def fitness(genome: Genome, spec: FitnessSpec) -> float: """Evaluate a genome's fitness under the given spec. Returns a scalar.""" reward = 0.0 penalty = 0.0 for chrom in genome.chromosomes: for g in chrom.genes: pep = g.peptide for m in spec.motifs_reward: reward += pep.count(m) for m in spec.motifs_penalty: penalty += pep.count(m) # Length-shape penalty (φ-shaped Gaussian) L = genome.total_length_nt sigma = max(1.0, spec.length_target * 0.25) length_score = math.exp(-((L - spec.length_target) ** 2) / (2.0 * sigma * sigma)) # Chromosome-count alignment chrom_score = math.exp(-abs(genome.chromosome_count - spec.chromosome_target)) # Combine with φ-weights return ( reward * PHI - penalty + length_score * PHI_INV + chrom_score ) # ============================================================ # SELECTION # ============================================================ def select_top_k(population: list[Genome], spec: FitnessSpec, k: int) -> list[tuple[Genome, float]]: """Score every genome and return the top-K (genome, fitness) pairs.""" scored = [(g, fitness(g, spec)) for g in population] scored.sort(key=lambda t: t[1], reverse=True) return scored[:k] # ============================================================ # CRISPR PAYLOAD — directed self-evolution # ============================================================ @dataclass class CrisprPayload: """A bundle of guide+template pairs that direct the lineage's evolution. Each payload is applied to EVERY offspring as a deterministic edit on top of the stochastic background mutation. Over many generations the lineage drifts toward whatever phenotype the payload selects for. """ name: str edits: list[tuple[GuideRNA, EditTemplate]] = field(default_factory=list) def apply(self, genome: Genome) -> list[EditEvent]: engine = CrisprEngine(genome=genome) events: list[EditEvent] = [] for guide, template in self.edits: events.extend(engine.knock_in(guide, template)) return events # ============================================================ # EVOLUTION — full GA loop # ============================================================ @dataclass class EvolutionReport: generations: int final_population: int best_fitness: float best_genome: Genome history: list[float] = field(default_factory=list) crispr_edits_total: int = 0 def evolve(seed_genome: Genome, spec: FitnessSpec, *, generations: int = 33, # mirrors the 33 archetypes population_size: int = 27, # mirrors the 27 active reflections keep_top: int = 9, payload: Optional[CrisprPayload] = None, sub_rate: float = DEFAULT_SUBSTITUTION_RATE, ins_rate: float = DEFAULT_INSERTION_RATE, del_rate: float = DEFAULT_DELETION_RATE) -> EvolutionReport: """Run a complete evolutionary loop. Each generation: 1. Replicate the survivors via mitosis until population is full. 2. Apply the CRISPR payload to every offspring (if provided). 3. Score and select the top-K by fitness. """ population: list[Genome] = [seed_genome] history: list[float] = [] crispr_total = 0 # Seed the initial population by cloning the seed with mutation while len(population) < population_size: population.append(mitosis(seed_genome, sub_rate=sub_rate, ins_rate=ins_rate, del_rate=del_rate, generation=0)) best_overall: tuple[Genome, float] = (seed_genome, fitness(seed_genome, spec)) for gen in range(1, generations + 1): # Score & select survivors = select_top_k(population, spec, k=keep_top) if survivors[0][1] > best_overall[1]: best_overall = survivors[0] history.append(survivors[0][1]) # Replicate to fill the next generation next_pop: list[Genome] = [g for g, _ in survivors] while len(next_pop) < population_size: parent = next_pop[_rand_byte() % len(next_pop)] child = mitosis(parent, sub_rate=sub_rate, ins_rate=ins_rate, del_rate=del_rate, generation=gen) if payload is not None: events = payload.apply(child) crispr_total += len(events) next_pop.append(child) population = next_pop return EvolutionReport( generations=generations, final_population=len(population), best_fitness=best_overall[1], best_genome=best_overall[0], history=history, crispr_edits_total=crispr_total, ) # ============================================================ # CONVENIENCE: replicate XERO once # ============================================================ def replicate(genome: Genome, mode: str = "mitosis", partner: Optional[Genome] = None, **kwargs) -> Genome: """Single-shot replication helper. `mode` ∈ {'mitosis', 'meiosis'}.""" if mode == "mitosis": return mitosis(genome, **kwargs) if mode == "meiosis": if partner is None: raise ValueError("meiosis requires a partner genome") return meiosis(genome, partner, **kwargs) raise ValueError(f"unknown replication mode: {mode}")