Spaces:
Running
Running
| """EvoLLM β investor-ready Gradio demo. | |
| Same file runs on: | |
| β’ HuggingFace Spaces (auto-deployed from GitHub) | |
| β’ Locally: `python space/app.py` (privacy-first, no cloud) | |
| The Space ships with 5 hand-curated "personality" adapters (Default, Creative, | |
| Concise, Technical, Empathetic). A Thompson-sampling bandit picks one per | |
| query, learning from thumbs feedback. The evolution log shows mutation | |
| events as new adapter variants are tested and promoted. | |
| This file is self-contained: it inlines the genome / bandit / pool logic | |
| so the Space stays independent of the main evollm package. The same logic | |
| lives in evollm/* for the local server build. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import math | |
| import os | |
| import random | |
| import sys | |
| import time | |
| import uuid | |
| from collections import defaultdict | |
| from dataclasses import dataclass, field | |
| from datetime import datetime | |
| from pathlib import Path | |
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| from pydantic import BaseModel, Field | |
| # Make the local `knowledge` package importable whether we're running on HF | |
| # Spaces (cwd = /app) or locally (cwd = repo root, script in space/). | |
| sys.path.insert(0, str(Path(__file__).resolve().parent)) | |
| from knowledge import ( # noqa: E402 | |
| KnowledgePipeline, | |
| generate_training_notebook, | |
| import_adapter as import_adapter_files, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Genome | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class Genome(BaseModel): | |
| genome_id: str = Field(default_factory=lambda: uuid.uuid4().hex[:12]) | |
| parent_id: str | None = None | |
| generation: int = 0 | |
| name: str = "Default" | |
| base_model: str = "SmolLM2-360M-Instruct" | |
| quantization: str = "Q8_0" | |
| lora_rank: int = 8 | |
| lora_alpha: int = 16 | |
| lora_target_modules: list[str] = Field(default_factory=lambda: ["q_proj", "v_proj"]) | |
| memory_token_enabled: bool = False | |
| memory_token_count: int = 4 | |
| temperature: float = 0.7 | |
| top_p: float = 0.9 | |
| top_k: int = 40 | |
| repeat_penalty: float = 1.1 | |
| max_tokens: int = 256 | |
| system_prompt: str = ( | |
| "You are EvoLLM, a privacy-first local assistant. " | |
| "Give helpful, complete answers. Be accurate and honest about uncertainty." | |
| ) | |
| fitness_score: float | None = None | |
| eval_bank_score: float | None = None | |
| feedback_score: float | None = None | |
| knowledge_sources: list[str] = Field(default_factory=list) # doc ids this genome can read; empty = all | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Adapter pool | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class Adapter: | |
| adapter_id: str | |
| name: str | |
| description: str | |
| genome: Genome | |
| promoted: bool = True | |
| def build_seed_pool() -> list[Adapter]: | |
| pool = [ | |
| Adapter("evo_default", "Default", | |
| "Balanced baseline genome β the neutral start of evolution.", | |
| Genome( | |
| genome_id="evo_default", name="Default", | |
| system_prompt=( | |
| "You are EvoLLM, a privacy-first local assistant. " | |
| "Give helpful, complete answers. Be accurate, balanced, and " | |
| "honest about what you don't know." | |
| ), | |
| temperature=0.7, top_p=0.9, top_k=40, | |
| eval_bank_score=0.62, | |
| )), | |
| Adapter("evo_creative", "Creative", | |
| "Higher temperature, expressive β for ideation and writing.", | |
| Genome( | |
| genome_id="evo_creative", name="Creative", generation=1, | |
| parent_id="evo_default", | |
| system_prompt=( | |
| "You are EvoLLM in creative mode. Embrace originality, vivid imagery, " | |
| "and surprising connections. Write expressively but stay coherent and " | |
| "on-topic." | |
| ), | |
| temperature=1.0, top_p=0.95, top_k=80, | |
| lora_rank=16, lora_alpha=32, | |
| eval_bank_score=0.55, | |
| )), | |
| Adapter("evo_concise", "Concise", | |
| "Terse, fact-first β optimised for quick answers.", | |
| Genome( | |
| genome_id="evo_concise", name="Concise", generation=1, | |
| parent_id="evo_default", | |
| system_prompt=( | |
| "You are EvoLLM in concise mode. Answer in 1-3 short sentences. " | |
| "Skip preamble and qualifications. Information density above all." | |
| ), | |
| temperature=0.4, top_p=0.85, top_k=30, max_tokens=128, | |
| eval_bank_score=0.68, | |
| )), | |
| Adapter("evo_technical", "Technical", | |
| "Precise, structured, code-aware β for engineering questions.", | |
| Genome( | |
| genome_id="evo_technical", name="Technical", generation=1, | |
| parent_id="evo_default", | |
| system_prompt=( | |
| "You are EvoLLM in technical mode. Use precise terminology and " | |
| "structured reasoning. Use code blocks when relevant. State any " | |
| "assumptions explicitly. Complete answers preferred over short ones." | |
| ), | |
| temperature=0.5, top_p=0.9, top_k=40, max_tokens=384, | |
| lora_rank=32, lora_alpha=64, | |
| lora_target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], | |
| eval_bank_score=0.71, | |
| )), | |
| Adapter("evo_empathetic", "Empathetic", | |
| "Warmer, context-sensitive β better for personal topics.", | |
| Genome( | |
| genome_id="evo_empathetic", name="Empathetic", generation=1, | |
| parent_id="evo_default", | |
| system_prompt=( | |
| "You are EvoLLM in empathetic mode. Acknowledge feelings before " | |
| "facts. Be warm, patient, and supportive while remaining honest " | |
| "and helpful." | |
| ), | |
| temperature=0.75, top_p=0.92, top_k=50, | |
| memory_token_enabled=True, memory_token_count=8, | |
| eval_bank_score=0.59, | |
| )), | |
| ] | |
| return pool | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Thompson-sampling bandit | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class ArmStats: | |
| adapter_id: str | |
| alpha: float = 1.0 | |
| beta: float = 1.0 | |
| def sample(self) -> float: | |
| x = random.gammavariate(self.alpha, 1.0) | |
| y = random.gammavariate(self.beta, 1.0) | |
| return x / (x + y) if (x + y) > 0 else 0.0 | |
| def mean(self) -> float: | |
| return self.alpha / (self.alpha + self.beta) | |
| class Bandit: | |
| def __init__(self) -> None: | |
| self.arms: dict[str, ArmStats] = {} | |
| def register(self, adapter_id: str, prior_fitness: float | None = None) -> None: | |
| if adapter_id in self.arms: | |
| return | |
| if prior_fitness is not None: | |
| weight = 5.0 | |
| alpha = 1.0 + prior_fitness * weight | |
| beta = 1.0 + (1.0 - prior_fitness) * weight | |
| else: | |
| alpha, beta = 1.0, 1.0 | |
| self.arms[adapter_id] = ArmStats(adapter_id, alpha, beta) | |
| def select(self) -> str: | |
| scored = [(arm.sample(), arm.adapter_id) for arm in self.arms.values()] | |
| scored.sort(reverse=True) | |
| return scored[0][1] | |
| def update(self, adapter_id: str, reward: float) -> None: | |
| if adapter_id not in self.arms: | |
| self.register(adapter_id) | |
| reward = max(0.0, min(1.0, reward)) | |
| self.arms[adapter_id].alpha += reward | |
| self.arms[adapter_id].beta += 1.0 - reward | |
| def snapshot(self) -> list[dict]: | |
| out = [] | |
| for arm in self.arms.values(): | |
| trials = arm.alpha + arm.beta - 2 | |
| out.append({ | |
| "adapter_id": arm.adapter_id, | |
| "mean": round(arm.mean, 3), | |
| "alpha": round(arm.alpha, 2), | |
| "beta": round(arm.beta, 2), | |
| "trials": int(max(0, trials)), | |
| "confidence": round(1.0 - 1.0 / math.sqrt(arm.alpha + arm.beta), 3), | |
| }) | |
| out.sort(key=lambda r: r["mean"], reverse=True) | |
| return out | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Model loading | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # EVOLLM_SKIP_MODEL lets tests import this module without the (large) model | |
| # download β used by scripts/smoke_test.py to exercise the evolution-replay | |
| # and rendering logic. Never set in production. | |
| if os.environ.get("EVOLLM_SKIP_MODEL") == "1": | |
| print("EVOLLM_SKIP_MODEL=1 β skipping model load (test mode).") | |
| LLM = None | |
| else: | |
| print("Downloading SmolLM2-360M-Instruct Q8_0 GGUF...") | |
| MODEL_PATH = hf_hub_download( | |
| repo_id="HuggingFaceTB/SmolLM2-360M-Instruct-GGUF", | |
| filename="smollm2-360m-instruct-q8_0.gguf", | |
| ) | |
| print(f"Loading model from {MODEL_PATH}...") | |
| LLM = Llama( | |
| model_path=MODEL_PATH, | |
| n_ctx=2048, | |
| n_threads=os.cpu_count() or 4, | |
| n_batch=512, | |
| verbose=False, | |
| ) | |
| print("Model ready.") | |
| # Real, pre-measured evolution run (produced by scripts/run_evolution_sweep.py). | |
| # The free CPU Space can't score 41 eval prompts/adapter live (~15 min each), | |
| # so we REPLAY a genuine offline run: real model, real eval bank, real scores. | |
| # Only the timing is replayed β the numbers are measured, not simulated. | |
| def _load_evolution_run() -> dict | None: | |
| # EVOLLM_RUN_PATH lets tests point at a fixture; default is the committed run. | |
| override = os.environ.get("EVOLLM_RUN_PATH") | |
| path = Path(override) if override else Path(__file__).resolve().parent / "data" / "evolution_run.json" | |
| if not path.exists(): | |
| return None | |
| try: | |
| return json.loads(path.read_text(encoding="utf-8")) | |
| except Exception as e: | |
| print(f"[evolution] failed to load run: {e}") | |
| return None | |
| EVOLUTION_RUN = _load_evolution_run() | |
| REPLAY_STATE = {"revealed": 0} # how many recorded generations have been shown | |
| POOL: list[Adapter] = build_seed_pool() | |
| # Override hardcoded seed scores with the real measured ones if we have them. | |
| if EVOLUTION_RUN: | |
| _real = {s["adapter_id"]: s["eval_bank_score"] for s in EVOLUTION_RUN.get("seeds", [])} | |
| for a in POOL: | |
| if a.adapter_id in _real: | |
| a.genome.eval_bank_score = _real[a.adapter_id] | |
| POOL_BY_ID: dict[str, Adapter] = {a.adapter_id: a for a in POOL} | |
| # Knowledge pipeline β embedder is lazy-loaded on first upload so app startup | |
| # stays fast. On HF Space the SQLite file is ephemeral (rebuilds wipe it); | |
| # locally it persists at data/knowledge.sqlite. | |
| KNOWLEDGE = KnowledgePipeline() | |
| BANDIT = Bandit() | |
| for a in POOL: | |
| BANDIT.register(a.adapter_id, prior_fitness=a.genome.eval_bank_score) | |
| FEEDBACK_LOG: list[dict] = [] | |
| EVOLUTION_LOG: list[dict] = [] | |
| LAST_INTERACTION: dict = { | |
| "adapter_id": None, "user_prompt": None, "response": None, | |
| "knowledge_used": [], | |
| } | |
| def log_evolution(event_type: str, message: str, payload: dict | None = None) -> None: | |
| EVOLUTION_LOG.insert(0, { | |
| "at": datetime.utcnow().strftime("%H:%M:%S"), | |
| "type": event_type, | |
| "message": message, | |
| "payload": payload or {}, | |
| }) | |
| del EVOLUTION_LOG[200:] | |
| log_evolution("init", "EvoLLM initialised β 5 seed adapters loaded into pool.") | |
| if EVOLUTION_RUN: | |
| _seed_summary = " Β· ".join( | |
| f"{s['name']} {s['eval_bank_score']:.3f}" | |
| for s in sorted(EVOLUTION_RUN["seeds"], key=lambda s: -s["eval_bank_score"]) | |
| ) | |
| log_evolution( | |
| "fitness", | |
| f"Eval bank baseline (REAL, {EVOLUTION_RUN['meta']['eval_prompts']} prompts on " | |
| f"{EVOLUTION_RUN['meta']['model']}): {_seed_summary}", | |
| ) | |
| else: | |
| log_evolution( | |
| "fitness", | |
| "Eval bank baseline pending β run scripts/run_evolution_sweep.py to populate real scores.", | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Chat | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def chat(message, history, force_adapter, knowledge_mode): | |
| if force_adapter and force_adapter != "𧬠Auto (bandit)": | |
| adapter_id = next((a.adapter_id for a in POOL if a.name == force_adapter), None) | |
| else: | |
| adapter_id = None | |
| if adapter_id is None: | |
| adapter_id = BANDIT.select() | |
| adapter = POOL_BY_ID[adapter_id] | |
| genome = adapter.genome | |
| system_prompt = genome.system_prompt | |
| if genome.memory_token_enabled: | |
| memory_prefix = "[Memory tokens active β maintain context awareness across turns.]" | |
| system_prompt = f"{memory_prefix}\n\n{system_prompt}" | |
| retrieved: list[dict] = [] | |
| if knowledge_mode: | |
| try: | |
| retrieved = KNOWLEDGE.query(message, top_k=3) | |
| except Exception as e: | |
| print(f"[knowledge] retrieval failed: {e}") | |
| retrieved = [] | |
| if retrieved: | |
| context_block = "\n\n".join( | |
| f"[Source: {r['document_name']} Β· chunk {r['chunk_index']}]\n{r['text']}" | |
| for r in retrieved | |
| ) | |
| system_prompt = ( | |
| f"{system_prompt}\n\n" | |
| f"Use the following context to ground your answer. If the answer is " | |
| f"not in the context, say so honestly.\n\n" | |
| f"--- BEGIN CONTEXT ---\n{context_block}\n--- END CONTEXT ---" | |
| ) | |
| messages: list[dict[str, str]] = [{"role": "system", "content": system_prompt}] | |
| for turn in history: | |
| messages.append({"role": turn["role"], "content": turn["content"]}) | |
| messages.append({"role": "user", "content": message}) | |
| stream = LLM.create_chat_completion( | |
| messages=messages, | |
| temperature=genome.temperature, | |
| top_p=genome.top_p, | |
| top_k=genome.top_k, | |
| repeat_penalty=genome.repeat_penalty, | |
| max_tokens=genome.max_tokens, | |
| stream=True, | |
| ) | |
| partial = "" | |
| for chunk in stream: | |
| delta = chunk["choices"][0].get("delta", {}) | |
| token = delta.get("content") | |
| if token: | |
| partial += token | |
| yield partial | |
| # Append a sources footer when knowledge was used. | |
| if retrieved: | |
| sources_line = " Β· ".join( | |
| f"{r['document_name']} (chunk {r['chunk_index']})" for r in retrieved | |
| ) | |
| partial = f"{partial}\n\n_π Sources: {sources_line}_" | |
| yield partial | |
| LAST_INTERACTION["adapter_id"] = adapter_id | |
| LAST_INTERACTION["user_prompt"] = message | |
| LAST_INTERACTION["response"] = partial | |
| LAST_INTERACTION["knowledge_used"] = [r["document_id"] for r in retrieved] | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Feedback handlers | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def record_feedback(rating: int) -> str: | |
| adapter_id = LAST_INTERACTION["adapter_id"] | |
| if not adapter_id: | |
| return "No interaction yet β chat first, then rate." | |
| BANDIT.update(adapter_id, reward=1.0 if rating > 0 else 0.0) | |
| FEEDBACK_LOG.insert(0, { | |
| "at": datetime.utcnow().strftime("%H:%M:%S"), | |
| "adapter": POOL_BY_ID[adapter_id].name, | |
| "rating": "π" if rating > 0 else "π", | |
| "prompt": (LAST_INTERACTION["user_prompt"] or "")[:80], | |
| }) | |
| del FEEDBACK_LOG[100:] | |
| label = "π" if rating > 0 else "π" | |
| log_evolution( | |
| "feedback", | |
| f"{label} for {POOL_BY_ID[adapter_id].name} β bandit updated.", | |
| {"adapter_id": adapter_id, "rating": rating}, | |
| ) | |
| return f"Recorded {label} for **{POOL_BY_ID[adapter_id].name}**. Bandit updated." | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Manual evolution trigger (for demos) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def trigger_evolution_cycle() -> str: | |
| """Reveal the next generation of the REAL pre-measured evolution run. | |
| Each step here was actually computed offline: the child genome was run | |
| against the 41-prompt eval bank on the real model, and its fitness is the | |
| measured score. We replay one recorded generation per click so a viewer | |
| can watch the fitness curve climb. Nothing here is synthetic. | |
| """ | |
| if not EVOLUTION_RUN: | |
| return ( | |
| "### No measured run available yet\n\n" | |
| "The fitness curve replays a **real** offline evolution run. To generate it, " | |
| "run on a machine with internet + the model:\n\n" | |
| "```bash\npython scripts/run_evolution_sweep.py --generations 8\n```\n\n" | |
| "Commit the resulting `space/data/evolution_run.json` and redeploy β the curve " | |
| "and this button light up with genuine measured scores. (The free CPU Space " | |
| "can't score 41 prompts/adapter live, which is why we replay a measured run.)" | |
| ) | |
| lineage = EVOLUTION_RUN["lineage"] | |
| idx = REPLAY_STATE["revealed"] | |
| if idx >= len(lineage): | |
| best = EVOLUTION_RUN["best_fitness_by_generation"][-1] | |
| return ( | |
| f"### Evolution run complete\n\n" | |
| f"All {len(lineage)} recorded generations revealed. " | |
| f"Best measured fitness: **{best:.3f}** " | |
| f"(seed baseline best: {EVOLUTION_RUN['best_fitness_by_generation'][0]:.3f}). " | |
| f"Use **π Reset replay** to watch it again." | |
| ) | |
| step = lineage[idx] | |
| REPLAY_STATE["revealed"] = idx + 1 | |
| child_id = step["child_id"] | |
| g = step["genome"] | |
| child_genome = Genome( | |
| genome_id=child_id, | |
| parent_id=step["parent_id"], | |
| generation=step["generation"], | |
| name=f"Gen-{step['generation']} child", | |
| system_prompt=g.get("system_prompt", ""), | |
| temperature=g.get("temperature", 0.7), | |
| top_p=g.get("top_p", 0.9), | |
| top_k=g.get("top_k", 40), | |
| max_tokens=g.get("max_tokens", 256), | |
| memory_token_enabled=bool(g.get("memory_prefix")), | |
| eval_bank_score=step["child_fitness"], | |
| ) | |
| child = Adapter( | |
| adapter_id=child_id, | |
| name=child_genome.name, | |
| description=f"Gen {step['generation']}: {step['mutation_kind']} β {step['mutation_detail']}", | |
| genome=child_genome, | |
| promoted=step["promoted"], | |
| ) | |
| POOL.append(child) | |
| POOL_BY_ID[child_id] = child | |
| BANDIT.register(child_id, prior_fitness=child_genome.eval_bank_score) | |
| log_evolution( | |
| "mutation", | |
| f"Gen {step['generation']}: mutated {step['parent_name']} via " | |
| f"{step['mutation_kind']} ({step['mutation_detail']}) β measured fitness " | |
| f"{step['child_fitness']:.3f}", | |
| {"adapter_id": child_id, "parent_id": step["parent_id"], "kind": step["mutation_kind"]}, | |
| ) | |
| if step["promoted"]: | |
| log_evolution( | |
| "promotion", | |
| f"PROMOTED gen-{step['generation']} child β {step['child_fitness']:.3f} β₯ " | |
| f"parent {step['parent_fitness']:.3f}. New population best.", | |
| ) | |
| verdict = f"β **Promoted** β beats parent ({step['child_fitness']:.3f} β₯ {step['parent_fitness']:.3f})" | |
| else: | |
| log_evolution( | |
| "archive", | |
| f"Archived gen-{step['generation']} child β {step['child_fitness']:.3f} < " | |
| f"parent {step['parent_fitness']:.3f}.", | |
| ) | |
| verdict = f"π¦ **Archived** β below parent ({step['child_fitness']:.3f} < {step['parent_fitness']:.3f})" | |
| remaining = len(lineage) - REPLAY_STATE["revealed"] | |
| return ( | |
| f"### Generation {step['generation']} (real, measured)\n\n" | |
| f"**Mutation**: `{step['mutation_kind']}` β {step['mutation_detail']}\n\n" | |
| f"**Parent fitness**: {step['parent_fitness']:.3f} β " | |
| f"**Child fitness**: {step['child_fitness']:.3f}\n\n" | |
| f"{verdict}\n\n" | |
| f"_{remaining} generation(s) left to reveal._" | |
| ) | |
| def reset_evolution_replay() -> str: | |
| """Reset the replay and drop any revealed children from the pool.""" | |
| REPLAY_STATE["revealed"] = 0 | |
| seed_ids = {s["adapter_id"] for s in (EVOLUTION_RUN or {}).get("seeds", [])} or { | |
| a.adapter_id for a in POOL if a.genome.generation == 0 or a.adapter_id.startswith("evo_") | |
| } | |
| # Keep only the original seed adapters (those present at startup). | |
| survivors = [a for a in POOL if not a.adapter_id.startswith("evo_g")] | |
| POOL.clear() | |
| POOL.extend(survivors) | |
| POOL_BY_ID.clear() | |
| POOL_BY_ID.update({a.adapter_id: a for a in POOL}) | |
| return "π Replay reset β back to the seed population. Click Trigger to watch evolution again." | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # UI render helpers | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def render_fitness_curve(): | |
| """DataFrame for the fitness-over-generations LinePlot, revealed up to the | |
| current replay position.""" | |
| import pandas as pd | |
| if not EVOLUTION_RUN: | |
| return pd.DataFrame({"generation": [0], "best_fitness": [0.0]}) | |
| curve = EVOLUTION_RUN["best_fitness_by_generation"] | |
| upto = REPLAY_STATE["revealed"] + 1 # gen 0 (seed) + revealed children | |
| curve = curve[:upto] | |
| return pd.DataFrame({ | |
| "generation": list(range(len(curve))), | |
| "best_fitness": curve, | |
| }) | |
| def render_evolution_summary() -> str: | |
| if not EVOLUTION_RUN: | |
| return ( | |
| "_No measured run loaded. Run `python scripts/run_evolution_sweep.py` " | |
| "and commit `space/data/evolution_run.json` to populate this with real scores._" | |
| ) | |
| m = EVOLUTION_RUN["meta"] | |
| curve = EVOLUTION_RUN["best_fitness_by_generation"] | |
| gain = curve[-1] - curve[0] | |
| revealed = REPLAY_STATE["revealed"] | |
| return ( | |
| f"**Real measured run** Β· model `{m['model']}` Β· {m['eval_prompts']} eval prompts Β· " | |
| f"{m['generations']} generations.\n\n" | |
| f"Seed best **{curve[0]:.3f}** β evolved best **{curve[-1]:.3f}** " | |
| f"(**+{gain:.3f}**, {gain / max(curve[0], 1e-9) * 100:.0f}% relative). " | |
| f"Revealed: {revealed}/{len(EVOLUTION_RUN['lineage'])}.\n\n" | |
| f"_The curve is genuine β measured offline because the free CPU Space can't score " | |
| f"41 prompts/adapter live. Only the replay timing is for the demo._" | |
| ) | |
| def render_active_genome() -> str: | |
| aid = LAST_INTERACTION["adapter_id"] or POOL[0].adapter_id | |
| return POOL_BY_ID[aid].genome.model_dump_json(indent=2) | |
| def render_pool_table(): | |
| rows = [] | |
| snap = {s["adapter_id"]: s for s in BANDIT.snapshot()} | |
| for a in sorted(POOL, key=lambda x: (-x.genome.generation, x.adapter_id)): | |
| s = snap.get(a.adapter_id, {}) | |
| rows.append([ | |
| a.name, | |
| a.genome.generation, | |
| a.genome.eval_bank_score if a.genome.eval_bank_score is not None else "β", | |
| s.get("mean", "β"), | |
| s.get("trials", 0), | |
| "β " if a.promoted else "π§ͺ", | |
| ]) | |
| return rows | |
| def render_evolution_log() -> str: | |
| if not EVOLUTION_LOG: | |
| return "_No events yet._" | |
| lines = [] | |
| for e in EVOLUTION_LOG[:40]: | |
| icon = { | |
| "init": "π§¬", "fitness": "π", "feedback": "π", | |
| "mutation": "π", "promotion": "π", "archive": "π¦", | |
| }.get(e["type"], "β’") | |
| lines.append(f"`{e['at']}` {icon} **{e['type']}** β {e['message']}") | |
| return "\n\n".join(lines) | |
| def render_feedback_log() -> str: | |
| if not FEEDBACK_LOG: | |
| return "_No feedback yet β rate a response with π or π._" | |
| lines = [] | |
| for f in FEEDBACK_LOG[:20]: | |
| lines.append(f"`{f['at']}` {f['rating']} **{f['adapter']}** β _{f['prompt']}β¦_") | |
| return "\n\n".join(lines) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Knowledge helpers | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def render_knowledge_table(): | |
| docs = KNOWLEDGE.documents() | |
| if not docs: | |
| return [["β", "β", "β", "β", "β"]] | |
| rows = [] | |
| for d in docs: | |
| size_kb = (d["size_bytes"] or 0) / 1024 | |
| rows.append([ | |
| d["name"], | |
| d["format"].upper(), | |
| f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb/1024:.1f} MB", | |
| d["chunk_count"], | |
| d["uploaded_at"][:19].replace("T", " "), | |
| ]) | |
| return rows | |
| def render_knowledge_stats() -> str: | |
| s = KNOWLEDGE.stats() | |
| if s["document_count"] == 0: | |
| return "_No documents indexed yet._" | |
| size_mb = s["total_bytes"] / (1024 * 1024) | |
| return ( | |
| f"π **{s['document_count']} documents** Β· " | |
| f"**{s['chunk_count']} chunks** Β· " | |
| f"**{size_mb:.2f} MB** total Β· " | |
| f"embedder: `intfloat/multilingual-e5-small` (384-dim)" | |
| ) | |
| def handle_file_upload(files) -> tuple[str, list]: | |
| if not files: | |
| return "_No files selected._", render_knowledge_table() | |
| results = [] | |
| for f in files: | |
| path = f if isinstance(f, str) else f.name | |
| try: | |
| result = KNOWLEDGE.ingest_file(path) | |
| results.append(f"β **{result['name']}** β {result['chunk_count']} chunks indexed") | |
| log_evolution( | |
| "knowledge", | |
| f"π Indexed {result['name']} β {result['chunk_count']} chunks", | |
| {"document_id": result["document_id"]}, | |
| ) | |
| except Exception as e: | |
| results.append(f"β Failed: `{Path(path).name}` β {e}") | |
| return "\n\n".join(results), render_knowledge_table() | |
| def handle_text_paste(name: str, text: str) -> tuple[str, list, str, str]: | |
| if not (name and text and text.strip()): | |
| return "_Provide both a name and some text._", render_knowledge_table(), name, text | |
| try: | |
| result = KNOWLEDGE.ingest_text(name.strip(), text) | |
| log_evolution( | |
| "knowledge", | |
| f"π Indexed pasted text '{result['name']}' β {result['chunk_count']} chunks", | |
| {"document_id": result["document_id"]}, | |
| ) | |
| msg = f"β **{result['name']}** β {result['chunk_count']} chunks indexed" | |
| return msg, render_knowledge_table(), "", "" | |
| except Exception as e: | |
| return f"β {e}", render_knowledge_table(), name, text | |
| def handle_delete_all() -> tuple[str, list]: | |
| n = KNOWLEDGE.stats()["document_count"] | |
| KNOWLEDGE.clear() | |
| if n > 0: | |
| log_evolution("knowledge", f"π Cleared all {n} documents from index") | |
| return f"ποΈ Cleared {n} document(s).", render_knowledge_table() | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # LoRA-on-upload (Phase 4b) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| NOTEBOOK_DIR = Path("data/notebooks") | |
| NOTEBOOK_DIR.mkdir(parents=True, exist_ok=True) | |
| def handle_generate_notebook( | |
| selected_doc_names: list[str], | |
| adapter_name: str, | |
| lora_rank: int, | |
| num_epochs: int, | |
| ): | |
| """Build a Colab notebook from the chunks of the selected documents.""" | |
| if not selected_doc_names: | |
| return "_Select at least one indexed document first._", None | |
| adapter_name = (adapter_name or "").strip() or "user_adapter" | |
| # Gather chunks for the selected docs from the store. | |
| with KNOWLEDGE.store._conn() as c: # noqa: SLF001 β internal access OK for now | |
| rows = c.execute( | |
| "SELECT documents.name as name, chunks.text as text " | |
| "FROM chunks JOIN documents ON chunks.document_id = documents.id " | |
| "WHERE documents.name IN (" + ",".join(["?"] * len(selected_doc_names)) + ") " | |
| "ORDER BY chunks.document_id, chunks.chunk_index", | |
| selected_doc_names, | |
| ).fetchall() | |
| chunks = [r["text"] for r in rows] | |
| if not chunks: | |
| return "_No chunks found for those documents β re-index them?_", None | |
| safe_name = "".join(ch if ch.isalnum() else "_" for ch in adapter_name)[:40] or "user_adapter" | |
| out_path = NOTEBOOK_DIR / f"evollm_train_{safe_name}.ipynb" | |
| generate_training_notebook( | |
| adapter_name=adapter_name, | |
| chunks=chunks, | |
| source_doc_names=selected_doc_names, | |
| lora_rank=int(lora_rank), | |
| num_epochs=int(num_epochs), | |
| output_path=out_path, | |
| description=f"Trained from {len(selected_doc_names)} document(s) via EvoLLM", | |
| ) | |
| log_evolution( | |
| "knowledge", | |
| f"π Generated training notebook for '{adapter_name}' " | |
| f"({len(chunks)} chunks, {len(selected_doc_names)} doc(s))", | |
| ) | |
| msg = ( | |
| f"β **Notebook ready**: `{out_path.name}` ({len(chunks)} training examples).\n\n" | |
| f"1. Download the file below\n" | |
| f"2. Open it in [Google Colab](https://colab.research.google.com/)\n" | |
| f"3. **Runtime β Change runtime type β T4 GPU**, then **Runtime β Run all**\n" | |
| f"4. After training, download the two output files (`*.gguf` and `*.json`)\n" | |
| f"5. Come back here, go to **𧬠Adapter Pool** tab β **π₯ Import trained adapter**" | |
| ) | |
| return msg, str(out_path) | |
| def handle_import_adapter(gguf_file, manifest_file): | |
| """Receive a trained LoRA + manifest, register a new adapter in the pool.""" | |
| if not gguf_file or not manifest_file: | |
| return "_Drop both the .gguf and the .json files._", render_pool_table() | |
| try: | |
| gguf_path = gguf_file if isinstance(gguf_file, str) else gguf_file.name | |
| manifest_path = manifest_file if isinstance(manifest_file, str) else manifest_file.name | |
| info = import_adapter_files(gguf_path, manifest_path) | |
| except Exception as e: | |
| return f"β Import failed: {e}", render_pool_table() | |
| manifest = info["manifest"] | |
| adapter_id = info["adapter_id"] | |
| # Build a genome reflecting the trained-from-knowledge adapter. | |
| sys_prompt = ( | |
| f"You are EvoLLM, fine-tuned on user-provided documents " | |
| f"({', '.join(manifest.get('source_documents', []))[:160] or 'unknown sources'}). " | |
| f"Draw on what you learned from those sources when relevant." | |
| ) | |
| genome = Genome( | |
| genome_id=adapter_id, | |
| parent_id="evo_default", | |
| generation=1, | |
| name=info["name"], | |
| base_model=manifest.get("base_model", "SmolLM2-1.7B-Instruct"), | |
| lora_rank=int(manifest.get("lora_rank", 16)), | |
| lora_alpha=int(manifest.get("lora_alpha", 32)), | |
| lora_target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], | |
| system_prompt=sys_prompt, | |
| eval_bank_score=None, # unmeasured until the eval bank runs against it | |
| ) | |
| new_adapter = Adapter( | |
| adapter_id=adapter_id, | |
| name=info["name"], | |
| description=info["description"] or "User-trained from documents", | |
| genome=genome, | |
| promoted=True, | |
| ) | |
| POOL.append(new_adapter) | |
| POOL_BY_ID[adapter_id] = new_adapter | |
| BANDIT.register(adapter_id, prior_fitness=0.55) # neutral-mid prior | |
| log_evolution( | |
| "promotion", | |
| f"π₯ IMPORTED user-trained adapter '{info['name']}' " | |
| f"({manifest.get('training_examples', '?')} examples, rank {genome.lora_rank}) β joined the pool.", | |
| {"adapter_id": adapter_id, "source_documents": manifest.get("source_documents", [])}, | |
| ) | |
| note = ( | |
| f"β **{info['name']}** imported and added to the adapter pool.\n\n" | |
| f"_GGUF saved to `{info['gguf_path']}`. The Bandit will start sampling it on the next " | |
| f"chat. Real LoRA weight-loading is active in the local desktop app; on this Space the " | |
| f"adapter uses its trained-from-data genome (system prompt + sampling config)._" | |
| ) | |
| return note, render_pool_table() | |
| def refresh_all(): | |
| return ( | |
| render_active_genome(), | |
| render_pool_table(), | |
| render_evolution_log(), | |
| render_feedback_log(), | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # UI | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CSS = """ | |
| .gradio-container {max-width: 1400px !important;} | |
| .evo-header {background: linear-gradient(135deg, #1e3a8a 0%, #7c3aed 100%); | |
| color: white; padding: 24px; border-radius: 12px; margin-bottom: 16px; | |
| box-shadow: 0 4px 12px rgba(124, 58, 237, 0.15);} | |
| .evo-header h1 {margin: 0 0 8px 0; font-size: 2rem;} | |
| .evo-header p {margin: 0; opacity: 0.92; line-height: 1.5;} | |
| .metric-badge {display: inline-block; padding: 4px 12px; background: rgba(255,255,255,0.18); | |
| color: white; border-radius: 999px; margin-right: 6px; margin-top: 4px; | |
| font-size: 0.85rem; backdrop-filter: blur(4px);} | |
| .evo-notice {margin-top: 14px !important; padding: 10px 14px; | |
| background: rgba(255, 255, 255, 0.12); border-radius: 8px; | |
| font-size: 0.85rem !important;} | |
| """ | |
| INTRO_HTML = """ | |
| <div class="evo-header"> | |
| <h1>𧬠EvoLLM β Self-Evolving Local LLM</h1> | |
| <p>A privacy-first 1B-class language model that <b>visibly improves itself</b> through | |
| multi-armed-bandit adapter selection and Lamarckian evolution. Runs fully on-device. | |
| No telemetry. No API calls.</p> | |
| <div style="margin-top: 14px;"> | |
| <span class="metric-badge">π§ Web demo: SmolLM2-360M</span> | |
| <span class="metric-badge">π» Local app: SmolLM2-1.7B</span> | |
| <span class="metric-badge">𧬠Adapter pool: 5 seed variants</span> | |
| <span class="metric-badge">π― Bandit: Thompson sampling</span> | |
| </div> | |
| <p class="evo-notice"> | |
| β‘ <b>This web demo runs SmolLM2-360M for speed</b> on the free CPU tier (~5 tok/s, answers in 20-40s). | |
| The local desktop app runs the full <b>SmolLM2-1.7B</b> for higher quality (5-30Γ faster on real hardware). | |
| The evolution engine, adapter pool, and bandit work identically on both. | |
| </p> | |
| </div> | |
| """ | |
| with gr.Blocks(title="EvoLLM", theme=gr.themes.Soft(), css=CSS) as demo: | |
| gr.HTML(INTRO_HTML) | |
| with gr.Tabs(): | |
| # ββ Tab 1: Chat ββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π¬ Chat"): | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| with gr.Row(): | |
| adapter_choice = gr.Dropdown( | |
| choices=["𧬠Auto (bandit)"] + [a.name for a in POOL], | |
| value="𧬠Auto (bandit)", | |
| label="Adapter selection", | |
| info="Auto = bandit picks based on learned preference. Or force one.", | |
| scale=3, | |
| ) | |
| knowledge_toggle = gr.Checkbox( | |
| label="π Knowledge mode", | |
| value=False, | |
| info="Retrieve from uploaded documents and cite sources", | |
| scale=1, | |
| ) | |
| chatbot = gr.Chatbot( | |
| height=480, show_copy_button=True, type="messages", | |
| avatar_images=(None, "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"), | |
| ) | |
| with gr.Row(): | |
| chat_input = gr.Textbox( | |
| placeholder="Ask EvoLLM anything⦠(Enter to send)", | |
| show_label=False, autofocus=True, scale=8, | |
| container=False, | |
| ) | |
| send_btn = gr.Button("Send", variant="primary", scale=1, min_width=80) | |
| with gr.Row(): | |
| thumbs_up_btn = gr.Button("π Good response", size="sm") | |
| thumbs_down_btn = gr.Button("π Bad response", size="sm") | |
| clear_btn = gr.Button("ποΈ Clear chat", size="sm") | |
| feedback_status = gr.Markdown("") | |
| gr.Markdown( | |
| "**Try:** *Explain quantum entanglement.* Β· " | |
| "*Write a haiku about adaptive AI.* Β· " | |
| "*What is distillation in machine learning?* Β· " | |
| "*Translate to French: 'Good morning, how are you?'*" | |
| ) | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 𧬠Active Genome") | |
| active_genome_view = gr.Code( | |
| value=render_active_genome(), | |
| language="json", lines=20, interactive=False, | |
| ) | |
| # ββ Tab 2: Adapter Pool ββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("𧬠Adapter Pool"): | |
| gr.Markdown( | |
| "### The population of evolved variants\n" | |
| "Each adapter is a distinct genome β system prompt, sampling config, " | |
| "LoRA setup. The bandit learns which one wins for your usage." | |
| ) | |
| pool_table = gr.Dataframe( | |
| value=render_pool_table(), | |
| headers=["Name", "Gen", "Eval Bank", "Bandit Mean", "Trials", "Status"], | |
| interactive=False, wrap=True, | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("### π Evolution β watch fitness climb across generations") | |
| evolution_summary = gr.Markdown(render_evolution_summary()) | |
| fitness_plot = gr.LinePlot( | |
| value=render_fitness_curve(), | |
| x="generation", y="best_fitness", | |
| title="Best eval-bank fitness by generation (real measured run)", | |
| x_title="Generation", y_title="Eval-bank fitness", | |
| height=320, overlay_point=True, | |
| ) | |
| with gr.Row(): | |
| trigger_btn = gr.Button("π Reveal next generation", variant="primary") | |
| reset_btn = gr.Button("π Reset replay") | |
| refresh_btn = gr.Button("β» Refresh tables") | |
| evolution_result = gr.Markdown("") | |
| gr.Markdown("---") | |
| gr.Markdown( | |
| "### π₯ Import a trained adapter\n" | |
| "Drop the two files produced by the Colab training notebook " | |
| "(`*.gguf` and `*.json`) to add a user-trained adapter to the pool." | |
| ) | |
| with gr.Row(): | |
| import_gguf = gr.File( | |
| label="LoRA adapter (.gguf)", file_types=[".gguf"], type="filepath", | |
| ) | |
| import_manifest = gr.File( | |
| label="Manifest (.json)", file_types=[".json"], type="filepath", | |
| ) | |
| import_btn = gr.Button("π₯ Import adapter", variant="primary") | |
| import_status = gr.Markdown("") | |
| # ββ Tab 3: Knowledge βββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π Knowledge"): | |
| gr.Markdown( | |
| "### Document knowledge β the second dimension of evolution\n" | |
| "Upload PDFs, Word docs, Markdown, or paste text. EvoLLM chunks, embeds, " | |
| "and indexes them locally with a multilingual embedder. In the Chat tab, " | |
| "toggle **π Knowledge mode** and the model retrieves relevant chunks " | |
| "before answering, citing sources." | |
| ) | |
| gr.Markdown( | |
| "> β οΈ **On HF Space** uploads are session-only β they're processed inside the " | |
| "Space container and disappear on rebuild. **Use the local desktop app for " | |
| "true privacy and persistence** (`data/knowledge.sqlite`)." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| gr.Markdown("#### Upload files") | |
| file_upload = gr.File( | |
| label="Drop PDF / TXT / MD / DOCX (multi-select OK)", | |
| file_types=[".pdf", ".txt", ".md", ".markdown", ".docx"], | |
| file_count="multiple", | |
| type="filepath", | |
| ) | |
| upload_btn = gr.Button("π₯ Index uploaded files", variant="primary") | |
| with gr.Column(scale=2): | |
| gr.Markdown("#### Or paste text directly") | |
| paste_name = gr.Textbox( | |
| label="Source name", placeholder="e.g. company_handbook" | |
| ) | |
| paste_text = gr.Textbox( | |
| label="Text content", lines=6, | |
| placeholder="Paste any text β Creole, technical docs, FAQsβ¦", | |
| ) | |
| paste_btn = gr.Button("π₯ Index pasted text", variant="primary") | |
| ingest_status = gr.Markdown("") | |
| gr.Markdown("#### Indexed documents") | |
| knowledge_stats = gr.Markdown(render_knowledge_stats()) | |
| knowledge_table = gr.Dataframe( | |
| value=render_knowledge_table(), | |
| headers=["Name", "Format", "Size", "Chunks", "Uploaded"], | |
| interactive=False, wrap=True, | |
| ) | |
| with gr.Row(): | |
| refresh_knowledge_btn = gr.Button("π Refresh") | |
| clear_knowledge_btn = gr.Button("ποΈ Clear all documents", variant="stop") | |
| knowledge_action_status = gr.Markdown("") | |
| gr.Markdown("---") | |
| gr.Markdown( | |
| "### 𧬠Train an adapter from these documents\n" | |
| "Bake the document content into a real LoRA adapter via QLoRA on Colab. " | |
| "EvoLLM generates a configured notebook with your data inline; you run it " | |
| "on a free T4 GPU; then import the resulting `.gguf` + manifest back here." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| train_doc_select = gr.CheckboxGroup( | |
| choices=[d["name"] for d in KNOWLEDGE.documents()], | |
| label="Documents to train on", | |
| info="Select one or more indexed documents.", | |
| ) | |
| with gr.Column(scale=1): | |
| train_adapter_name = gr.Textbox( | |
| label="Adapter name", placeholder="e.g. company_handbook", | |
| ) | |
| train_lora_rank = gr.Slider( | |
| minimum=4, maximum=64, value=16, step=4, | |
| label="LoRA rank", | |
| info="Higher = more capacity, slower training", | |
| ) | |
| train_num_epochs = gr.Slider( | |
| minimum=1, maximum=10, value=3, step=1, | |
| label="Training epochs", | |
| ) | |
| with gr.Row(): | |
| refresh_train_docs_btn = gr.Button("π Refresh doc list", size="sm") | |
| generate_notebook_btn = gr.Button("𧬠Generate training notebook", variant="primary") | |
| notebook_status = gr.Markdown("") | |
| notebook_download = gr.File(label="π Download notebook", interactive=False) | |
| # ββ Tab 4: Evolution Log βββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π Evolution Log"): | |
| gr.Markdown("### Lineage of mutations, promotions, and feedback events") | |
| evolution_log_view = gr.Markdown(render_evolution_log()) | |
| gr.Markdown("---") | |
| gr.Markdown("### Recent feedback") | |
| feedback_log_view = gr.Markdown(render_feedback_log()) | |
| # ββ Tab 4: About βββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("βΉοΈ About"): | |
| gr.Markdown(""" | |
| ## EvoLLM β what's actually here | |
| ### Hardware-adaptive architecture | |
| EvoLLM scales the **base model** to the user's hardware while keeping the | |
| **evolution engine identical** across all tiers: | |
| | Tier | Base | Use | Speed | | |
| |---|---|---|---| | |
| | Phone / IoT | SmolLM2-135M | embedded edge | ~50 tok/s on phone NPU | | |
| | **Web demo (this Space)** | SmolLM2-360M | free public preview | ~5 tok/s on 2 vCPUs | | |
| | **Local desktop app** | SmolLM2-1.7B | privacy-first daily driver | ~30 tok/s on a 4090 | | |
| | Workstation | Qwen 2.5 7B | power user | ~100 tok/s on A100 | | |
| | Datacenter | Llama 3.1 8B+ | hosted serving | ~300 tok/s on A100 | | |
| The genome schema, adapter pool, Thompson bandit, eval bank, and mutation | |
| operators are byte-for-byte the same across every tier. Only the base | |
| weights change. That's the deployment story. | |
| ### The evolution layer | |
| EvoLLM wraps each base model with: | |
| - **Base swap**: every tier runs a different base β the smallest variant is 135M for embedded, the largest is 8B+ for datacenter | |
| - **Adapter pool**: 5 hand-curated genome variants, with the architecture in | |
| place to ingest real distilled LoRA weights (Phase 2 β Colab notebook in repo) | |
| - **Bandit**: Thompson sampling over Beta(Ξ±, Ξ²) reward distributions per | |
| adapter. Live thumbs feedback updates posteriors in real time. | |
| - **Eval bank**: 40 fixed prompts across reasoning, factual, code, writing, | |
| instruction-following, safety, calibration, and edge cases. Deterministic | |
| rule-based scoring β no LLM-as-judge dependency. | |
| - **Mutation operators**: LoRA rank, target modules, memory token, | |
| sampling config, system prompt | |
| - **Fitness**: 50/50 blend of eval-bank score and live feedback win-rate | |
| ### Why this matters | |
| Other local LLMs (Ollama, LM Studio, GPT4All) ship one frozen model. | |
| **EvoLLM ships a population** β and that population evolves on the user's | |
| machine, in response to that specific user. The same hardware runs a | |
| better model after a week of use than it did on day 1. | |
| ### Two dimensions of evolution | |
| EvoLLM evolves on two orthogonal axes: | |
| 1. **Behaviour** β the adapter pool. Each adapter is a genome (system prompt, | |
| sampling config, LoRA setup). The Thompson-sampling bandit learns which | |
| adapter wins for the user from live thumbs feedback. | |
| 2. **Knowledge** β uploaded documents. Embedded with a multilingual model and | |
| stored in a local vector DB. When Knowledge mode is on, queries retrieve | |
| the top-3 relevant chunks and inject them as grounded context with citations. | |
| Both dimensions feed the same evolution log. Both live on the user's hardware. | |
| Both are visible in the UI. | |
| ### Roadmap | |
| | Phase | Status | What | | |
| |---|---|---| | |
| | 0 β Inference foundation | β Done | FastAPI + llama.cpp + GGUF | | |
| | 1 β Adapter loading + memory token | β Done | The 5-personality adapter pool | | |
| | 2 β Distillation seed adapters | π§ | Colab notebook produces real LoRA files | | |
| | 3 β Desktop installer | π | Tauri/Electron bundle for Windows | | |
| | 4a β Knowledge layer (RAG) | β Done | This tab β multilingual embed + cite | | |
| | 4b β LoRA-on-upload | π§ | "Train adapter from documents" Colab flow | | |
| | 5 β Background evolution worker | π | Periodic QLoRA retrain on feedback | | |
| | 6 β Cloud-mediated adapter delivery | π | Opt-in anonymized feedback β updates | | |
| ### Source | |
| GitHub: [drhemanm/EvoTransformerV11](https://github.com/drhemanm/EvoTransformerV11) | |
| Built on EvoTransformer (Mohabeer, 2025). | |
| """) | |
| # ββ Wiring ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def submit_message(message, history, adapter_pick, knowledge_on): | |
| history = history or [] | |
| history.append({"role": "user", "content": message}) | |
| history.append({"role": "assistant", "content": ""}) | |
| for partial in chat(message, history[:-2], adapter_pick, knowledge_on): | |
| history[-1]["content"] = partial | |
| yield history, "" | |
| yield history, "" | |
| refresh_outputs = [active_genome_view, pool_table, evolution_log_view, feedback_log_view] | |
| submit_inputs = [chat_input, chatbot, adapter_choice, knowledge_toggle] | |
| submit_outputs = [chatbot, chat_input] | |
| chat_input.submit( | |
| submit_message, submit_inputs, submit_outputs, api_name=False, | |
| ).then(refresh_all, None, refresh_outputs, api_name=False) | |
| send_btn.click( | |
| submit_message, submit_inputs, submit_outputs, api_name=False, | |
| ).then(refresh_all, None, refresh_outputs, api_name=False) | |
| thumbs_up_btn.click( | |
| lambda: record_feedback(+1), None, feedback_status, api_name=False, | |
| ).then(refresh_all, None, refresh_outputs, api_name=False) | |
| thumbs_down_btn.click( | |
| lambda: record_feedback(-1), None, feedback_status, api_name=False, | |
| ).then(refresh_all, None, refresh_outputs, api_name=False) | |
| clear_btn.click( | |
| lambda: ([], "", ""), None, [chatbot, chat_input, feedback_status], | |
| api_name=False, | |
| ) | |
| trigger_btn.click( | |
| trigger_evolution_cycle, None, evolution_result, api_name=False, | |
| ).then( | |
| render_fitness_curve, None, fitness_plot, api_name=False, | |
| ).then( | |
| render_evolution_summary, None, evolution_summary, api_name=False, | |
| ).then(refresh_all, None, refresh_outputs, api_name=False) | |
| reset_btn.click( | |
| reset_evolution_replay, None, evolution_result, api_name=False, | |
| ).then( | |
| render_fitness_curve, None, fitness_plot, api_name=False, | |
| ).then( | |
| render_evolution_summary, None, evolution_summary, api_name=False, | |
| ).then(refresh_all, None, refresh_outputs, api_name=False) | |
| refresh_btn.click(refresh_all, None, refresh_outputs, api_name=False) | |
| # ββ Knowledge tab wirings βββββββββββββββββββββββββββββββββββββββ | |
| upload_btn.click( | |
| handle_file_upload, [file_upload], [ingest_status, knowledge_table], | |
| api_name=False, | |
| ).then(render_knowledge_stats, None, knowledge_stats, api_name=False) | |
| paste_btn.click( | |
| handle_text_paste, | |
| [paste_name, paste_text], | |
| [ingest_status, knowledge_table, paste_name, paste_text], | |
| api_name=False, | |
| ).then(render_knowledge_stats, None, knowledge_stats, api_name=False) | |
| refresh_knowledge_btn.click( | |
| lambda: (render_knowledge_table(), render_knowledge_stats()), | |
| None, [knowledge_table, knowledge_stats], api_name=False, | |
| ) | |
| clear_knowledge_btn.click( | |
| handle_delete_all, None, [knowledge_action_status, knowledge_table], | |
| api_name=False, | |
| ).then(render_knowledge_stats, None, knowledge_stats, api_name=False) | |
| # Refresh the doc selector when documents change (keep choices in sync) | |
| def _refresh_doc_choices(): | |
| return gr.update(choices=[d["name"] for d in KNOWLEDGE.documents()]) | |
| refresh_train_docs_btn.click( | |
| _refresh_doc_choices, None, train_doc_select, api_name=False, | |
| ) | |
| generate_notebook_btn.click( | |
| handle_generate_notebook, | |
| [train_doc_select, train_adapter_name, train_lora_rank, train_num_epochs], | |
| [notebook_status, notebook_download], | |
| api_name=False, | |
| ) | |
| import_btn.click( | |
| handle_import_adapter, | |
| [import_gguf, import_manifest], | |
| [import_status, pool_table], | |
| api_name=False, | |
| ).then(refresh_all, None, refresh_outputs, api_name=False) | |
| if __name__ == "__main__": | |
| demo.queue().launch( | |
| server_name="0.0.0.0", | |
| share=os.environ.get("EVOLLM_SHARE", "").lower() == "true", | |
| ) | |