"""EvoLLM — investor-ready Gradio demo. Same file runs on: • HuggingFace Spaces (auto-deployed from GitHub) • Locally: `python space/app.py` (privacy-first, no cloud) The Space ships with 5 hand-curated "personality" adapters (Default, Creative, Concise, Technical, Empathetic). A Thompson-sampling bandit picks one per query, learning from thumbs feedback. The evolution log shows mutation events as new adapter variants are tested and promoted. This file is self-contained: it inlines the genome / bandit / pool logic so the Space stays independent of the main evollm package. The same logic lives in evollm/* for the local server build. """ from __future__ import annotations import json import math import os import random import sys import time import uuid from collections import defaultdict from dataclasses import dataclass, field from datetime import datetime from pathlib import Path import gradio as gr from huggingface_hub import hf_hub_download from llama_cpp import Llama from pydantic import BaseModel, Field # Make the local `knowledge` package importable whether we're running on HF # Spaces (cwd = /app) or locally (cwd = repo root, script in space/). sys.path.insert(0, str(Path(__file__).resolve().parent)) from knowledge import ( # noqa: E402 KnowledgePipeline, generate_training_notebook, import_adapter as import_adapter_files, ) # ──────────────────────────────────────────────────────────────────────────── # Genome # ──────────────────────────────────────────────────────────────────────────── class Genome(BaseModel): genome_id: str = Field(default_factory=lambda: uuid.uuid4().hex[:12]) parent_id: str | None = None generation: int = 0 name: str = "Default" base_model: str = "SmolLM2-360M-Instruct" quantization: str = "Q8_0" lora_rank: int = 8 lora_alpha: int = 16 lora_target_modules: list[str] = Field(default_factory=lambda: ["q_proj", "v_proj"]) memory_token_enabled: bool = False memory_token_count: int = 4 temperature: float = 0.7 top_p: float = 0.9 top_k: int = 40 repeat_penalty: float = 1.1 max_tokens: int = 256 system_prompt: str = ( "You are EvoLLM, a privacy-first local assistant. " "Give helpful, complete answers. Be accurate and honest about uncertainty." ) fitness_score: float | None = None eval_bank_score: float | None = None feedback_score: float | None = None knowledge_sources: list[str] = Field(default_factory=list) # doc ids this genome can read; empty = all # ──────────────────────────────────────────────────────────────────────────── # Adapter pool # ──────────────────────────────────────────────────────────────────────────── @dataclass class Adapter: adapter_id: str name: str description: str genome: Genome promoted: bool = True def build_seed_pool() -> list[Adapter]: pool = [ Adapter("evo_default", "Default", "Balanced baseline genome — the neutral start of evolution.", Genome( genome_id="evo_default", name="Default", system_prompt=( "You are EvoLLM, a privacy-first local assistant. " "Give helpful, complete answers. Be accurate, balanced, and " "honest about what you don't know." ), temperature=0.7, top_p=0.9, top_k=40, eval_bank_score=0.62, )), Adapter("evo_creative", "Creative", "Higher temperature, expressive — for ideation and writing.", Genome( genome_id="evo_creative", name="Creative", generation=1, parent_id="evo_default", system_prompt=( "You are EvoLLM in creative mode. Embrace originality, vivid imagery, " "and surprising connections. Write expressively but stay coherent and " "on-topic." ), temperature=1.0, top_p=0.95, top_k=80, lora_rank=16, lora_alpha=32, eval_bank_score=0.55, )), Adapter("evo_concise", "Concise", "Terse, fact-first — optimised for quick answers.", Genome( genome_id="evo_concise", name="Concise", generation=1, parent_id="evo_default", system_prompt=( "You are EvoLLM in concise mode. Answer in 1-3 short sentences. " "Skip preamble and qualifications. Information density above all." ), temperature=0.4, top_p=0.85, top_k=30, max_tokens=128, eval_bank_score=0.68, )), Adapter("evo_technical", "Technical", "Precise, structured, code-aware — for engineering questions.", Genome( genome_id="evo_technical", name="Technical", generation=1, parent_id="evo_default", system_prompt=( "You are EvoLLM in technical mode. Use precise terminology and " "structured reasoning. Use code blocks when relevant. State any " "assumptions explicitly. Complete answers preferred over short ones." ), temperature=0.5, top_p=0.9, top_k=40, max_tokens=384, lora_rank=32, lora_alpha=64, lora_target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], eval_bank_score=0.71, )), Adapter("evo_empathetic", "Empathetic", "Warmer, context-sensitive — better for personal topics.", Genome( genome_id="evo_empathetic", name="Empathetic", generation=1, parent_id="evo_default", system_prompt=( "You are EvoLLM in empathetic mode. Acknowledge feelings before " "facts. Be warm, patient, and supportive while remaining honest " "and helpful." ), temperature=0.75, top_p=0.92, top_k=50, memory_token_enabled=True, memory_token_count=8, eval_bank_score=0.59, )), ] return pool # ──────────────────────────────────────────────────────────────────────────── # Thompson-sampling bandit # ──────────────────────────────────────────────────────────────────────────── @dataclass class ArmStats: adapter_id: str alpha: float = 1.0 beta: float = 1.0 def sample(self) -> float: x = random.gammavariate(self.alpha, 1.0) y = random.gammavariate(self.beta, 1.0) return x / (x + y) if (x + y) > 0 else 0.0 @property def mean(self) -> float: return self.alpha / (self.alpha + self.beta) class Bandit: def __init__(self) -> None: self.arms: dict[str, ArmStats] = {} def register(self, adapter_id: str, prior_fitness: float | None = None) -> None: if adapter_id in self.arms: return if prior_fitness is not None: weight = 5.0 alpha = 1.0 + prior_fitness * weight beta = 1.0 + (1.0 - prior_fitness) * weight else: alpha, beta = 1.0, 1.0 self.arms[adapter_id] = ArmStats(adapter_id, alpha, beta) def select(self) -> str: scored = [(arm.sample(), arm.adapter_id) for arm in self.arms.values()] scored.sort(reverse=True) return scored[0][1] def update(self, adapter_id: str, reward: float) -> None: if adapter_id not in self.arms: self.register(adapter_id) reward = max(0.0, min(1.0, reward)) self.arms[adapter_id].alpha += reward self.arms[adapter_id].beta += 1.0 - reward def snapshot(self) -> list[dict]: out = [] for arm in self.arms.values(): trials = arm.alpha + arm.beta - 2 out.append({ "adapter_id": arm.adapter_id, "mean": round(arm.mean, 3), "alpha": round(arm.alpha, 2), "beta": round(arm.beta, 2), "trials": int(max(0, trials)), "confidence": round(1.0 - 1.0 / math.sqrt(arm.alpha + arm.beta), 3), }) out.sort(key=lambda r: r["mean"], reverse=True) return out # ──────────────────────────────────────────────────────────────────────────── # Model loading # ──────────────────────────────────────────────────────────────────────────── # EVOLLM_SKIP_MODEL lets tests import this module without the (large) model # download — used by scripts/smoke_test.py to exercise the evolution-replay # and rendering logic. Never set in production. if os.environ.get("EVOLLM_SKIP_MODEL") == "1": print("EVOLLM_SKIP_MODEL=1 — skipping model load (test mode).") LLM = None else: print("Downloading SmolLM2-360M-Instruct Q8_0 GGUF...") MODEL_PATH = hf_hub_download( repo_id="HuggingFaceTB/SmolLM2-360M-Instruct-GGUF", filename="smollm2-360m-instruct-q8_0.gguf", ) print(f"Loading model from {MODEL_PATH}...") LLM = Llama( model_path=MODEL_PATH, n_ctx=2048, n_threads=os.cpu_count() or 4, n_batch=512, verbose=False, ) print("Model ready.") # Real, pre-measured evolution run (produced by scripts/run_evolution_sweep.py). # The free CPU Space can't score 41 eval prompts/adapter live (~15 min each), # so we REPLAY a genuine offline run: real model, real eval bank, real scores. # Only the timing is replayed — the numbers are measured, not simulated. def _load_evolution_run() -> dict | None: # EVOLLM_RUN_PATH lets tests point at a fixture; default is the committed run. override = os.environ.get("EVOLLM_RUN_PATH") path = Path(override) if override else Path(__file__).resolve().parent / "data" / "evolution_run.json" if not path.exists(): return None try: return json.loads(path.read_text(encoding="utf-8")) except Exception as e: print(f"[evolution] failed to load run: {e}") return None EVOLUTION_RUN = _load_evolution_run() REPLAY_STATE = {"revealed": 0} # how many recorded generations have been shown POOL: list[Adapter] = build_seed_pool() # Override hardcoded seed scores with the real measured ones if we have them. if EVOLUTION_RUN: _real = {s["adapter_id"]: s["eval_bank_score"] for s in EVOLUTION_RUN.get("seeds", [])} for a in POOL: if a.adapter_id in _real: a.genome.eval_bank_score = _real[a.adapter_id] POOL_BY_ID: dict[str, Adapter] = {a.adapter_id: a for a in POOL} # Knowledge pipeline — embedder is lazy-loaded on first upload so app startup # stays fast. On HF Space the SQLite file is ephemeral (rebuilds wipe it); # locally it persists at data/knowledge.sqlite. KNOWLEDGE = KnowledgePipeline() BANDIT = Bandit() for a in POOL: BANDIT.register(a.adapter_id, prior_fitness=a.genome.eval_bank_score) FEEDBACK_LOG: list[dict] = [] EVOLUTION_LOG: list[dict] = [] LAST_INTERACTION: dict = { "adapter_id": None, "user_prompt": None, "response": None, "knowledge_used": [], } def log_evolution(event_type: str, message: str, payload: dict | None = None) -> None: EVOLUTION_LOG.insert(0, { "at": datetime.utcnow().strftime("%H:%M:%S"), "type": event_type, "message": message, "payload": payload or {}, }) del EVOLUTION_LOG[200:] log_evolution("init", "EvoLLM initialised — 5 seed adapters loaded into pool.") if EVOLUTION_RUN: _seed_summary = " · ".join( f"{s['name']} {s['eval_bank_score']:.3f}" for s in sorted(EVOLUTION_RUN["seeds"], key=lambda s: -s["eval_bank_score"]) ) log_evolution( "fitness", f"Eval bank baseline (REAL, {EVOLUTION_RUN['meta']['eval_prompts']} prompts on " f"{EVOLUTION_RUN['meta']['model']}): {_seed_summary}", ) else: log_evolution( "fitness", "Eval bank baseline pending — run scripts/run_evolution_sweep.py to populate real scores.", ) # ──────────────────────────────────────────────────────────────────────────── # Chat # ──────────────────────────────────────────────────────────────────────────── def chat(message, history, force_adapter, knowledge_mode): if force_adapter and force_adapter != "🧬 Auto (bandit)": adapter_id = next((a.adapter_id for a in POOL if a.name == force_adapter), None) else: adapter_id = None if adapter_id is None: adapter_id = BANDIT.select() adapter = POOL_BY_ID[adapter_id] genome = adapter.genome system_prompt = genome.system_prompt if genome.memory_token_enabled: memory_prefix = "[Memory tokens active — maintain context awareness across turns.]" system_prompt = f"{memory_prefix}\n\n{system_prompt}" retrieved: list[dict] = [] if knowledge_mode: try: retrieved = KNOWLEDGE.query(message, top_k=3) except Exception as e: print(f"[knowledge] retrieval failed: {e}") retrieved = [] if retrieved: context_block = "\n\n".join( f"[Source: {r['document_name']} · chunk {r['chunk_index']}]\n{r['text']}" for r in retrieved ) system_prompt = ( f"{system_prompt}\n\n" f"Use the following context to ground your answer. If the answer is " f"not in the context, say so honestly.\n\n" f"--- BEGIN CONTEXT ---\n{context_block}\n--- END CONTEXT ---" ) messages: list[dict[str, str]] = [{"role": "system", "content": system_prompt}] for turn in history: messages.append({"role": turn["role"], "content": turn["content"]}) messages.append({"role": "user", "content": message}) stream = LLM.create_chat_completion( messages=messages, temperature=genome.temperature, top_p=genome.top_p, top_k=genome.top_k, repeat_penalty=genome.repeat_penalty, max_tokens=genome.max_tokens, stream=True, ) partial = "" for chunk in stream: delta = chunk["choices"][0].get("delta", {}) token = delta.get("content") if token: partial += token yield partial # Append a sources footer when knowledge was used. if retrieved: sources_line = " · ".join( f"{r['document_name']} (chunk {r['chunk_index']})" for r in retrieved ) partial = f"{partial}\n\n_📚 Sources: {sources_line}_" yield partial LAST_INTERACTION["adapter_id"] = adapter_id LAST_INTERACTION["user_prompt"] = message LAST_INTERACTION["response"] = partial LAST_INTERACTION["knowledge_used"] = [r["document_id"] for r in retrieved] # ──────────────────────────────────────────────────────────────────────────── # Feedback handlers # ──────────────────────────────────────────────────────────────────────────── def record_feedback(rating: int) -> str: adapter_id = LAST_INTERACTION["adapter_id"] if not adapter_id: return "No interaction yet — chat first, then rate." BANDIT.update(adapter_id, reward=1.0 if rating > 0 else 0.0) FEEDBACK_LOG.insert(0, { "at": datetime.utcnow().strftime("%H:%M:%S"), "adapter": POOL_BY_ID[adapter_id].name, "rating": "👍" if rating > 0 else "👎", "prompt": (LAST_INTERACTION["user_prompt"] or "")[:80], }) del FEEDBACK_LOG[100:] label = "👍" if rating > 0 else "👎" log_evolution( "feedback", f"{label} for {POOL_BY_ID[adapter_id].name} — bandit updated.", {"adapter_id": adapter_id, "rating": rating}, ) return f"Recorded {label} for **{POOL_BY_ID[adapter_id].name}**. Bandit updated." # ──────────────────────────────────────────────────────────────────────────── # Manual evolution trigger (for demos) # ──────────────────────────────────────────────────────────────────────────── def trigger_evolution_cycle() -> str: """Reveal the next generation of the REAL pre-measured evolution run. Each step here was actually computed offline: the child genome was run against the 41-prompt eval bank on the real model, and its fitness is the measured score. We replay one recorded generation per click so a viewer can watch the fitness curve climb. Nothing here is synthetic. """ if not EVOLUTION_RUN: return ( "### No measured run available yet\n\n" "The fitness curve replays a **real** offline evolution run. To generate it, " "run on a machine with internet + the model:\n\n" "```bash\npython scripts/run_evolution_sweep.py --generations 8\n```\n\n" "Commit the resulting `space/data/evolution_run.json` and redeploy — the curve " "and this button light up with genuine measured scores. (The free CPU Space " "can't score 41 prompts/adapter live, which is why we replay a measured run.)" ) lineage = EVOLUTION_RUN["lineage"] idx = REPLAY_STATE["revealed"] if idx >= len(lineage): best = EVOLUTION_RUN["best_fitness_by_generation"][-1] return ( f"### Evolution run complete\n\n" f"All {len(lineage)} recorded generations revealed. " f"Best measured fitness: **{best:.3f}** " f"(seed baseline best: {EVOLUTION_RUN['best_fitness_by_generation'][0]:.3f}). " f"Use **🔄 Reset replay** to watch it again." ) step = lineage[idx] REPLAY_STATE["revealed"] = idx + 1 child_id = step["child_id"] g = step["genome"] child_genome = Genome( genome_id=child_id, parent_id=step["parent_id"], generation=step["generation"], name=f"Gen-{step['generation']} child", system_prompt=g.get("system_prompt", ""), temperature=g.get("temperature", 0.7), top_p=g.get("top_p", 0.9), top_k=g.get("top_k", 40), max_tokens=g.get("max_tokens", 256), memory_token_enabled=bool(g.get("memory_prefix")), eval_bank_score=step["child_fitness"], ) child = Adapter( adapter_id=child_id, name=child_genome.name, description=f"Gen {step['generation']}: {step['mutation_kind']} — {step['mutation_detail']}", genome=child_genome, promoted=step["promoted"], ) POOL.append(child) POOL_BY_ID[child_id] = child BANDIT.register(child_id, prior_fitness=child_genome.eval_bank_score) log_evolution( "mutation", f"Gen {step['generation']}: mutated {step['parent_name']} via " f"{step['mutation_kind']} ({step['mutation_detail']}) → measured fitness " f"{step['child_fitness']:.3f}", {"adapter_id": child_id, "parent_id": step["parent_id"], "kind": step["mutation_kind"]}, ) if step["promoted"]: log_evolution( "promotion", f"PROMOTED gen-{step['generation']} child — {step['child_fitness']:.3f} ≥ " f"parent {step['parent_fitness']:.3f}. New population best.", ) verdict = f"✅ **Promoted** — beats parent ({step['child_fitness']:.3f} ≥ {step['parent_fitness']:.3f})" else: log_evolution( "archive", f"Archived gen-{step['generation']} child — {step['child_fitness']:.3f} < " f"parent {step['parent_fitness']:.3f}.", ) verdict = f"📦 **Archived** — below parent ({step['child_fitness']:.3f} < {step['parent_fitness']:.3f})" remaining = len(lineage) - REPLAY_STATE["revealed"] return ( f"### Generation {step['generation']} (real, measured)\n\n" f"**Mutation**: `{step['mutation_kind']}` — {step['mutation_detail']}\n\n" f"**Parent fitness**: {step['parent_fitness']:.3f} → " f"**Child fitness**: {step['child_fitness']:.3f}\n\n" f"{verdict}\n\n" f"_{remaining} generation(s) left to reveal._" ) def reset_evolution_replay() -> str: """Reset the replay and drop any revealed children from the pool.""" REPLAY_STATE["revealed"] = 0 seed_ids = {s["adapter_id"] for s in (EVOLUTION_RUN or {}).get("seeds", [])} or { a.adapter_id for a in POOL if a.genome.generation == 0 or a.adapter_id.startswith("evo_") } # Keep only the original seed adapters (those present at startup). survivors = [a for a in POOL if not a.adapter_id.startswith("evo_g")] POOL.clear() POOL.extend(survivors) POOL_BY_ID.clear() POOL_BY_ID.update({a.adapter_id: a for a in POOL}) return "🔄 Replay reset — back to the seed population. Click Trigger to watch evolution again." # ──────────────────────────────────────────────────────────────────────────── # UI render helpers # ──────────────────────────────────────────────────────────────────────────── def render_fitness_curve(): """DataFrame for the fitness-over-generations LinePlot, revealed up to the current replay position.""" import pandas as pd if not EVOLUTION_RUN: return pd.DataFrame({"generation": [0], "best_fitness": [0.0]}) curve = EVOLUTION_RUN["best_fitness_by_generation"] upto = REPLAY_STATE["revealed"] + 1 # gen 0 (seed) + revealed children curve = curve[:upto] return pd.DataFrame({ "generation": list(range(len(curve))), "best_fitness": curve, }) def render_evolution_summary() -> str: if not EVOLUTION_RUN: return ( "_No measured run loaded. Run `python scripts/run_evolution_sweep.py` " "and commit `space/data/evolution_run.json` to populate this with real scores._" ) m = EVOLUTION_RUN["meta"] curve = EVOLUTION_RUN["best_fitness_by_generation"] gain = curve[-1] - curve[0] revealed = REPLAY_STATE["revealed"] return ( f"**Real measured run** · model `{m['model']}` · {m['eval_prompts']} eval prompts · " f"{m['generations']} generations.\n\n" f"Seed best **{curve[0]:.3f}** → evolved best **{curve[-1]:.3f}** " f"(**+{gain:.3f}**, {gain / max(curve[0], 1e-9) * 100:.0f}% relative). " f"Revealed: {revealed}/{len(EVOLUTION_RUN['lineage'])}.\n\n" f"_The curve is genuine — measured offline because the free CPU Space can't score " f"41 prompts/adapter live. Only the replay timing is for the demo._" ) def render_active_genome() -> str: aid = LAST_INTERACTION["adapter_id"] or POOL[0].adapter_id return POOL_BY_ID[aid].genome.model_dump_json(indent=2) def render_pool_table(): rows = [] snap = {s["adapter_id"]: s for s in BANDIT.snapshot()} for a in sorted(POOL, key=lambda x: (-x.genome.generation, x.adapter_id)): s = snap.get(a.adapter_id, {}) rows.append([ a.name, a.genome.generation, a.genome.eval_bank_score if a.genome.eval_bank_score is not None else "—", s.get("mean", "—"), s.get("trials", 0), "✅" if a.promoted else "🧪", ]) return rows def render_evolution_log() -> str: if not EVOLUTION_LOG: return "_No events yet._" lines = [] for e in EVOLUTION_LOG[:40]: icon = { "init": "🧬", "fitness": "📊", "feedback": "👍", "mutation": "🔀", "promotion": "🏆", "archive": "📦", }.get(e["type"], "•") lines.append(f"`{e['at']}` {icon} **{e['type']}** — {e['message']}") return "\n\n".join(lines) def render_feedback_log() -> str: if not FEEDBACK_LOG: return "_No feedback yet — rate a response with 👍 or 👎._" lines = [] for f in FEEDBACK_LOG[:20]: lines.append(f"`{f['at']}` {f['rating']} **{f['adapter']}** — _{f['prompt']}…_") return "\n\n".join(lines) # ──────────────────────────────────────────────────────────────────────────── # Knowledge helpers # ──────────────────────────────────────────────────────────────────────────── def render_knowledge_table(): docs = KNOWLEDGE.documents() if not docs: return [["—", "—", "—", "—", "—"]] rows = [] for d in docs: size_kb = (d["size_bytes"] or 0) / 1024 rows.append([ d["name"], d["format"].upper(), f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb/1024:.1f} MB", d["chunk_count"], d["uploaded_at"][:19].replace("T", " "), ]) return rows def render_knowledge_stats() -> str: s = KNOWLEDGE.stats() if s["document_count"] == 0: return "_No documents indexed yet._" size_mb = s["total_bytes"] / (1024 * 1024) return ( f"📊 **{s['document_count']} documents** · " f"**{s['chunk_count']} chunks** · " f"**{size_mb:.2f} MB** total · " f"embedder: `intfloat/multilingual-e5-small` (384-dim)" ) def handle_file_upload(files) -> tuple[str, list]: if not files: return "_No files selected._", render_knowledge_table() results = [] for f in files: path = f if isinstance(f, str) else f.name try: result = KNOWLEDGE.ingest_file(path) results.append(f"✅ **{result['name']}** — {result['chunk_count']} chunks indexed") log_evolution( "knowledge", f"📚 Indexed {result['name']} — {result['chunk_count']} chunks", {"document_id": result["document_id"]}, ) except Exception as e: results.append(f"❌ Failed: `{Path(path).name}` — {e}") return "\n\n".join(results), render_knowledge_table() def handle_text_paste(name: str, text: str) -> tuple[str, list, str, str]: if not (name and text and text.strip()): return "_Provide both a name and some text._", render_knowledge_table(), name, text try: result = KNOWLEDGE.ingest_text(name.strip(), text) log_evolution( "knowledge", f"📚 Indexed pasted text '{result['name']}' — {result['chunk_count']} chunks", {"document_id": result["document_id"]}, ) msg = f"✅ **{result['name']}** — {result['chunk_count']} chunks indexed" return msg, render_knowledge_table(), "", "" except Exception as e: return f"❌ {e}", render_knowledge_table(), name, text def handle_delete_all() -> tuple[str, list]: n = KNOWLEDGE.stats()["document_count"] KNOWLEDGE.clear() if n > 0: log_evolution("knowledge", f"📚 Cleared all {n} documents from index") return f"🗑️ Cleared {n} document(s).", render_knowledge_table() # ──────────────────────────────────────────────────────────────────────────── # LoRA-on-upload (Phase 4b) # ──────────────────────────────────────────────────────────────────────────── NOTEBOOK_DIR = Path("data/notebooks") NOTEBOOK_DIR.mkdir(parents=True, exist_ok=True) def handle_generate_notebook( selected_doc_names: list[str], adapter_name: str, lora_rank: int, num_epochs: int, ): """Build a Colab notebook from the chunks of the selected documents.""" if not selected_doc_names: return "_Select at least one indexed document first._", None adapter_name = (adapter_name or "").strip() or "user_adapter" # Gather chunks for the selected docs from the store. with KNOWLEDGE.store._conn() as c: # noqa: SLF001 — internal access OK for now rows = c.execute( "SELECT documents.name as name, chunks.text as text " "FROM chunks JOIN documents ON chunks.document_id = documents.id " "WHERE documents.name IN (" + ",".join(["?"] * len(selected_doc_names)) + ") " "ORDER BY chunks.document_id, chunks.chunk_index", selected_doc_names, ).fetchall() chunks = [r["text"] for r in rows] if not chunks: return "_No chunks found for those documents — re-index them?_", None safe_name = "".join(ch if ch.isalnum() else "_" for ch in adapter_name)[:40] or "user_adapter" out_path = NOTEBOOK_DIR / f"evollm_train_{safe_name}.ipynb" generate_training_notebook( adapter_name=adapter_name, chunks=chunks, source_doc_names=selected_doc_names, lora_rank=int(lora_rank), num_epochs=int(num_epochs), output_path=out_path, description=f"Trained from {len(selected_doc_names)} document(s) via EvoLLM", ) log_evolution( "knowledge", f"📝 Generated training notebook for '{adapter_name}' " f"({len(chunks)} chunks, {len(selected_doc_names)} doc(s))", ) msg = ( f"✅ **Notebook ready**: `{out_path.name}` ({len(chunks)} training examples).\n\n" f"1. Download the file below\n" f"2. Open it in [Google Colab](https://colab.research.google.com/)\n" f"3. **Runtime → Change runtime type → T4 GPU**, then **Runtime → Run all**\n" f"4. After training, download the two output files (`*.gguf` and `*.json`)\n" f"5. Come back here, go to **🧬 Adapter Pool** tab → **📥 Import trained adapter**" ) return msg, str(out_path) def handle_import_adapter(gguf_file, manifest_file): """Receive a trained LoRA + manifest, register a new adapter in the pool.""" if not gguf_file or not manifest_file: return "_Drop both the .gguf and the .json files._", render_pool_table() try: gguf_path = gguf_file if isinstance(gguf_file, str) else gguf_file.name manifest_path = manifest_file if isinstance(manifest_file, str) else manifest_file.name info = import_adapter_files(gguf_path, manifest_path) except Exception as e: return f"❌ Import failed: {e}", render_pool_table() manifest = info["manifest"] adapter_id = info["adapter_id"] # Build a genome reflecting the trained-from-knowledge adapter. sys_prompt = ( f"You are EvoLLM, fine-tuned on user-provided documents " f"({', '.join(manifest.get('source_documents', []))[:160] or 'unknown sources'}). " f"Draw on what you learned from those sources when relevant." ) genome = Genome( genome_id=adapter_id, parent_id="evo_default", generation=1, name=info["name"], base_model=manifest.get("base_model", "SmolLM2-1.7B-Instruct"), lora_rank=int(manifest.get("lora_rank", 16)), lora_alpha=int(manifest.get("lora_alpha", 32)), lora_target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], system_prompt=sys_prompt, eval_bank_score=None, # unmeasured until the eval bank runs against it ) new_adapter = Adapter( adapter_id=adapter_id, name=info["name"], description=info["description"] or "User-trained from documents", genome=genome, promoted=True, ) POOL.append(new_adapter) POOL_BY_ID[adapter_id] = new_adapter BANDIT.register(adapter_id, prior_fitness=0.55) # neutral-mid prior log_evolution( "promotion", f"📥 IMPORTED user-trained adapter '{info['name']}' " f"({manifest.get('training_examples', '?')} examples, rank {genome.lora_rank}) — joined the pool.", {"adapter_id": adapter_id, "source_documents": manifest.get("source_documents", [])}, ) note = ( f"✅ **{info['name']}** imported and added to the adapter pool.\n\n" f"_GGUF saved to `{info['gguf_path']}`. The Bandit will start sampling it on the next " f"chat. Real LoRA weight-loading is active in the local desktop app; on this Space the " f"adapter uses its trained-from-data genome (system prompt + sampling config)._" ) return note, render_pool_table() def refresh_all(): return ( render_active_genome(), render_pool_table(), render_evolution_log(), render_feedback_log(), ) # ──────────────────────────────────────────────────────────────────────────── # UI # ──────────────────────────────────────────────────────────────────────────── CSS = """ .gradio-container {max-width: 1400px !important;} .evo-header {background: linear-gradient(135deg, #1e3a8a 0%, #7c3aed 100%); color: white; padding: 24px; border-radius: 12px; margin-bottom: 16px; box-shadow: 0 4px 12px rgba(124, 58, 237, 0.15);} .evo-header h1 {margin: 0 0 8px 0; font-size: 2rem;} .evo-header p {margin: 0; opacity: 0.92; line-height: 1.5;} .metric-badge {display: inline-block; padding: 4px 12px; background: rgba(255,255,255,0.18); color: white; border-radius: 999px; margin-right: 6px; margin-top: 4px; font-size: 0.85rem; backdrop-filter: blur(4px);} .evo-notice {margin-top: 14px !important; padding: 10px 14px; background: rgba(255, 255, 255, 0.12); border-radius: 8px; font-size: 0.85rem !important;} """ INTRO_HTML = """
A privacy-first 1B-class language model that visibly improves itself through multi-armed-bandit adapter selection and Lamarckian evolution. Runs fully on-device. No telemetry. No API calls.
⚡ This web demo runs SmolLM2-360M for speed on the free CPU tier (~5 tok/s, answers in 20-40s). The local desktop app runs the full SmolLM2-1.7B for higher quality (5-30× faster on real hardware). The evolution engine, adapter pool, and bandit work identically on both.