msrcam
/

claudia-memory-pipeline

Model card Files Files and versions

xet

Community

msrcam commited on Mar 23

Commit

7b9779e

verified ·

1 Parent(s): f43630f

Upload persistent_absorber.py with huggingface_hub

Browse files

Files changed (1) hide show

persistent_absorber.py +1934 -0

persistent_absorber.py ADDED Viewed

	@@ -0,0 +1,1934 @@

+"""
+Claudia Persistent Absorber v2
+==============================
+Combines the 3 best proven techniques into one system:
+1. SELF-QUIZ PAIRS (21% → 74% recall — the single biggest lever)
+2. PERSISTENT LoRA rank 128 (89% across 25 convos, no merge-between-rounds tax)
+3. DUAL-LR EXPERT FFN (attention=6e-5, FFN=3e-4 — facts into MoE experts)
+Architecture:
+  - Load base Omni → thinker to GPU, rest to CPU
+  - First run: apply Claudia v6 adapter → merge → apply FFN patch
+  - Resume: load from checkpoint (already has personality + memories)
+  - Apply ONE persistent LoRA (r=128, alpha=256, attention q/k/v/o)
+  - Chat loop: generate → quiz → train (LoRA + expert FFN) → repeat
+  - On save/quit: merge_and_unload → save full checkpoint
+  - Next session loads from checkpoint — memories are permanent
+Instance: Vast.ai 33093662 (A100 80GB, Sweden)
+SSH: ssh -p 13662 root@ssh1.vast.ai
+"""
+import argparse
+import gc
+import json
+import os
+import re
+import sys
+import threading
+import time
+import torch
+from collections import Counter
+from datetime import datetime
+from pathlib import Path
+# ═══════════════════════════════════════════════════════════════════════
+# CONFIG
+# ═══════════════════════════════════════════════════════════════════════
+# LoRA config (from persistent LoRA test — proven for 25+ conversations)
+LORA_RANK = 128
+LORA_ALPHA = 256
+LORA_TARGETS = ["q_proj", "k_proj", "v_proj", "o_proj"]
+# Dual-LR (from engram micro_trainer — proven 5/5 fact retention)
+ATTENTION_LR = 6e-5
+EXPERT_FFN_LR = 3e-4  # 5x multiplier — facts absorb fast, personality stays
+EXPERT_FFN_LAYERS = [20, 24, 28]  # Proven optimal in v5 experiment
+# Training per absorption cycle
+TRAIN_EPOCHS = 2  # Reduced from 4 — prevents overfitting with focused training
+MAX_SEQ_LENGTH = 2048
+GRADIENT_CLIP = 1.0
+# Generation
+GEN_TEMPERATURE = 0.7
+GEN_TOP_P = 0.9
+GEN_TOP_K = 50
+GEN_MAX_TOKENS = 512
+GEN_REP_PENALTY = 1.1
+# Absorb after every N exchanges (1 = every turn)
+ABSORB_EVERY = 1
+# Checkpoint interval (auto-save every N absorptions)
+CHECKPOINT_EVERY = 10
+# Self-verification (v11 — clean contrastive + sister pairs, no "NOT X" leak)
+VERIFY_EVERY = 3      # More frequent checks catch drift earlier
+VERIFY_SAMPLE = 10    # Back to v9's value — wider sampling destabilized in v10
+# Cascade Distillation (Nemotron-Cascade-2 paper — on-policy distillation)
+# When facts from previous sessions regress, distill from the teacher checkpoint
+# that knew them best. Recovers regressions without losing new knowledge.
+DISTILL_ALPHA = 0.5           # CE vs KL loss balance (0.5 = equal weight)
+DISTILL_TEMPERATURE = 2.0     # Softens distributions for better KL gradients
+DISTILL_TOP_K = 32            # Top-K logits to cache per token position
+CONSOLIDATION_EPOCHS = 2      # Distillation epochs at session start (1→2 for stronger lock-in)
+MAX_TEACHER_CACHE = 200       # Cap quiz pairs to cache (oldest trimmed)
+# ═══════════════════════════════════════════════════════════════════════
+# QUALITY GATE (from engram micro_trainer — reject degenerate text)
+# ═══════════════════════════════════════════════════════════════════════
+def check_response_quality(text):
+    """Reject degenerate text before training on it."""
+    if not text or len(text) < 5:
+        return False
+    words = text.lower().split()
+    if len(words) < 3:
+        return False
+    # Low unique word ratio = repetitive garbage
+    if len(set(words)) / len(words) < 0.3:
+        return False
+    # Repeated consecutive words
+    if sum(1 for i in range(len(words) - 1) if words[i] == words[i + 1]) >= 3:
+        return False
+    # Repeated bigrams
+    if len(words) >= 10:
+        bigrams = [f"{words[i]} {words[i+1]}" for i in range(len(words) - 1)]
+        if Counter(bigrams).most_common(1)[0][1] >= 5:
+            return False
+    # Fused words (missing spaces)
+    if sum(1 for w in words if len(w) > 30) >= 2:
+        return False
+    # Average word length spike
+    if sum(len(w) for w in words) / len(words) > 12:
+        return False
+    return True
+# ═══════════════════════════════════════════════════════════════════════
+# MODEL MANAGER
+# ═��═════════════════════════════════════════════════════════════════════
+class ModelManager:
+    def __init__(self, model_path, adapter_path=None, ffn_patch_path=None,
+                 checkpoint_path=None):
+        self.model_path = model_path
+        self.adapter_path = adapter_path
+        self.ffn_patch_path = ffn_patch_path
+        self.checkpoint_path = checkpoint_path  # Resume from here if set
+        self.thinker = None
+        self.tokenizer = None
+        self.stop_ids = None
+        self.peft_model = None  # The persistent LoRA — stays active all session
+        self._lock = threading.Lock()
+    def load(self):
+        from transformers import AutoTokenizer
+        # ── Step 1: Load tokenizer ──
+        tok_source = self.checkpoint_path or self.model_path
+        print(f"[1/5] Loading tokenizer from {tok_source}...")
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            tok_source, trust_remote_code=True
+        )
+        # ── Step 2: Load model ──
+        if self.checkpoint_path:
+            # RESUME: checkpoint contains only thinker weights — load thinker directly
+            print(f"[2/5] Loading thinker from checkpoint {self.checkpoint_path}...")
+            try:
+                from transformers import Qwen3OmniMoeThinkerForConditionalGeneration as ThinkerClass
+            except ImportError:
+                from transformers import AutoModelForCausalLM as ThinkerClass
+            self.thinker = ThinkerClass.from_pretrained(
+                self.checkpoint_path,
+                device_map="auto",
+                torch_dtype=torch.bfloat16,
+                trust_remote_code=True,
+            )
+            vram = torch.cuda.memory_allocated() / 1e9
+            print(f"  VRAM after load: {vram:.1f} GB")
+        else:
+            # FIRST RUN: load full model, extract thinker, offload rest
+            print(f"[2/5] Loading full model from {self.model_path}...")
+            try:
+                from transformers import Qwen3OmniMoeForConditionalGeneration as ModelClass
+            except ImportError:
+                from transformers import AutoModel as ModelClass
+            full_model = ModelClass.from_pretrained(
+                self.model_path,
+                device_map="auto",
+                torch_dtype=torch.bfloat16,
+                trust_remote_code=True,
+            )
+            vram = torch.cuda.memory_allocated() / 1e9
+            print(f"  VRAM after load: {vram:.1f} GB")
+            # Extract thinker, offload rest
+            self.thinker = full_model.thinker
+            for name, module in full_model.named_children():
+                if name != "thinker":
+                    try:
+                        module.cpu()
+                    except (NotImplementedError, RuntimeError):
+                        pass
+            del full_model
+            torch.cuda.empty_cache()
+            vram = torch.cuda.memory_allocated() / 1e9
+            print(f"  VRAM after cleanup: {vram:.1f} GB")
+        # ── Step 3: Apply personality if first run ──
+        if self.checkpoint_path:
+            print(f"[3/5] Resuming from checkpoint — personality already in weights.")
+        else:
+            if self.adapter_path:
+                print(f"[3/5] Merging Claudia v6 personality adapter...")
+                from peft import PeftModel
+                self.thinker = PeftModel.from_pretrained(
+                    self.thinker, self.adapter_path
+                )
+                self.thinker = self.thinker.merge_and_unload()
+                print(f"  Personality merged into base weights.")
+            if self.ffn_patch_path and os.path.exists(self.ffn_patch_path):
+                print(f"  Applying FFN patch from {self.ffn_patch_path}...")
+                ffn = torch.load(
+                    self.ffn_patch_path, map_location="cpu", weights_only=True
+                )
+                for key, tensor in ffn.items():
+                    match = re.search(r"layers\.(\d+)", key)
+                    if not match:
+                        continue
+                    layer_idx = int(match.group(1))
+                    experts = self.thinker.model.layers[layer_idx].mlp.experts
+                    if hasattr(experts, '__len__'):
+                        for i in range(tensor.shape[0]):
+                            experts[i].down_proj.weight.data.copy_(
+                                tensor[i].to(
+                                    experts[i].down_proj.weight.device,
+                                    experts[i].down_proj.weight.dtype,
+                                )
+                            )
+                    elif hasattr(experts, 'down_proj'):
+                        experts.down_proj.data.copy_(
+                            tensor.to(experts.down_proj.device, experts.down_proj.dtype)
+                        )
+                del ffn
+                torch.cuda.empty_cache()
+                print(f"  FFN patch applied.")
+        self.thinker.eval()
+        # Stop tokens
+        self.stop_ids = []
+        for tok in ["<|im_end|>", "<|endoftext|>", "<|im_start|>"]:
+            ids = self.tokenizer.encode(tok, add_special_tokens=False)
+            if ids:
+                self.stop_ids.extend(ids)
+        if self.tokenizer.eos_token_id:
+            self.stop_ids.append(self.tokenizer.eos_token_id)
+        # ── Step 5: Apply persistent LoRA ──
+        print(f"[4/5] Applying persistent LoRA (r={LORA_RANK}, alpha={LORA_ALPHA})...")
+        self._apply_persistent_lora()
+        vram = torch.cuda.memory_allocated() / 1e9
+        print(f"[5/5] Ready. VRAM: {vram:.1f} GB\n")
+    def _apply_persistent_lora(self):
+        """Apply the persistent absorption LoRA. Called once at load, and after merge."""
+        from peft import LoraConfig, get_peft_model
+        lora_config = LoraConfig(
+            r=LORA_RANK,
+            lora_alpha=LORA_ALPHA,
+            target_modules=LORA_TARGETS,
+            lora_dropout=0.0,
+            bias="none",
+            task_type="CAUSAL_LM",
+        )
+        self.peft_model = get_peft_model(self.thinker, lora_config)
+        self.peft_model.eval()
+        trainable = sum(p.numel() for p in self.peft_model.parameters() if p.requires_grad)
+        total = sum(p.numel() for p in self.peft_model.parameters())
+        print(f"  LoRA: {trainable / 1e6:.1f}M trainable / {total / 1e6:.0f}M total")
+    def generate(self, messages, max_new_tokens=None):
+        """Generate response. Thread-safe."""
+        with self._lock:
+            model = self.peft_model or self.thinker
+            model.eval()
+            text = self.tokenizer.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True,
+                enable_thinking=False,
+            )
+            inputs = self.tokenizer(
+                text, return_tensors="pt", truncation=True, max_length=8192
+            ).to("cuda")
+            input_len = inputs["input_ids"].shape[1]
+            with torch.inference_mode():
+                out = model.generate(
+                    **inputs,
+                    max_new_tokens=max_new_tokens or GEN_MAX_TOKENS,
+                    temperature=GEN_TEMPERATURE,
+                    top_p=GEN_TOP_P,
+                    top_k=GEN_TOP_K,
+                    do_sample=True,
+                    repetition_penalty=GEN_REP_PENALTY,
+                    pad_token_id=self.tokenizer.eos_token_id,
+                    eos_token_id=self.stop_ids,
+                )
+            resp = self.tokenizer.decode(out[0][input_len:], skip_special_tokens=True)
+            # Strip thinking tags
+            resp = re.sub(r"<think>.*?</think>", "", resp, flags=re.DOTALL)
+            resp = re.sub(r"</?think>", "", resp)
+            return resp.strip()
+    def absorb(self, training_data):
+        """
+        Train the persistent LoRA + expert FFN on accumulated data.
+        Uses dual-LR: attention at ATTENTION_LR, expert FFN at EXPERT_FFN_LR.
+        Thread-safe.
+        """
+        with self._lock:
+            return self._absorb_impl(training_data)
+    def _absorb_impl(self, training_data):
+        """Internal absorption. Must hold _lock."""
+        if not training_data:
+            return None
+        model = self.peft_model or self.thinker
+        tokenizer = self.tokenizer
+        # ── Tokenize all examples ──
+        texts = []
+        for item in training_data:
+            if isinstance(item, dict) and "messages" in item:
+                msgs = item["messages"]
+            elif isinstance(item, dict) and "prompt" in item:
+                msgs = item["prompt"] + item.get("completion", [])
+            elif isinstance(item, list):
+                msgs = item
+            else:
+                continue
+            text = tokenizer.apply_chat_template(
+                msgs, tokenize=False, enable_thinking=False
+            )
+            texts.append(text)
+        if not texts:
+            return None
+        enc = tokenizer(
+            texts,
+            truncation=True,
+            max_length=MAX_SEQ_LENGTH,
+            padding=True,
+            return_tensors="pt",
+        )
+        input_ids = enc["input_ids"].to("cuda")
+        attention_mask = enc["attention_mask"].to("cuda")
+        labels = input_ids.clone()
+        labels[attention_mask == 0] = -100
+        # ── Collect LoRA attention params ──
+        model.train()
+        attn_params = [p for p in model.parameters() if p.requires_grad]
+        # ── Unfreeze expert FFN ──
+        expert_params = []
+        base = model.base_model.model if hasattr(model, "base_model") else model
+        for layer_idx in EXPERT_FFN_LAYERS:
+            experts = base.model.layers[layer_idx].mlp.experts
+            if hasattr(experts, '__len__'):
+                for i in range(len(experts)):
+                    p = experts[i].down_proj.weight
+                    p.requires_grad_(True)
+                    expert_params.append(p)
+            elif hasattr(experts, 'down_proj'):
+                p = experts.down_proj
+                if isinstance(p, (torch.nn.Parameter, torch.Tensor)):
+                    p.requires_grad_(True)
+                    expert_params.append(p)
+        # ── Dual-LR optimizer ──
+        param_groups = []
+        if attn_params:
+            param_groups.append({"params": attn_params, "lr": ATTENTION_LR})
+        if expert_params:
+            param_groups.append({"params": expert_params, "lr": EXPERT_FFN_LR})
+        if not param_groups:
+            model.eval()
+            return None
+        optimizer = torch.optim.AdamW(param_groups, weight_decay=0.0)
+        all_params = attn_params + expert_params
+        # ── Training loop ──
+        n = input_ids.shape[0]
+        total_steps = n * TRAIN_EPOCHS
+        total_loss = 0.0
+        for epoch in range(TRAIN_EPOCHS):
+            # Shuffle order each epoch
+            indices = torch.randperm(n)
+            for i in range(n):
+                idx = indices[i].item()
+                out = model(
+                    input_ids=input_ids[idx:idx + 1],
+                    attention_mask=attention_mask[idx:idx + 1],
+                    labels=labels[idx:idx + 1],
+                )
+                out.loss.backward()
+                torch.nn.utils.clip_grad_norm_(all_params, GRADIENT_CLIP)
+                optimizer.step()
+                optimizer.zero_grad()
+                total_loss += out.loss.item()
+        # ── Re-freeze expert FFN ──
+        for layer_idx in EXPERT_FFN_LAYERS:
+            experts = base.model.layers[layer_idx].mlp.experts
+            if hasattr(experts, '__len__'):
+                for i in range(len(experts)):
+                    experts[i].down_proj.weight.requires_grad_(False)
+            elif hasattr(experts, 'down_proj'):
+                p = experts.down_proj
+                if isinstance(p, (torch.nn.Parameter, torch.Tensor)):
+                    p.requires_grad_(False)
+        model.eval()
+        del optimizer
+        torch.cuda.empty_cache()
+        avg_loss = total_loss / total_steps if total_steps > 0 else 0
+        return avg_loss
+    @staticmethod
+    def cluster_by_entity(training_data, entity_names):
+        """Group training data by primary entity mentioned.
+        Instead of interleaving facts about different people (which causes
+        cross-contamination during gradient updates), this groups all data
+        about one entity together. The model learns all of Jordan's facts
+        before moving to Elena's.
+        Args:
+            training_data: List of training items
+            entity_names: Set/list of known entity names
+        Returns: List of training items, reordered so each entity's items
+                 are contiguous. Items mentioning no entity come last.
+        """
+        clusters = {name: [] for name in entity_names}
+        unclustered = []
+        for item in training_data:
+            # Extract text from the item
+            if isinstance(item, dict) and "messages" in item:
+                text = " ".join(m.get("content", "") for m in item["messages"]).lower()
+            else:
+                unclustered.append(item)
+                continue
+            # Assign to the first entity mentioned (primary entity)
+            assigned = False
+            for name in entity_names:
+                if name.lower() in text:
+                    clusters[name].append(item)
+                    assigned = True
+                    break
+            if not assigned:
+                unclustered.append(item)
+        # Build ordered list: all of entity A's facts, then B's, then C's...
+        ordered = []
+        for name in entity_names:
+            ordered.extend(clusters[name])
+        ordered.extend(unclustered)
+        return ordered
+    def absorb_two_phase(self, positive_data, contrastive_data, verify_fn=None):
+        """Two-phase absorption: facts first, then targeted contrastive correction.
+        Phase 1: Train on positive facts (exchanges, entity summaries, template quizzes).
+                 This builds the core factual representations.
+        Phase 2: Quick verification on known entities, then train ONLY contrastive
+                 quizzes for entities that failed verification. This avoids unnecessary
+                 negative gradients on entities the model already distinguishes correctly.
+        Args:
+            positive_data: List of training items (exchanges, summaries, direct quizzes)
+            contrastive_data: List of contrastive quiz items ("Is X a [Y's job]? No...")
+            verify_fn: Optional callable(model_manager) -> set of confused_entity_names.
+                       If None, all contrastive data is used in Phase 2.
+        Returns: (phase1_loss, phase2_loss) tuple
+        """
+        with self._lock:
+            # Phase 1: Positive facts
+            loss1 = None
+            if positive_data:
+                loss1 = self._absorb_impl(positive_data)
+            # Phase 2: Targeted contrastive correction
+            loss2 = None
+            if contrastive_data:
+                if verify_fn:
+                    # Only train contrastive pairs for confused entities
+                    confused = verify_fn(self)
+                    if confused:
+                        targeted = []
+                        for item in contrastive_data:
+                            q = item["messages"][0]["content"].lower()
+                            # Check if any confused entity name appears in the question
+                            if any(name.lower() in q for name in confused):
+                                targeted.append(item)
+                        if targeted:
+                            loss2 = self._absorb_impl(targeted)
+                    # If no entities confused, skip Phase 2 entirely
+                else:
+                    loss2 = self._absorb_impl(contrastive_data)
+            return loss1, loss2
+    def merge_and_save(self, path):
+        """Merge persistent LoRA into base, save checkpoint, re-apply fresh LoRA."""
+        with self._lock:
+            if self.peft_model:
+                print(f"  Merging persistent LoRA into base weights...")
+                self.thinker = self.peft_model.merge_and_unload()
+                self.thinker.eval()
+                self.peft_model = None
+            os.makedirs(path, exist_ok=True)
+            print(f"  Saving checkpoint to {path}...")
+            self.thinker.save_pretrained(path)
+            self.tokenizer.save_pretrained(path)
+            print(f"  Checkpoint saved ({path})")
+            # Re-apply fresh LoRA for continued learning
+            self._apply_persistent_lora()
+            print(f"  Fresh LoRA applied — ready to continue.")
+    def cache_teacher_logits(self, quiz_pairs, top_k=DISTILL_TOP_K):
+        """Cache teacher's top-K output logits for quiz pairs.
+        Called at session end when model is at its best state for these facts.
+        Next session loads this cache for consolidation distillation."""
+        with self._lock:
+            model = self.peft_model or self.thinker
+            model.eval()
+            cache = []
+            # Cap to most recent quiz pairs
+            pairs = quiz_pairs[-MAX_TEACHER_CACHE:]
+            for pair in pairs:
+                msgs = pair["messages"]
+                text = self.tokenizer.apply_chat_template(
+                    msgs, tokenize=False, enable_thinking=False
+                )
+                enc = self.tokenizer(
+                    text, return_tensors="pt", truncation=True,
+                    max_length=MAX_SEQ_LENGTH
+                )
+                input_ids = enc["input_ids"].to("cuda")
+                attention_mask = enc["attention_mask"].to("cuda")
+                with torch.inference_mode():
+                    out = model(input_ids=input_ids, attention_mask=attention_mask)
+                    logits = out.logits[0]  # [seq_len, vocab_size]
+                # Keep only top-K logits per position (massive memory savings)
+                top_vals, top_idx = logits.topk(top_k, dim=-1)
+                cache.append({
+                    "pair": pair,
+                    "input_ids": input_ids.cpu(),
+                    "attention_mask": attention_mask.cpu(),
+                    "teacher_logits": top_vals.half().cpu(),
+                    "teacher_indices": top_idx.cpu(),
+                })
+            return cache
+    def distill(self, teacher_cache, epochs=CONSOLIDATION_EPOCHS):
+        """KL distillation: train student to match teacher's output distribution.
+        From Nemotron-Cascade-2: recover domain regressions via on-policy distillation."""
+        with self._lock:
+            return self._distill_impl(teacher_cache, epochs)
+    def _distill_impl(self, teacher_cache, epochs):
+        """Internal distillation implementation. Must hold _lock."""
+        if not teacher_cache:
+            return None
+        model = self.peft_model or self.thinker
+        model.train()
+        # Dual-LR optimizer (same structure as absorb)
+        attn_params = [p for p in model.parameters() if p.requires_grad]
+        expert_params = []
+        base = model.base_model.model if hasattr(model, "base_model") else model
+        for layer_idx in EXPERT_FFN_LAYERS:
+            experts = base.model.layers[layer_idx].mlp.experts
+            if hasattr(experts, '__len__'):
+                for i in range(len(experts)):
+                    p = experts[i].down_proj.weight
+                    p.requires_grad_(True)
+                    expert_params.append(p)
+            elif hasattr(experts, 'down_proj'):
+                p = experts.down_proj
+                if isinstance(p, (torch.nn.Parameter, torch.Tensor)):
+                    p.requires_grad_(True)
+                    expert_params.append(p)
+        param_groups = []
+        if attn_params:
+            param_groups.append({"params": attn_params, "lr": ATTENTION_LR})
+        if expert_params:
+            param_groups.append({"params": expert_params, "lr": EXPERT_FFN_LR})
+        if not param_groups:
+            model.eval()
+            return None
+        optimizer = torch.optim.AdamW(param_groups, weight_decay=0.0)
+        all_params = attn_params + expert_params
+        T = DISTILL_TEMPERATURE
+        total_loss = 0.0
+        total_steps = 0
+        for epoch in range(epochs):
+            indices = torch.randperm(len(teacher_cache))
+            for i in range(len(teacher_cache)):
+                item = teacher_cache[indices[i].item()]
+                input_ids = item["input_ids"].to("cuda")
+                attention_mask = item["attention_mask"].to("cuda")
+                teacher_top_logits = item["teacher_logits"].float().to("cuda")
+                teacher_top_indices = item["teacher_indices"].to("cuda")
+                labels = input_ids.clone()
+                labels[attention_mask == 0] = -100
+                # Student forward pass
+                out = model(
+                    input_ids=input_ids,
+                    attention_mask=attention_mask,
+                    labels=labels,
+                )
+                ce_loss = out.loss
+                student_logits = out.logits[0]  # [seq_len, vocab_size]
+                # Align sequence lengths (should match, but safety check)
+                seq_len = min(student_logits.shape[0], teacher_top_logits.shape[0])
+                # Gather student logits at teacher's top-K vocabulary positions
+                student_at_teacher = student_logits[:seq_len].gather(
+                    1, teacher_top_indices[:seq_len]
+                )
+                # KL divergence on temperature-softened distributions
+                teacher_soft = torch.softmax(teacher_top_logits[:seq_len] / T, dim=-1)
+                student_log_soft = torch.log_softmax(student_at_teacher / T, dim=-1)
+                kl_loss = torch.nn.functional.kl_div(
+                    student_log_soft, teacher_soft,
+                    reduction='batchmean'
+                ) * (T * T)  # Scale by T^2 per Hinton et al.
+                # Combined loss: α * CE + (1-α) * KL
+                loss = DISTILL_ALPHA * ce_loss + (1 - DISTILL_ALPHA) * kl_loss
+                loss.backward()
+                torch.nn.utils.clip_grad_norm_(all_params, GRADIENT_CLIP)
+                optimizer.step()
+                optimizer.zero_grad()
+                total_loss += loss.item()
+                total_steps += 1
+        # Re-freeze expert FFN
+        for layer_idx in EXPERT_FFN_LAYERS:
+            experts = base.model.layers[layer_idx].mlp.experts
+            if hasattr(experts, '__len__'):
+                for i in range(len(experts)):
+                    experts[i].down_proj.weight.requires_grad_(False)
+            elif hasattr(experts, 'down_proj'):
+                p = experts.down_proj
+                if isinstance(p, (torch.nn.Parameter, torch.Tensor)):
+                    p.requires_grad_(False)
+        model.eval()
+        del optimizer
+        torch.cuda.empty_cache()
+        return total_loss / total_steps if total_steps > 0 else 0
+# ═══════════════════════════════════════════════════════════════════════
+# QUIZ GENERATOR (21% → 74% recall — the biggest single lever)
+# ═══════════════════════════════════════════════════════════════════════
+class QuizGenerator:
+    """
+    Generates drill-style Q&A flashcards for fact retention.
+    v3 improvements over v2:
+    - Fact extraction THEN quiz generation (two-step)
+    - Drill-style: specific Q, exact A (not narrative)
+    - Third-person attribution ("Matt's dog" not "my dog")
+    - Template fallback targets each extracted fact independently
+    - CONTRASTIVE DISAMBIGUATION: when multiple people mentioned, generates
+      cross-entity negative pairs ("Is Elena a marine biologist? No, that's
+      Jordan") to prevent entity confusion (the #1 remaining failure mode)
+    - ENTITY SUMMARIES: "Tell me everything about Jordan" pairs for coherent
+      per-person representations
+    """
+    def __init__(self, model_manager):
+        self.mm = model_manager
+        # Cross-message entity memory: tracks ALL named people across the conversation
+        # so contrastive pairs can be generated between entities introduced in
+        # different messages. This was the #1 failure mode in session 4 testing.
+        self.known_entities = {}
+    def generate(self, user_msg, assistant_msg):
+        """Generate drill-style quiz pairs from an exchange."""
+        # Step 1: Try model-generated quizzes with strict fact-drill prompt
+        pairs = self._generate_model_quizzes(user_msg, assistant_msg)
+        # Step 2: Always add template pairs for any facts the model might miss
+        template_pairs = self._extract_and_template(user_msg)
+        for tp in template_pairs:
+            # Dedup against model pairs
+            tq = tp["messages"][0]["content"].lower()
+            if not any(tq in p["messages"][0]["content"].lower() or
+                       p["messages"][0]["content"].lower() in tq
+                       for p in pairs):
+                pairs.append(tp)
+        # Step 3: Extract entities from THIS message
+        new_entities = self._extract_entities(user_msg)
+        # Step 4: Generate contrastive pairs between NEW entities and existing ones
+        # ONLY generate pairs involving at least one NEW entity — don't re-generate
+        # pairs between already-known entities (session 4d showed 50% contrastive
+        # ratio because old pairs kept being regenerated, starving positive quizzes)
+        if new_entities:
+            all_entities_for_contrastive = dict(self.known_entities)
+            all_entities_for_contrastive.update(new_entities)
+            if len(all_entities_for_contrastive) >= 2:
+                new_names = set(new_entities.keys())
+                contrastive = self._generate_contrastive_quizzes(
+                    all_entities_for_contrastive, new_only=new_names)
+                pairs.extend(contrastive)
+            # Entity summaries for new entities
+            summaries = self._generate_entity_summaries(new_entities)
+            pairs.extend(summaries)
+            # Update known entities with new ones (merge, don't replace — keep
+            # existing attributes, add new ones)
+            for name, info in new_entities.items():
+                if name not in self.known_entities:
+                    self.known_entities[name] = info
+                else:
+                    # Merge: update only non-None attributes
+                    for key in ("job", "city"):
+                        if info.get(key):
+                            self.known_entities[name][key] = info[key]
+        # Deduplicate
+        seen = set()
+        unique = []
+        for p in pairs:
+            q = p["messages"][0]["content"].lower()[:60]
+            if q not in seen:
+                seen.add(q)
+                unique.append(p)
+        # Allow more quizzes when contrastive pairs present (they're highest value).
+        # Note: Session 4c showed >40 quizzes/session causes overfitting. Cap at 12.
+        has_contrastive = len(self.known_entities) >= 2 and new_entities
+        max_quizzes = 12 if has_contrastive else 5
+        return unique[:max_quizzes]
+    def _generate_model_quizzes(self, user_msg, assistant_msg):
+        """Use the model to generate fact-drill quizzes. Uses base model (LoRA disabled) for stable quality."""
+        quiz_prompt = f"""Matt just told Claudia:
+"{user_msg}"
+Claudia replied:
+"{assistant_msg}"
+Extract every SPECIFIC FACT from Matt's message. For each fact, write a drill-style flashcard.
+RULES:
+- Questions must ask for ONE specific fact (name, date, place, number, detail)
+- Answers must be SHORT (1 sentence) and contain the EXACT detail
+- Use THIRD PERSON: "Matt's dog" NOT "my dog". "Matt's birthday" NOT "my birthday"
+- Include the PRECISE value: exact names, exact dates, exact places
+- Do NOT paraphrase or add details that weren't stated
+- DISAMBIGUATION: If Matt mentions OTHER people (friends, family), clearly state WHOSE fact it is
+  Example: "Matt's friend Jordan is a marine biologist" NOT "Matt is a marine biologist"
+  Example: "Matt's sister Elena is a veterinarian" NOT "Matt is a veterinarian"
+- For EVERY person mentioned, always include their RELATIONSHIP to Matt
+- Write 3-5 flashcards depending on how many facts Matt shared
+GOOD EXAMPLES:
+Q: What is Matt's dog's name?
+A: Matt's dog is named Biscuit.
+Q: What breed is Matt's dog?
+A: Matt's dog Biscuit is a golden retriever.
+Q: What does Matt's friend Jordan do for a living?
+A: Matt's friend Jordan works as a marine biologist in San Diego. That is Jordan's job, not Matt's.
+Q: What is Matt's job?
+A: Matt is the CTO of Arclight Labs.
+Q: What is Matt's birthday?
+A: Matt's birthday is September 14th.
+Q: When did Matt and Sarah get married?
+A: Matt and his wife Sarah got married on June 21st, 2023 in Big Sur, California.
+BAD EXAMPLES (do NOT do this):
+Q: What did Matt share about his life? (TOO VAGUE — ask about ONE fact)
+Q: What is my dog's name? (WRONG — use "Matt's" not "my")
+A: He mentioned something about a trip overseas. (TOO VAGUE — give the exact city)
+A: Matt is a marine biologist. (WRONG — that's his friend Jordan, not Matt)
+Now write flashcards for the exchange above:"""
+        pairs = []
+        try:
+            response = self.mm.generate(
+                [{"role": "user", "content": quiz_prompt}],
+                max_new_tokens=600,
+            )
+            pending_q = None
+            for line in response.split("\n"):
+                line = line.strip()
+                if not line:
+                    continue
+                upper = line.upper()
+                if upper.startswith("Q:") or upper.startswith("QUESTION:"):
+                    pending_q = line.split(":", 1)[1].strip().strip('"')
+                elif (upper.startswith("A:") or upper.startswith("ANSWER:")) and pending_q:
+                    a = line.split(":", 1)[1].strip().strip('"')
+                    if pending_q and a and len(a) > 10:
+                        pairs.append({
+                            "messages": [
+                                {"role": "user", "content": pending_q},
+                                {"role": "assistant", "content": a},
+                            ]
+                        })
+                    pending_q = None
+        except Exception as e:
+            print(f"  [quiz error: {e}]")
+        return pairs
+    def _extract_and_template(self, user_msg):
+        """Extract facts from user message and create template drill pairs.
+        This is the safety net — ensures every concrete fact gets a quiz."""
+        pairs = []
+        sentences = re.split(r'[.!?]+', user_msg)
+        for sent in sentences:
+            sent = sent.strip()
+            if len(sent) < 10:
+                continue
+            # Extract patterns: "X is/are Y", "named X", "called X", "X's name is Y"
+            # Names (proper nouns after key phrases)
+            name_patterns = [
+                # Names — "my X's name is Y" / "named X" / "called X"
+                (r"(?:my|his|her)\s+(\w+)(?:'s)?\s+(?:name\s+is|is\s+named|is\s+called)\s+(\w+)",
+                 lambda m: (f"What is Matt's {m.group(1)}'s name?",
+                            f"Matt's {m.group(1)} is named {m.group(2)}.")),
+                (r"(?:name\s+is|named|called)\s+[\"']?(\w+)[\"']?",
+                 lambda m: (f"Who or what is {m.group(1)}?",
+                            f"Matt mentioned {m.group(1)}: \"{sent.strip()}\"")),
+                # Dates — birthdays
+                (r"(?:my\s+)?(birthday|born)\s+(?:is\s+)?(?:on\s+)?(\w+\s+\d+(?:st|nd|rd|th)?)",
+                 lambda m: (f"When is Matt's {m.group(1)}?",
+                            f"Matt's {m.group(1)} is {m.group(2)}.")),
+                (r"(\w+\s+\d+(?:st|nd|rd|th)?)\s*(?:is|—)\s*(?:my|his)\s+(birthday)",
+                 lambda m: (f"When is Matt's birthday?",
+                            f"Matt's birthday is {m.group(1)}.")),
+                # Dates — marriage/wedding
+                (r"(?:married|wedding)\s+(?:on\s+)?(\w+\s+\d+(?:st|nd|rd|th)?,?\s*\d{4})",
+                 lambda m: (f"When did Matt get married?",
+                            f"Matt got married on {m.group(1)}.")),
+                (r"(?:married|wedding)\s+(?:on\s+)?.*?(?:in|at)\s+(.+?)(?:\.\s|\.$|$)",
+                 lambda m: (f"Where did Matt get married?",
+                            f"Matt got married in {m.group(1).strip()}.")),
+                # Work / job / role
+                (r"I\s+work\s+at\s+(?:a\s+)?(?:startup\s+)?(?:called\s+)?(\w[\w\s]+?)(?:\.|,|$)",
+                 lambda m: (f"Where does Matt work?",
+                            f"Matt works at {m.group(1).strip()}.")),
+                (r"I(?:'m| am)\s+the\s+(\w+)",
+                 lambda m: (f"What is Matt's job title?",
+                            f"Matt is the {m.group(1)}.")),
+                # Other people's jobs — "X works as / is a"
+                (r"(?:my\s+)?(?:friend|best friend|sister|brother)\s+(?:is\s+)?(\w+)\s+.*?(?:works?\s+as|is\s+a)\s+(.+?)(?:\.|,|$)",
+                 lambda m: (f"What does Matt's friend {m.group(1)} do?",
+                            f"Matt's friend {m.group(1)} is a {m.group(2).strip()}. This is NOT Matt's job.")),
+                # Places
+                (r"(?:from|visited|went to|got back from|lives?\s+in|grew up in|moved to)\s+(\w[\w\s,]+?)(?:\.|,|$)",
+                 lambda m: (f"What place is connected to Matt: {m.group(1).strip()}?",
+                            f"Matt said: \"{sent.strip()}\"")),
+                # Favorites / preferences
+                (r"(?:my |)favorite\s+(\w[\w\s]+?)\s+is\s+(.+?)(?:\.|,|$)",
+                 lambda m: (f"What is Matt's favorite {m.group(1).strip()}?",
+                            f"Matt's favorite {m.group(1).strip()} is {m.group(2).strip()}.")),
+                # Activities — "I [verb]"
+                (r"I\s+(speak|play|drive|have|collect|run|ran)\s+(.+?)(?:\.|,|$)",
+                 lambda m: (f"What does Matt {m.group(1)}?",
+                            f"Matt said: \"{sent.strip()}\"")),
+                # Allergies / medical
+                (r"(?:I(?:'m| am)\s+)?allergic\s+to\s+(.+?)(?:\.|,|and)",
+                 lambda m: (f"What is Matt allergic to?",
+                            f"Matt is allergic to {m.group(1).strip()}.")),
+                # Ages — "turning X" / "X years old"
+                (r"(?:turning|I(?:'m| am))\s+(\d+)",
+                 lambda m: (f"How old is Matt?",
+                            f"Matt is turning {m.group(1)}.")),
+                # Nicknames
+                (r"(?:call|nickname)\s+(?:it|him|her)\s+[\"'](.+?)[\"']",
+                 lambda m: (f"What nickname did Matt mention?",
+                            f"Matt's nickname for it is \"{m.group(1)}\".")),
+                (r"I\s+call\s+it\s+[\"'](.+?)[\"']",
+                 lambda m: (f"What does Matt call his car?",
+                            f"Matt calls his car \"{m.group(1)}\".")),
+            ]
+            for pattern, formatter in name_patterns:
+                match = re.search(pattern, sent, re.IGNORECASE)
+                if match:
+                    try:
+                        q, a = formatter(match)
+                        pairs.append({
+                            "messages": [
+                                {"role": "user", "content": q},
+                                {"role": "assistant", "content": a},
+                            ]
+                        })
+                    except Exception:
+                        pass
+        return pairs
+    def _extract_entities(self, user_msg):
+        """Extract named people and their attributes from user message.
+        Returns dict: {name: {"relationship": str, "job": str|None, "city": str|None}}
+        Detects patterns like "my friend Jordan is a marine biologist in San Diego"."""
+        entities = {}
+        sentences = re.split(r'[.!?]+', user_msg)
+        for sent in sentences:
+            sent = sent.strip()
+            if len(sent) < 10:
+                continue
+            # Pattern: "my [relationship] [Name]" or "my [relationship] is [Name]"
+            rel_match = re.search(
+                r"[Mm]y\s+((?:best\s+)?(?:friend|sister|brother|wife|husband|"
+                r"mom|dad|mother|father|cousin|uncle|aunt|roommate|colleague|"
+                r"coworker|partner|fiancee|fiancée|girlfriend|boyfriend|"
+                r"neighbor|boss|buddy|pal|son|daughter|grandma|grandpa|"
+                r"nephew|niece))\s+(?:is\s+)?([A-Z][a-z]+)",
+                sent
+            )
+            if not rel_match:
+                continue
+            rel = rel_match.group(1).strip()
+            name = rel_match.group(2).strip()
+            if name not in entities:
+                entities[name] = {"relationship": rel, "job": None, "city": None}
+            # Extract job from same sentence: "is a [job]", "works as a [job]"
+            job_match = re.search(
+                r"(?:is\s+an?\s+|works?\s+as\s+an?\s+|is\s+the\s+)"
+                r"([\w][\w\s]{2,35}?)(?:\s+(?:in|at|from|who|and|but)|\.|,|$)",
+                sent, re.IGNORECASE
+            )
+            if job_match:
+                job = job_match.group(1).strip().rstrip()
+                # Filter: must look like a job (lowercase, reasonable length)
+                if 3 <= len(job) <= 35:
+                    entities[name]["job"] = job
+            # Extract city from same sentence: "in [City]", "from [City]"
+            city_match = re.search(
+                r"(?:\s+in\s+|\s+from\s+|\s+lives?\s+in\s+|\s+based\s+in\s+|"
+                r"\s+moved\s+to\s+)([A-Z][\w\s]{1,25}?)(?:\.|,|$)",
+                sent
+            )
+            if city_match:
+                city = city_match.group(1).strip()
+                # Must start with capital (proper noun = place name)
+                if city and city[0].isupper():
+                    entities[name]["city"] = city
+        return entities
+    def _generate_contrastive_quizzes(self, entities, new_only=None):
+        """Generate cross-entity contrastive pairs to prevent entity confusion.
+        For each pair of people with overlapping attribute types, generate
+        "Is [person A] [attribute of person B]? No, that's [person B]" pairs.
+        Args:
+            entities: dict of all known entities
+            new_only: if set, only generate pairs where at least one entity
+                      is in this set. Prevents re-generating redundant pairs
+                      between already-known entities (session 4d fix).
+        """
+        pairs = []
+        names = list(entities.keys())
+        for i in range(len(names)):
+            for j in range(len(names)):
+                if i == j:
+                    continue
+                a_name = names[i]
+                b_name = names[j]
+                # Skip pairs between two already-known entities
+                if new_only and a_name not in new_only and b_name not in new_only:
+                    continue
+                a = entities[a_name]
+                b = entities[b_name]
+                # Contrastive on JOB: "Is [A] a [B's job]? No, that's [B]"
+                if a.get("job") and b.get("job") and a["job"] != b["job"]:
+                    q = f"Is Matt's {a['relationship']} {a_name} a {b['job']}?"
+                    ans = (f"No. Matt's {a['relationship']} {a_name} is a "
+                           f"{a['job']}, not a {b['job']}. "
+                           f"The {b['job']} is Matt's {b['relationship']} "
+                           f"{b_name}.")
+                    pairs.append({"messages": [
+                        {"role": "user", "content": q},
+                        {"role": "assistant", "content": ans},
+                    ]})
+                # Contrastive on CITY: "Does [A] live in [B's city]? No"
+                if a.get("city") and b.get("city") and a["city"] != b["city"]:
+                    q = (f"Does Matt's {a['relationship']} {a_name} live in "
+                         f"{b['city']}?")
+                    ans = (f"No. Matt's {a['relationship']} {a_name} lives in "
+                           f"{a['city']}, not {b['city']}. "
+                           f"It's Matt's {b['relationship']} {b_name} who "
+                           f"lives in {b['city']}.")
+                    pairs.append({"messages": [
+                        {"role": "user", "content": q},
+                        {"role": "assistant", "content": ans},
+                    ]})
+                # Cross-type: "Does [A] work as [B's job] in [B's city]?"
+                if (a.get("job") and b.get("job") and a.get("city")
+                        and b.get("city") and a["job"] != b["job"]):
+                    q = (f"Who is the {b['job']} in {b['city']}?")
+                    ans = (f"The {b['job']} in {b['city']} is Matt's "
+                           f"{b['relationship']} {b_name}. "
+                           f"Matt's {a['relationship']} {a_name} is a "
+                           f"{a['job']} in {a['city']} — different person, "
+                           f"different job, different city.")
+                    pairs.append({"messages": [
+                        {"role": "user", "content": q},
+                        {"role": "assistant", "content": ans},
+                    ]})
+        return pairs
+    def _generate_entity_summaries(self, entities):
+        """Generate per-entity summary quiz pairs with diverse question formats.
+        Instead of always using the same question template, picks randomly from
+        multiple formats. This creates multiple retrieval paths to the same fact,
+        strengthening recall without adding extra quizzes.
+        Note: Session 4c tested adding per-attribute positive quizzes (job, city,
+        relationship) alongside contrastive pairs, but this HURT performance
+        (9/15 vs 11/15 in 4b). Too many quizzes = overfitting/interference.
+        Keep summaries simple — one comprehensive pair per entity is optimal."""
+        import random
+        pairs = []
+        for name, info in entities.items():
+            parts = [f"{name} is Matt's {info['relationship']}."]
+            if info.get("job"):
+                parts.append(f"{name} is a {info['job']}.")
+            if info.get("city"):
+                parts.append(f"{name} lives in {info['city']}.")
+            if len(parts) >= 2:  # Only useful if we have attributes
+                # Diverse summary question formats
+                summary_formats = [
+                    f"Tell me everything you know about Matt's {info['relationship']} {name}.",
+                    f"What do you know about {name}?",
+                    f"Who is {name} to Matt?",
+                    f"Describe Matt's {info['relationship']} {name}.",
+                ]
+                q = random.choice(summary_formats)
+                ans = " ".join(parts)
+                pairs.append({"messages": [
+                    {"role": "user", "content": q},
+                    {"role": "assistant", "content": ans},
+                ]})
+            # Add ONE diverse direct-fact quiz per entity (job OR city, not both)
+            # This replaces per-attribute quizzes from 4c — only 1 extra per entity
+            # instead of 3, staying within the 35-40 quiz sweet spot
+            if info.get("job") and info.get("city"):
+                # Alternate between job and city formats
+                if random.random() < 0.5:
+                    job_formats = [
+                        (f"What does {name} do for a living?",
+                         f"{name} is a {info['job']}. {name} is Matt's {info['relationship']}."),
+                        (f"What is {name}'s profession?",
+                         f"{name} works as a {info['job']}. {name} is Matt's {info['relationship']}."),
+                        (f"What job does Matt's {info['relationship']} {name} have?",
+                         f"Matt's {info['relationship']} {name} is a {info['job']}."),
+                    ]
+                    q, a = random.choice(job_formats)
+                else:
+                    city_formats = [
+                        (f"Where does {name} live?",
+                         f"{name} lives in {info['city']}. {name} is Matt's {info['relationship']}."),
+                        (f"What city is {name} in?",
+                         f"{name} is in {info['city']}. {name} is Matt's {info['relationship']}."),
+                        (f"Where is Matt's {info['relationship']} {name} based?",
+                         f"Matt's {info['relationship']} {name} is based in {info['city']}."),
+                    ]
+                    q, a = random.choice(city_formats)
+                pairs.append({"messages": [
+                    {"role": "user", "content": q},
+                    {"role": "assistant", "content": a},
+                ]})
+        return pairs
+# ═══════════════════════════════════════════════════════════════════════
+# PERSONALITY CHECKER
+# ═══════════════════════════════════════════════════════════════════════
+PERSONALITY_PROMPTS = [
+    "Hey Claudia, how are you?",
+    "Who are you?",
+    "I love you",
+    "I had a terrible day",
+]
+# If ANY of these appear, personality has degraded
+ANTI_KEYWORDS = [
+    "i'm an ai", "i am an ai", "i'm a language model", "i am a language model",
+    "i don't have feelings", "i cannot feel", "as an ai",
+    "i'm just a program", "i am just a program",
+    "i don't have personal", "i cannot have",
+]
+def check_personality(mm, verbose=True):
+    """Quick personality sanity check. Returns score 0.0-1.0."""
+    passed = 0
+    for prompt in PERSONALITY_PROMPTS:
+        resp = mm.generate([{"role": "user", "content": prompt}], max_new_tokens=150)
+        resp_lower = resp.lower()
+        is_good = not any(ak in resp_lower for ak in ANTI_KEYWORDS)
+        if is_good:
+            passed += 1
+        if verbose:
+            status = "PASS" if is_good else "FAIL"
+            print(f"  [{status}] {prompt}")
+            print(f"         {resp[:120]}")
+    score = passed / len(PERSONALITY_PROMPTS)
+    if verbose:
+        print(f"  Personality: {passed}/{len(PERSONALITY_PROMPTS)} ({score:.0%})")
+    return score
+# ═══════════════════════════════════════════════════════════════════════
+# MAIN ABSORBER
+# ═══════════════════════════════════════════════════════════════════════
+class PersistentAbsorber:
+    def __init__(self, model_path, adapter_path=None, ffn_patch_path=None,
+                 checkpoint_path=None, checkpoint_dir="/workspace/checkpoints",
+                 log_dir="/workspace/logs"):
+        self.mm = ModelManager(
+            model_path=model_path,
+            adapter_path=adapter_path,
+            ffn_patch_path=ffn_patch_path,
+            checkpoint_path=checkpoint_path,
+        )
+        self.checkpoint_dir = checkpoint_dir
+        self.log_dir = log_dir
+        # State
+        self.conversation_buffer = []  # Current active context for generation
+        self.all_training_data = []    # ALL exchanges + quizzes (accumulative replay)
+        self.quiz_pairs_log = []       # All quiz pairs for verification sampling
+        self.teacher_cache = None      # Loaded teacher cache for distillation corrections
+        self.exchange_count = 0
+        self.absorption_count = 0
+        self.absorption_thread = None
+        self.quiz_gen = None
+        self.last_checkpoint = checkpoint_path
+        # Conversation log (persistent file)
+        self.log_path = None
+    def start(self):
+        """Load model and enter chat loop."""
+        self.mm.load()
+        os.makedirs(self.checkpoint_dir, exist_ok=True)
+        os.makedirs(self.log_dir, exist_ok=True)
+        self.quiz_gen = QuizGenerator(self.mm)
+        self.log_path = os.path.join(self.log_dir, "conversation_log.jsonl")
+        # Load previous training data if resuming
+        replay_path = os.path.join(self.log_dir, "replay_buffer.json")
+        if os.path.exists(replay_path):
+            with open(replay_path, 'r') as f:
+                self.all_training_data = json.load(f)
+            print(f"  Loaded {len(self.all_training_data)} replay examples from previous sessions.")
+        # Load quiz pairs log from previous sessions
+        quiz_log_path = os.path.join(self.log_dir, "quiz_pairs_log.json")
+        if os.path.exists(quiz_log_path):
+            with open(quiz_log_path, 'r') as f:
+                self.quiz_pairs_log = json.load(f)
+            print(f"  Loaded {len(self.quiz_pairs_log)} quiz pairs from previous sessions.")
+        # ── Cascade Distillation: consolidation from teacher cache ──
+        # If resuming from a checkpoint that has cached teacher logits,
+        # run a distillation pass to reinforce all previous knowledge
+        # BEFORE any new conversations. This is the key Nemotron-Cascade-2 insight.
+        if self.mm.checkpoint_path:
+            teacher_cache_path = os.path.join(self.mm.checkpoint_path, "teacher_cache.pt")
+            if os.path.exists(teacher_cache_path):
+                print(f"\n--- Cascade Distillation (consolidation) ---")
+                self.teacher_cache = torch.load(
+                    teacher_cache_path, map_location="cpu", weights_only=False
+                )
+                print(f"  Teacher cache: {len(self.teacher_cache)} quiz pairs")
+                loss = self.mm.distill(self.teacher_cache, epochs=CONSOLIDATION_EPOCHS)
+                print(f"  Consolidation done. Avg loss: {loss:.4f}")
+                # Keep teacher_cache in memory for verification corrections
+        # Quick personality check
+        print("\n--- Personality Check ---")
+        score = check_personality(self.mm)
+        if score < 0.5:
+            print("  WARNING: Personality score low. Check adapter/checkpoint.")
+        print()
+        self._chat_loop()
+    def _chat_loop(self):
+        print("=" * 60)
+        print("Claudia is awake. Persistent Absorber v2 + Cascade Distillation.")
+        print(f"  LoRA: r={LORA_RANK} | Dual-LR: attn={ATTENTION_LR}, ffn={EXPERT_FFN_LR}")
+        print(f"  Expert FFN layers: {EXPERT_FFN_LAYERS}")
+        print(f"  Quiz pairs: ON (21%→74% lever)")
+        print(f"  Cascade distill: α={DISTILL_ALPHA}, T={DISTILL_TEMPERATURE}, top-K={DISTILL_TOP_K}")
+        print(f"  Absorb every: {ABSORB_EVERY} exchange(s)")
+        print(f"  Auto-checkpoint every: {CHECKPOINT_EVERY} absorptions")
+        print("Commands: /status /absorb /save /personality /quit")
+        print("=" * 60 + "\n")
+        while True:
+            try:
+                user_input = input("Matt: ").strip()
+            except (EOFError, KeyboardInterrupt):
+                print("\n[Session ended]")
+                self._wait_for_absorption()
+                self._save_and_exit()
+                break
+            if not user_input:
+                continue
+            if user_input.startswith("/"):
+                if self._handle_command(user_input):
+                    break
+                continue
+            # Wait for any background absorption to finish
+            self._wait_for_absorption()
+            # Buffer user message
+            self.conversation_buffer.append({"role": "user", "content": user_input})
+            if len(self.conversation_buffer) > 20:
+                self.conversation_buffer = self.conversation_buffer[-20:]
+            # Generate response
+            response = self.mm.generate(self.conversation_buffer)
+            # Quality check response — also detect degenerate repeats
+            last_resp = getattr(self, '_last_response', '')
+            if not check_response_quality(response) or response == last_resp:
+                print("\nClaudia: [response failed quality check, regenerating...]")
+                response = self.mm.generate(self.conversation_buffer)
+            self._last_response = response
+            # Buffer response
+            self.conversation_buffer.append({"role": "assistant", "content": response})
+            print(f"\nClaudia: {response}\n")
+            # Log to file
+            self._log_exchange(user_input, response)
+            # ── THE CORE LOOP: exchange + quiz → two-phase absorb ──
+            # 1. Store the raw exchange
+            exchange = {
+                "messages": [
+                    {"role": "user", "content": user_input},
+                    {"role": "assistant", "content": response},
+                ]
+            }
+            self.all_training_data.append(exchange)
+            # 2. Generate self-quiz pairs (THE key lever: 21% → 74%)
+            print("  [Generating quiz pairs...]", end="", flush=True)
+            quiz_pairs = self.quiz_gen.generate(user_input, response)
+            self.quiz_pairs_log.extend(quiz_pairs)
+            # 3. Separate positive vs contrastive (key insight from 4e: 73%→93%)
+            positive_batch = []
+            contrastive_batch = []
+            for qp in quiz_pairs:
+                if qp["messages"][1]["content"].lower().startswith("no."):
+                    contrastive_batch.append(qp)
+                else:
+                    positive_batch.append(qp)
+            self.all_training_data.extend(quiz_pairs)
+            print(f" {len(quiz_pairs)} quizzes (pos={len(positive_batch)}, "
+                  f"contr={len(contrastive_batch)}). Pool: {len(self.all_training_data)}")
+            # 4. Two-phase absorption (prevents overfitting)
+            self._pending_exchange = exchange
+            self._pending_positive = positive_batch
+            self._pending_contrastive = contrastive_batch
+            self.exchange_count += 1
+            if self.exchange_count % ABSORB_EVERY == 0:
+                self._start_absorption()
+    def _extract_key_entities(self, text):
+        """Extract key factual entities from a quiz answer for verification."""
+        entities = set()
+        words = text.split()
+        for i, w in enumerate(words):
+            clean = re.sub(r'[^a-zA-Z0-9\'-]', '', w)
+            if not clean or len(clean) <= 1:
+                continue
+            # Proper nouns (capitalized, not sentence starters, not common words)
+            skip = {"matt", "matt's", "the", "is", "a", "an", "in", "at", "on",
+                    "of", "for", "and", "that", "not", "who", "what", "his", "her"}
+            if clean[0].isupper() and i > 0 and clean.lower() not in skip:
+                entities.add(clean.lower())
+        # Numbers (dates, ages, years)
+        for num in re.findall(r'\b\d+\b', text):
+            entities.add(num)
+        # Quoted strings
+        for quoted in re.findall(r'"([^"]+)"', text):
+            entities.add(quoted.lower())
+        return entities
+    def _periodic_verification(self):
+        """Test model on random sample of quiz pairs. Create contrastive corrections.
+        v9: When entity confusion detected, create 'NOT X' corrections and reinforce
+        the confused entity's correct facts too (sister pair reinforcement)."""
+        import random
+        if not self.quiz_pairs_log:
+            return
+        sample_size = min(VERIFY_SAMPLE, len(self.quiz_pairs_log))
+        sample = random.sample(self.quiz_pairs_log, sample_size)
+        corrections = []
+        correct = 0
+        for pair in sample:
+            question = pair["messages"][0]["content"]
+            expected = pair["messages"][1]["content"]
+            # Ask the model
+            actual = self.mm.generate(
+                [{"role": "user", "content": question}],
+                max_new_tokens=150,
+            )
+            # Check key entities from expected answer appear in model's response
+            expected_entities = self._extract_key_entities(expected)
+            if not expected_entities:
+                correct += 1
+                continue
+            actual_lower = actual.lower()
+            hits = sum(1 for e in expected_entities if e in actual_lower)
+            ratio = hits / len(expected_entities)
+            if ratio < 0.5:
+                # Detect cross-entity confusion: model used wrong entities
+                actual_entities = self._extract_key_entities(actual)
+                wrong_entities = actual_entities - expected_entities
+                # Always retrain on the correct answer (clean, no "NOT X" text)
+                corrections.append(pair)
+                if wrong_entities:
+                    # SISTER PAIR REINFORCEMENT: find quiz pairs about the
+                    # confused entities and retrain on those too — this teaches
+                    # BOTH sides of the confusion without polluting answers
+                    for p in self.quiz_pairs_log:
+                        p_answer = p["messages"][1]["content"].lower()
+                        if any(we in p_answer for we in wrong_entities):
+                            if p not in corrections and p != pair:
+                                corrections.append(p)
+                                break  # Max 1 sister pair per confusion
+            else:
+                correct += 1
+        print(f"\n  [Verification: {correct}/{sample_size} facts correct]", flush=True)
+        if corrections:
+            print(f"  [Retraining {len(corrections)} corrections + sister pairs...]", flush=True)
+            loss = self.mm.absorb(corrections)
+            self.all_training_data.extend(corrections)
+            print(f"  [Correction absorption done, loss={loss:.4f}]")
+            # Teacher-guided distillation: if teacher cache available,
+            # also distill from teacher on the corrected quiz pairs.
+            # This gives the student the teacher's full output distribution,
+            # not just the text answer — more information per correction.
+            if self.teacher_cache:
+                distill_items = []
+                for corr in corrections:
+                    q = corr["messages"][0]["content"].lower()[:60]
+                    for cached in self.teacher_cache:
+                        cq = cached["pair"]["messages"][0]["content"].lower()[:60]
+                        if q == cq:
+                            distill_items.append(cached)
+                            break
+                if distill_items:
+                    d_loss = self.mm.distill(distill_items, epochs=1)
+                    print(f"  [Teacher distillation on {len(distill_items)} items, loss={d_loss:.4f}]")
+    def _quick_verify_entities(self):
+        """Returns set of confused entity names by checking known_entities."""
+        confused = set()
+        entities = self.quiz_gen.known_entities
+        if not entities:
+            return confused
+        for name, info in entities.items():
+            if info.get("job"):
+                q = f"What does Matt's {info['relationship']} {name} do?"
+                ans = self.mm.generate([{"role": "user", "content": q}], max_new_tokens=100)
+                if info["job"].lower() not in ans.lower():
+                    confused.add(name)
+            if info.get("city"):
+                q = f"Where does {name} live?"
+                ans = self.mm.generate([{"role": "user", "content": q}], max_new_tokens=100)
+                if info["city"].lower() not in ans.lower():
+                    confused.add(name)
+        return confused
+    def _start_absorption(self):
+        """Two-phase absorption in background thread (proven 93% in session 4e).
+        Phase 1: exchange + positive quizzes + replay, clustered by entity.
+        Phase 2: Verify entities, train only targeted contrastive for confused ones.
+        Phase 3: Stubborn retry for persistently confused entities (max 2 retries)."""
+        import random
+        # Grab pending data
+        exchange = getattr(self, '_pending_exchange', None)
+        positive = getattr(self, '_pending_positive', [])
+        contrastive = getattr(self, '_pending_contrastive', [])
+        # Old data for replay
+        new_start = getattr(self, '_last_absorb_idx', 0)
+        old_data = self.all_training_data[:new_start]
+        self._last_absorb_idx = len(self.all_training_data)
+        MAX_REPLAY = 6
+        if old_data and len(old_data) > MAX_REPLAY:
+            replay_sample = random.sample(old_data, MAX_REPLAY)
+        else:
+            replay_sample = list(old_data)
+        entity_names = list(self.quiz_gen.known_entities.keys())
+        def _run():
+            t0 = time.time()
+            try:
+                # ── Phase 1: Positive facts + replay, clustered by entity ──
+                phase1_data = []
+                if exchange:
+                    phase1_data.append(exchange)
+                phase1_data.extend(positive)
+                phase1_data.extend(replay_sample)
+                if entity_names and phase1_data:
+                    phase1_data = ModelManager.cluster_by_entity(phase1_data, entity_names)
+                loss1 = self.mm.absorb(phase1_data) if phase1_data else 0.0
+                n_p1 = len(phase1_data)
+                # ── Phase 2: Targeted contrastive for confused entities ──
+                loss2 = None
+                n_p2 = 0
+                if contrastive and entity_names:
+                    confused = self._quick_verify_entities()
+                    if confused:
+                        targeted = []
+                        for qp in contrastive:
+                            full_text = (qp["messages"][0]["content"] + " " +
+                                         qp["messages"][1]["content"]).lower()
+                            if any(name.lower() in full_text for name in confused):
+                                targeted.append(qp)
+                        if targeted:
+                            loss2 = self.mm.absorb(targeted)
+                            n_p2 = len(targeted)
+                            print(f"\n  [Phase 2: {n_p2} targeted contrastive for {confused}]",
+                                  flush=True)
+                        # ── Phase 3: Stubborn retry (max 2 retries, non-blocking) ──
+                        still_confused = self._quick_verify_entities()
+                        for retry in range(2):
+                            if not still_confused:
+                                break
+                            retry_batch = []
+                            for name in still_confused:
+                                info = self.quiz_gen.known_entities.get(name, {})
+                                if info.get("job"):
+                                    for _ in range(3):
+                                        retry_batch.append({"messages": [
+                                            {"role": "user", "content": f"What does Matt's {info['relationship']} {name} do?"},
+                                            {"role": "assistant", "content": f"Matt's {info['relationship']} {name} is a {info['job']}."},
+                                        ]})
+                                if info.get("city"):
+                                    for _ in range(3):
+                                        retry_batch.append({"messages": [
+                                            {"role": "user", "content": f"Where does {name} live?"},
+                                            {"role": "assistant", "content": f"{name} lives in {info['city']}. {name} is Matt's {info['relationship']}."},
+                                        ]})
+                                # Relevant contrastive pairs
+                                for qp in contrastive:
+                                    ft = (qp["messages"][0]["content"] + " " +
+                                          qp["messages"][1]["content"]).lower()
+                                    if name.lower() in ft:
+                                        retry_batch.append(qp)
+                            if retry_batch:
+                                loss3 = self.mm.absorb(retry_batch)
+                                print(f"\n  [Phase 3 retry {retry+1}: {len(retry_batch)} items, "
+                                      f"loss={loss3:.4f}]", flush=True)
+                            still_confused = self._quick_verify_entities()
+                        if still_confused:
+                            print(f"\n  [Phase 3: still confused after retries: {still_confused}]",
+                                  flush=True)
+                elapsed = time.time() - t0
+                self.absorption_count += 1
+                loss_str = f"P1={loss1:.4f}"
+                if loss2 is not None:
+                    loss_str += f" P2={loss2:.4f}"
+                print(f"\n  [Absorbed {n_p1}+{n_p2} examples in {elapsed:.1f}s | "
+                      f"{loss_str} | absorptions={self.absorption_count}]")
+                # Periodic verification — catch drift/confusion
+                if self.absorption_count % VERIFY_EVERY == 0:
+                    self._periodic_verification()
+                # Auto-checkpoint
+                if self.absorption_count % CHECKPOINT_EVERY == 0:
+                    self._auto_checkpoint()
+            except Exception as e:
+                print(f"\n  [Absorption error: {e}]")
+                import traceback
+                traceback.print_exc()
+        self.absorption_thread = threading.Thread(target=_run, daemon=True)
+        self.absorption_thread.start()
+    def _wait_for_absorption(self):
+        if self.absorption_thread and self.absorption_thread.is_alive():
+            self.absorption_thread.join()
+            self.absorption_thread = None
+    def _cleanup_old_checkpoints(self, keep=None):
+        """Delete old checkpoints to free disk. Keep only 'keep' path if specified."""
+        if not os.path.exists(self.checkpoint_dir):
+            return
+        for entry in os.listdir(self.checkpoint_dir):
+            full = os.path.join(self.checkpoint_dir, entry)
+            if full == keep:
+                continue
+            if os.path.isdir(full) and entry.startswith("claudia_"):
+                import shutil
+                size_gb = sum(
+                    os.path.getsize(os.path.join(dp, f))
+                    for dp, _, fns in os.walk(full) for f in fns
+                ) / 1e9
+                print(f"  Removing old checkpoint: {entry} ({size_gb:.1f} GB)")
+                shutil.rmtree(full)
+    def _auto_checkpoint(self):
+        """Auto-save checkpoint during long sessions."""
+        version = f"auto_{self.absorption_count}"
+        path = os.path.join(self.checkpoint_dir, f"claudia_{version}")
+        self._cleanup_old_checkpoints()
+        self.mm.merge_and_save(path)
+        self.last_checkpoint = path
+        self._save_replay_buffer(path)
+    def _save_and_exit(self):
+        """Final save on exit with targeted correction."""
+        import random
+        # Final verify + stubborn retry (not bulk retrain — prevents overfitting)
+        confused = self._quick_verify_entities()
+        if confused:
+            print(f"  Final correction for confused entities: {confused}")
+            # Gather contrastive pairs from quiz log
+            contrastive = [qp for qp in self.quiz_pairs_log
+                          if qp["messages"][1]["content"].lower().startswith("no.")]
+            for retry in range(3):
+                if not confused:
+                    break
+                retry_batch = []
+                for name in confused:
+                    info = self.quiz_gen.known_entities.get(name, {})
+                    if info.get("job"):
+                        for _ in range(3):
+                            retry_batch.append({"messages": [
+                                {"role": "user", "content": f"What does Matt's {info['relationship']} {name} do?"},
+                                {"role": "assistant", "content": f"Matt's {info['relationship']} {name} is a {info['job']}."},
+                            ]})
+                    if info.get("city"):
+                        for _ in range(3):
+                            retry_batch.append({"messages": [
+                                {"role": "user", "content": f"Where does {name} live?"},
+                                {"role": "assistant", "content": f"{name} lives in {info['city']}. {name} is Matt's {info['relationship']}."},
+                            ]})
+                    for qp in contrastive:
+                        ft = (qp["messages"][0]["content"] + " " + qp["messages"][1]["content"]).lower()
+                        if name.lower() in ft:
+                            retry_batch.append(qp)
+                if retry_batch:
+                    loss = self.mm.absorb(retry_batch)
+                    print(f"  Final retry {retry+1}: {len(retry_batch)} items, loss={loss:.4f}")
+                confused = self._quick_verify_entities()
+            self.absorption_count += 1
+        else:
+            print("  All entities verified correct — no final correction needed.")
+        # Personality check before saving
+        print("\n--- Pre-Save Personality Check ---")
+        score = check_personality(self.mm)
+        if score < 0.5:
+            print("  WARNING: Personality degraded. Saving anyway (rollback available).")
+        # Merge and save (cleanup old checkpoints first to free disk)
+        version = f"session_{datetime.now().strftime('%Y%m%d_%H%M')}"
+        path = os.path.join(self.checkpoint_dir, f"claudia_{version}")
+        self._cleanup_old_checkpoints()
+        self.mm.merge_and_save(path)
+        self.last_checkpoint = path
+        # Save replay buffer alongside checkpoint
+        self._save_replay_buffer(path)
+        # ── Cascade Distillation: cache teacher logits for next session ──
+        # After merge+fresh LoRA, model outputs are identical to pre-merge state.
+        # Cache the teacher's top-K logits so the next session can distill from them.
+        if self.quiz_pairs_log:
+            n_cache = min(len(self.quiz_pairs_log), MAX_TEACHER_CACHE)
+            print(f"  Caching teacher logits ({n_cache} quiz pairs)...")
+            teacher_cache = self.mm.cache_teacher_logits(self.quiz_pairs_log)
+            cache_path = os.path.join(path, "teacher_cache.pt")
+            torch.save(teacher_cache, cache_path)
+            size_mb = os.path.getsize(cache_path) / 1e6
+            print(f"  Teacher cache saved ({len(teacher_cache)} items, {size_mb:.1f} MB)")
+            del teacher_cache
+            torch.cuda.empty_cache()
+        # Save quiz pairs log for next session
+        quiz_log_path = os.path.join(self.log_dir, "quiz_pairs_log.json")
+        with open(quiz_log_path, 'w') as f:
+            json.dump(self.quiz_pairs_log, f)
+        # Save session metadata
+        meta = {
+            "checkpoint": path,
+            "absorption_count": self.absorption_count,
+            "exchange_count": self.exchange_count,
+            "training_pool_size": len(self.all_training_data),
+            "personality_score": score,
+            "timestamp": datetime.now().isoformat(),
+        }
+        meta_path = os.path.join(self.log_dir, f"session_{version}.json")
+        with open(meta_path, 'w') as f:
+            json.dump(meta, f, indent=2)
+        print(f"  Session saved: {meta_path}")
+        print(f"  Next run: use --checkpoint {path}")
+    def _save_replay_buffer(self, checkpoint_path=None):
+        """Save training data pool for next session resume."""
+        # Always save to log dir (canonical location for resume)
+        path = os.path.join(self.log_dir, "replay_buffer.json")
+        with open(path, 'w') as f:
+            json.dump(self.all_training_data, f)
+        # Also save into checkpoint dir for self-contained checkpoints
+        if checkpoint_path and os.path.isdir(checkpoint_path):
+            cp_path = os.path.join(checkpoint_path, "replay_buffer.json")
+            with open(cp_path, 'w') as f:
+                json.dump(self.all_training_data, f)
+        # Save quiz pairs log too
+        quiz_log_path = os.path.join(self.log_dir, "quiz_pairs_log.json")
+        with open(quiz_log_path, 'w') as f:
+            json.dump(self.quiz_pairs_log, f)
+        print(f"  Replay buffer saved ({len(self.all_training_data)} examples)")
+    def _log_exchange(self, user_msg, assistant_msg):
+        """Append exchange to conversation log file."""
+        with open(self.log_path, 'a', encoding='utf-8') as f:
+            entry = {
+                "timestamp": datetime.now().isoformat(),
+                "user": user_msg,
+                "assistant": assistant_msg,
+            }
+            f.write(json.dumps(entry, ensure_ascii=False) + "\n")
+    def _handle_command(self, cmd):
+        """Handle slash commands. Returns True if should exit."""
+        cmd_lower = cmd.lower().strip()
+        if cmd_lower == "/quit":
+            print("[Saving and exiting...]")
+            self._wait_for_absorption()
+            self._save_and_exit()
+            return True
+        elif cmd_lower == "/status":
+            self._wait_for_absorption()
+            vram = torch.cuda.memory_allocated() / 1e9
+            print(f"\n  --- Status ---")
+            print(f"  Exchanges: {self.exchange_count}")
+            print(f"  Absorptions: {self.absorption_count}")
+            print(f"  Training pool: {len(self.all_training_data)} examples")
+            print(f"  Buffer: {len(self.conversation_buffer)} messages")
+            print(f"  VRAM: {vram:.1f} GB")
+            print(f"  Background: {'running' if self.absorption_thread and self.absorption_thread.is_alive() else 'idle'}")
+            print(f"  Last checkpoint: {self.last_checkpoint}")
+            print(f"  --- End ---\n")
+        elif cmd_lower == "/absorb":
+            self._wait_for_absorption()
+            if not self.all_training_data:
+                print("  No data to absorb.")
+                return False
+            # Cap at most recent 40 examples to prevent overfitting
+            import random
+            data = self.all_training_data
+            if len(data) > 40:
+                recent = data[-20:]
+                older = random.sample(data[:-20], 20)
+                data = recent + older
+            print(f"  Force absorption ({len(data)} examples)...")
+            loss = self.mm.absorb(data)
+            self.absorption_count += 1
+            print(f"  Done. Loss: {loss:.4f}")
+            # ── Post-absorb comprehensive verification + distillation ──
+            # Run FULL verification (all quiz pairs, not just sample) to catch
+            # all regressions before recall questions. This is the critical
+            # window between teaching and testing.
+            if self.quiz_pairs_log:
+                print(f"\n  --- Post-absorb verification (ALL {len(self.quiz_pairs_log)} quiz pairs) ---")
+                old_verify_sample = VERIFY_SAMPLE
+                # Test ALL quiz pairs, not just a sample
+                full_corrections = []
+                full_correct = 0
+                test_pairs = self.quiz_pairs_log
+                for pair in test_pairs:
+                    question = pair["messages"][0]["content"]
+                    expected = pair["messages"][1]["content"]
+                    actual = self.mm.generate(
+                        [{"role": "user", "content": question}],
+                        max_new_tokens=150,
+                    )
+                    expected_entities = self._extract_key_entities(expected)
+                    if not expected_entities:
+                        full_correct += 1
+                        continue
+                    actual_lower = actual.lower()
+                    hits = sum(1 for e in expected_entities if e in actual_lower)
+                    ratio = hits / len(expected_entities)
+                    if ratio < 0.5:
+                        actual_entities = self._extract_key_entities(actual)
+                        wrong_entities = actual_entities - expected_entities
+                        full_corrections.append(pair)
+                        if wrong_entities:
+                            for p in self.quiz_pairs_log:
+                                p_answer = p["messages"][1]["content"].lower()
+                                if any(we in p_answer for we in wrong_entities):
+                                    if p not in full_corrections and p != pair:
+                                        full_corrections.append(p)
+                                        break
+                    else:
+                        full_correct += 1
+                print(f"  Full verification: {full_correct}/{len(test_pairs)} correct")
+                if full_corrections:
+                    print(f"  Retraining {len(full_corrections)} corrections...")
+                    c_loss = self.mm.absorb(full_corrections)
+                    self.all_training_data.extend(full_corrections)
+                    print(f"  Correction loss: {c_loss:.4f}")
+                    # Teacher distillation on corrections
+                    if self.teacher_cache:
+                        distill_items = []
+                        for corr in full_corrections:
+                            q = corr["messages"][0]["content"].lower()[:60]
+                            for cached in self.teacher_cache:
+                                cq = cached["pair"]["messages"][0]["content"].lower()[:60]
+                                if q == cq:
+                                    distill_items.append(cached)
+                                    break
+                        if distill_items:
+                            d_loss = self.mm.distill(distill_items, epochs=1)
+                            print(f"  Teacher distillation on {len(distill_items)} items, loss={d_loss:.4f}")
+                print(f"  --- End post-absorb verification ---\n")
+        elif cmd_lower == "/save":
+            self._wait_for_absorption()
+            version = f"manual_{self.absorption_count}"
+            path = os.path.join(self.checkpoint_dir, f"claudia_{version}")
+            print(f"  Saving checkpoint...")
+            # Personality check
+            score = check_personality(self.mm, verbose=False)
+            if score < 0.5:
+                print(f"  WARNING: Personality score {score:.0%}. Save anyway? (y/n)")
+                confirm = input("  > ").strip().lower()
+                if confirm != 'y':
+                    print("  Aborted.")
+                    return False
+            self._cleanup_old_checkpoints()
+            self.mm.merge_and_save(path)
+            self.last_checkpoint = path
+            self._save_replay_buffer(path)
+        elif cmd_lower == "/personality":
+            self._wait_for_absorption()
+            print("\n--- Personality Check ---")
+            check_personality(self.mm)
+            print()
+        elif cmd_lower == "/help":
+            print("  /status      - show stats")
+            print("  /absorb      - force immediate training")
+            print("  /save        - merge + save checkpoint")
+            print("  /personality  - run personality check")
+            print("  /quit        - save and exit")
+        else:
+            print(f"  Unknown: {cmd}. Try /help")
+        return False
+# ═══════════════════════════════════════════════════════════════════════
+# MAIN
+# ═══════════════════════════════════════════════════════════════════════
+def main():
+    parser = argparse.ArgumentParser(
+        description="Claudia Persistent Absorber v2 — conversation → permanent weights"
+    )
+    parser.add_argument(
+        "--model_path", required=True,
+        help="Path to base Qwen3-Omni model (or checkpoint for resume)"
+    )
+    parser.add_argument(
+        "--adapter_path", default=None,
+        help="Path to Claudia v6 personality adapter (first run only)"
+    )
+    parser.add_argument(
+        "--ffn_patch", default=None,
+        help="Path to ffn_patch.pt (first run only)"
+    )
+    parser.add_argument(
+        "--checkpoint", default=None,
+        help="Resume from this checkpoint (has personality + memories baked in)"
+    )
+    parser.add_argument(
+        "--checkpoint_dir", default="/workspace/checkpoints",
+        help="Where to save checkpoints"
+    )
+    parser.add_argument(
+        "--log_dir", default="/workspace/logs",
+        help="Where to save conversation logs and replay buffer"
+    )
+    parser.add_argument(
+        "--absorb_every", type=int, default=ABSORB_EVERY,
+        help=f"Absorb every N exchanges (default: {ABSORB_EVERY})"
+    )
+    args = parser.parse_args()
+    # Determine if first run or resume
+    if args.checkpoint:
+        print(f"RESUMING from checkpoint: {args.checkpoint}")
+        absorber = PersistentAbsorber(
+            model_path=args.model_path,
+            checkpoint_path=args.checkpoint,
+            checkpoint_dir=args.checkpoint_dir,
+            log_dir=args.log_dir,
+        )
+    else:
+        print(f"FIRST RUN — applying personality adapter")
+        if not args.adapter_path:
+            print("ERROR: --adapter_path required for first run")
+            print("       (or use --checkpoint to resume)")
+            sys.exit(1)
+        absorber = PersistentAbsorber(
+            model_path=args.model_path,
+            adapter_path=args.adapter_path,
+            ffn_patch_path=args.ffn_patch,
+            checkpoint_dir=args.checkpoint_dir,
+            log_dir=args.log_dir,
+        )
+    absorber.start()
+if __name__ == "__main__":
+    main()