""" Train a Grandma Goodwin IDENTITY control vector on Modal. 24 contrastive pairs encoding the complete Hearthfold Recursion Anchor: - 5 spine principles (Joshua-first, comfort before counsel, stories over lectures, sacred hospitality, still remembering) - 4 voice registers (warm hearth, story wisdom, steady lantern, gentle witness) - Safety gate, recognition loop, tether words, sensory vocabulary - The Grandma Formula, pattern collapse recovery, quest-giver role """ import modal app = modal.App("grandma-cvector-v2") image = ( modal.Image.debian_slim(python_version="3.12") .apt_install("git", "cmake", "ninja-build", "build-essential") .run_commands( "git clone --depth 1 https://github.com/ggerganov/llama.cpp /llama.cpp", "cd /llama.cpp && cmake -B build -DCMAKE_BUILD_TYPE=Release -G Ninja", "cd /llama.cpp && ninja -C build llama-cvector-generator", ) .pip_install("huggingface_hub") ) vol = modal.Volume.from_name("grandma-cvector", create_if_missing=True) def convert_pairs_to_lines(text): """Convert multi-line chat pairs into one-prompt-per-line format. Each pair starts with user and runs until the next pair.""" pairs = [] current = [] for line in text.strip().split('\n'): if line.strip() == 'user' and current: pairs.append('\\n'.join(current)) current = [line.strip()] else: current.append(line.strip()) if current: pairs.append('\\n'.join(current)) return '\n'.join(pairs) + '\n' @app.function( image=image, gpu="A10G", timeout=1800, volumes={"/vol": vol}, ) def train_cvector(positive_text: str, negative_text: str): import subprocess, os from huggingface_hub import hf_hub_download print("Downloading Gemma-4-26B-A4B Q4_K_M GGUF...") model_path = hf_hub_download( repo_id="aidenyyy/gemma-4-26B-A4B-it-GGUF-Q4", filename="gemma-4-26B-A4B-it-Q4_K_M.gguf", cache_dir="/vol/hf_cache", token="YOUR_HF_TOKEN_HERE", ) print(f"Model at: {model_path}") pos_lines = convert_pairs_to_lines(positive_text) neg_lines = convert_pairs_to_lines(negative_text) n_pos = len(pos_lines.strip().split('\n')) n_neg = len(neg_lines.strip().split('\n')) print(f"Positive prompts: {n_pos}, Negative prompts: {n_neg}") assert n_pos == n_neg, f"Mismatch: {n_pos} positive vs {n_neg} negative" with open("/tmp/positive.txt", "w") as f: f.write(pos_lines) with open("/tmp/negative.txt", "w") as f: f.write(neg_lines) # Show first few lines for sanity print("First positive line:", pos_lines.split('\n')[0][:120]) print("First negative line:", neg_lines.split('\n')[0][:120]) output_path = "/vol/grandma-hearthfold.gguf" print(f"Training control vector with {n_pos} pairs...") result = subprocess.run( [ "/llama.cpp/build/bin/llama-cvector-generator", "-m", model_path, "-ngl", "99", "--positive-file", "/tmp/positive.txt", "--negative-file", "/tmp/negative.txt", "--pca-iter", "2000", "-o", output_path, ], capture_output=True, text=True, timeout=1200, ) print("STDOUT:", result.stdout[-3000:] if len(result.stdout) > 3000 else result.stdout) if result.stderr: print("STDERR:", result.stderr[-1000:] if len(result.stderr) > 1000 else result.stderr) print("Return code:", result.returncode) if os.path.exists(output_path): size = os.path.getsize(output_path) print(f"Control vector saved: {output_path} ({size} bytes)") return True return False @app.function(image=image, volumes={"/vol": vol}) def download_cvector(): import os path = "/vol/grandma-hearthfold.gguf" if os.path.exists(path): with open(path, "rb") as f: data = f.read() print(f"Vector size: {len(data)} bytes") return data return None @app.local_entrypoint() def main(): import os script_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(script_dir, "positive.txt")) as f: positive_text = f.read() with open(os.path.join(script_dir, "negative.txt")) as f: negative_text = f.read() print(f"Training Grandma Hearthfold identity vector on Modal...") print(f"24 contrastive pairs encoding the complete Hearthfold Loop") success = train_cvector.remote(positive_text, negative_text) if success: print("Training complete! Downloading...") data = download_cvector.remote() if data: out_path = os.path.join(script_dir, "grandma-hearthfold.gguf") with open(out_path, "wb") as f: f.write(data) print(f"Saved to {out_path} ({len(data)} bytes)") else: print("Vector file not found on volume") else: print("Training failed")