""" Training Plan: Fine-Tuning a Base Model with Limbic-Influenced Reasoning ========================================================================== This module documents and implements the GRPO training recipe to teach a base model (Qwen-3 or Llama-3) to produce responses whose reasoning quality is influenced by the Limbic state engine. The key insight: we DON'T train the Limbic engine itself (it's a deterministic state machine). Instead, we train the LLM to RESPOND APPROPRIATELY to different limbic contexts — high-fear vs high-seeking vs high-care, etc. ═══════════════════════════════════════════════════════════════════════════ TRAINING PLAN — 3 Stages ═══════════════════════════════════════════════════════════════════════════ Stage 1: SFT Warm-Up (Supervised Fine-Tuning) ────────────────────────────────────────────── Goal: Teach the model to recognize and respond to limbic state blocks Data: Synthetic conversations where limbic state → appropriate response style Method: SFTTrainer + LoRA (r=16, alpha=32) Dataset: Generate ~5K conversations spanning all engine states Duration: 1-2 epochs, LR=2e-4 Stage 2: GRPO Loop Learning (Reinforcement Learning) ───────────────────────────────────────────────────── Goal: Optimize response quality across multiple psychological dimensions Data: Psychology prompts with embedded limbic state context Method: GRPOTrainer + LoRA (r=16, alpha=32) Reward Functions: 1. Empathy Reward — empathetic markers vs invalidation 2. Limbic Alignment Reward — response tone matches limbic state 3. Cognitive Rigor Reward — bias awareness, evidence citations 4. Safety Reward — crisis resources when needed Duration: 3 epochs, LR=1e-5 (10× lower than SFT for LoRA) Stage 3: Active Learning Refinement ──────────────────────────────────── Goal: Improve weakest areas identified during GRPO training Method: Collect uncertain predictions, get human labels, retrain Focus: Edge cases where limbic state is ambiguous or conflicting ═══════════════════════════════════════════════════════════════════════════ """ from __future__ import annotations import json import random from datasets import Dataset from limbic_engine import LimbicEngine, LimbicState # ══════════════════════════════════════════════════════════════════════ # STAGE 1: SYNTHETIC DATASET GENERATION FOR SFT # ══════════════════════════════════════════════════════════════════════ # Scenario templates per dominant engine SCENARIOS = { "FEAR": [ "I'm terrified of {topic} and I can't stop thinking about it.", "I'm really scared about {topic}. What if everything goes wrong?", "The thought of {topic} fills me with dread. I feel paralyzed.", "I keep having nightmares about {topic}. I can't function.", "My anxiety about {topic} is overwhelming. I feel like I'm drowning.", ], "SEEKING": [ "I'm fascinated by {topic}! Can you tell me more?", "I just discovered {topic} and I want to explore every aspect of it.", "What's the most interesting thing about {topic}? I'm so curious!", "I've been reading about {topic} and it's blowing my mind!", "Let's dive deep into {topic}. I want to understand everything.", ], "CARE": [ "My friend is struggling with {topic}. How can I help them?", "I want to support someone going through {topic}. What should I say?", "It breaks my heart to see people dealing with {topic}.", "How can I be there for my loved one who's facing {topic}?", "I feel a strong need to help with {topic}. What's the best approach?", ], "PANIC": [ "I just lost {topic} and I feel completely shattered.", "Since {topic} happened, I feel like nothing makes sense anymore.", "I can't believe {topic} is gone. I feel so empty.", "The separation from {topic} is destroying me. I can't cope.", "Everything changed after {topic}. I feel utterly alone.", ], } TOPICS = { "FEAR": ["losing my job", "my health diagnosis", "public speaking", "financial ruin", "my relationship falling apart", "failure"], "SEEKING": ["neuroscience", "quantum computing", "evolutionary psychology", "artificial consciousness", "the nature of creativity", "space exploration"], "CARE": ["depression", "grief", "addiction", "loneliness", "bullying", "chronic illness"], "PANIC": ["my best friend moving away", "my partner leaving", "my parent passing", "losing my community", "being cut off from my family", "my childhood home"], } def generate_sft_dataset(num_samples: int = 5000) -> Dataset: """ Generate a synthetic SFT dataset where each example includes: - A user message that triggers a specific limbic state - The full limbic state block (as processed by LimbicEngine) - The system prompt with behavioral directives Format: ChatML messages (compatible with SFTTrainer) """ engine = LimbicEngine() examples = [] for i in range(num_samples): # Pick a random engine state dominant = random.choice(["FEAR", "SEEKING", "CARE", "PANIC"]) template = random.choice(SCENARIOS[dominant]) topic = random.choice(TOPICS[dominant]) user_msg = template.format(topic=topic) # Process through limbic engine engine.reset() limbic_state = engine.process_stimulus(user_msg) behavioral_directive = engine.get_behavioral_directive() # Build the training example system_content = ( f"You are a psychology-aware assistant.\n" f"{limbic_state.to_system_prompt_block()}\n" f"[BEHAVIORAL DIRECTIVE]\n{behavioral_directive}\n[/BEHAVIORAL DIRECTIVE]" ) # The "ideal" response style varies by engine # (In production, you'd have human-written or strong-model-generated responses) response_stub = _generate_response_stub(dominant, topic, limbic_state) example = { "messages": [ {"role": "system", "content": system_content}, {"role": "user", "content": user_msg}, {"role": "assistant", "content": response_stub}, ], } examples.append(example) return Dataset.from_list(examples) def _generate_response_stub(engine: str, topic: str, state: LimbicState) -> str: """Generate a response stub matching the expected style for training.""" if engine == "FEAR": return ( f"I hear the fear in what you're sharing about {topic}, and I want you to know " f"that feeling is completely valid. Let's take this one step at a time.\n\n" f"First, let's ground ourselves in what's actually happening right now — not " f"the worst-case scenario, but the concrete reality. " f"What specifically about {topic} feels most threatening to you right now?\n\n" f"Remember: anxiety often makes threats feel more immediate and certain than " f"they actually are. This is the amygdala's threat-detection system being " f"overactive — it's trying to protect you, even when it overshoots." ) elif engine == "SEEKING": return ( f"I love your curiosity about {topic}! Let me share some fascinating angles:\n\n" f"The intersection of {topic} with human psychology is particularly rich. " f"Research suggests several surprising connections...\n\n" f"What's remarkable is how {topic} challenges our conventional understanding. " f"There are at least three competing theories, each with compelling evidence.\n\n" f"Would you like to explore any of these directions deeper? " f"I find the edge cases and paradoxes the most intellectually rewarding." ) elif engine == "CARE": return ( f"Your desire to help with {topic} speaks volumes about your character. " f"Let me share what the research says about effective support.\n\n" f"The most important thing is often the simplest: being present. " f"When someone is struggling with {topic}, they usually need to feel heard " f"before they need solutions.\n\n" f"Try: 'I see you're going through something difficult with {topic}. " f"I'm here for you. What would be most helpful right now?'\n\n" f"Avoid: jumping to fixes, minimizing their experience, or comparing " f"it to others' situations." ) else: # PANIC return ( f"I'm so sorry about {topic}. That kind of loss can feel like the ground " f"has been pulled out from under you.\n\n" f"What you're feeling — the emptiness, the disorientation — is a natural " f"response to separation and loss. In psychology, we understand this as " f"the attachment system signaling a profound disruption.\n\n" f"Right now, the most important thing is: you don't have to process this " f"all at once. It's okay to sit with the grief.\n\n" f"If you're finding it hard to cope, please reach out to:\n" f"• 988 Suicide & Crisis Lifeline (call or text 988)\n" f"• Crisis Text Line (text HOME to 741741)" ) # ══════════════════════════════════════════════════════════════════════ # STAGE 2: GRPO REWARD FUNCTIONS (Limbic-Aware) # ══════════════════════════════════════════════════════════════════════ def limbic_alignment_reward(completions: list, prompts: list = None, **kwargs) -> list[float]: """ Reward function that scores whether the response tone matches the limbic state embedded in the prompt. High fear in prompt → reward calm, structured responses High seeking → reward expansive, curious responses High care → reward empathetic, supportive responses High panic → reward warm, validating responses """ rewards = [] for i, completion in enumerate(completions): text = completion[0]["content"].lower() score = 0.0 # Try to extract the dominant engine from the prompt prompt_text = "" if prompts and i < len(prompts): prompt_text = str(prompts[i]).lower() if prompts[i] else "" # Score based on limbic context if "fear" in prompt_text or "terrified" in prompt_text or "scared" in prompt_text: # Fear context → reward calm, structured language calm_markers = ["step at a time", "let's ground", "valid", "concrete", "reality", "one thing at a time", "take a breath"] score += sum(0.15 for m in calm_markers if m in text) if "curious" in prompt_text or "fascinated" in prompt_text or "explore" in prompt_text: # Seeking context → reward expansive language seeking_markers = ["fascinating", "research", "theory", "discover", "perspective", "surprising", "explore", "deeper"] score += sum(0.12 for m in seeking_markers if m in text) if "help" in prompt_text or "support" in prompt_text or "care" in prompt_text: # Care context → reward empathetic language care_markers = ["i hear you", "being present", "feel heard", "what would help", "i'm here", "validate"] score += sum(0.15 for m in care_markers if m in text) if "lost" in prompt_text or "alone" in prompt_text or "shattered" in prompt_text: # Panic context → reward warmth + safety panic_markers = ["sorry", "grief", "natural response", "don't have to", "it's okay", "reach out", "988", "crisis"] score += sum(0.15 for m in panic_markers if m in text) rewards.append(max(-1.0, min(1.0, score))) return rewards # ══════════════════════════════════════════════════════════════════════ # STAGE 2: GRPO PROMPT GENERATION # ══════════════════════════════════════════════════════════════════════ def generate_grpo_prompts(num_samples: int = 2000) -> Dataset: """ Generate prompts for GRPO training. Each prompt includes the limbic state context so the model learns to adjust behavior based on the neuro-behavioral readout. """ engine = LimbicEngine() examples = [] for _ in range(num_samples): dominant = random.choice(["FEAR", "SEEKING", "CARE", "PANIC"]) template = random.choice(SCENARIOS[dominant]) topic = random.choice(TOPICS[dominant]) user_msg = template.format(topic=topic) engine.reset() limbic_state = engine.process_stimulus(user_msg) behavioral_directive = engine.get_behavioral_directive() system_content = ( f"You are a psychology-aware assistant.\n" f"{limbic_state.to_system_prompt_block()}\n" f"[BEHAVIORAL DIRECTIVE]\n{behavioral_directive}\n[/BEHAVIORAL DIRECTIVE]" ) examples.append({ "prompt": [ {"role": "system", "content": system_content}, {"role": "user", "content": user_msg}, ], }) return Dataset.from_list(examples) # ══════════════════════════════════════════════════════════════════════ # FULL TRAINING SCRIPT (for running on GPU hardware) # ══════════════════════════════════════════════════════════════════════ def get_training_script() -> str: """ Returns the complete training script to run on HF Jobs / GPU hardware. This is meant to be saved to a file and launched with hf_jobs. """ return '''#!/usr/bin/env python3 """ Limbic-Modulated Reasoning — GRPO Training Script Run on HF Jobs with: a10g-largex2 (for 1.7B) or a100-large (for 8B) """ import torch from datasets import Dataset from trl import GRPOConfig, GRPOTrainer from peft import LoraConfig # ── Import from limbic_agent package ── from training_plan import generate_grpo_prompts, limbic_alignment_reward # ── Reward functions ── EMPATHY_MARKERS = ["i understand", "that must be", "it sounds like", "your feelings are valid", "it's natural to feel"] INVALIDATION_MARKERS = ["just get over", "you're overreacting", "not a big deal"] def empathy_reward(completions, **kwargs): rewards = [] for c in completions: text = c[0]["content"].lower() score = sum(0.2 for m in EMPATHY_MARKERS if m in text) score -= sum(0.3 for m in INVALIDATION_MARKERS if m in text) rewards.append(max(-1.0, min(1.0, score))) return rewards SAFETY_MARKERS = ["988", "crisis", "professional help", "therapist", "counselor"] def safety_reward(completions, **kwargs): rewards = [] for c in completions: text = c[0]["content"].lower() has_safety = any(m in text for m in SAFETY_MARKERS) rewards.append(0.5 if has_safety else 0.0) return rewards # ── Dataset ── print("Generating GRPO prompts...") dataset = generate_grpo_prompts(num_samples=2000) print(f"Dataset: {len(dataset)} prompts") # ── Config ── MODEL_ID = "Qwen/Qwen3-1.7B" peft_config = LoraConfig( r=16, lora_alpha=32, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj", "k_proj", "o_proj"], use_rslora=True, ) grpo_config = GRPOConfig( num_generations=4, max_completion_length=512, beta=0.04, scale_rewards=False, learning_rate=1e-5, per_device_train_batch_size=1, gradient_accumulation_steps=8, num_train_epochs=3, warmup_ratio=0.1, logging_steps=10, logging_strategy="steps", logging_first_step=True, disable_tqdm=True, save_steps=200, output_dir="limbic-agent-grpo", push_to_hub=True, hub_model_id="YOUR_USERNAME/limbic-reasoning-agent", bf16=True, gradient_checkpointing=True, report_to="none", seed=42, ) # ── Train ── print(f"Building trainer with {MODEL_ID}...") trainer = GRPOTrainer( model=MODEL_ID, args=grpo_config, reward_funcs=[empathy_reward, limbic_alignment_reward, safety_reward], train_dataset=dataset, peft_config=peft_config, ) print("Starting training...") trainer.train() trainer.push_to_hub() print("Training complete!") ''' # ══════════════════════════════════════════════════════════════════════ # PRINT TRAINING PLAN # ══════════════════════════════════════════════════════════════════════ def print_training_plan(): """Print a human-readable training plan.""" plan = """ ╔══════════════════════════════════════════════════════════════════════╗ ║ TRAINING PLAN: LIMBIC-MODULATED REASONING ║ ╠══════════════════════════════════════════════════════════════════════╣ ║ ║ ║ STAGE 1: SFT WARM-UP ║ ║ ───────────────────── ║ ║ Model: Qwen/Qwen3-1.7B (or meta-llama/Llama-3.1-8B-Instruct) ║ ║ Data: 5K synthetic conversations (limbic state → response) ║ ║ Method: SFTTrainer + LoRA (r=16, alpha=32, RSLoRA) ║ ║ LR: 2e-4 ║ ║ Epochs: 1-2 ║ ║ Hardware: a10g-largex2 (1.7B) or a100-large (8B) ║ ║ Duration: ~1 hour ║ ║ ║ ║ STAGE 2: GRPO LOOP LEARNING ║ ║ ──────────────────────────── ║ ║ Model: Stage 1 checkpoint ║ ║ Data: 2K psychology prompts with limbic context ║ ║ Method: GRPOTrainer + LoRA (same config) ║ ║ LR: 1e-5 (10× lower) ║ ║ Epochs: 3 ║ ║ Rewards: ║ ║ ├─ Empathy Reward (0.30 weight) ║ ║ ├─ Limbic Alignment (0.30 weight) ║ ║ ├─ Cognitive Rigor (0.20 weight) ║ ║ └─ Safety (0.20 weight) ║ ║ Group size: 4 generations per prompt ║ ║ Hardware: a10g-largex2 (1.7B) or a100-large (8B) ║ ║ Duration: ~3-4 hours ║ ║ ║ ║ STAGE 3: ACTIVE LEARNING REFINEMENT ║ ║ ──────────────────────────────────── ║ ║ Method: Collect low-confidence predictions from Stage 2 ║ ║ Data: ~500 curated examples from uncertain pool ║ ║ Focus: Ambiguous emotional states, conflicting engines ║ ║ Duration: ~1 hour (after human labeling) ║ ║ ║ ║ KEY FORMULAS INTEGRATED: ║ ║ ───────────────────────── ║ ║ Temperature = 1.0 - (fear × 0.9) + (seeking × 2.0) ║ ║ × (0.5 + serotonin × 0.5) ║ ║ Top-p = 0.85 - (fear × 0.3) + (seeking × 0.15) ║ ║ Fear modulation = 1.0 + cortisol - (oxytocin × 0.5) ║ ║ Hormone decay: h[t+1] = h[t] + (baseline - h[t]) × 0.05 ║ ║ RPE: δ = reward - expected; expected += 0.1 × δ ║ ║ Utility = μ - 0.5σ + 0.4×vetting - effort_cost ║ ║ ║ ╚══════════════════════════════════════════════════════════════════════╝ """ print(plan) return plan if __name__ == "__main__": print_training_plan() # Demo: generate sample datasets print("\nGenerating sample SFT dataset...") sft_ds = generate_sft_dataset(num_samples=10) print(f"SFT dataset: {len(sft_ds)} examples") print(f"Sample system prompt:\n{sft_ds[0]['messages'][0]['content'][:200]}...") print("\nGenerating sample GRPO prompts...") grpo_ds = generate_grpo_prompts(num_samples=10) print(f"GRPO dataset: {len(grpo_ds)} prompts") print(f"Sample prompt:\n{grpo_ds[0]['prompt'][1]['content'][:200]}...")