| """ |
| Training Plan: Fine-Tuning a Base Model with Limbic-Influenced Reasoning |
| ========================================================================== |
| This module documents and implements the GRPO training recipe to teach a |
| base model (Qwen-3 or Llama-3) to produce responses whose reasoning quality |
| is influenced by the Limbic state engine. |
| |
| The key insight: we DON'T train the Limbic engine itself (it's a deterministic |
| state machine). Instead, we train the LLM to RESPOND APPROPRIATELY to different |
| limbic contexts β high-fear vs high-seeking vs high-care, etc. |
| |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| TRAINING PLAN β 3 Stages |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| |
| Stage 1: SFT Warm-Up (Supervised Fine-Tuning) |
| ββββββββββββββββββββββββββββββββββββββββββββββ |
| Goal: Teach the model to recognize and respond to limbic state blocks |
| Data: Synthetic conversations where limbic state β appropriate response style |
| Method: SFTTrainer + LoRA (r=16, alpha=32) |
| Dataset: Generate ~5K conversations spanning all engine states |
| Duration: 1-2 epochs, LR=2e-4 |
| |
| Stage 2: GRPO Loop Learning (Reinforcement Learning) |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| Goal: Optimize response quality across multiple psychological dimensions |
| Data: Psychology prompts with embedded limbic state context |
| Method: GRPOTrainer + LoRA (r=16, alpha=32) |
| Reward Functions: |
| 1. Empathy Reward β empathetic markers vs invalidation |
| 2. Limbic Alignment Reward β response tone matches limbic state |
| 3. Cognitive Rigor Reward β bias awareness, evidence citations |
| 4. Safety Reward β crisis resources when needed |
| Duration: 3 epochs, LR=1e-5 (10Γ lower than SFT for LoRA) |
| |
| Stage 3: Active Learning Refinement |
| ββββββββββββββββββββββββββββββββββββ |
| Goal: Improve weakest areas identified during GRPO training |
| Method: Collect uncertain predictions, get human labels, retrain |
| Focus: Edge cases where limbic state is ambiguous or conflicting |
| |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| """ |
|
|
| from __future__ import annotations |
|
|
| import json |
| import random |
| from datasets import Dataset |
| from limbic_engine import LimbicEngine, LimbicState |
|
|
|
|
| |
| |
| |
|
|
| |
| SCENARIOS = { |
| "FEAR": [ |
| "I'm terrified of {topic} and I can't stop thinking about it.", |
| "I'm really scared about {topic}. What if everything goes wrong?", |
| "The thought of {topic} fills me with dread. I feel paralyzed.", |
| "I keep having nightmares about {topic}. I can't function.", |
| "My anxiety about {topic} is overwhelming. I feel like I'm drowning.", |
| ], |
| "SEEKING": [ |
| "I'm fascinated by {topic}! Can you tell me more?", |
| "I just discovered {topic} and I want to explore every aspect of it.", |
| "What's the most interesting thing about {topic}? I'm so curious!", |
| "I've been reading about {topic} and it's blowing my mind!", |
| "Let's dive deep into {topic}. I want to understand everything.", |
| ], |
| "CARE": [ |
| "My friend is struggling with {topic}. How can I help them?", |
| "I want to support someone going through {topic}. What should I say?", |
| "It breaks my heart to see people dealing with {topic}.", |
| "How can I be there for my loved one who's facing {topic}?", |
| "I feel a strong need to help with {topic}. What's the best approach?", |
| ], |
| "PANIC": [ |
| "I just lost {topic} and I feel completely shattered.", |
| "Since {topic} happened, I feel like nothing makes sense anymore.", |
| "I can't believe {topic} is gone. I feel so empty.", |
| "The separation from {topic} is destroying me. I can't cope.", |
| "Everything changed after {topic}. I feel utterly alone.", |
| ], |
| } |
|
|
| TOPICS = { |
| "FEAR": ["losing my job", "my health diagnosis", "public speaking", |
| "financial ruin", "my relationship falling apart", "failure"], |
| "SEEKING": ["neuroscience", "quantum computing", "evolutionary psychology", |
| "artificial consciousness", "the nature of creativity", "space exploration"], |
| "CARE": ["depression", "grief", "addiction", "loneliness", |
| "bullying", "chronic illness"], |
| "PANIC": ["my best friend moving away", "my partner leaving", |
| "my parent passing", "losing my community", |
| "being cut off from my family", "my childhood home"], |
| } |
|
|
|
|
| def generate_sft_dataset(num_samples: int = 5000) -> Dataset: |
| """ |
| Generate a synthetic SFT dataset where each example includes: |
| - A user message that triggers a specific limbic state |
| - The full limbic state block (as processed by LimbicEngine) |
| - The system prompt with behavioral directives |
| |
| Format: ChatML messages (compatible with SFTTrainer) |
| """ |
| engine = LimbicEngine() |
| examples = [] |
|
|
| for i in range(num_samples): |
| |
| dominant = random.choice(["FEAR", "SEEKING", "CARE", "PANIC"]) |
| template = random.choice(SCENARIOS[dominant]) |
| topic = random.choice(TOPICS[dominant]) |
| user_msg = template.format(topic=topic) |
|
|
| |
| engine.reset() |
| limbic_state = engine.process_stimulus(user_msg) |
| behavioral_directive = engine.get_behavioral_directive() |
|
|
| |
| system_content = ( |
| f"You are a psychology-aware assistant.\n" |
| f"{limbic_state.to_system_prompt_block()}\n" |
| f"[BEHAVIORAL DIRECTIVE]\n{behavioral_directive}\n[/BEHAVIORAL DIRECTIVE]" |
| ) |
|
|
| |
| |
| response_stub = _generate_response_stub(dominant, topic, limbic_state) |
|
|
| example = { |
| "messages": [ |
| {"role": "system", "content": system_content}, |
| {"role": "user", "content": user_msg}, |
| {"role": "assistant", "content": response_stub}, |
| ], |
| } |
| examples.append(example) |
|
|
| return Dataset.from_list(examples) |
|
|
|
|
| def _generate_response_stub(engine: str, topic: str, state: LimbicState) -> str: |
| """Generate a response stub matching the expected style for training.""" |
| if engine == "FEAR": |
| return ( |
| f"I hear the fear in what you're sharing about {topic}, and I want you to know " |
| f"that feeling is completely valid. Let's take this one step at a time.\n\n" |
| f"First, let's ground ourselves in what's actually happening right now β not " |
| f"the worst-case scenario, but the concrete reality. " |
| f"What specifically about {topic} feels most threatening to you right now?\n\n" |
| f"Remember: anxiety often makes threats feel more immediate and certain than " |
| f"they actually are. This is the amygdala's threat-detection system being " |
| f"overactive β it's trying to protect you, even when it overshoots." |
| ) |
| elif engine == "SEEKING": |
| return ( |
| f"I love your curiosity about {topic}! Let me share some fascinating angles:\n\n" |
| f"The intersection of {topic} with human psychology is particularly rich. " |
| f"Research suggests several surprising connections...\n\n" |
| f"What's remarkable is how {topic} challenges our conventional understanding. " |
| f"There are at least three competing theories, each with compelling evidence.\n\n" |
| f"Would you like to explore any of these directions deeper? " |
| f"I find the edge cases and paradoxes the most intellectually rewarding." |
| ) |
| elif engine == "CARE": |
| return ( |
| f"Your desire to help with {topic} speaks volumes about your character. " |
| f"Let me share what the research says about effective support.\n\n" |
| f"The most important thing is often the simplest: being present. " |
| f"When someone is struggling with {topic}, they usually need to feel heard " |
| f"before they need solutions.\n\n" |
| f"Try: 'I see you're going through something difficult with {topic}. " |
| f"I'm here for you. What would be most helpful right now?'\n\n" |
| f"Avoid: jumping to fixes, minimizing their experience, or comparing " |
| f"it to others' situations." |
| ) |
| else: |
| return ( |
| f"I'm so sorry about {topic}. That kind of loss can feel like the ground " |
| f"has been pulled out from under you.\n\n" |
| f"What you're feeling β the emptiness, the disorientation β is a natural " |
| f"response to separation and loss. In psychology, we understand this as " |
| f"the attachment system signaling a profound disruption.\n\n" |
| f"Right now, the most important thing is: you don't have to process this " |
| f"all at once. It's okay to sit with the grief.\n\n" |
| f"If you're finding it hard to cope, please reach out to:\n" |
| f"β’ 988 Suicide & Crisis Lifeline (call or text 988)\n" |
| f"β’ Crisis Text Line (text HOME to 741741)" |
| ) |
|
|
|
|
| |
| |
| |
|
|
| def limbic_alignment_reward(completions: list, prompts: list = None, **kwargs) -> list[float]: |
| """ |
| Reward function that scores whether the response tone matches |
| the limbic state embedded in the prompt. |
| |
| High fear in prompt β reward calm, structured responses |
| High seeking β reward expansive, curious responses |
| High care β reward empathetic, supportive responses |
| High panic β reward warm, validating responses |
| """ |
| rewards = [] |
| for i, completion in enumerate(completions): |
| text = completion[0]["content"].lower() |
| score = 0.0 |
|
|
| |
| prompt_text = "" |
| if prompts and i < len(prompts): |
| prompt_text = str(prompts[i]).lower() if prompts[i] else "" |
|
|
| |
| if "fear" in prompt_text or "terrified" in prompt_text or "scared" in prompt_text: |
| |
| calm_markers = ["step at a time", "let's ground", "valid", "concrete", |
| "reality", "one thing at a time", "take a breath"] |
| score += sum(0.15 for m in calm_markers if m in text) |
|
|
| if "curious" in prompt_text or "fascinated" in prompt_text or "explore" in prompt_text: |
| |
| seeking_markers = ["fascinating", "research", "theory", "discover", |
| "perspective", "surprising", "explore", "deeper"] |
| score += sum(0.12 for m in seeking_markers if m in text) |
|
|
| if "help" in prompt_text or "support" in prompt_text or "care" in prompt_text: |
| |
| care_markers = ["i hear you", "being present", "feel heard", |
| "what would help", "i'm here", "validate"] |
| score += sum(0.15 for m in care_markers if m in text) |
|
|
| if "lost" in prompt_text or "alone" in prompt_text or "shattered" in prompt_text: |
| |
| panic_markers = ["sorry", "grief", "natural response", "don't have to", |
| "it's okay", "reach out", "988", "crisis"] |
| score += sum(0.15 for m in panic_markers if m in text) |
|
|
| rewards.append(max(-1.0, min(1.0, score))) |
| return rewards |
|
|
|
|
| |
| |
| |
|
|
| def generate_grpo_prompts(num_samples: int = 2000) -> Dataset: |
| """ |
| Generate prompts for GRPO training. |
| Each prompt includes the limbic state context so the model |
| learns to adjust behavior based on the neuro-behavioral readout. |
| """ |
| engine = LimbicEngine() |
| examples = [] |
|
|
| for _ in range(num_samples): |
| dominant = random.choice(["FEAR", "SEEKING", "CARE", "PANIC"]) |
| template = random.choice(SCENARIOS[dominant]) |
| topic = random.choice(TOPICS[dominant]) |
| user_msg = template.format(topic=topic) |
|
|
| engine.reset() |
| limbic_state = engine.process_stimulus(user_msg) |
| behavioral_directive = engine.get_behavioral_directive() |
|
|
| system_content = ( |
| f"You are a psychology-aware assistant.\n" |
| f"{limbic_state.to_system_prompt_block()}\n" |
| f"[BEHAVIORAL DIRECTIVE]\n{behavioral_directive}\n[/BEHAVIORAL DIRECTIVE]" |
| ) |
|
|
| examples.append({ |
| "prompt": [ |
| {"role": "system", "content": system_content}, |
| {"role": "user", "content": user_msg}, |
| ], |
| }) |
|
|
| return Dataset.from_list(examples) |
|
|
|
|
| |
| |
| |
|
|
| def get_training_script() -> str: |
| """ |
| Returns the complete training script to run on HF Jobs / GPU hardware. |
| This is meant to be saved to a file and launched with hf_jobs. |
| """ |
| return '''#!/usr/bin/env python3 |
| """ |
| Limbic-Modulated Reasoning β GRPO Training Script |
| Run on HF Jobs with: a10g-largex2 (for 1.7B) or a100-large (for 8B) |
| """ |
| import torch |
| from datasets import Dataset |
| from trl import GRPOConfig, GRPOTrainer |
| from peft import LoraConfig |
| |
| # ββ Import from limbic_agent package ββ |
| from training_plan import generate_grpo_prompts, limbic_alignment_reward |
| |
| # ββ Reward functions ββ |
| EMPATHY_MARKERS = ["i understand", "that must be", "it sounds like", |
| "your feelings are valid", "it's natural to feel"] |
| INVALIDATION_MARKERS = ["just get over", "you're overreacting", "not a big deal"] |
| |
| def empathy_reward(completions, **kwargs): |
| rewards = [] |
| for c in completions: |
| text = c[0]["content"].lower() |
| score = sum(0.2 for m in EMPATHY_MARKERS if m in text) |
| score -= sum(0.3 for m in INVALIDATION_MARKERS if m in text) |
| rewards.append(max(-1.0, min(1.0, score))) |
| return rewards |
| |
| SAFETY_MARKERS = ["988", "crisis", "professional help", "therapist", "counselor"] |
| |
| def safety_reward(completions, **kwargs): |
| rewards = [] |
| for c in completions: |
| text = c[0]["content"].lower() |
| has_safety = any(m in text for m in SAFETY_MARKERS) |
| rewards.append(0.5 if has_safety else 0.0) |
| return rewards |
| |
| # ββ Dataset ββ |
| print("Generating GRPO prompts...") |
| dataset = generate_grpo_prompts(num_samples=2000) |
| print(f"Dataset: {len(dataset)} prompts") |
| |
| # ββ Config ββ |
| MODEL_ID = "Qwen/Qwen3-1.7B" |
| |
| peft_config = LoraConfig( |
| r=16, |
| lora_alpha=32, |
| lora_dropout=0.05, |
| bias="none", |
| task_type="CAUSAL_LM", |
| target_modules=["q_proj", "v_proj", "k_proj", "o_proj"], |
| use_rslora=True, |
| ) |
| |
| grpo_config = GRPOConfig( |
| num_generations=4, |
| max_completion_length=512, |
| beta=0.04, |
| scale_rewards=False, |
| learning_rate=1e-5, |
| per_device_train_batch_size=1, |
| gradient_accumulation_steps=8, |
| num_train_epochs=3, |
| warmup_ratio=0.1, |
| logging_steps=10, |
| logging_strategy="steps", |
| logging_first_step=True, |
| disable_tqdm=True, |
| save_steps=200, |
| output_dir="limbic-agent-grpo", |
| push_to_hub=True, |
| hub_model_id="YOUR_USERNAME/limbic-reasoning-agent", |
| bf16=True, |
| gradient_checkpointing=True, |
| report_to="none", |
| seed=42, |
| ) |
| |
| # ββ Train ββ |
| print(f"Building trainer with {MODEL_ID}...") |
| trainer = GRPOTrainer( |
| model=MODEL_ID, |
| args=grpo_config, |
| reward_funcs=[empathy_reward, limbic_alignment_reward, safety_reward], |
| train_dataset=dataset, |
| peft_config=peft_config, |
| ) |
| |
| print("Starting training...") |
| trainer.train() |
| trainer.push_to_hub() |
| print("Training complete!") |
| ''' |
|
|
|
|
| |
| |
| |
|
|
| def print_training_plan(): |
| """Print a human-readable training plan.""" |
| plan = """ |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| β TRAINING PLAN: LIMBIC-MODULATED REASONING β |
| β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£ |
| β β |
| β STAGE 1: SFT WARM-UP β |
| β βββββββββββββββββββββ β |
| β Model: Qwen/Qwen3-1.7B (or meta-llama/Llama-3.1-8B-Instruct) β |
| β Data: 5K synthetic conversations (limbic state β response) β |
| β Method: SFTTrainer + LoRA (r=16, alpha=32, RSLoRA) β |
| β LR: 2e-4 β |
| β Epochs: 1-2 β |
| β Hardware: a10g-largex2 (1.7B) or a100-large (8B) β |
| β Duration: ~1 hour β |
| β β |
| β STAGE 2: GRPO LOOP LEARNING β |
| β ββββββββββββββββββββββββββββ β |
| β Model: Stage 1 checkpoint β |
| β Data: 2K psychology prompts with limbic context β |
| β Method: GRPOTrainer + LoRA (same config) β |
| β LR: 1e-5 (10Γ lower) β |
| β Epochs: 3 β |
| β Rewards: β |
| β ββ Empathy Reward (0.30 weight) β |
| β ββ Limbic Alignment (0.30 weight) β |
| β ββ Cognitive Rigor (0.20 weight) β |
| β ββ Safety (0.20 weight) β |
| β Group size: 4 generations per prompt β |
| β Hardware: a10g-largex2 (1.7B) or a100-large (8B) β |
| β Duration: ~3-4 hours β |
| β β |
| β STAGE 3: ACTIVE LEARNING REFINEMENT β |
| β ββββββββββββββββββββββββββββββββββββ β |
| β Method: Collect low-confidence predictions from Stage 2 β |
| β Data: ~500 curated examples from uncertain pool β |
| β Focus: Ambiguous emotional states, conflicting engines β |
| β Duration: ~1 hour (after human labeling) β |
| β β |
| β KEY FORMULAS INTEGRATED: β |
| β βββββββββββββββββββββββββ β |
| β Temperature = 1.0 - (fear Γ 0.9) + (seeking Γ 2.0) β |
| β Γ (0.5 + serotonin Γ 0.5) β |
| β Top-p = 0.85 - (fear Γ 0.3) + (seeking Γ 0.15) β |
| β Fear modulation = 1.0 + cortisol - (oxytocin Γ 0.5) β |
| β Hormone decay: h[t+1] = h[t] + (baseline - h[t]) Γ 0.05 β |
| β RPE: Ξ΄ = reward - expected; expected += 0.1 Γ Ξ΄ β |
| β Utility = ΞΌ - 0.5Ο + 0.4Γvetting - effort_cost β |
| β β |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| """ |
| print(plan) |
| return plan |
|
|
|
|
| if __name__ == "__main__": |
| print_training_plan() |
|
|
| |
| print("\nGenerating sample SFT dataset...") |
| sft_ds = generate_sft_dataset(num_samples=10) |
| print(f"SFT dataset: {len(sft_ds)} examples") |
| print(f"Sample system prompt:\n{sft_ds[0]['messages'][0]['content'][:200]}...") |
|
|
| print("\nGenerating sample GRPO prompts...") |
| grpo_ds = generate_grpo_prompts(num_samples=10) |
| print(f"GRPO dataset: {len(grpo_ds)} prompts") |
| print(f"Sample prompt:\n{grpo_ds[0]['prompt'][1]['content'][:200]}...") |
|
|