Spaces:

MahaultA
/

mindsphere_coach

Sleeping

mindsphere_coach / src /mindsphere /core /agent.py

Mahault

Batch emotion + profile extraction into single LLM call per coaching turn

e3562aa about 2 months ago

133 kB

	"""
	CoachingAgent: main orchestrator for MindSphere coaching sessions.

	Manages the full lifecycle:
	calibration → visualization → planning → coaching → complete

	Integrates the POMDP model, ToM particle filter, empathy planner,
	and LLM layers. Post-calibration responses are routed through Mistral
	LLM for natural, companion-style conversation — with belief context,
	ToM predictions, and cognitive load inference injected dynamically.

	Falls back to template-based responses when no API key is available.
	"""

	from __future__ import annotations

	import logging
	from typing import Any, Dict, List, Optional, Tuple

	import numpy as np

	from .model import SphereModel, SKILL_FACTORS, SKILL_LEVEL_VALUES, ACTION_NAMES
	from .inference import (
	update_belief,
	update_all_beliefs,
	compute_efe_all_factors,
	select_action,
	compute_information_gain,
	)
	from .action_dispatcher import select_coaching_action
	from .dependency_graph import DependencyGraph
	from .utils import normalize, softmax
	from ..tom.particle_filter import UserTypeFilter
	from ..tom.empathy_planner import EmpathyPlanner
	from ..content.question_bank import (
	QUESTION_BANK,
	CalibrationQuestion,
	get_adaptive_question_order,
	)
	from ..content.interventions import (
	get_gentle_push_pair,
	get_interventions_for_skill,
	Intervention,
	)
	from ..content.templates import (
	WELCOME_MESSAGE, SPHERE_INTRO, PLAN_INTRO,
	COACHING_PROBES, COACHING_EXERCISES,
	)
	from .emotional_state import (
	EmotionEngine,
	EmotionalPrediction,
	EmotionalObservation,
	PredictionError,
	compute_belief_entropy,
	)
	from .learning import ModelLearner
	from .user_profile import UserProfile

	logger = logging.getLogger(__name__)

	# Phase constants
	PHASE_CALIBRATION = "calibration"
	PHASE_VISUALIZATION = "visualization"
	PHASE_PLANNING = "planning"
	PHASE_UPDATE = "update"
	PHASE_COACHING = "coaching"
	PHASE_COMPLETE = "complete"

	# Human-readable skill names
	SKILL_LABELS = {
	"focus": "Focus",
	"follow_through": "Follow-through",
	"social_courage": "Social Courage",
	"emotional_reg": "Emotional Regulation",
	"systems_thinking": "Systems Thinking",
	"self_trust": "Self-Trust",
	"task_clarity": "Task Clarity",
	"consistency": "Consistency",
	}


	class CoachingAgent:
	"""
	Main orchestrator for a MindSphere coaching session.

	When a Mistral API key is available, all post-calibration responses
	are generated via LLM with dynamic belief/ToM context injection.
	Otherwise, falls back to template-based responses.

	Usage:
	agent = CoachingAgent()
	result = agent.start_session() # returns welcome + first question

	# Phase 1: Calibration loop
	result = agent.step(user_answer_dict)
	# ... repeat for ~10 questions ...

	# Phase 2+: All responses routed through LLM
	result = agent.step(user_choice_dict)
	"""

	def __init__(
	self,
	lambda_empathy: float = 0.5,
	n_particles: int = 50,
	beta: float = 4.0,
	):
	self.model = SphereModel()
	self.dep_graph = DependencyGraph()
	self.tom = UserTypeFilter(n_particles=n_particles)
	self.empathy = EmpathyPlanner(lambda_empathy=lambda_empathy, beta=beta)

	self.beliefs: Dict[str, np.ndarray] = {}
	self.phase: str = PHASE_CALIBRATION
	self.timestep: int = 0
	self.asked_question_ids: List[str] = []
	self.history: List[Dict[str, Any]] = []
	self.current_question: Optional[CalibrationQuestion] = None
	self.current_intervention: Optional[Intervention] = None
	self.target_skill: Optional[str] = None

	# Conversation history for LLM context (role + content pairs)
	self.conversation_history: List[Dict[str, str]] = []

	# Tracks turns within each post-calibration phase
	self._viz_turns: int = 0
	self._planning_turns: int = 0
	self._coaching_turns: int = 0
	self._probes_asked: List[str] = [] # track which probing questions we've used
	self._exercises_given: List[str] = [] # track which exercises we've suggested
	self._accepted_interventions: List[Dict[str, Any]] = []

	# Cognitive load tracking
	self._recent_sentiments: List[str] = [] # last N sentiment signals

	self._max_calibration_questions = 10

	# POMDP parameter learning (Dirichlet concentration parameters)
	self.learner = ModelLearner(self.model)

	# Circumplex emotion engine (Pattisapu & Albarracin 2024)
	self.emotion = EmotionEngine()
	self._last_prediction: Optional[EmotionalPrediction] = None
	self._last_observation: Optional[EmotionalObservation] = None
	self._last_error: Optional[PredictionError] = None

	# Semantic user profile with Bayesian network causal model
	self.profile = UserProfile()

	# LLM generator + classifier — initialized lazily
	self._generator = None
	self._generator_initialized = False
	self._classifier = None
	self._classifier_initialized = False

	@property
	def generator(self):
	"""Lazy-initialize the LLM generator."""
	if not self._generator_initialized:
	self._generator_initialized = True
	try:
	from ..llm.client import MistralClient
	from ..llm.generator import CoachGenerator
	client = MistralClient()
	self._generator = CoachGenerator(client=client)
	if self._generator.is_available:
	logger.info(f"LLM generator available — using {client.base_url} ({client.model})")
	else:
	logger.warning("LLM generator created but not available (no API key?) — using template responses")
	self._generator = None
	except Exception as e:
	logger.warning(f"LLM generator init failed: {e} — using template responses")
	self._generator = None
	return self._generator

	@property
	def classifier(self):
	"""Lazy-initialize the LLM classifier."""
	if not self._classifier_initialized:
	self._classifier_initialized = True
	try:
	from ..llm.client import MistralClient
	from ..llm.classifier import SphereClassifier
	client = MistralClient()
	if client.is_available:
	self._classifier = SphereClassifier(client=client)
	logger.info("LLM classifier available for emotion observations")
	else:
	logger.warning("LLM classifier not available (no API key?)")
	except Exception as e:
	logger.warning(f"LLM classifier init failed: {e}")
	self._classifier = None
	return self._classifier

	def _llm_generate(self, user_message: str, cognitive_load: Optional[Dict] = None) -> str:
	"""
	Try to generate a response via LLM. Returns empty string if unavailable.
	The caller should fall back to template responses when this returns "".

	Args:
	user_message: The user's text
	cognitive_load: Pre-computed cognitive load dict (avoids double-calling
	_assess_cognitive_load which duplicates sentiment tracking)
	"""
	gen = self.generator
	if gen is None:
	logger.info(f"[LLM] Generator is None — using template fallback (phase={self.phase})")
	return ""

	# In stream mode, capture context for later streaming instead of calling LLM
	if getattr(self, '_stream_mode', False):
	ctx = self._prepare_llm_context(user_message, cognitive_load)
	self._stream_capture = ctx
	logger.info(f"[LLM] Stream mode — context captured (phase={self.phase})")
	return ""

	ctx = self._prepare_llm_context(user_message, cognitive_load)

	logger.info(f"[LLM] Generating response (phase={self.phase}, msg_len={len(user_message)}, history_len={len(self.conversation_history)}, profile_facts={len(self.profile.facts)})")
	result = gen.generate(
	user_message=ctx["user_message"],
	conversation_history=ctx["conversation_history"],
	phase=ctx["phase"],
	belief_summary=ctx["belief_summary"],
	tom_summary=ctx["tom_summary"],
	cognitive_load=ctx["cognitive_load"],
	target_skill=ctx["target_skill"],
	current_intervention=ctx["current_intervention"],
	accepted_interventions=ctx["accepted_interventions"],
	profile_section=ctx["profile_section"],
	)
	if result:
	logger.info(f"[LLM] Got response ({len(result)} chars)")
	else:
	logger.warning(f"[LLM] Empty response — falling back to template (phase={self.phase})")
	return result

	def _prepare_llm_context(self, user_message: str, cognitive_load: Optional[Dict] = None) -> Dict[str, Any]:
	"""Prepare shared context dict for LLM generation (used by both sync and stream)."""
	intervention_dict = None
	if self.current_intervention:
	intervention_dict = {
	"description": self.current_intervention.description,
	"target_skill": self.current_intervention.target_skill,
	"duration_minutes": self.current_intervention.duration_minutes,
	"difficulty": self.current_intervention.difficulty,
	}

	if cognitive_load is None:
	cognitive_load = self._assess_cognitive_load(user_message)
	user_state = self.get_inferred_user_state()
	cognitive_load["inferred_emotions"] = user_state.get("recent_emotions", [])
	cognitive_load["inferred_topics"] = user_state.get("recent_topics", [])
	cognitive_load["engagement_level"] = user_state.get("engagement_level", "moderate")

	current_emotion = self.emotion.get_current_emotion()
	if current_emotion:
	cognitive_load["circumplex_emotion"] = current_emotion.emotion_label()
	cognitive_load["circumplex_valence"] = round(current_emotion.valence, 2)
	cognitive_load["circumplex_arousal"] = round(current_emotion.arousal, 2)

	if self._last_error:
	cognitive_load["emotion_prediction_error"] = round(self._last_error.magnitude, 3)
	if self._last_prediction and self._last_observation:
	cognitive_load["predicted_emotion"] = self._last_prediction.predicted_emotion
	cognitive_load["observed_emotion"] = self._last_observation.observed_emotion

	return {
	"user_message": user_message,
	"conversation_history": self.conversation_history,
	"phase": self.phase,
	"belief_summary": self.get_belief_summary(),
	"tom_summary": self.tom.get_user_type_summary(),
	"cognitive_load": cognitive_load,
	"target_skill": self.target_skill,
	"current_intervention": intervention_dict,
	"accepted_interventions": self._accepted_interventions,
	"profile_section": self.profile.format_for_prompt(),
	}

	def _llm_generate_stream(self, user_message: str, cognitive_load: Optional[Dict] = None):
	"""
	Stream LLM response, yielding text chunks.

	Returns a generator of string chunks. If LLM is unavailable, yields nothing.
	"""
	gen = self.generator
	if gen is None:
	return

	ctx = self._prepare_llm_context(user_message, cognitive_load)
	logger.info(f"[LLM] Streaming response (phase={self.phase}, msg_len={len(user_message)}, profile_facts={len(self.profile.facts)})")

	yield from gen.generate_stream(
	user_message=ctx["user_message"],
	conversation_history=ctx["conversation_history"],
	phase=ctx["phase"],
	belief_summary=ctx["belief_summary"],
	tom_summary=ctx["tom_summary"],
	cognitive_load=ctx["cognitive_load"],
	target_skill=ctx["target_skill"],
	current_intervention=ctx["current_intervention"],
	accepted_interventions=ctx["accepted_interventions"],
	profile_section=ctx["profile_section"],
	)

	def _assess_cognitive_load(self, user_message: str = "", emotional_data: Optional[Dict] = None) -> Dict[str, Any]:
	"""
	Infer cognitive load from ToM dimensions + conversation signals + emotional state.

	Uses:
	- ToM overwhelm_threshold (low = easily overwhelmed)
	- Recent sentiment signals from conversation
	- Message length and engagement patterns
	- Emotional valence beliefs and prediction error from Circumplex POMDP
	"""
	# ToM-based assessment
	tom_type = self.tom.get_user_type_summary()
	overwhelm_threshold = tom_type.get("overwhelm_threshold", 0.5)
	autonomy = tom_type.get("autonomy_sensitivity", 0.5)

	# Conversation-based signals
	lower = user_message.lower()
	signals = []

	# Detect disengagement
	disengaged_words = [
	"bored", "boring", "meh", "whatever", "don't care",
	"not interested", "idk", "dunno",
	]
	if any(w in lower for w in disengaged_words):
	signals.append("disengaged")

	# Detect overwhelm
	overwhelm_words = [
	"overwhelmed", "too much", "can't handle", "stressed",
	"anxious", "burned out", "exhausted", "tired",
	]
	if any(w in lower for w in overwhelm_words):
	signals.append("overwhelmed")

	# Detect positive engagement
	engaged_words = [
	"interesting", "tell me more", "curious", "that makes sense",
	"let's do it", "i want to", "excited", "ready",
	]
	if any(w in lower for w in engaged_words):
	signals.append("engaged")

	# Detect off-topic intent
	offtopic_words = [
	"by the way", "random question", "off topic", "unrelated",
	"can you", "do you know", "what about", "have you heard",
	"what's going on", "what do you think about",
	]
	if any(w in lower for w in offtopic_words):
	signals.append("off_topic")

	# Detect deflection / topic change
	deflection_words = [
	"don't want to talk about", "not want to talk",
	"change the subject", "change topic", "something else",
	"let's talk about", "can we talk about", "i want to talk about",
	"anyway", "never mind", "forget it", "moving on",
	"i'd rather", "not right now", "not now",
	"but what about", "what about", "how about",
	]
	if any(w in lower for w in deflection_words):
	signals.append("deflection")

	# Short messages may indicate low engagement
	if len(user_message.strip()) < 10 and user_message.strip():
	signals.append("low_effort")

	# === Emotional state integration (from Circumplex POMDP) ===
	if emotional_data:
	beliefs = emotional_data.get("emotional_beliefs", {})
	valence_belief = beliefs.get("valence", {}).get("belief", [])

	# Valence belief concentrated on negative states → emotional distress
	if len(valence_belief) >= 5:
	negative_mass = valence_belief[0] + valence_belief[1]
	if negative_mass > 0.6:
	signals.append("emotional_distress")

	# High prediction error → our emotional model was wrong
	error_data = emotional_data.get("error", {})
	if error_data.get("magnitude", 0) > 0.5:
	signals.append("emotional_surprise")

	# Current valence strongly negative
	current = emotional_data.get("current_emotion") or {}
	if current.get("valence", 0) < -0.3:
	signals.append("low_valence")

	# Also check the observation directly (in case current_emotion is empty)
	obs = emotional_data.get("observation") or {}
	if obs.get("observed_valence", 0) < -0.3:
	if "low_valence" not in signals:
	signals.append("low_valence")

	# Track recent sentiments
	self._recent_sentiments.append(
	"disengaged" if "disengaged" in signals
	else "overwhelmed" if "overwhelmed" in signals
	else "engaged" if "engaged" in signals
	else "neutral"
	)
	# Keep last 5
	self._recent_sentiments = self._recent_sentiments[-5:]

	# Determine overall load level
	recent_disengaged = sum(1 for s in self._recent_sentiments if s == "disengaged")
	recent_overwhelmed = sum(1 for s in self._recent_sentiments if s == "overwhelmed")

	if recent_overwhelmed >= 2 or (overwhelm_threshold < 0.3 and "overwhelmed" in signals):
	load_level = "high"
	coaching_readiness = "not_ready"
	elif recent_disengaged >= 2 or "disengaged" in signals:
	load_level = "low_engagement"
	coaching_readiness = "not_ready"
	elif "off_topic" in signals or "deflection" in signals:
	load_level = "redirected"
	coaching_readiness = "not_ready"
	elif "emotional_distress" in signals or "low_valence" in signals:
	load_level = "emotionally_vulnerable"
	coaching_readiness = "not_ready"
	elif "engaged" in signals:
	load_level = "optimal"
	coaching_readiness = "ready"
	else:
	load_level = "moderate"
	coaching_readiness = "open"

	return {
	"level": load_level,
	"coaching_readiness": coaching_readiness,
	"signals": signals,
	"overwhelm_threshold": overwhelm_threshold,
	"autonomy_sensitivity": autonomy,
	}

	def _track_conversation(self, role: str, content: str) -> None:
	"""Add a message to conversation history for LLM context."""
	if not content:
	return
	# Warn on consecutive same-role duplicate messages
	if (self.conversation_history
	and self.conversation_history[-1]["role"] == role
	and self.conversation_history[-1]["content"] == content):
	logger.warning(f"[Track] Skipping duplicate {role} message: '{content[:60]}...'")
	return
	self.conversation_history.append({"role": role, "content": content})

	# -------------------------------------------------------------------------
	# PREDICT-OBSERVE-UPDATE — Circumplex Emotional Inference
	# -------------------------------------------------------------------------

	def _predict_emotion(self) -> EmotionalPrediction:
	"""
	PREDICT the user's emotional state before observing their text.

	Uses:
	- Belief entropy → arousal (how uncertain is the model about the user?)
	- ToM felt cost + acceptance probability → valence
	- Reliability → confidence gating

	This follows Pattisapu & Albarracin (2024):
	- Arousal = H[Q(s\|o)] = posterior entropy
	- Valence = U - EU = reward prediction error
	"""
	# Compute belief entropies across skill factors
	belief_entropies = {}
	for skill in SKILL_FACTORS:
	if skill in self.beliefs:
	belief_entropies[skill] = compute_belief_entropy(self.beliefs[skill])

	# Get ToM predictions for felt cost
	if self.current_intervention:
	pred = self.tom.predict_response_gated(self.current_intervention.to_dict())
	tom_felt_cost = pred.get("predicted_felt_cost", 0.3)
	tom_p_accept = pred.get("p_accept", 0.5)
	else:
	tom_felt_cost = 0.3
	tom_p_accept = 0.5

	reliability = self.tom.reliability

	prediction = self.emotion.predict(
	belief_entropies=belief_entropies,
	tom_felt_cost=tom_felt_cost,
	tom_p_accept=tom_p_accept,
	reliability=reliability,
	)
	self._last_prediction = prediction
	return prediction

	def _observe_emotion(self, user_text: str) -> EmotionalObservation:
	"""
	OBSERVE the user's emotional state by classifying their text.

	Uses LLM classification when available, falls back to heuristic.
	The classification becomes a formal POMDP observation.
	"""
	classifier = self.classifier
	if classifier is not None:
	# Use LLM for accurate emotional observation
	context = f"Phase: {self.phase}"
	if self.target_skill:
	context += f", Target skill: {self.target_skill}"
	if self.current_intervention:
	context += f", Current suggestion: {self.current_intervention.description}"

	try:
	result = classifier.classify_emotion(user_text, context=context)
	except Exception:
	result = classifier.classify_emotion_heuristic(user_text)
	else:
	# Heuristic fallback — no LLM available
	result = self._classify_emotion_heuristic(user_text)

	observation = self.emotion.observe(
	valence_idx=result.get("valence_idx", 2),
	arousal_idx=result.get("arousal_idx", 2),
	raw_classification=result,
	)
	self._last_observation = observation
	return observation

	def _update_from_emotion_error(
	self,
	prediction: EmotionalPrediction,
	observation: EmotionalObservation,
	) -> PredictionError:
	"""
	UPDATE beliefs using the prediction error between predicted
	and observed emotional state.

	This is the core of the predict-observe-update loop:
	1. Bayesian update of emotional beliefs (A-matrix in EmotionEngine)
	2. Prediction error used to soft-update ToM particles
	- Large errors → ToM was wrong → adjust user type particles
	- Small errors → ToM is accurate → increase confidence

	From the empathy project:
	- Prediction error is the accuracy gate signal
	- When ToM predicts "calm" but user is "angry" → ToM needs calibration
	"""
	error = self.emotion.update(prediction, observation)
	self._last_error = error

	# Use prediction error to soft-update ToM particles
	# Large valence error → our model of what makes them feel good/bad is wrong
	# Large arousal error → our model of their uncertainty/engagement is wrong
	self._update_tom_from_emotion_error(error, observation)

	return error

	def _update_tom_from_emotion_error(
	self,
	error: PredictionError,
	observation: EmotionalObservation,
	) -> None:
	"""
	Use emotional prediction error to update ToM particle filter.

	When our emotional prediction is wrong, it tells us something
	about the user's type:

	- We predicted calm but they're stressed → overwhelm_threshold lower than thought
	- We predicted positive but they're frustrated → autonomy higher than thought
	- We predicted engaged but they're bored → novelty_seeking higher than thought

	The update strength scales with the magnitude of the error.
	"""
	if error.magnitude < 0.1:
	# Small error — prediction was close, no update needed
	return

	# Scale update strength by error magnitude (capped at 0.2)
	strength = min(error.magnitude * 0.15, 0.2)

	# Determine which ToM dimensions to adjust based on the emotional observation
	observed_emotion = observation.observed_emotion
	valence_error = error.valence_error # positive = user happier than predicted
	arousal_error = error.arousal_error # positive = user more aroused than predicted

	# Dimension indices in particle space:
	# 0: avoids_evaluation, 1: hates_long_tasks, 2: novelty_seeking
	# 3: structure_preference, 4: external_validation, 5: autonomy_sensitivity
	# 6: overwhelm_threshold

	biases = []

	# Negative valence surprise: user is more negative than predicted
	if valence_error < -0.2:
	biases.append((6, -1, strength)) # Lower overwhelm threshold
	biases.append((0, 1, strength * 0.5)) # Higher evaluation avoidance

	# Positive valence surprise: user is happier than predicted
	if valence_error > 0.2:
	biases.append((6, 1, strength * 0.5)) # Higher overwhelm threshold

	# High arousal surprise: user is more activated than predicted
	if arousal_error > 0.2:
	if observation.observed_valence < 0:
	# Negative + high arousal = angry/anxious → autonomy sensitivity
	biases.append((5, 1, strength))
	else:
	# Positive + high arousal = excited → novelty seeking
	biases.append((2, 1, strength * 0.5))

	# Low arousal surprise: user is less activated than predicted
	if arousal_error < -0.2:
	if observation.observed_valence < 0:
	# Negative + low arousal = bored/depressed → novelty seeking
	biases.append((2, 1, strength))
	biases.append((1, 1, strength * 0.5)) # hates long tasks
	else:
	# Positive + low arousal = calm/relaxed → structure preference
	biases.append((3, 1, strength * 0.3))

	# Apply biases to particles
	for dim_idx, direction, bias_strength in biases:
	for j in range(self.tom.n_particles):
	particle_val = self.tom.particle_params[j, dim_idx]
	if direction > 0:
	likelihood = 0.5 + bias_strength * particle_val
	else:
	likelihood = 0.5 + bias_strength * (1.0 - particle_val)
	self.tom.particle_weights[j] *= likelihood

	# Renormalize
	total = np.sum(self.tom.particle_weights)
	if total > 0:
	self.tom.particle_weights /= total
	else:
	self.tom.particle_weights = np.ones(self.tom.n_particles) / self.tom.n_particles

	# Invalidate caches
	self.tom._reliability_cache = None
	self.tom._confidence_cache = None

	def _classify_emotion_heuristic(self, user_text: str) -> Dict[str, Any]:
	"""
	Heuristic emotion classification — fallback when LLM is unavailable.
	Uses keyword matching to estimate valence and arousal.
	"""
	lower = user_text.lower()

	neg_words = [
	"stressed", "anxious", "worried", "frustrated", "stuck",
	"hopeless", "lost", "confused", "scared", "tired",
	"burned out", "overwhelmed", "sad", "angry", "annoyed",
	"hate", "terrible", "awful", "bad", "depressed",
	"bored", "boring", "meh", "ugh", "down", "horrible",
	"pointless", "miserable", "give up", "sucks", "worst",
	"painful", "dread", "fail", "broken", "nothing works",
	]
	pos_words = [
	"good", "great", "happy", "excited", "curious",
	"interesting", "love", "amazing", "better", "hopeful",
	"motivated", "ready", "clear", "peaceful", "calm",
	"relaxed", "grateful", "proud", "confident", "wonderful",
	"fantastic", "awesome", "brilliant", "beautiful",
	"progress", "getting it", "starting to",
	]
	high_arousal = [
	"!", "stressed", "anxious", "excited", "angry",
	"can't believe", "urgent", "help", "panic", "scared",
	"amazing", "overwhelmed", "furious", "thrilled",
	"panicking", "intense", "?!", "!!",
	]
	low_arousal = [
	"bored", "tired", "calm", "peaceful", "relaxed",
	"meh", "whatever", "ok", "fine", "sleepy",
	"hopeless", "pointless", "numb",
	]

	neg_c = sum(1 for w in neg_words if w in lower)
	pos_c = sum(1 for w in pos_words if w in lower)
	hi_c = sum(1 for w in high_arousal if w in lower)
	lo_c = sum(1 for w in low_arousal if w in lower)

	if len(user_text.strip()) < 10:
	lo_c += 1

	# Valence
	if neg_c > pos_c + 1:
	v, vi = "very_negative", 0
	elif neg_c > pos_c:
	v, vi = "negative", 1
	elif pos_c > neg_c + 1:
	v, vi = "very_positive", 4
	elif pos_c > neg_c:
	v, vi = "positive", 3
	else:
	v, vi = "neutral", 2

	# Arousal
	if hi_c > lo_c + 1:
	a, ai = "very_high", 4
	elif hi_c > lo_c:
	a, ai = "high", 3
	elif lo_c > hi_c + 1:
	a, ai = "very_low", 0
	elif lo_c > hi_c:
	a, ai = "low", 1
	else:
	a, ai = "moderate", 2

	# Emotion label
	if vi >= 3 and ai >= 3:
	emotion = "excited"
	elif vi >= 3 and ai <= 1:
	emotion = "relaxed"
	elif vi <= 1 and ai >= 3:
	emotion = "angry"
	elif vi <= 1 and ai <= 1:
	emotion = "depressed"
	elif vi >= 3:
	emotion = "happy"
	elif vi <= 1:
	emotion = "sad"
	elif ai >= 3:
	emotion = "alert"
	else:
	emotion = "calm"

	return {
	"valence": v, "arousal": a,
	"valence_idx": vi, "arousal_idx": ai,
	"primary_emotion": emotion,
	"confidence": "medium", "emotional_cues": [],
	}

	def _run_emotional_inference(self, user_text: str) -> Dict[str, Any]:
	"""
	Run the full predict-observe-update loop for emotional inference.

	Returns a dict with prediction, observation, error, and current state.
	This is called each turn during post-calibration phases.
	"""
	# 1. PREDICT — before seeing the text
	prediction = self._predict_emotion()

	# 2. OBSERVE — classify the text
	observation = self._observe_emotion(user_text)

	# 3. UPDATE — compare and learn
	error = self._update_from_emotion_error(prediction, observation)

	return {
	"prediction": prediction.to_dict(),
	"observation": observation.to_dict(),
	"error": error.to_dict(),
	"current_emotion": self.emotion.get_current_emotion().to_dict()
	if self.emotion.get_current_emotion() else None,
	"emotional_beliefs": self.emotion.get_belief_state(),
	}

	def _run_batched_inference(self, user_text: str):
	"""
	Run emotion classification + profile extraction in a single LLM call.

	Returns (emotional_data, profile_data) where profile_data is the raw
	dict for store_extracted_data(), or None if batching wasn't possible.
	Falls back to the original separate emotion-only call if needed.
	"""
	classifier = self.classifier
	profile_data = None

	if classifier is not None:
	# Build context for the batched call
	context = f"Phase: {self.phase}"
	if self.target_skill:
	context += f", Target skill: {self.target_skill}"
	if self.current_intervention:
	context += f", Current suggestion: {self.current_intervention.description}"

	existing_summary = ""
	if self.profile.facts:
	existing_summary = "; ".join(
	f.content for f in self.profile.facts[-5:]
	)

	try:
	result = classifier.classify_emotion_and_extract_profile(
	user_text, context=context, existing_summary=existing_summary,
	)
	emotion_result = result["emotion"]
	profile_data = result["profile"]

	# Process emotion through predict-observe-update loop
	prediction = self._predict_emotion()
	observation = self.emotion.observe(
	valence_idx=emotion_result.get("valence_idx", 2),
	arousal_idx=emotion_result.get("arousal_idx", 2),
	raw_classification=emotion_result,
	)
	self._last_observation = observation
	error = self._update_from_emotion_error(prediction, observation)

	emotional_data = {
	"prediction": prediction.to_dict(),
	"observation": observation.to_dict(),
	"error": error.to_dict(),
	"current_emotion": self.emotion.get_current_emotion().to_dict()
	if self.emotion.get_current_emotion() else None,
	"emotional_beliefs": self.emotion.get_belief_state(),
	}
	logger.info(
	"[Batched] emotion + profile in 1 LLM call "
	f"(emotion={emotion_result.get('primary_emotion')}, "
	f"profile_facts={len(profile_data.get('facts', []))})"
	)
	return emotional_data, profile_data

	except Exception as e:
	logger.warning(f"[Batched] Failed ({e}), falling back to separate calls")

	# Fallback: original separate emotion inference
	emotional_data = self._run_emotional_inference(user_text)
	return emotional_data, None

	def start_session(self) -> Dict[str, Any]:
	"""Initialize and return welcome message + first question."""
	self.beliefs = self.model.get_initial_beliefs()
	self.phase = PHASE_CALIBRATION
	self.timestep = 0
	self.asked_question_ids = []
	self.history = []
	self.conversation_history = []
	self.tom.reset()
	self.emotion.reset()
	self._viz_turns = 0
	self._planning_turns = 0
	self._recent_sentiments = []
	self._last_prediction = None
	self._last_observation = None
	self._last_error = None

	# Get first question (adaptive ordering)
	next_q = self._get_next_question()
	self.current_question = next_q

	# Track in conversation history
	self._track_conversation("assistant", WELCOME_MESSAGE)

	return {
	"phase": self.phase,
	"message": WELCOME_MESSAGE,
	"question": self._format_question(next_q) if next_q else None,
	"is_complete": False,
	}

	def step(self, user_input: Dict[str, Any]) -> Dict[str, Any]:
	"""
	Process one step of the coaching session.

	Args:
	user_input: Dict with keys depending on phase:
	- calibration: {"answer": str, "answer_index": int (optional)}
	- planning/update: {"choice": "accept"\|"too_hard"\|"not_relevant"}
	- any phase: {"answer": str} for free-text chat

	Returns:
	Result dict with message, phase, question/intervention data, etc.
	"""
	self.timestep += 1

	if self.phase == PHASE_CALIBRATION:
	return self._step_calibration(user_input)
	elif self.phase == PHASE_VISUALIZATION:
	return self._step_visualization(user_input)
	elif self.phase == PHASE_PLANNING:
	return self._step_planning(user_input)
	elif self.phase == PHASE_UPDATE:
	return self._step_update(user_input)
	elif self.phase == PHASE_COACHING:
	return self._step_coaching(user_input)
	else:
	return {"phase": PHASE_COMPLETE, "message": "Session complete.", "is_complete": True}

	def step_stream(self, user_input: Dict[str, Any]):
	"""
	Streaming version of step(). Yields SSE event dicts.

	Events:
	{"event": "metadata", "data": {phase, sphere_data, question, ...}}
	{"event": "token", "data": {"text": "chunk"}} (repeated)
	{"event": "done", "data": {}}
	"""
	self.timestep += 1

	# Calibration: no LLM call, yield result directly
	if self.phase == PHASE_CALIBRATION:
	result = self._step_calibration(user_input)
	yield {"event": "metadata", "data": result}
	yield {"event": "done", "data": {}}
	return

	# Enable stream capture: _llm_generate will save context instead of calling LLM
	self._stream_capture = None
	self._stream_mode = True
	try:
	if self.phase == PHASE_VISUALIZATION:
	result = self._step_visualization(user_input)
	elif self.phase == PHASE_PLANNING:
	result = self._step_planning(user_input)
	elif self.phase == PHASE_UPDATE:
	result = self._step_update(user_input)
	elif self.phase == PHASE_COACHING:
	result = self._step_coaching(user_input)
	else:
	result = {"phase": PHASE_COMPLETE, "message": "Session complete.", "is_complete": True}
	finally:
	self._stream_mode = False

	captured = self._stream_capture
	self._stream_capture = None

	# Yield metadata (everything except message)
	template_msg = result.get("message", "")
	metadata = {k: v for k, v in result.items() if k != "message"}
	yield {"event": "metadata", "data": metadata}

	# Stream LLM response if context was captured
	if captured and self.generator:
	collected = []
	for chunk in self.generator.generate_stream(
	user_message=captured["user_message"],
	conversation_history=captured["conversation_history"],
	phase=captured["phase"],
	belief_summary=captured["belief_summary"],
	tom_summary=captured["tom_summary"],
	cognitive_load=captured["cognitive_load"],
	target_skill=captured["target_skill"],
	current_intervention=captured["current_intervention"],
	accepted_interventions=captured["accepted_interventions"],
	profile_section=captured["profile_section"],
	):
	collected.append(chunk)
	yield {"event": "token", "data": {"text": chunk}}

	full_text = "".join(collected)
	if full_text:
	# Replace template fallback in conversation history with real LLM response
	for i in range(len(self.conversation_history) - 1, -1, -1):
	if self.conversation_history[i]["role"] == "assistant":
	self.conversation_history[i]["content"] = full_text
	break
	else:
	# LLM returned nothing — use template message
	if template_msg:
	yield {"event": "token", "data": {"text": template_msg}}
	else:
	# No LLM available — yield template message at once
	if template_msg:
	yield {"event": "token", "data": {"text": template_msg}}

	yield {"event": "done", "data": {}}

	# -------------------------------------------------------------------------
	# CALIBRATION PHASE
	# -------------------------------------------------------------------------

	# Acknowledgment messages cycled during calibration
	_CALIBRATION_ACKS = [
	"Got it.",
	"Thanks for sharing that.",
	"Understood.",
	"Okay, noted.",
	"That's helpful to know.",
	"Interesting — thanks.",
	"Appreciate your honesty.",
	"Good to know.",
	]

	def _step_calibration(self, user_input: Dict[str, Any]) -> Dict[str, Any]:
	"""Process a calibration answer and return next question or transition."""
	if self.current_question is None:
	return self._transition_to_visualization()

	q = self.current_question
	message_type = user_input.get("message_type", "text")

	# --- Handle MC questions ---
	if q.question_type == "mc":
	answer_idx = user_input.get("answer_index")
	if answer_idx is None:
	# Try to match text to option
	answer_text = user_input.get("answer", "")
	answer_idx = self._match_mc_answer(answer_text, q.options)

	# If we still have no valid answer, treat as conversational text
	if answer_idx is None:
	return {
	"phase": PHASE_CALIBRATION,
	"message": "No worries — just pick whichever option fits best.",
	"question": self._format_question(q),
	"progress": len(self.asked_question_ids) / self._max_calibration_questions,
	"is_complete": False,
	}

	if q.a_matrix is not None:
	# Update belief for this skill factor
	self.beliefs[q.category] = update_belief(
	self.beliefs[q.category],
	answer_idx,
	q.a_matrix,
	)
	# Learn: refine the A-matrix from this observation
	self.learner.learn_from_observation(
	q.category, answer_idx, self.beliefs[q.category]
	)
	else:
	# Free text — update multiple factors via classification result
	classified = user_input.get("classified", {})
	skill_signals = classified.get("skill_signals", {})
	for skill, signal in skill_signals.items():
	if skill in self.beliefs:
	signal_map = {"very_low": 0, "low": 1, "medium": 2, "high": 3, "very_high": 4}
	obs_idx = signal_map.get(signal, 2)
	if skill in self.model.A:
	self.beliefs[skill] = update_belief(
	self.beliefs[skill], obs_idx, self.model.A[skill]
	)
	# Learn: refine A-matrix
	self.learner.learn_from_observation(
	skill, obs_idx, self.beliefs[skill]
	)

	# Update friction beliefs from classification
	overwhelm = classified.get("overwhelm_signal", "medium")
	overwhelm_map = {"low": 0, "medium": 1, "high": 2}
	if "overwhelm_sensitivity" in self.model.A:
	obs_idx = overwhelm_map.get(overwhelm, 1)
	self.beliefs["overwhelm_sensitivity"] = update_belief(
	self.beliefs["overwhelm_sensitivity"],
	obs_idx,
	self.model.A["overwhelm_sensitivity"],
	)
	# Learn: refine friction observation model
	self.learner.learn_from_observation(
	"overwhelm_sensitivity", obs_idx, self.beliefs["overwhelm_sensitivity"]
	)

	self.asked_question_ids.append(q.id)

	# Record in history
	self.history.append({
	"timestep": self.timestep,
	"phase": PHASE_CALIBRATION,
	"question_id": q.id,
	"user_input": user_input,
	})

	# Extract profile facts from calibration answers (especially free-text)
	cal_text = user_input.get("answer", "")
	if cal_text and len(cal_text) > 10: # Only extract from substantive answers
	self.profile.extract_and_store(
	user_text=cal_text,
	turn=self.timestep,
	classifier=self.classifier,
	context=f"Answering calibration Q: {q.question_text[:80]}",
	)

	# Check if calibration is complete
	if len(self.asked_question_ids) >= self._max_calibration_questions:
	# Don't generate a separate ack — the sphere commentary will
	# acknowledge the last answer as part of its single LLM call.
	# Generating a separate ack causes two LLM responses to be
	# concatenated, producing the "double response" problem.
	user_text = user_input.get("answer", "")
	self._track_conversation("user", user_text)
	return self._transition_to_visualization()

	# Get next question
	next_q = self._get_next_question()
	if next_q is None:
	user_text = user_input.get("answer", "")
	self._track_conversation("user", user_text)
	return self._transition_to_visualization()

	self.current_question = next_q

	# Generate acknowledgment — LLM first, template fallback
	user_text = user_input.get("answer", "")
	ack = self._generate_calibration_ack(user_text, q)

	# Track in conversation history so LLM has context
	self._track_conversation("user", user_text)
	self._track_conversation("assistant", ack)

	return {
	"phase": PHASE_CALIBRATION,
	"message": ack,
	"question": self._format_question(next_q),
	"progress": len(self.asked_question_ids) / self._max_calibration_questions,
	"is_complete": False,
	}

	def _generate_calibration_ack(self, user_text: str, question: CalibrationQuestion) -> str:
	"""
	Generate a natural acknowledgment of the user's calibration answer.

	Uses LLM when available for context-aware responses (e.g., empathetic
	when user shares something emotional). Falls back to template cycling.
	"""
	gen = self.generator
	if gen is not None:
	logger.info(f"[LLM] Generating calibration ack for Q{len(self.asked_question_ids)} (user said: '{user_text[:50]}')")
	try:
	# Import here to avoid circular imports at module level
	from ..llm.generator import build_system_prompt
	system_prompt = build_system_prompt(
	phase="calibration",
	belief_summary=self.get_belief_summary(),
	)

	# Give the LLM context about the question that was asked
	context_msg = f'[The user was asked: "{question.question_text}"]'
	if question.question_type == "mc" and question.options:
	context_msg += f'\n[Options were: {", ".join(question.options)}]'
	context_msg += f'\n[This is question {len(self.asked_question_ids)} of {self._max_calibration_questions}]'

	messages = [{"role": "system", "content": system_prompt}]

	# Add recent conversation for continuity
	for msg in self.conversation_history[-4:]:
	messages.append({"role": msg["role"], "content": msg["content"]})

	messages.append({"role": "assistant", "content": context_msg})
	messages.append({"role": "user", "content": user_text})

	response = gen.client.chat_completion(
	messages=messages,
	temperature=0.7,
	max_tokens=80, # Keep acks short
	model_override=None, # uses LLM_MODEL default
	)
	if response and response.strip():
	logger.info(f"[LLM] Calibration ack: '{response.strip()[:60]}...'")
	return response.strip()
	logger.warning("[LLM] Empty calibration ack from LLM")
	except Exception as e:
	logger.warning(f"[LLM] Calibration ack failed: {e}")
	else:
	logger.info("[LLM] No generator — using template calibration ack")

	# Template fallback
	ack_idx = (len(self.asked_question_ids) - 1) % len(self._CALIBRATION_ACKS)
	logger.info(f"[LLM] Template fallback ack: '{self._CALIBRATION_ACKS[ack_idx]}'")
	return self._CALIBRATION_ACKS[ack_idx]

	# -------------------------------------------------------------------------
	# VISUALIZATION PHASE
	# -------------------------------------------------------------------------

	def _transition_to_visualization(self) -> Dict[str, Any]:
	"""Transition from calibration to visualization with personalized commentary."""
	self.phase = PHASE_VISUALIZATION
	self._viz_turns = 0
	sphere_data = self.get_sphere_data()

	# Single LLM call generates both acknowledgment of last answer
	# and sphere commentary together — avoids double-response problem
	commentary = self._generate_sphere_commentary()

	self._track_conversation("assistant", commentary)

	return {
	"phase": PHASE_VISUALIZATION,
	"message": commentary,
	"sphere_data": sphere_data,
	"belief_summary": self.get_belief_summary(),
	"is_complete": False,
	}

	def _step_visualization(self, user_input: Dict[str, Any]) -> Dict[str, Any]:
	"""Handle user messages during sphere discussion, transition when ready."""
	self._viz_turns += 1
	user_text = user_input.get("answer", "").strip()

	# Run emotional inference
	emotional_data = self._run_emotional_inference(user_text)

	# Assess cognitive load / intent (with emotional data)
	cog_load = self._assess_cognitive_load(user_text, emotional_data=emotional_data)

	# Update cognitive model
	self._update_user_model_from_text(user_text)

	# Record history
	self.history.append({
	"timestep": self.timestep,
	"phase": PHASE_VISUALIZATION,
	"user_input": user_input,
	"emotional_inference": emotional_data,
	})

	# === Companion gate: be present when user needs it ===
	if cog_load["coaching_readiness"] == "not_ready":
	logger.info(f"[Viz] Companion mode (signals={cog_load['signals']})")
	llm_response = self._llm_generate(user_text, cognitive_load=cog_load)
	self._track_conversation("user", user_text)
	response = llm_response or self._respond_to_sphere_reaction(user_text)
	self._track_conversation("assistant", response)
	return {
	"phase": PHASE_VISUALIZATION,
	"message": response,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"efe_info": {"selected_action": "companion_chat", "override": "not_coaching_ready"},
	"is_complete": False,
	}

	# === Decide action FIRST, then make one LLM call ===
	lower = user_text.lower()
	wants_coaching = any(w in lower for w in [
	"suggest", "what should", "help me", "what can i do",
	"let's work", "what next", "show me", "i want to improve",
	"what do you recommend", "first step",
	])

	# Determine whether we're transitioning to planning
	should_transition = False
	efe_info = None

	if wants_coaching:
	should_transition = True
	efe_info = {"selected_action": "propose_intervention", "override": "explicit_request"}
	elif self._viz_turns >= 2:
	emotion_error_mag = emotional_data.get("error", {}).get("magnitude", 0.0)
	valence_belief = self.emotion.belief_valence

	action_idx, action_name, efe_info = select_coaching_action(
	beliefs=self.beliefs,
	model=self.model,
	phase="visualization",
	timestep=self.timestep,
	tom_reliability=self.tom.reliability,
	empathy_planner=self.empathy,
	tom_filter=self.tom,
	target_skill=self.target_skill,
	current_intervention=self.current_intervention,
	emotion_prediction_error=emotion_error_mag,
	emotion_valence_belief=valence_belief,
	)
	propose_prob = efe_info["action_probabilities"].get("propose_intervention", 0)
	if action_name == "propose_intervention" and propose_prob > 0.45:
	should_transition = True

	if should_transition:
	# Transition to planning — _generate_plan_message makes the single LLM call
	self._track_conversation("user", user_text)
	result = self._transition_to_planning()
	result["efe_info"] = efe_info or {"selected_action": "propose_intervention"}
	self._track_conversation("assistant", result["message"])
	return result

	# Stay in visualization — one LLM call for conversational response
	llm_response = self._llm_generate(user_text, cognitive_load=cog_load)
	self._track_conversation("user", user_text)
	response = llm_response or self._respond_to_sphere_reaction(user_text)
	self._track_conversation("assistant", response)

	if efe_info is None:
	efe_info = {"selected_action": "ask_free_text", "phase": "visualization"}

	return {
	"phase": PHASE_VISUALIZATION,
	"message": response,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"efe_info": efe_info,
	"is_complete": False,
	}

	# -------------------------------------------------------------------------
	# PLANNING PHASE
	# -------------------------------------------------------------------------

	def _transition_to_planning(self) -> Dict[str, Any]:
	"""Transition to planning: propose first intervention using EFE to select skill."""
	self.phase = PHASE_PLANNING
	self._planning_turns = 0

	# Use EFE to select the best skill to target from top candidates
	impact_ranking = self.dep_graph.compute_impact_ranking(
	{k: v for k, v in self.beliefs.items() if k in SKILL_FACTORS}
	)
	top_skills = [s for s, _ in impact_ranking[:3]] if impact_ranking else SKILL_FACTORS[:1]

	# Evaluate EFE of propose_intervention targeting each candidate skill
	best_skill = top_skills[0]
	best_G = float("inf")
	for skill in top_skills:
	G = compute_efe_all_factors(
	self.beliefs, self.model, 3, # action = propose_intervention
	relevant_factors=[skill],
	lambda_epist=0.5,
	)
	# Blend with empathy: predict felt cost for gentle intervention
	gentle_list = get_interventions_for_skill(skill, "gentle")
	if gentle_list:
	pred = self.tom.predict_response_gated(gentle_list[0].to_dict())
	G_social = self.empathy.compute_blended_efe(
	G, pred["predicted_felt_cost"], self.tom.reliability
	)
	else:
	G_social = G
	if G_social < best_G:
	best_G = G_social
	best_skill = skill

	self.target_skill = best_skill

	# Get gentle/push pair for counterfactual
	gentle, push = get_gentle_push_pair(self.target_skill)

	# Compute counterfactual
	counterfactual = self.empathy.compute_counterfactual(
	gentle.to_dict(), push.to_dict(), self.tom
	)

	# Select the recommended intervention
	gentle_pred = self.tom.predict_response_gated(gentle.to_dict())
	push_pred = self.tom.predict_response_gated(push.to_dict())

	if gentle_pred["p_accept"] >= push_pred["p_accept"]:
	self.current_intervention = gentle
	else:
	self.current_intervention = push

	# Generate personalized planning message
	plan_message = self._generate_plan_message()
	self._track_conversation("assistant", plan_message)

	return {
	"phase": PHASE_PLANNING,
	"message": plan_message,
	"intervention": {
	"description": self.current_intervention.description,
	"target_skill": self.current_intervention.target_skill,
	"duration_minutes": self.current_intervention.duration_minutes,
	"difficulty": self.current_intervention.difficulty,
	},
	"counterfactual": counterfactual,
	"target_skill": self.target_skill,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"is_complete": False,
	}

	def _step_planning(self, user_input: Dict[str, Any]) -> Dict[str, Any]:
	"""User responds to proposed intervention — either a choice or free text."""
	self._planning_turns += 1
	choice = user_input.get("choice")
	user_text = user_input.get("answer", "").strip()

	# If they clicked a choice button, handle it
	if choice:
	self._track_conversation("user", choice)
	result = self._process_choice(choice)
	self._track_conversation("assistant", result.get("message", ""))
	return result

	# === EMOTIONAL INFERENCE (runs every turn) ===
	emotional_data = self._run_emotional_inference(user_text)
	cog_load = self._assess_cognitive_load(user_text, emotional_data=emotional_data)

	# Update cognitive model
	self._update_user_model_from_text(user_text)

	# === Companion gate: be present when user needs it ===
	if cog_load["coaching_readiness"] == "not_ready":
	logger.info(f"[Planning] Companion mode (signals={cog_load['signals']})")
	llm_response = self._llm_generate(user_text, cognitive_load=cog_load)
	self._track_conversation("user", user_text)
	response = llm_response or self._generate_companion_response(user_text)
	self._track_conversation("assistant", response)
	return {
	"phase": PHASE_PLANNING,
	"message": response,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"emotional_state": emotional_data,
	"efe_info": {"selected_action": "companion_chat", "override": "not_coaching_ready"},
	"is_complete": False,
	}

	# Otherwise it's free text — have a conversation
	# NOTE: _respond_to_planning_chat may call _llm_generate,
	# so track user message AFTER to avoid duplicate user messages
	result = self._respond_to_planning_chat(user_text)
	result["emotional_state"] = emotional_data
	self._track_conversation("user", user_text)
	self._track_conversation("assistant", result.get("message", ""))
	return result

	# -------------------------------------------------------------------------
	# UPDATE PHASE
	# -------------------------------------------------------------------------

	def _step_update(self, user_input: Dict[str, Any]) -> Dict[str, Any]:
	"""Process subsequent user choices or free-text in the update loop."""
	choice = user_input.get("choice")
	user_text = user_input.get("answer", "").strip()

	if choice:
	self._track_conversation("user", choice)
	result = self._process_choice(choice)
	self._track_conversation("assistant", result.get("message", ""))
	return result

	# === EMOTIONAL INFERENCE (runs every turn) ===
	emotional_data = self._run_emotional_inference(user_text)
	cog_load = self._assess_cognitive_load(user_text, emotional_data=emotional_data)

	# === Companion gate: be present when user needs it ===
	if cog_load["coaching_readiness"] == "not_ready":
	logger.info(f"[Update] Companion mode (signals={cog_load['signals']})")
	llm_response = self._llm_generate(user_text, cognitive_load=cog_load)
	self._track_conversation("user", user_text)
	response = llm_response or self._generate_companion_response(user_text)
	self._track_conversation("assistant", response)
	return {
	"phase": PHASE_UPDATE,
	"message": response,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"emotional_state": emotional_data,
	"efe_info": {"selected_action": "companion_chat", "override": "not_coaching_ready"},
	"is_complete": False,
	}

	# Free text during update phase
	# NOTE: _respond_to_update_chat calls _llm_generate,
	# so track user message AFTER to avoid duplicate user messages
	result = self._respond_to_update_chat(user_text)
	result["emotional_state"] = emotional_data
	self._track_conversation("user", user_text)
	self._track_conversation("assistant", result.get("message", ""))
	return result

	def _process_choice(self, choice: str) -> Dict[str, Any]:
	"""Process a user choice and update beliefs."""
	self.phase = PHASE_UPDATE

	# Map choice to observation index
	choice_map = {"accept": 0, "too_hard": 1, "not_relevant": 2}
	choice_idx = choice_map.get(choice, 0)

	# Update ToM particle filter
	intervention_dict = (
	self.current_intervention.to_dict()
	if self.current_intervention
	else {"difficulty": 0.3, "duration_minutes": 5}
	)
	tom_stats = self.tom.update_weights(choice_idx, intervention_dict)

	# Update friction beliefs
	if "overwhelm_sensitivity" in self.model.A:
	belief_before = self.beliefs["overwhelm_sensitivity"].copy()
	self.beliefs["overwhelm_sensitivity"] = update_belief(
	self.beliefs["overwhelm_sensitivity"],
	choice_idx,
	self.model.A["overwhelm_sensitivity"],
	)
	# Learn: refine friction observation model + transition model
	self.learner.learn_from_observation(
	"overwhelm_sensitivity", choice_idx, self.beliefs["overwhelm_sensitivity"]
	)
	self.learner.learn_from_transition(
	"overwhelm_sensitivity", choice_idx, belief_before, self.beliefs["overwhelm_sensitivity"]
	)

	# Record history
	self.history.append({
	"timestep": self.timestep,
	"phase": PHASE_UPDATE,
	"choice": choice,
	"tom_stats": tom_stats,
	})

	if choice == "accept":
	# Track the accepted intervention
	if self.current_intervention:
	self._accepted_interventions.append(self.current_intervention.to_dict())

	# Generate personalized encouragement and transition to coaching
	message = self._generate_acceptance_message()
	return self._transition_to_coaching(message, tom_stats)
	else:
	# User rejected — adapt and propose alternative
	return self._propose_alternative(choice, tom_stats)

	def _propose_alternative(
	self, rejection_reason: str, tom_stats: Dict
	) -> Dict[str, Any]:
	"""Propose an adjusted intervention after rejection, using EFE+ToM to select."""
	if self.target_skill is None:
	self.target_skill = SKILL_FACTORS[0]

	# Use EFE to find the best alternative across candidate (skill, intervention) pairs
	impact_ranking = self.dep_graph.compute_impact_ranking(
	{k: v for k, v in self.beliefs.items() if k in SKILL_FACTORS}
	)

	candidates = []
	for skill, _ in (impact_ranking[:4] if impact_ranking else [(SKILL_FACTORS[0], 0)]):
	# For "too_hard", always use gentle; for "not_relevant", try different skills
	if rejection_reason == "too_hard" and skill == self.target_skill:
	ivs = get_interventions_for_skill(skill, "gentle")
	elif rejection_reason == "not_relevant" and skill == self.target_skill:
	continue # skip the rejected skill
	else:
	ivs = get_interventions_for_skill(skill, "gentle")

	for iv in ivs[:2]: # consider up to 2 interventions per skill
	G = compute_efe_all_factors(
	self.beliefs, self.model, 3,
	relevant_factors=[skill], lambda_epist=0.5,
	)
	pred = self.tom.predict_response_gated(iv.to_dict())
	G_social = self.empathy.compute_blended_efe(
	G, pred["predicted_felt_cost"], self.tom.reliability
	)
	candidates.append((skill, iv, G_social))

	if candidates:
	candidates.sort(key=lambda x: x[2])
	best_skill, best_iv, _ = candidates[0]
	self.target_skill = best_skill
	self.current_intervention = best_iv
	else:
	# Fallback: original hardcoded logic
	if rejection_reason == "too_hard":
	ivs = get_interventions_for_skill(self.target_skill, "gentle")
	if ivs:
	self.current_intervention = ivs[0]

	# Compute new counterfactual
	gentle, push = get_gentle_push_pair(self.target_skill)
	counterfactual = self.empathy.compute_counterfactual(
	gentle.to_dict(), push.to_dict(), self.tom
	)

	# Generate personalized adaptation message
	adaptation_message = self._generate_adaptation_message(rejection_reason)

	return {
	"phase": PHASE_UPDATE,
	"message": adaptation_message,
	"intervention": {
	"description": self.current_intervention.description if self.current_intervention else "",
	"target_skill": self.target_skill,
	"duration_minutes": self.current_intervention.duration_minutes if self.current_intervention else 2,
	"difficulty": self.current_intervention.difficulty if self.current_intervention else 0.1,
	},
	"counterfactual": counterfactual,
	"tom_stats": tom_stats,
	"user_type_summary": self.tom.get_user_type_summary(),
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"is_complete": False,
	}

	# -------------------------------------------------------------------------
	# COACHING PHASE
	# -------------------------------------------------------------------------

	def _transition_to_coaching(self, acceptance_message: str, tom_stats: Dict = None) -> Dict[str, Any]:
	"""Transition to ongoing coaching after intervention acceptance."""
	self.phase = PHASE_COACHING
	self._coaching_turns = 0

	# Generate a coaching follow-up — a probing question about the target skill
	probe = self._get_next_probe()
	if probe:
	coaching_message = acceptance_message + "\n\n" + probe
	else:
	coaching_message = acceptance_message + (
	"\n\nNow that you have a concrete step, let's dig a bit deeper. "
	"Tell me more about what's going on for you right now."
	)

	result = {
	"phase": PHASE_COACHING,
	"message": coaching_message,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"is_complete": False,
	}
	if tom_stats:
	result["tom_stats"] = tom_stats
	return result

	def _step_coaching(self, user_input: Dict[str, Any]) -> Dict[str, Any]:
	"""Handle the ongoing coaching conversation using EFE-driven action selection."""
	self._coaching_turns += 1
	user_text = user_input.get("answer", "").strip()
	choice = user_input.get("choice")

	# === BATCHED INFERENCE: emotion + profile in one LLM call ===
	emotional_data, profile_extracted = self._run_batched_inference(user_text)

	# Update the cognitive model from this message
	self._update_user_model_from_text(user_text, profile_data=profile_extracted)

	# Record history with emotional data
	self.history.append({
	"timestep": self.timestep,
	"phase": PHASE_COACHING,
	"user_input": user_input,
	"emotional_inference": emotional_data,
	})

	# === Keyword overrides: explicit stop always honored ===
	lower = user_text.lower()
	wants_to_stop = any(w in lower for w in [
	"done", "that's enough", "let's stop", "end session",
	"i'm good", "goodbye", "wrap up", "finish",
	])
	if wants_to_stop:
	self._track_conversation("user", user_text)
	result = self._end_session()
	result["efe_info"] = {"selected_action": "end_session", "override": "explicit_request"}
	self._track_conversation("assistant", result.get("message", ""))
	return result

	# === Override: explicit request for more action ===
	wants_more_action = any(w in lower for w in [
	"another step", "next step", "what else",
	"give me something", "another exercise", "what now",
	])
	cog_load = self._assess_cognitive_load(user_text, emotional_data=emotional_data)
	if wants_more_action and cog_load["coaching_readiness"] != "not_ready":
	self._track_conversation("user", user_text)
	result = self._propose_next_coaching_step(cognitive_load=cog_load)
	result["efe_info"] = {"selected_action": "propose_intervention", "override": "explicit_request"}
	result["emotional_state"] = emotional_data
	self._track_conversation("assistant", result.get("message", ""))
	return result

	# === Companion mode: respect off-topic and deflection ===
	if cog_load["coaching_readiness"] == "not_ready":
	# User is off-topic, deflecting, or disengaged — be a companion
	logger.info(f"[Coaching] Companion mode triggered (signals={cog_load['signals']}, readiness={cog_load['coaching_readiness']})")
	# NOTE: _llm_generate MUST be called BEFORE _track_conversation
	# because the generator also appends user_message to the messages list.
	# Tracking first would cause the user message to appear twice.
	llm_response = self._llm_generate(user_text, cognitive_load=cog_load)
	self._track_conversation("user", user_text)
	response = llm_response or self._generate_companion_response(user_text)
	logger.info(f"[Coaching] Companion response (llm={'yes' if llm_response else 'template'}, len={len(response)})")
	self._track_conversation("assistant", response)
	return {
	"phase": PHASE_COACHING,
	"message": response,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"emotional_state": emotional_data,
	"efe_info": {"selected_action": "companion_chat", "override": "not_coaching_ready"},
	"is_complete": False,
	}

	# === EFE-driven action selection ===
	emotion_error_mag = emotional_data.get("error", {}).get("magnitude", 0.0)
	valence_belief = self.emotion.belief_valence

	action_idx, action_name, efe_info = select_coaching_action(
	beliefs=self.beliefs,
	model=self.model,
	phase="coaching",
	timestep=self.timestep,
	tom_reliability=self.tom.reliability,
	empathy_planner=self.empathy,
	tom_filter=self.tom,
	target_skill=self.target_skill,
	current_intervention=self.current_intervention,
	emotion_prediction_error=emotion_error_mag,
	emotion_valence_belief=valence_belief,
	)

	# Dispatch based on EFE-selected action
	logger.info(f"[Coaching] EFE selected: {action_name} (probs={efe_info.get('action_probabilities', {})})")
	if action_name == "propose_intervention":
	self._track_conversation("user", user_text)
	result = self._propose_next_coaching_step(cognitive_load=cog_load)
	result["efe_info"] = efe_info
	result["emotional_state"] = emotional_data
	self._track_conversation("assistant", result.get("message", ""))
	return result
	elif action_name == "end_session":
	self._track_conversation("user", user_text)
	result = self._end_session()
	result["efe_info"] = efe_info
	result["emotional_state"] = emotional_data
	self._track_conversation("assistant", result.get("message", ""))
	return result
	elif action_name == "safety_check":
	self._track_conversation("user", user_text)
	response = (
	"I want to check in — how are you feeling about all this? "
	"Sometimes coaching conversations bring up a lot, and I want to make sure "
	"we're going at the right pace for you."
	)
	self._track_conversation("assistant", response)
	return {
	"phase": PHASE_COACHING,
	"message": response,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"emotional_state": emotional_data,
	"efe_info": efe_info,
	"is_complete": False,
	}
	elif action_name == "show_counterfactual":
	self._track_conversation("user", user_text)
	# Show counterfactual comparison for the current target skill
	gentle, push = get_gentle_push_pair(self.target_skill or SKILL_FACTORS[0])
	cf = self.empathy.compute_counterfactual(
	gentle.to_dict(), push.to_dict(), self.tom
	)
	cf_text = self.empathy.format_counterfactual_text(cf)
	llm_response = self._llm_generate(
	f"[SYSTEM: Present this counterfactual naturally: {cf_text}]",
	cognitive_load=cog_load,
	)
	response = llm_response or (
	f"Here's what my model predicts for two approaches:\n\n{cf_text}\n\n"
	"What feels more realistic for you?"
	)
	self._track_conversation("assistant", response)
	return {
	"phase": PHASE_COACHING,
	"message": response,
	"counterfactual": cf,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"emotional_state": emotional_data,
	"efe_info": efe_info,
	"is_complete": False,
	}
	elif action_name == "reframe":
	# Reframing: use LLM with reframe context
	llm_response = self._llm_generate(user_text, cognitive_load=cog_load)
	self._track_conversation("user", user_text)
	if not llm_response:
	# Template reframe
	probe = self._get_next_probe()
	llm_response = (
	"Let me offer a different way to look at this. "
	"What you're describing isn't a flaw — it's information about "
	"where the friction is. "
	)
	if probe:
	llm_response += probe
	self._track_conversation("assistant", llm_response)
	return {
	"phase": PHASE_COACHING,
	"message": llm_response,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"emotional_state": emotional_data,
	"efe_info": efe_info,
	"is_complete": False,
	}
	else:
	# Default: ask_free_text / adjust_difficulty — conversational response
	llm_response = self._llm_generate(user_text, cognitive_load=cog_load)
	self._track_conversation("user", user_text)
	response = llm_response or self._generate_coaching_response(user_text)
	self._track_conversation("assistant", response)
	return {
	"phase": PHASE_COACHING,
	"message": response,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"emotional_state": emotional_data,
	"efe_info": efe_info,
	"is_complete": False,
	}

	def _get_next_probe(self) -> Optional[str]:
	"""Get a probing question for the target skill that hasn't been asked yet."""
	skill = self.target_skill or "focus"
	probes = COACHING_PROBES.get(skill, [])

	for probe in probes:
	if probe not in self._probes_asked:
	self._probes_asked.append(probe)
	return probe

	# Fall back to a different skill's probe
	sorted_skills = self._get_skill_scores_sorted()
	for skill_name, _ in sorted_skills:
	probes = COACHING_PROBES.get(skill_name, [])
	for probe in probes:
	if probe not in self._probes_asked:
	self._probes_asked.append(probe)
	return probe

	return None

	def _get_next_exercise(self) -> Optional[str]:
	"""Get a coaching exercise for the target skill."""
	skill = self.target_skill or "focus"
	exercises = COACHING_EXERCISES.get(skill, [])

	for ex in exercises:
	if ex not in self._exercises_given:
	self._exercises_given.append(ex)
	return ex

	# Fall back to a different skill
	sorted_skills = self._get_skill_scores_sorted()
	for skill_name, _ in sorted_skills:
	exercises = COACHING_EXERCISES.get(skill_name, [])
	for ex in exercises:
	if ex not in self._exercises_given:
	self._exercises_given.append(ex)
	return ex

	return None

	def _propose_next_coaching_step(self, cognitive_load: Optional[Dict] = None) -> Dict[str, Any]:
	"""Propose the next coaching step via LLM (EFE already decided to propose)."""
	exercise = self._get_next_exercise()
	if exercise:
	sorted_skills = self._get_skill_scores_sorted()
	next_skill = sorted_skills[0][0]
	for skill_name, score in sorted_skills:
	if skill_name != self.target_skill:
	next_skill = skill_name
	break
	skill_label = self._label(next_skill)

	# Use LLM to present the exercise naturally
	system_hint = (
	f"[SYSTEM: Propose this intervention naturally: "
	f"Target skill: {skill_label} ({self.model.get_skill_score(self.beliefs.get(next_skill, np.ones(5)/5)):.0f}/100). "
	f"Suggestion: \"{exercise}\". "
	f"Weave it into the conversation — don't just announce it. "
	f"Keep it to 2-3 sentences.]"
	)
	llm_response = self._llm_generate(system_hint, cognitive_load=cognitive_load)
	message = llm_response or (
	f"Here's something to try — this one targets your {skill_label}: "
	f"{exercise} How does that land?"
	)
	else:
	# Exhausted exercises — probe instead
	probe = self._get_next_probe()
	message = probe or (
	"We've covered a lot of ground. What feels like the most important thing "
	"you're taking away from this conversation?"
	)

	return {
	"phase": PHASE_COACHING,
	"message": message,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"is_complete": False,
	}

	def _generate_coaching_response(self, user_text: str) -> str:
	"""Generate a coaching response that probes deeper or offers insight."""
	lower = user_text.lower()

	# Detect what the user is talking about
	is_emotional = any(w in lower for w in [
	"stressed", "anxious", "worried", "frustrated", "stuck",
	"hopeless", "lost", "confused", "scared", "tired",
	"burned out", "overwhelmed", "depressed", "sad",
	])
	is_reflective = any(w in lower for w in [
	"i think", "i realize", "i notice", "interesting",
	"never thought", "makes sense", "you're right",
	])
	is_asking = any(w in lower for w in [
	"why", "how", "what should", "what do", "can you",
	"tell me", "explain",
	])
	mentions_work = any(w in lower for w in [
	"work", "job", "career", "boss", "colleague", "deadline",
	"project", "meeting",
	])
	mentions_personal = any(w in lower for w in [
	"family", "relationship", "friend", "partner", "home",
	"health", "sleep", "exercise",
	])

	if is_emotional:
	# Empathize first, then probe gently
	probe = self._get_next_probe()
	response = (
	"I hear that, and I don't want to brush past it. What you're feeling "
	"is information — it tells us something about where the friction is. "
	)
	if probe:
	response += f"Let me ask you this: {probe}"
	else:
	response += "What do you think is the root of that feeling?"
	return response

	elif is_reflective:
	# Reinforce the insight and build on it
	response = (
	"That's a really useful observation. The ability to see your own patterns "
	"is exactly what makes change possible. "
	)
	exercise = self._get_next_exercise()
	if exercise:
	response += (
	f"Building on that insight, here's something you could try: {exercise}"
	)
	else:
	probe = self._get_next_probe()
	if probe:
	response += f"Let's go deeper. {probe}"
	return response

	elif is_asking:
	# Answer based on the sphere data and ToM
	sorted_skills = self._get_skill_scores_sorted()
	weakest = sorted_skills[0]
	strongest = sorted_skills[-1]

	return (
	f"Based on your patterns, your biggest opportunity is in "
	f"{self._label(weakest[0])} ({round(weakest[1])}/100). But your "
	f"{self._label(strongest[0])} ({round(strongest[1])}/100) shows you already "
	f"have real capacity. The question isn't whether you can change — you can. "
	f"It's about finding the right entry point and making it small enough to actually stick. "
	f"What specifically would you like to know more about?"
	)

	elif mentions_work:
	# Probe into the work context
	probe = self._get_next_probe()
	response = (
	"Work is often where these patterns show up most clearly. "
	"The stakes are higher, the structure is external, and the pressure is real. "
	)
	if probe:
	response += probe
	else:
	response += (
	"Can you tell me about a specific situation at work where you felt stuck or frustrated?"
	)
	return response

	elif mentions_personal:
	# Acknowledge and explore
	response = (
	"The personal side matters a lot — these patterns don't stay at work. "
	"They show up in relationships, health, everything. "
	)
	probe = self._get_next_probe()
	if probe:
	response += probe
	else:
	response += "What's the connection you're seeing between this and your personal life?"
	return response

	else:
	# General coaching response — probe or exercise
	if self._coaching_turns % 3 == 0:
	# Every third turn, offer an exercise
	exercise = self._get_next_exercise()
	if exercise:
	return (
	f"Thanks for sharing that. Here's something concrete you can do with that: "
	f"{exercise}\n\nBut I also want to understand you better. "
	f"What would change in your life if you got this right?"
	)

	# Default: ask a probing question
	probe = self._get_next_probe()
	if probe:
	return (
	f"That helps me understand where you're coming from. "
	f"Let me dig a bit deeper: {probe}"
	)

	return (
	"I appreciate you sharing that. Let's keep exploring — "
	"what feels like the most important thing right now? Is there something "
	"specific you'd like to work on, or should I suggest another step?"
	)

	def _generate_companion_response(self, user_text: str) -> str:
	"""Template fallback when user is off-topic and LLM is unavailable.

	Instead of forcing coaching, acknowledges what the user said and
	follows their lead — companion first, coach second.
	"""
	lower = user_text.lower()

	# Detect explicit deflection from coaching
	deflecting = any(w in lower for w in [
	"don't want to talk about", "not want to talk",
	"something else", "never mind", "forget it",
	"not right now", "not now", "moving on",
	])
	if deflecting:
	return (
	"No pressure at all — we can talk about whatever you want. "
	"What's on your mind?"
	)

	# Detect overwhelm / disengagement
	overwhelmed = any(w in lower for w in [
	"overwhelmed", "too much", "can't handle", "exhausted",
	])
	if overwhelmed:
	return (
	"Hey, let's slow down. We don't have to push through anything right now. "
	"What would feel good to talk about instead?"
	)

	# User brought up a non-coaching topic — engage with it
	return (
	"I'm happy to chat about that! "
	"We can always come back to the other stuff whenever — or not. "
	"Tell me more."
	)

	def _end_session(self) -> Dict[str, Any]:
	"""End the coaching session with a summary."""
	sorted_skills = self._get_skill_scores_sorted()
	weakest = self._label(sorted_skills[0][0])
	strongest = self._label(sorted_skills[-1][0])

	# Try LLM for a natural wrap-up
	llm_context = (
	f"[SYSTEM: The user wants to end the session. "
	f"Strongest area: {strongest}. Growth area: {weakest}. "
	)
	if self._accepted_interventions:
	last_step = self._accepted_interventions[-1].get("description", "")
	pred = self.tom.predict_response_gated(self._accepted_interventions[-1])
	p_complete = pred.get("p_accept", 0.5)
	llm_context += (
	f"They committed to: \"{last_step}\" "
	f"(predicted {round(p_complete * 100)}% follow-through). "
	)
	llm_context += "Wrap up warmly and briefly. Reference things from the conversation.]"

	llm_response = self._llm_generate(llm_context)

	if not llm_response:
	# Template fallback
	parts = ["Let's wrap up. Here's what I'm taking away from our conversation:"]
	parts.append(
	f"Your biggest strength is {strongest} — that's your foundation. "
	f"Your biggest growth area is {weakest}."
	)
	if self._accepted_interventions:
	last_step = self._accepted_interventions[-1].get("description", "")
	pred = self.tom.predict_response_gated(self._accepted_interventions[-1])
	p_complete = pred.get("p_accept", 0.5)
	parts.append(
	f"Your committed step: \"{last_step}\" — "
	f"I predict a {round(p_complete * 100)}% chance you'll follow through."
	)
	parts.append(
	"The fact that you showed up and went through this process tells me something important "
	"about you. You're not just thinking about change — you're doing something about it. "
	"Come back anytime to check in."
	)
	llm_response = " ".join(parts)

	return {
	"phase": PHASE_COMPLETE,
	"message": llm_response,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"user_type_summary": self.tom.get_user_type_summary(),
	"is_complete": True,
	}

	# -------------------------------------------------------------------------
	# CONVERSATIONAL RESPONSE GENERATION
	# -------------------------------------------------------------------------

	def _get_skill_scores_sorted(self) -> List[Tuple[str, float]]:
	"""Get skill scores sorted low→high."""
	scores = self.model.get_all_skill_scores(self.beliefs)
	return sorted(scores.items(), key=lambda x: x[1])

	def _label(self, skill: str) -> str:
	"""Human-readable skill name."""
	return SKILL_LABELS.get(skill, skill.replace("_", " ").title())

	def _generate_sphere_commentary(self) -> str:
	"""Generate personalized commentary about the user's sphere."""
	sorted_skills = self._get_skill_scores_sorted()
	weakest = sorted_skills[:2]
	strongest = sorted_skills[-2:]

	bottlenecks = self.dep_graph.find_bottlenecks(
	{k: v for k, v in self.beliefs.items() if k in SKILL_FACTORS}
	)

	# Try LLM first — give it the full sphere data to work with
	gen = self.generator
	logger.info(f"[LLM] Generating sphere commentary (generator={'available' if gen else 'None'})")
	if gen is not None:
	try:
	from ..llm.generator import build_system_prompt

	# Build a rich data summary for the LLM
	skill_lines = []
	for skill_name, score in sorted_skills:
	skill_lines.append(f" - {self._label(skill_name)}: {round(score)}/100")

	sphere_context = "Here are the user's skill scores:\n" + "\n".join(skill_lines)

	if bottlenecks:
	bn = bottlenecks[0]
	blocker = self._label(bn["blocker"])
	blocked_names = [self._label(b) for b in bn["blocked"][:2]]
	sphere_context += (
	f"\n\nKey insight: {blocker} is a bottleneck — "
	f"it's holding back {' and '.join(blocked_names)}."
	)

	system_prompt = build_system_prompt(
	phase="sphere_commentary",
	belief_summary=self.get_belief_summary(),
	tom_summary=self.tom.get_user_type_summary(),
	)

	messages = [{"role": "system", "content": system_prompt}]

	# Include conversation history from calibration
	for msg in self.conversation_history[-6:]:
	messages.append({"role": msg["role"], "content": msg["content"]})

	messages.append({
	"role": "user",
	"content": (
	f"[SYSTEM: First briefly acknowledge their last answer (1 sentence), then share "
	f"what the sphere shows — like a perceptive friend noticing patterns, not a report. "
	f"This should be ONE cohesive response, not two separate parts. "
	f"Data for reference (don't recite it all): {sphere_context}]"
	),
	})

	response = gen.client.chat_completion(
	messages=messages,
	temperature=0.7,
	max_tokens=300,
	model_override=None, # uses LLM_MODEL default
	)
	if response and response.strip():
	logger.info(f"[LLM] Sphere commentary generated ({len(response)} chars)")
	return response.strip()
	logger.warning("[LLM] Empty sphere commentary from LLM")
	except Exception as e:
	logger.warning(f"[LLM] Sphere commentary failed: {e}")

	logger.info("[LLM] Using template sphere commentary")
	# Template fallback
	parts = [
	"Here's your MindSphere — a snapshot of your patterns across eight areas."
	]

	s1_name, s1_score = strongest[-1]
	s2_name, s2_score = strongest[-2]
	parts.append(
	f"Your strongest areas are {self._label(s1_name)} ({round(s1_score)}/100) "
	f"and {self._label(s2_name)} ({round(s2_score)}/100) — that's real foundation to build on."
	)

	w1_name, w1_score = weakest[0]
	w2_name, w2_score = weakest[1]
	parts.append(
	f"The biggest dents are in {self._label(w1_name)} ({round(w1_score)}/100) "
	f"and {self._label(w2_name)} ({round(w2_score)}/100)."
	)

	if bottlenecks:
	bn = bottlenecks[0]
	blocker = self._label(bn["blocker"])
	blocked_names = [self._label(b) for b in bn["blocked"][:2]]
	parts.append(
	f"Interestingly, {blocker} is acting as a bottleneck — "
	f"it's holding back your {' and '.join(blocked_names)}. "
	f"That means improving it would have a ripple effect."
	)

	parts.append(
	"Does this match how you see yourself? I'm curious what stands out to you."
	)

	return " ".join(parts)

	def _respond_to_sphere_reaction(self, user_text: str) -> str:
	"""Respond to the user's reaction to their sphere."""
	lower = user_text.lower()

	# Detect sentiment/intent
	agrees = any(w in lower for w in [
	"yes", "yeah", "accurate", "right", "true", "makes sense",
	"spot on", "correct", "agree", "that's me", "sounds right",
	])
	disagrees = any(w in lower for w in [
	"no", "don't think", "disagree", "wrong", "not really",
	"doesn't seem", "inaccurate", "off", "surprised",
	])
	asks_why = any(w in lower for w in [
	"why", "how come", "explain", "what does", "what do you mean",
	])
	mentions_stress = any(w in lower for w in [
	"stress", "anxious", "overwhelm", "burned", "tired",
	"exhausted", "struggling",
	])
	mentions_focus = any(w in lower for w in [
	"focus", "distract", "attention", "concentrate",
	])

	sorted_skills = self._get_skill_scores_sorted()
	weakest_name = self._label(sorted_skills[0][0])
	weakest_score = round(sorted_skills[0][1])

	if agrees:
	return (
	f"Good — that self-awareness is genuinely useful. The fact that you can see "
	f"these patterns clearly means you're already ahead of where most people start. "
	f"Let me show you what I think would make the biggest difference right now."
	)
	elif disagrees:
	# Acknowledge and show openness to being wrong
	return (
	f"That's important feedback — my model is built from your answers, but you know "
	f"yourself better than ten questions can capture. Which part feels off? "
	f"I can adjust my understanding as we talk."
	)
	elif asks_why:
	bottlenecks = self.dep_graph.find_bottlenecks(
	{k: v for k, v in self.beliefs.items() if k in SKILL_FACTORS}
	)
	if bottlenecks:
	bn = bottlenecks[0]
	blocker = self._label(bn["blocker"])
	blocked = [self._label(b) for b in bn["blocked"][:2]]
	return (
	f"The scores come from how you answered the calibration questions — "
	f"each answer shifts my estimate of where you sit on each skill. "
	f"The dependency analysis shows that {blocker} is a leverage point because "
	f"it feeds into {' and '.join(blocked)}. "
	f"Think of it like a supply chain — a bottleneck upstream affects everything downstream."
	)
	return (
	f"The scores come from your calibration answers — each one shifts my estimate "
	f"of where you sit across these eight dimensions. The dents show where "
	f"there's the most room for movement."
	)
	elif mentions_stress:
	return (
	f"Stress is real, and it touches a lot of these dimensions — especially "
	f"Emotional Regulation and Focus. The good news is that working on even one "
	f"of these can take pressure off the others. You mentioned being stressed before, "
	f"and I factored that into my model of you. Let me suggest something small "
	f"that might help."
	)
	elif mentions_focus:
	focus_score = round(self.model.get_skill_score(self.beliefs.get("focus", self.model.D["focus"])))
	return (
	f"Focus came in at {focus_score}/100 — and from what you told me, that tracks. "
	f"The interesting thing is that focus isn't just about willpower. It often connects "
	f"to other patterns — like how clear your tasks are, or how you handle interruptions. "
	f"Let me show you what I think would help most."
	)
	else:
	# Generic thoughtful response
	return (
	f"Thanks for sharing that. Based on everything you've told me, "
	f"I think the most impactful place to start is {weakest_name} — "
	f"it's currently at {weakest_score}/100, and improving it would "
	f"unlock progress in other areas too. Want me to suggest a concrete first step?"
	)

	def _generate_plan_message(self) -> str:
	"""Generate personalized planning message for the first intervention."""
	if not self.current_intervention or not self.target_skill:
	return PLAN_INTRO

	skill_label = self._label(self.target_skill)
	score = round(self.model.get_skill_score(
	self.beliefs.get(self.target_skill, self.model.D.get(self.target_skill, np.array([0.2]*5)))
	))

	# Check ToM predictions to calibrate tone
	pred = self.tom.predict_response_gated(self.current_intervention.to_dict())
	p_accept = pred.get("p_accept", 0.5)

	# Check if there's a dependency explanation
	bottlenecks = self.dep_graph.find_bottlenecks(
	{k: v for k, v in self.beliefs.items() if k in SKILL_FACTORS}
	)

	# Try LLM first
	llm_response = self._llm_generate(
	f"[SYSTEM: Propose this intervention naturally: "
	f"Target skill: {skill_label} ({score}/100). "
	f"Suggestion: \"{self.current_intervention.description}\" "
	f"({self.current_intervention.duration_minutes} min, "
	f"difficulty {self.current_intervention.difficulty}). "
	f"Predicted acceptance: {round(p_accept * 100)}%. "
	f"Weave it into conversation naturally.]"
	)
	if llm_response:
	return llm_response

	# Template fallback
	parts = []

	if bottlenecks and bottlenecks[0]["blocker"] == self.target_skill:
	blocked = [self._label(b) for b in bottlenecks[0]["blocked"][:2]]
	parts.append(
	f"I'm starting with {skill_label} because it's your biggest leverage point right now — "
	f"at {score}/100, it's holding back your {' and '.join(blocked)}."
	)
	else:
	parts.append(
	f"I'm starting with {skill_label} ({score}/100) because "
	f"I think it's where a small change would make the biggest difference."
	)

	parts.append(
	f'Here\'s what I have in mind: "{self.current_intervention.description}"'
	)

	if p_accept < 0.4:
	parts.append(
	"I know this might feel like a stretch — and that's okay. "
	"It's designed to be small enough that you can try it without committing to anything big."
	)
	elif p_accept > 0.7:
	parts.append(
	"I think this is right in your sweet spot — challenging enough to matter, "
	"small enough to actually happen."
	)

	parts.append(
	"What do you think? You can also tell me if it feels too much or not relevant, "
	"and I'll adjust."
	)

	return " ".join(parts)

	def _respond_to_planning_chat(self, user_text: str) -> Dict[str, Any]:
	"""Respond to free-text during the planning phase."""
	lower = user_text.lower()

	# Detect implicit acceptance or rejection
	positive = any(w in lower for w in [
	"sure", "ok", "okay", "sounds good", "let's do it", "i'll try",
	"let's go", "yes", "yeah", "i can do that", "worth a shot",
	])
	negative = any(w in lower for w in [
	"too hard", "too much", "can't", "won't work", "not for me",
	"impossible", "no way", "overwhelming",
	])
	irrelevant = any(w in lower for w in [
	"not relevant", "doesn't apply", "not my issue", "wrong area",
	"not the problem",
	])

	if positive:
	return self._process_choice("accept")
	elif negative:
	return self._process_choice("too_hard")
	elif irrelevant:
	return self._process_choice("not_relevant")

	# For everything else (questions, off-topic, general chat) — use LLM
	llm_response = self._llm_generate(user_text)
	if llm_response:
	return {
	"phase": PHASE_PLANNING,
	"message": llm_response,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"is_complete": False,
	}

	# Template fallback
	asks_question = any(w in lower for w in [
	"why", "how", "what if", "explain", "tell me more",
	])
	if asks_question:
	response = self._explain_intervention()
	else:
	response = self._respond_general_chat(user_text, PHASE_PLANNING)

	return {
	"phase": PHASE_PLANNING,
	"message": response,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"is_complete": False,
	}

	def _respond_to_update_chat(self, user_text: str) -> Dict[str, Any]:
	"""Respond to free-text during the update phase."""
	lower = user_text.lower()

	# Update cognitive model
	self._update_user_model_from_text(user_text)

	positive = any(w in lower for w in [
	"sure", "ok", "okay", "sounds good", "i'll try", "yes", "yeah",
	])
	negative = any(w in lower for w in [
	"too hard", "too much", "can't", "no",
	])
	irrelevant = any(w in lower for w in [
	"not relevant", "doesn't apply", "wrong area",
	])

	if positive:
	return self._process_choice("accept")
	elif negative:
	return self._process_choice("too_hard")
	elif irrelevant:
	return self._process_choice("not_relevant")

	# Try LLM for natural conversation
	llm_response = self._llm_generate(user_text)
	response = llm_response or self._respond_general_chat(user_text, PHASE_UPDATE)
	return {
	"phase": PHASE_UPDATE,
	"message": response,
	"sphere_data": self.get_sphere_data(),
	"belief_summary": self.get_belief_summary(),
	"is_complete": False,
	}

	def _explain_intervention(self) -> str:
	"""Explain why the current intervention was chosen."""
	if not self.current_intervention or not self.target_skill:
	return "I chose this based on where I think a small change would make the biggest impact."

	skill_label = self._label(self.target_skill)
	score = round(self.model.get_skill_score(
	self.beliefs.get(self.target_skill, self.model.D.get(self.target_skill, np.array([0.2]*5)))
	))

	# Check dependency
	bottlenecks = self.dep_graph.find_bottlenecks(
	{k: v for k, v in self.beliefs.items() if k in SKILL_FACTORS}
	)

	parts = [
	f"{skill_label} is at {score}/100 in your sphere."
	]

	if bottlenecks and bottlenecks[0]["blocker"] == self.target_skill:
	blocked = [self._label(b) for b in bottlenecks[0]["blocked"][:2]]
	parts.append(
	f"It's also a bottleneck — it's limiting your {' and '.join(blocked)}. "
	f"So improving it has a multiplier effect."
	)

	# ToM insight
	pred = self.tom.predict_response_gated(self.current_intervention.to_dict())
	p_accept = pred.get("p_accept", 0.5)
	parts.append(
	f"I picked this specific step because my model predicts about a "
	f"{round(p_accept * 100)}% chance you'll actually follow through on it — "
	f"and that matters more than ambition."
	)

	parts.append("Does that make sense?")

	return " ".join(parts)

	def _generate_acceptance_message(self) -> str:
	"""Generate encouragement when user accepts an intervention."""
	if not self.current_intervention:
	return "Great choice. Let's see how it goes."

	skill_label = self._label(self.current_intervention.target_skill)
	duration = self.current_intervention.duration_minutes

	pred = self.tom.predict_response_gated(self.current_intervention.to_dict())
	p_complete = pred.get("p_accept", 0.6)

	# Try LLM first
	llm_response = self._llm_generate(
	f"[SYSTEM: The user just accepted this step: "
	f"\"{self.current_intervention.description}\" "
	f"({round(duration)} min). "
	f"Predicted follow-through: {round(p_complete * 100)}%. "
	f"Encourage them naturally and transition into coaching conversation.]"
	)
	if llm_response:
	return llm_response

	# Template fallback
	parts = [f"Good. {self.current_intervention.description}"]

	if duration <= 5:
	parts.append(
	f"It's only {round(duration)} minutes — the point isn't to change everything, "
	f"it's to prove to yourself that you can shift the pattern."
	)
	else:
	parts.append(
	f"Block out {round(duration)} minutes for this. "
	f"You don't need to do it perfectly, just do it."
	)

	parts.append(
	f"Based on what I know about you, I predict a {round(p_complete * 100)}% chance "
	f"you'll follow through. Let's check in next time and see how it went."
	)

	return " ".join(parts)

	def _generate_adaptation_message(self, rejection_reason: str) -> str:
	"""Generate personalized message when adapting after rejection."""
	# Try LLM first
	if self.current_intervention:
	llm_response = self._llm_generate(
	f"[SYSTEM: The user rejected the previous suggestion as '{rejection_reason}'. "
	f"New suggestion: \"{self.current_intervention.description}\" "
	f"({round(self.current_intervention.duration_minutes)} min). "
	f"Acknowledge their feedback warmly and present the alternative naturally.]"
	)
	if llm_response:
	return llm_response

	# Template fallback
	if rejection_reason == "too_hard":
	if self.current_intervention:
	return (
	f"I hear you — that was too much. That's useful information for me. "
	f"Let me try something smaller: \"{self.current_intervention.description}\" "
	f"This should take about {round(self.current_intervention.duration_minutes)} minutes. "
	f"How does that feel?"
	)
	return (
	"I hear you — that was too much. Let me find something smaller "
	"that still moves the needle."
	)
	elif rejection_reason == "not_relevant":
	skill_label = self._label(self.target_skill) if self.target_skill else "a different area"
	if self.current_intervention:
	return (
	f"Fair enough — let me look at {skill_label} instead. "
	f"Here's what I have in mind: \"{self.current_intervention.description}\" "
	f"Does this connect better to what you're actually dealing with?"
	)
	return (
	f"Fair enough. Let me find something that connects better to what "
	f"actually matters to you."
	)
	return "Let me try a different approach."

	# -------------------------------------------------------------------------
	# COGNITIVE MODELING — continuous user model updates from conversation
	# -------------------------------------------------------------------------

	def _update_user_model_from_text(self, user_text: str, profile_data=None) -> None:
	"""
	Update the cognitive model of the user from conversational signals.

	This goes beyond explicit choices — it infers emotional state, interests,
	engagement patterns, and updates the ToM particle filter accordingly.
	The agent is always building a richer picture of who this person is.

	Also extracts structured profile facts and updates the Bayesian network.

	Args:
	profile_data: Pre-extracted profile dict from batched LLM call.
	If provided, skips the separate LLM extraction call.
	"""
	# Extract profile facts, causal links, and progress signals
	if profile_data is not None and profile_data.get("facts"):
	# Use pre-extracted data from batched call (no extra LLM call)
	extraction = self.profile.store_extracted_data(
	data=profile_data,
	turn=self.timestep,
	)
	else:
	# Fallback: separate LLM call for profile extraction
	extraction = self.profile.extract_and_store(
	user_text=user_text,
	turn=self.timestep,
	classifier=self.classifier,
	context=self._get_recent_context(),
	)

	# Apply profile signals and progress to POMDP skill beliefs
	self._apply_skill_signals(extraction)

	lower = user_text.lower()

	# --- Infer emotional state ---
	emotional_signals = {
	"stressed": ["stressed", "stress", "anxious", "anxiety", "worried"],
	"frustrated": ["frustrated", "annoying", "annoyed", "ugh", "hate"],
	"sad": ["sad", "depressed", "hopeless", "lonely", "lost"],
	"excited": ["excited", "pumped", "can't wait", "stoked"],
	"calm": ["calm", "peaceful", "relaxed", "good"],
	"bored": ["bored", "boring", "meh", "whatever"],
	"overwhelmed": ["overwhelmed", "too much", "drowning", "buried"],
	"curious": ["curious", "wonder", "interesting", "tell me"],
	}

	detected_emotions = []
	for emotion, keywords in emotional_signals.items():
	if any(w in lower for w in keywords):
	detected_emotions.append(emotion)

	# --- Infer topics of interest ---
	topic_signals = {
	"work": ["work", "job", "career", "boss", "colleague", "office", "meeting", "deadline", "project"],
	"relationships": ["partner", "friend", "family", "relationship", "dating", "marriage"],
	"health": ["health", "exercise", "gym", "diet", "sleep", "energy", "body"],
	"creativity": ["creative", "art", "music", "writing", "design", "ideas"],
	"learning": ["learn", "study", "book", "course", "skill", "education"],
	"finances": ["money", "budget", "savings", "debt", "financial", "income"],
	"identity": ["who am i", "purpose", "meaning", "values", "identity", "authentic"],
	}

	detected_topics = []
	for topic, keywords in topic_signals.items():
	if any(w in lower for w in keywords):
	detected_topics.append(topic)

	# --- Update particle filter from conversational signals ---
	# The particle filter normally only updates from explicit choices.
	# Here we do soft updates based on inferred signals to expand the model.

	if detected_emotions:
	self._soft_update_tom_from_emotions(detected_emotions)

	# --- Store inferred state in history ---
	if detected_emotions or detected_topics:
	self.history.append({
	"timestep": self.timestep,
	"type": "cognitive_model_update",
	"emotions": detected_emotions,
	"topics": detected_topics,
	"user_text_snippet": user_text[:100],
	})

	def _apply_skill_signals(self, extraction: Dict[str, Any]) -> None:
	"""
	Apply profile and progress signals to update POMDP skill beliefs.

	This closes the feedback loop: facts extracted from conversation
	(breakup, progress reports, challenges) actually shift the skill
	beliefs, so the model evolves throughout the session.

	Two sources of signals:
	1. Profile Bayesian network — inferred states affect skill beliefs
	(e.g., breakup → emotional_stress → emotional_reg impaired)
	2. Progress signals — user reports improvement or regression
	(e.g., "I've been focusing better" → focus belief shifts up)
	"""
	# --- 1. Apply Bayesian network skill impacts ---
	bn_impacts = self.profile.bayes_net.get_skill_impacts()
	for skill, impact in bn_impacts.items():
	if skill not in self.beliefs or skill not in SKILL_FACTORS:
	continue
	if abs(impact) < 0.03:
	continue # Too small to matter

	belief = self.beliefs[skill]
	n_levels = len(belief)

	# Shift belief: positive impact → shift toward higher levels,
	# negative impact → shift toward lower levels
	shift_strength = min(abs(impact), 0.3) # Cap the shift
	if impact > 0:
	# Shift probability mass toward higher skill levels
	target = np.zeros(n_levels)
	target[-1] = 0.5 # Weight toward high
	target[-2] = 0.3
	target[n_levels // 2] = 0.2
	else:
	# Shift probability mass toward lower skill levels
	target = np.zeros(n_levels)
	target[0] = 0.5 # Weight toward low
	target[1] = 0.3
	target[n_levels // 2] = 0.2
	target = normalize(target)

	# Soft blend: belief = (1 - alpha) * belief + alpha * target
	alpha = shift_strength * 0.15 # Gentle: max ~4.5% blend per message
	self.beliefs[skill] = normalize(
	(1.0 - alpha) * belief + alpha * target
	)

	# --- 2. Apply progress signals (user-reported improvement/regression) ---
	progress_signals = extraction.get("progress_signals", [])
	for signal in progress_signals:
	skill = signal.get("skill")
	direction = signal.get("direction")
	magnitude = signal.get("magnitude", 0.2)

	if skill not in self.beliefs or skill not in SKILL_FACTORS:
	continue

	belief = self.beliefs[skill]
	n_levels = len(belief)

	# Progress signals are stronger than passive BN impacts —
	# user is explicitly reporting behavioral change
	shift_strength = min(magnitude, 0.5)

	if direction == "improvement":
	# Shift toward higher levels
	target = np.zeros(n_levels)
	for i in range(n_levels):
	target[i] = (i + 1) / n_levels # Linear ramp up
	else:
	# Shift toward lower levels
	target = np.zeros(n_levels)
	for i in range(n_levels):
	target[i] = (n_levels - i) / n_levels # Linear ramp down
	target = normalize(target)

	# Stronger blend for explicit progress reports
	alpha = shift_strength * 0.25 # Up to 12.5% blend
	self.beliefs[skill] = normalize(
	(1.0 - alpha) * belief + alpha * target
	)

	logger.info(
	f"[Learning] Skill '{skill}' belief shifted ({direction}, "
	f"magnitude={magnitude:.2f}): {signal.get('evidence', '')}"
	)

	def _soft_update_tom_from_emotions(self, emotions: List[str]) -> None:
	"""
	Soft-update the ToM particle filter based on emotional signals.

	Instead of a hard Bayesian update (which requires explicit choice observations),
	this applies a gentle bias to the particle weights based on what emotions
	tell us about the user's type dimensions.

	For example: "overwhelmed" suggests low overwhelm_threshold (dimension 6).
	"bored" with structured tasks suggests high novelty_seeking (dimension 2).
	"""
	# Map emotions to particle dimension biases
	# Each entry: (dimension_index, direction, strength)
	# direction > 0 means "this emotion suggests HIGH value on that dimension"
	# direction < 0 means "this emotion suggests LOW value"
	emotion_dim_map = {
	"overwhelmed": [(6, -1, 0.15)], # low overwhelm_threshold
	"stressed": [(6, -1, 0.10)], # low overwhelm_threshold
	"bored": [(2, 1, 0.10)], # high novelty_seeking
	"frustrated": [(5, 1, 0.08)], # high autonomy_sensitivity
	"excited": [(2, 1, 0.05), (6, 1, 0.05)], # novelty + high threshold
	"curious": [(2, 1, 0.05)], # novelty_seeking
	}

	for emotion in emotions:
	biases = emotion_dim_map.get(emotion, [])
	for dim_idx, direction, strength in biases:
	# Apply soft weight update: particles that match the signal get upweighted
	for j in range(self.tom.n_particles):
	particle_val = self.tom.particle_params[j, dim_idx]
	if direction > 0:
	# Upweight particles with high value on this dimension
	likelihood = 0.5 + strength * particle_val
	else:
	# Upweight particles with low value on this dimension
	likelihood = 0.5 + strength * (1.0 - particle_val)
	self.tom.particle_weights[j] *= likelihood

	# Renormalize
	total = np.sum(self.tom.particle_weights)
	if total > 0:
	self.tom.particle_weights /= total
	else:
	self.tom.particle_weights = np.ones(self.tom.n_particles) / self.tom.n_particles

	# Invalidate reliability cache
	self.tom._reliability_cache = None
	self.tom._confidence_cache = None

	def _get_recent_context(self) -> str:
	"""Get recent conversation context for profile extraction."""
	if not self.conversation_history:
	return ""
	recent = self.conversation_history[-4:]
	return " \| ".join(f"{m['role']}: {m['content'][:80]}" for m in recent)

	def get_inferred_user_state(self) -> Dict[str, Any]:
	"""
	Get the current inferred cognitive/emotional state of the user.

	Aggregates recent conversation signals into a summary the LLM can use.
	"""
	# Get recent emotion/topic history
	recent_updates = [
	h for h in self.history[-10:]
	if h.get("type") == "cognitive_model_update"
	]

	recent_emotions = []
	recent_topics = []
	for update in recent_updates:
	recent_emotions.extend(update.get("emotions", []))
	recent_topics.extend(update.get("topics", []))

	# Deduplicate and get most recent
	unique_emotions = list(dict.fromkeys(reversed(recent_emotions)))[:3]
	unique_topics = list(dict.fromkeys(reversed(recent_topics)))[:3]

	# Assess overall engagement from sentiment tracking
	engagement = "moderate"
	if self._recent_sentiments:
	recent = self._recent_sentiments[-3:]
	if all(s == "engaged" for s in recent):
	engagement = "high"
	elif any(s == "disengaged" for s in recent):
	engagement = "low"
	elif any(s == "overwhelmed" for s in recent):
	engagement = "strained"

	return {
	"recent_emotions": unique_emotions,
	"recent_topics": unique_topics,
	"engagement_level": engagement,
	"turns_in_phase": self._coaching_turns if self.phase == PHASE_COACHING else 0,
	"tom_type": self.tom.get_user_type_summary(),
	"tom_reliability": self.tom.reliability,
	}

	def _respond_general_chat(self, user_text: str, phase: str) -> str:
	"""Generate a response to general chat that relates back to coaching."""
	lower = user_text.lower()

	# Detect emotional content
	is_emotional = any(w in lower for w in [
	"stressed", "anxious", "worried", "frustrated", "stuck",
	"hopeless", "lost", "confused", "scared", "tired",
	"burned out", "overwhelmed",
	])
	is_positive = any(w in lower for w in [
	"better", "good", "happy", "excited", "motivated",
	"hopeful", "ready", "clear",
	])

	if is_emotional:
	return (
	f"I hear that. And I want you to know — what you're feeling makes sense "
	f"given what I'm seeing in your patterns. This isn't about fixing something "
	f"broken. It's about finding the smallest adjustment that creates the most relief. "
	f"That's what I'm trying to do here."
	)
	elif is_positive:
	return (
	f"That energy is worth channeling. Based on your sphere, the thing that would "
	f"compound most right now is working on your {self._label(self.target_skill or 'focus')}. "
	f"Want to commit to the step I suggested?"
	)
	else:
	# Acknowledge and gently steer back
	if phase == PHASE_PLANNING:
	return (
	f"Thanks for sharing that — it helps me understand you better. "
	f"Coming back to the step I suggested: does it feel doable, "
	f"or would you like me to adjust it?"
	)
	return (
	f"I appreciate you telling me that. It gives me more context for "
	f"how to help. What would be most useful for you right now?"
	)

	# -------------------------------------------------------------------------
	# DATA ACCESSORS
	# -------------------------------------------------------------------------

	def get_sphere_data(self) -> Dict[str, Any]:
	"""Get radar chart data: skill scores + bottlenecks + edges."""
	skill_beliefs = {
	k: v for k, v in self.beliefs.items() if k in SKILL_FACTORS
	}
	scores = self.model.get_all_skill_scores(self.beliefs)
	bottlenecks = self.dep_graph.find_bottlenecks(skill_beliefs)

	return {
	"categories": scores,
	"bottlenecks": bottlenecks,
	"dependency_edges": self.dep_graph.get_all_edges(),
	}

	def get_belief_summary(self) -> Dict[str, Any]:
	"""Get human-readable summary of all beliefs."""
	summary = {}

	# Skill scores
	for skill in SKILL_FACTORS:
	if skill in self.beliefs:
	score = self.model.get_skill_score(self.beliefs[skill])
	uncertainty = float(np.std(self.beliefs[skill] * SKILL_LEVEL_VALUES))
	summary[skill] = {"score": round(score, 1), "uncertainty": round(uncertainty, 1)}

	# Preferences
	for factor, levels in self.model.spec.preference_factors.items():
	if factor in self.beliefs:
	best_idx = int(np.argmax(self.beliefs[factor]))
	summary[factor] = {
	"inferred": levels[best_idx],
	"confidence": float(np.max(self.beliefs[factor])),
	}

	# Friction
	for factor, levels in self.model.spec.friction_factors.items():
	if factor in self.beliefs:
	best_idx = int(np.argmax(self.beliefs[factor]))
	summary[factor] = {
	"inferred": levels[best_idx],
	"confidence": float(np.max(self.beliefs[factor])),
	}

	# ToM reliability
	summary["tom_reliability"] = self.tom.reliability
	summary["user_type"] = self.tom.get_user_type_summary()

	# Learning progress
	summary["learning"] = self.learner.get_learning_summary()

	# Semantic profile + Bayesian network
	summary["profile"] = self.profile.get_summary()

	return summary

	def get_profile_data(self) -> Dict[str, Any]:
	"""Get detailed profile data for visualization panel."""
	# Raw skill belief distributions (5-level probabilities)
	skill_beliefs = {}
	skill_scores = {}
	for skill in SKILL_FACTORS:
	if skill in self.beliefs:
	skill_beliefs[skill] = self.beliefs[skill].tolist()
	skill_scores[skill] = round(
	self.model.get_skill_score(self.beliefs[skill]), 1
	)

	# Emotional state from Circumplex POMDP
	emotional_state = {
	"valence_belief": self.emotion.belief_valence.tolist(),
	"arousal_belief": self.emotion.belief_arousal.tolist(),
	}
	trajectory = self.emotion.get_emotional_trajectory()
	if trajectory.get("current"):
	emotional_state["current"] = trajectory["current"]
	else:
	current = self.emotion.get_current_emotion()
	emotional_state["current"] = current.to_dict() if current else None
	emotional_state["trajectory"] = trajectory.get("states", [])
	emotional_state["avg_prediction_error"] = trajectory.get(
	"avg_prediction_error", 0.0
	)

	# ToM user type dimensions + reliability
	tom_profile = {
	"dimensions": self.tom.get_user_type_summary(),
	"reliability": round(self.tom.reliability, 3),
	}

	# Dependency graph: nodes (skills + scores + bottleneck flag) and edges
	skill_belief_dict = {
	k: v for k, v in self.beliefs.items() if k in SKILL_FACTORS
	}
	bottlenecks = self.dep_graph.find_bottlenecks(skill_belief_dict)
	bottleneck_ids = {b["blocker"] for b in bottlenecks}
	dependency_graph = {
	"nodes": [
	{
	"id": skill,
	"score": skill_scores.get(skill, 50),
	"is_bottleneck": skill in bottleneck_ids,
	}
	for skill in SKILL_FACTORS
	],
	"edges": self.dep_graph.get_all_edges(),
	}

	# Profile facts from Bayesian network
	profile_summary = self.profile.get_summary()
	facts = profile_summary.get("facts", [])
	bayes_net = profile_summary.get("bayes_net", {})

	# Bayes net nodes dict {id: node_dict} → list with id included
	bn_nodes_dict = bayes_net.get("nodes", {})
	bn_nodes_list = []
	for nid, ndata in bn_nodes_dict.items():
	node = dict(ndata)
	node["id"] = nid
	bn_nodes_list.append(node)

	profile_facts = {
	"facts": facts,
	"bayes_net": {
	"nodes": bn_nodes_list,
	"edges": bayes_net.get("edges", []),
	"skill_impacts": bayes_net.get("skill_impacts", {}),
	},
	}

	# Score deltas: compare current vs previous snapshot
	score_deltas = {}
	if hasattr(self, "_prev_skill_scores") and self._prev_skill_scores:
	for skill, score in skill_scores.items():
	prev = self._prev_skill_scores.get(skill, score)
	delta = round(score - prev, 1)
	score_deltas[skill] = delta
	# Store current scores for next comparison
	self._prev_skill_scores = dict(skill_scores)

	return {
	"skill_beliefs": skill_beliefs,
	"skill_scores": skill_scores,
	"score_deltas": score_deltas,
	"emotional_state": emotional_state,
	"tom_profile": tom_profile,
	"dependency_graph": dependency_graph,
	"profile_facts": profile_facts,
	}

	# -------------------------------------------------------------------------
	# HELPERS
	# -------------------------------------------------------------------------

	def _get_next_question(self) -> Optional[CalibrationQuestion]:
	"""Get the next question using adaptive ordering."""
	ordered = get_adaptive_question_order(self.beliefs, self.asked_question_ids)
	return ordered[0] if ordered else None

	def _format_question(self, q: CalibrationQuestion) -> Dict[str, Any]:
	"""Format a question for the frontend."""
	result = {
	"id": q.id,
	"category": q.category,
	"question_text": q.question_text,
	"question_type": q.question_type,
	}
	if q.question_type == "mc":
	result["options"] = q.options
	return result

	def _match_mc_answer(self, answer_text: str, options: List[str]) -> Optional[int]:
	"""Simple text matching for MC answers. Returns None if no match."""
	answer_lower = answer_text.strip().lower()
	if not answer_lower:
	return None

	# Check for exact index (e.g., "0", "1", "2", "3")
	if answer_lower.isdigit():
	idx = int(answer_lower)
	if 0 <= idx < len(options):
	return idx

	# Check for letter (a, b, c, d)
	letter_map = {"a": 0, "b": 1, "c": 2, "d": 3}
	if answer_lower in letter_map:
	idx = letter_map[answer_lower]
	if idx < len(options):
	return idx

	# Require substantial overlap for substring match (>50% of option text)
	for i, opt in enumerate(options):
	opt_lower = opt.lower()
	if opt_lower == answer_lower:
	return i
	# Only match if the user's text contains most of the option
	if len(answer_lower) > 10 and opt_lower in answer_lower:
	return i

	return None # No confident match — treat as conversational

	def set_empathy_dial(self, lambda_value: float) -> None:
	"""Adjust the empathy dial (0=challenging, 1=gentle)."""
	self.empathy.lambda_empathy = max(0.0, min(1.0, lambda_value))