Spaces:

MahaultA
/

mindsphere_coach

Sleeping

mindsphere_coach / src /mindsphere /core /action_dispatcher.py

Mahault

Integrate emotional inference into EFE action selection

89bc1b1 2 months ago

9.45 kB

	"""
	EFE-driven action selection dispatcher.

	Bridges the POMDP inference (select_action, compute_efe_all_factors) to
	actual agent behavior. Handles:
	- Valid action masks per phase
	- Dynamic lambda_epist scheduling (epistemic ↔ pragmatic balance)
	- Empathy blending for intervention-type actions
	"""

	from __future__ import annotations

	import logging
	from typing import Dict, List, Optional, Tuple

	import numpy as np

	from .model import SphereModel, ACTION_NAMES, SKILL_FACTORS
	from .inference import compute_efe_all_factors, select_action
	from .utils import entropy

	logger = logging.getLogger(__name__)

	# ── Action indices (convenience aliases) ────────────────────────────────────
	A_ASK_MC = 0
	A_ASK_FREE = 1
	A_SHOW_SPHERE = 2
	A_PROPOSE = 3
	A_REFRAME = 4
	A_ADJUST = 5
	A_COUNTERFACTUAL = 6
	A_SAFETY = 7
	A_END = 8

	# ── Valid action masks per phase ────────────────────────────────────────────
	VALID_ACTIONS: Dict[str, List[int]] = {
	"visualization": [A_ASK_FREE, A_SHOW_SPHERE, A_PROPOSE],
	"planning": [A_ASK_FREE, A_PROPOSE, A_REFRAME, A_COUNTERFACTUAL, A_SAFETY, A_END],
	"coaching": [A_ASK_FREE, A_PROPOSE, A_REFRAME, A_ADJUST, A_COUNTERFACTUAL, A_SAFETY, A_END],
	"update": [A_ASK_FREE, A_PROPOSE, A_REFRAME, A_ADJUST, A_COUNTERFACTUAL, A_SAFETY],
	}


	# ── Dynamic lambda_epist ───────────────────────────────────────────────────

	def compute_lambda_epist(
	phase: str,
	timestep: int,
	tom_reliability: float,
	beliefs: Optional[Dict[str, np.ndarray]] = None,
	emotion_prediction_error: float = 0.0,
	) -> float:
	"""
	Compute the epistemic drive weight, balancing exploration vs exploitation.

	Starts high (explore the user) and decays toward pragmatic (propose actions).
	Boosted when ToM reliability is low, beliefs are uncertain, or emotional
	prediction error is high (model is wrong about user's emotional state).
	"""
	# Phase-dependent base value
	phase_base = {
	"visualization": 2.0,
	"planning": 0.8,
	"coaching": 0.5,
	"update": 0.6,
	}.get(phase, 1.0)

	# Temporal decay: linear from 1.0 → 0.3 over first 30 timesteps
	temporal_factor = max(0.3, 1.0 - 0.7 * min(timestep / 30.0, 1.0))

	# Uncertainty boost: average belief entropy / max possible entropy
	uncertainty_factor = 1.0
	if beliefs:
	skill_beliefs = [beliefs[s] for s in SKILL_FACTORS if s in beliefs]
	if skill_beliefs:
	max_ent = np.log(len(skill_beliefs[0])) # log(5) for 5 levels
	avg_ent = float(np.mean([entropy(b) for b in skill_beliefs]))
	uncertainty_factor = 1.0 + 0.5 * (avg_ent / max(max_ent, 1e-12))

	# Reliability discount: boost epistemic when ToM is unreliable
	reliability_factor = 1.0 + max(0.0, 0.5 - tom_reliability)

	# Emotion prediction error boost: when our emotional model is wrong,
	# increase exploration (ask questions, don't push interventions)
	emotion_factor = 1.0
	if emotion_prediction_error > 0.2:
	emotion_factor = 1.0 + 0.8 * min(emotion_prediction_error, 1.0)

	result = phase_base * temporal_factor * uncertainty_factor * reliability_factor * emotion_factor
	return result


	# ── Main action selector ───────────────────────────────────────────────────

	def select_coaching_action(
	beliefs: Dict[str, np.ndarray],
	model: SphereModel,
	phase: str,
	timestep: int,
	tom_reliability: float,
	empathy_planner=None,
	tom_filter=None,
	target_skill: Optional[str] = None,
	current_intervention=None,
	beta: float = 4.0,
	emotion_prediction_error: float = 0.0,
	emotion_valence_belief: Optional[np.ndarray] = None,
	) -> Tuple[int, str, Dict]:
	"""
	Full EFE-driven action selection with empathy blending.

	Args:
	beliefs: Current beliefs per factor
	model: The POMDP model
	phase: Current phase (visualization, planning, coaching, update)
	timestep: Current session timestep
	tom_reliability: ToM particle filter reliability [0, 1]
	empathy_planner: EmpathyPlanner for G_social blending (optional)
	tom_filter: UserTypeFilter for user predictions (optional)
	target_skill: Currently targeted skill (optional)
	current_intervention: Current intervention dict (optional)
	beta: Inverse temperature for softmax
	emotion_prediction_error: Magnitude of emotional prediction error [0, ~1.4]
	emotion_valence_belief: 5-element belief over valence states (optional)

	Returns:
	(action_idx, action_name, efe_info_dict)
	"""
	valid = VALID_ACTIONS.get(phase, [A_ASK_FREE, A_PROPOSE])

	lambda_epist = compute_lambda_epist(
	phase, timestep, tom_reliability, beliefs,
	emotion_prediction_error=emotion_prediction_error,
	)

	# Determine which factors are relevant
	relevant_factors = None
	if target_skill and target_skill in SKILL_FACTORS:
	relevant_factors = [target_skill]
	# If no target, use all skill factors (default in select_action)

	# Step 1: Compute raw system EFE and select
	action_idx, action_probs, efe_values = select_action(
	beliefs=beliefs,
	model=model,
	valid_actions=valid,
	lambda_epist=lambda_epist,
	beta=beta,
	relevant_factors=relevant_factors,
	)

	# Step 2: Blend with empathy for intervention-type actions
	# Only applies when we have empathy planner + ToM + an intervention context
	intervention_actions = {A_PROPOSE, A_REFRAME, A_ADJUST}
	if empathy_planner is not None and tom_filter is not None:
	# Compute G_social for each valid action
	blended_values = np.zeros(len(valid), dtype=np.float64)
	for i, v in enumerate(valid):
	if v in intervention_actions and current_intervention is not None:
	# Get ToM prediction for this intervention
	iv_dict = (
	current_intervention.to_dict()
	if hasattr(current_intervention, "to_dict")
	else current_intervention
	if isinstance(current_intervention, dict)
	else {"difficulty": 0.3, "duration_minutes": 5}
	)
	prediction = tom_filter.predict_response_gated(iv_dict)
	blended_values[i] = empathy_planner.compute_blended_efe(
	system_efe=efe_values[i],
	user_felt_cost=prediction["predicted_felt_cost"],
	reliability=tom_reliability,
	)
	else:
	# Non-intervention actions: use system EFE directly
	blended_values[i] = efe_values[i]

	# Re-select based on blended values
	from .utils import softmax as _softmax
	q_values = -blended_values
	blended_probs = _softmax(q_values, temperature=1.0 / max(beta, 0.01))
	best_idx = int(np.argmax(blended_probs))
	action_idx = valid[best_idx]
	action_probs = blended_probs
	efe_values = blended_values

	# Step 3: Emotional valence penalty on intervention actions
	# When the user is in a negative emotional state, penalize pushing
	# coaching interventions — prefer softer actions (ask, safety_check)
	valence_negative_mass = None
	if emotion_valence_belief is not None and len(emotion_valence_belief) >= 5:
	valence_negative_mass = float(emotion_valence_belief[0] + emotion_valence_belief[1])
	if valence_negative_mass > 0.4:
	penalty = 0.5 * valence_negative_mass
	for i, v in enumerate(valid):
	if v in intervention_actions:
	efe_values[i] += penalty # Higher G = worse action
	# Re-select with penalty applied
	from .utils import softmax as _softmax
	q_values = -efe_values
	penalized_probs = _softmax(q_values, temperature=1.0 / max(beta, 0.01))
	best_idx = int(np.argmax(penalized_probs))
	action_idx = valid[best_idx]
	action_probs = penalized_probs
	logger.info(
	f"[EFE] Emotional valence penalty applied (neg_mass={valence_negative_mass:.2f}, "
	f"penalty={penalty:.2f})"
	)

	action_name = ACTION_NAMES[action_idx]

	info = {
	"selected_action": action_name,
	"action_probabilities": {
	ACTION_NAMES[v]: round(float(p), 4)
	for v, p in zip(valid, action_probs)
	},
	"efe_values": {
	ACTION_NAMES[v]: round(float(e), 4)
	for v, e in zip(valid, efe_values)
	},
	"lambda_epist": round(lambda_epist, 3),
	"phase": phase,
	"emotion_prediction_error": round(emotion_prediction_error, 3),
	"valence_negative_mass": round(valence_negative_mass, 3) if valence_negative_mass is not None else None,
	}

	logger.info(
	f"[EFE] Phase={phase} → {action_name} "
	f"(λ_epist={lambda_epist:.2f}, probs={info['action_probabilities']})"
	)

	return action_idx, action_name, info