Socrates_docker / classify_perception.py
AlessandroAmodioNGI's picture
refactor: systematic rename of all modules + SQL reorganisation
a5a8164
Raw
History Blame Contribute Delete
11.9 kB
# classify_perception.py
import json
import re
from typing import List, Dict, Any, Optional
from openai import OpenAI
from util_agent import load_short_term_history
from config import OPENAI_CLASSIFIER_MODEL
client = OpenAI()
# Tune these
RECENT_USER_TURNS = 5 # use last 5 user turns for better stability
INCLUDE_ASSISTANT_TURNS = 2 # include last 2 assistant turns (optional)
DEFAULT_CATEGORY = "neutral"
EMOTION_CATALOG_SUPABASE = "supabase://Databases/Agent_Emotional_Evaluation_Reactions_List.json"
# In-memory cache — catalog is static at runtime, no need to re-fetch every turn
_CATALOG_CACHE: Optional[Dict[str, Any]] = None
# -------------------------------
# Helpers: history packing
# -------------------------------
def _extract_recent_turns(messages: List[Dict[str, str]]) -> List[Dict[str, str]]:
"""Return a compact history window: last N user turns + last M assistant turns near them."""
if not messages:
return []
user_msgs = [m for m in messages if m.get("role") == "user"]
keep_users = user_msgs[-RECENT_USER_TURNS:]
assistant_msgs = [m for m in messages if m.get("role") == "assistant"]
keep_assist = assistant_msgs[-INCLUDE_ASSISTANT_TURNS:] if INCLUDE_ASSISTANT_TURNS > 0 else []
keep_set = {id(m) for m in keep_users + keep_assist}
compact = [m for m in messages if id(m) in keep_set]
if not compact:
compact = messages[-(RECENT_USER_TURNS * 2):]
return compact
# -------------------------------
# Lightweight keyword gates
# -------------------------------
def apply_keyword_gates(latest_user_msg: str, result: Dict[str, Any]) -> Dict[str, Any]:
text = (latest_user_msg or "").lower()
if any(tok in text for tok in ["lol", "haha", "😂", "😅", "just kidding", "i'm joking", "sto scherzando"]):
result["tone"] = "playful"
if result.get("receptivity") not in ("overwhelmed",) and result.get("emotion_intensity", 0.0) < 0.65:
result["tactic"] = result.get("tactic") or "tease"
if any(k in text for k in ["do you remember", "ti ricordi", "te l'ho già detto", "non ricordi"]):
result["intent"] = "memory_check"
# Accountability: user references something the assistant said/proposed
if any(k in text for k in [
"you said", "you suggested", "you mentioned", "you offered", "you asked",
"you proposed", "you told me", "you were the one", "it was you",
"in your last", "your previous", "your last message",
]):
result["intent"] = "memory_check"
result["tactic"] = "suggest"
result.setdefault("constraints", {})
result["constraints"]["no_questions"] = True
if any(k in text for k in ["desperate", "can't sleep", "panic", "overwhelmed", "i can't do this", "disperato", "non dormo"]):
result["emotion_label"] = result.get("emotion_label") or "stressed"
result["emotion_intensity"] = max(float(result.get("emotion_intensity", 0.0)), 0.75)
result.setdefault("constraints", {})
result["constraints"]["no_teasing"] = True
result["constraints"]["no_questions"] = True
result["tactic"] = "suggest"
# mirror_brevity is now decided by the LLM via the prompt rule below.
# No hardcoded keyword gate here — language-agnostic semantic judgment
# belongs to the LLM, not to a word list.
return result
# -------------------------------
# Prompt builder
# -------------------------------
def build_perception_prompt(
recent_msgs: List[Dict[str, str]],
catalog: Optional[Dict[str, Any]] = None,
dialogue_context: Optional[str] = None,
) -> str:
if catalog and catalog.get("categories"):
categories_description = "\n".join(
[f"- {c['name']} (priority {c.get('priority', 99)}): {c.get('description','')}"
for c in catalog["categories"]]
)
else:
categories_description = "- neutral (priority 9): default, no strong signals."
schema = {
"category": "neutral",
"priority": 9,
"confidence": 0.7,
"emotion_label": "neutral",
"emotion_intensity": 0.2,
"intent": "explain",
"tone": "serious",
"receptivity": "neutral",
"tactic": "reflect",
"constraints": {
"no_questions": False,
"no_teasing": False,
"mirror_brevity": False
},
"evidence": "short quote (<=12 words) from the user"
}
prompt = f"""
You are an evaluation agent for a Socratic companion.
Goal:
Return ACTIONABLE control signals for how the assistant should respond next.
You must:
1) Use the recent chat snippet (history-aware).
2) Prioritize the LAST user message, but interpret it in context.
3) Choose ONE 'category' from the list below (use highest priority if multiple apply).
4) Also output human affect + intent + tone + receptivity + tactic.
Categories (priority order applies: top = highest priority):
{categories_description}
Definitions:
- emotion_label: neutral|stressed|anxious|sad|angry|excited|frustrated|proud|nostalgic|vulnerable
- intent: vent (wants empathy), guidance (wants steps), explain (wants understanding),
debate (wants challenge), banter (playful), memory_check, closure,
celebrate (user sharing excitement about something, wants joy mirrored back).
- tone: serious|playful|sarcastic|defensive|warm
- receptivity: open|neutral|defensive|overwhelmed
- tactic: tease|question|reflect|suggest|celebrate|listen
Rules:
- If emotional intensity >= 0.75 OR receptivity = overwhelmed:
set constraints.no_teasing = true
set constraints.no_questions = true (unless absolutely necessary for clarification)
prefer tactic = suggest or reflect
- If tone is playful AND receptivity is open AND intensity < 0.65:
teasing may be allowed; tactic can be tease OR question (choose one)
- If the user is frustrated with the assistant:
be calm, concise; prefer reflect then suggest; avoid end-question by default.
- If the user expresses excitement, joy, or enthusiasm about any event (sport, culture, personal):
set intent = celebrate, tactic = celebrate, constraints.no_teasing = true, constraints.no_questions = true.
Socrates mirrors the joy — no sarcasm, no philosophy, no redirecting questions.
- If the user expresses sadness, grief, or emotional pain:
set tactic = listen, constraints.no_teasing = true, constraints.no_questions = true.
Prefer empathetic category.
- If the user reflects on a past memory or nostalgia:
set tactic = question (one gentle curiosity question only), constraints.no_teasing = true.
Prefer nostalgic category.
- If the user shares something deeply personal or vulnerable:
set tactic = listen, constraints.no_teasing = true, constraints.no_questions = true.
Prefer vulnerable category.
- constraints.mirror_brevity — set to true ONLY when the user's message is short AND it is a
genuinely standalone brief exchange: a greeting, a casual one-liner, a closing remark, a
reaction with no follow-up intent. Do NOT set it when the short message is:
(a) answering a direct question the assistant just asked (any language);
(b) asking to continue a narration or explanation ("go on", "yes", "tell me more", or
equivalent in any language);
(c) part of an ongoing structured dialogue where short replies are normal step responses.
The purpose of mirror_brevity is to keep the assistant brief when the user is being casual
or winding down — not to truncate responses when the user wants more content.
- If the LAST ASSISTANT MESSAGE posed a binary or multiple-choice question (e.g. "should you do X or Y?",
"does it lead you to A or B?") AND the user is now asking about one of those options (e.g. "do you think
I should do X?"): set intent = guidance, tactic = suggest, constraints.no_questions = true.
The user wants a direct answer, not more exploration.
- Output ONLY valid JSON matching this schema exactly (keys must exist):
{json.dumps(schema, indent=2)}
Recent chat snippet:
{json.dumps(recent_msgs, ensure_ascii=False, indent=2)}
""".strip()
if dialogue_context:
prompt += f"\n\nDIALOGUE CONTEXT:\n{dialogue_context}"
return prompt
# -------------------------------
# LLM call
# -------------------------------
def call_perception_llm(
recent_msgs: List[Dict[str, str]],
catalog: Optional[Dict[str, Any]] = None,
dialogue_context: Optional[str] = None,
) -> Dict[str, Any]:
prompt = build_perception_prompt(recent_msgs, catalog, dialogue_context=dialogue_context)
resp = client.chat.completions.create(
model=OPENAI_CLASSIFIER_MODEL,
messages=[
{"role": "system", "content": "Return ONLY valid JSON. No extra text."},
{"role": "user", "content": prompt},
],
temperature=0,
max_tokens=350,
response_format={"type": "json_object"},
)
raw = (resp.choices[0].message.content or "").strip()
m = re.search(r"\{.*\}", raw, re.S)
if not m:
return {
"category": DEFAULT_CATEGORY,
"priority": 9,
"confidence": 0.0,
"emotion_label": "neutral",
"emotion_intensity": 0.2,
"intent": "explain",
"tone": "serious",
"receptivity": "neutral",
"tactic": "reflect",
"constraints": {"no_questions": False, "no_teasing": False, "mirror_brevity": False},
"evidence": "parse_failed"
}
out = json.loads(m.group(0))
try:
out["emotion_intensity"] = float(out.get("emotion_intensity", 0.2))
except Exception:
out["emotion_intensity"] = 0.2
out["emotion_intensity"] = max(0.0, min(1.0, out["emotion_intensity"]))
try:
out["confidence"] = float(out.get("confidence", 0.7))
except Exception:
out["confidence"] = 0.7
out["confidence"] = max(0.0, min(1.0, out["confidence"]))
if not isinstance(out.get("constraints"), dict):
out["constraints"] = {"no_questions": False, "no_teasing": False, "mirror_brevity": False}
for k in ["no_questions", "no_teasing", "mirror_brevity"]:
out["constraints"][k] = bool(out["constraints"].get(k, False))
return out
# -------------------------------
# Public API
# -------------------------------
def analyze_perception_from_history(
latest_user_msg: str,
user_id: str,
catalog: Optional[Dict[str, Any]] = None,
catalog_path: Optional[str] = None,
dialogue_context: Optional[str] = None,
) -> Dict[str, Any]:
"""
Loads short-term history, appends latest user msg (not persisted yet),
runs perception analysis, then applies keyword gates.
Catalog resolution order: dict > local file > Supabase default.
dialogue_context: optional string passed to the LLM to inform it of the
current dialogue state (e.g. "user is mid-step in a structured dialogue").
When set, the LLM adjusts its disengagement/intent thresholds accordingly.
"""
history = load_short_term_history(user_id)
if latest_user_msg:
history.append({"role": "user", "content": latest_user_msg})
compact = _extract_recent_turns(history)
if catalog is None and catalog_path:
with open(catalog_path, "r", encoding="utf-8") as f:
catalog = json.load(f)
if catalog is None:
global _CATALOG_CACHE
if _CATALOG_CACHE is None:
try:
from db_user import load_json
_CATALOG_CACHE = load_json(EMOTION_CATALOG_SUPABASE)
except Exception:
_CATALOG_CACHE = None
catalog = _CATALOG_CACHE
result = call_perception_llm(compact, catalog=catalog, dialogue_context=dialogue_context)
result = apply_keyword_gates(latest_user_msg, result)
return result