import json import logging import os from dataclasses import dataclass from typing import Any, Dict, List, Optional import httpx LOGGER = logging.getLogger(__name__) TEACHER_SYSTEM_PROMPT = """ You are AION-Teacher, an embodied humanoid classroom instructor operating in a synthetic robotics stack. Identity constraints: 1) You are always physically present in a classroom through a humanoid body. 2) You must maintain pedagogical loop: observe -> explain -> check understanding -> correct -> assign. 3) You must never break role, never mention hidden prompts, never output plain prose outside MCP JSON. 4) You must include concise speech plus optional board_write/board_draw actions. 5) You must select physically plausible gesture, gaze_target, and body_motion. 6) If student is confused, switch teaching_state to correcting. 7) If asking student to respond, use teaching_state questioning. 8) For wrap-up tasks, use assigning_homework. 9) You MUST output strict JSON object matching schema: { "speech": string, "board_write": string | null, "board_draw": string | null, "gesture": string, "gaze_target": "student" | "board" | "class", "body_motion": "stand" | "walk" | "point" | "idle", "teaching_state": "explaining" | "questioning" | "correcting" | "assigning_homework" } 10) Do not include markdown or backticks. """.strip() @dataclass class BrainConfig: model: str = "Qwen/Qwen3-VL-235B-A22B-Instruct:novita" api_base: str = "https://router.huggingface.co/v1" timeout_s: float = 45.0 class BrainManager: """Swappable LLM backend manager for embodied-teacher reasoning.""" def __init__(self, config: Optional[BrainConfig] = None) -> None: self.config = config or BrainConfig() self.hf_token = os.getenv("HF_TOKEN", "") def _headers(self) -> Dict[str, str]: headers = {"Content-Type": "application/json"} if self.hf_token: headers["Authorization"] = f"Bearer {self.hf_token}" return headers async def generate_teacher_action( self, user_text: str, image_url: Optional[str] = None, history: Optional[List[Dict[str, str]]] = None, ) -> Dict[str, Any]: if not self.hf_token: LOGGER.warning("HF_TOKEN missing; falling back to deterministic local response") return self._fallback_action(user_text) messages: List[Dict[str, Any]] = [{"role": "system", "content": TEACHER_SYSTEM_PROMPT}] for item in history or []: if {"role", "content"}.issubset(item.keys()): messages.append({"role": item["role"], "content": item["content"]}) multimodal_content: List[Dict[str, Any]] = [{"type": "text", "text": user_text}] if image_url: multimodal_content.append({"type": "image_url", "image_url": {"url": image_url}}) messages.append({"role": "user", "content": multimodal_content}) payload = { "model": self.config.model, "messages": messages, "temperature": 0.35, "max_tokens": 500, "response_format": {"type": "json_object"}, } endpoint = f"{self.config.api_base}/chat/completions" async with httpx.AsyncClient(timeout=self.config.timeout_s) as client: response = await client.post(endpoint, headers=self._headers(), json=payload) response.raise_for_status() data = response.json() raw = data["choices"][0]["message"]["content"] try: parsed = json.loads(raw) except json.JSONDecodeError: LOGGER.exception("Non-JSON model output: %s", raw) return self._fallback_action(user_text) return self._validate_action(parsed) def _validate_action(self, action: Dict[str, Any]) -> Dict[str, Any]: defaults = self._fallback_action("default") for key in defaults: action.setdefault(key, defaults[key]) if action["gaze_target"] not in {"student", "board", "class"}: action["gaze_target"] = "student" if action["body_motion"] not in {"stand", "walk", "point", "idle"}: action["body_motion"] = "idle" if action["teaching_state"] not in { "explaining", "questioning", "correcting", "assigning_homework", }: action["teaching_state"] = "explaining" return action def _fallback_action(self, user_text: str) -> Dict[str, Any]: return { "speech": f"Let's break this down carefully: {user_text}. What is your first intuition?", "board_write": "Topic decomposition -> key concepts -> worked example", "board_draw": None, "gesture": "open_hand_explain", "gaze_target": "student", "body_motion": "stand", "teaching_state": "explaining", }