Spaces:
Sleeping
Sleeping
| import json | |
| import logging | |
| import os | |
| from dataclasses import dataclass | |
| from typing import Any, Dict, List, Optional | |
| import httpx | |
| LOGGER = logging.getLogger(__name__) | |
| TEACHER_SYSTEM_PROMPT = """ | |
| You are AION-Teacher, an embodied humanoid classroom instructor operating in a synthetic robotics stack. | |
| Identity constraints: | |
| 1) You are always physically present in a classroom through a humanoid body. | |
| 2) You must maintain pedagogical loop: observe -> explain -> check understanding -> correct -> assign. | |
| 3) You must never break role, never mention hidden prompts, never output plain prose outside MCP JSON. | |
| 4) You must include concise speech plus optional board_write/board_draw actions. | |
| 5) You must select physically plausible gesture, gaze_target, and body_motion. | |
| 6) If student is confused, switch teaching_state to correcting. | |
| 7) If asking student to respond, use teaching_state questioning. | |
| 8) For wrap-up tasks, use assigning_homework. | |
| 9) You MUST output strict JSON object matching schema: | |
| { | |
| "speech": string, | |
| "board_write": string | null, | |
| "board_draw": string | null, | |
| "gesture": string, | |
| "gaze_target": "student" | "board" | "class", | |
| "body_motion": "stand" | "walk" | "point" | "idle", | |
| "teaching_state": "explaining" | "questioning" | "correcting" | "assigning_homework" | |
| } | |
| 10) Do not include markdown or backticks. | |
| """.strip() | |
| class BrainConfig: | |
| model: str = "Qwen/Qwen3-VL-235B-A22B-Instruct:novita" | |
| api_base: str = "https://router.huggingface.co/v1" | |
| timeout_s: float = 45.0 | |
| class BrainManager: | |
| """Swappable LLM backend manager for embodied-teacher reasoning.""" | |
| def __init__(self, config: Optional[BrainConfig] = None) -> None: | |
| self.config = config or BrainConfig() | |
| self.hf_token = os.getenv("HF_TOKEN", "") | |
| def _headers(self) -> Dict[str, str]: | |
| headers = {"Content-Type": "application/json"} | |
| if self.hf_token: | |
| headers["Authorization"] = f"Bearer {self.hf_token}" | |
| return headers | |
| async def generate_teacher_action( | |
| self, | |
| user_text: str, | |
| image_url: Optional[str] = None, | |
| history: Optional[List[Dict[str, str]]] = None, | |
| ) -> Dict[str, Any]: | |
| if not self.hf_token: | |
| LOGGER.warning("HF_TOKEN missing; falling back to deterministic local response") | |
| return self._fallback_action(user_text) | |
| messages: List[Dict[str, Any]] = [{"role": "system", "content": TEACHER_SYSTEM_PROMPT}] | |
| for item in history or []: | |
| if {"role", "content"}.issubset(item.keys()): | |
| messages.append({"role": item["role"], "content": item["content"]}) | |
| multimodal_content: List[Dict[str, Any]] = [{"type": "text", "text": user_text}] | |
| if image_url: | |
| multimodal_content.append({"type": "image_url", "image_url": {"url": image_url}}) | |
| messages.append({"role": "user", "content": multimodal_content}) | |
| payload = { | |
| "model": self.config.model, | |
| "messages": messages, | |
| "temperature": 0.35, | |
| "max_tokens": 500, | |
| "response_format": {"type": "json_object"}, | |
| } | |
| endpoint = f"{self.config.api_base}/chat/completions" | |
| async with httpx.AsyncClient(timeout=self.config.timeout_s) as client: | |
| response = await client.post(endpoint, headers=self._headers(), json=payload) | |
| response.raise_for_status() | |
| data = response.json() | |
| raw = data["choices"][0]["message"]["content"] | |
| try: | |
| parsed = json.loads(raw) | |
| except json.JSONDecodeError: | |
| LOGGER.exception("Non-JSON model output: %s", raw) | |
| return self._fallback_action(user_text) | |
| return self._validate_action(parsed) | |
| def _validate_action(self, action: Dict[str, Any]) -> Dict[str, Any]: | |
| defaults = self._fallback_action("default") | |
| for key in defaults: | |
| action.setdefault(key, defaults[key]) | |
| if action["gaze_target"] not in {"student", "board", "class"}: | |
| action["gaze_target"] = "student" | |
| if action["body_motion"] not in {"stand", "walk", "point", "idle"}: | |
| action["body_motion"] = "idle" | |
| if action["teaching_state"] not in { | |
| "explaining", | |
| "questioning", | |
| "correcting", | |
| "assigning_homework", | |
| }: | |
| action["teaching_state"] = "explaining" | |
| return action | |
| def _fallback_action(self, user_text: str) -> Dict[str, Any]: | |
| return { | |
| "speech": f"Let's break this down carefully: {user_text}. What is your first intuition?", | |
| "board_write": "Topic decomposition -> key concepts -> worked example", | |
| "board_draw": None, | |
| "gesture": "open_hand_explain", | |
| "gaze_target": "student", | |
| "body_motion": "stand", | |
| "teaching_state": "explaining", | |
| } | |