""" Execution intent detection. Decides whether a user prompt needs a real E2B sandbox or is just chat. Strategy: 1. Fast keyword/regex heuristics (free, deterministic) 2. If ambiguous → fall back to LLM classification (cheap) Output: ExecutionIntent dataclass """ from __future__ import annotations import json import logging import re from dataclasses import dataclass from typing import List, Optional from . import llm_router logger = logging.getLogger(__name__) @dataclass class ExecutionIntent: needs_sandbox: bool reason: str confidence: float # 0..1 # ---------------------------------------------------------------------------- # Heuristic rules # ---------------------------------------------------------------------------- # Strong execution signals _EXEC_PATTERNS = [ r"\brun\s+(this|the|my)?\s*(code|script|python|bash|shell|command)", r"\bexecute\s+(this|the)?\s*(code|script|python|bash|shell)", r"\bcreate\s+(a\s+)?(file|folder|directory|script)\b", r"\bwrite\s+(a\s+)?(file|script)\s+(named|called)", r"\b(install|pip install|npm install|apt install)\b", r"\b(ls|cd|cat|mkdir|rm|cp|mv|grep|chmod)\s+", r"```(python|bash|sh|shell|javascript|js|node)\b", r"\b(unix\s+timestamp|current\s+time)\b.*\b(file|write|create)", r"\b(proof\.txt|test\.py|script\.py|main\.py)\b", r"\bdebug\s+(this|my)\b", r"\bbuild\s+(an?\s+)?(app|website|api|server|script)", ] # Strong chat-only signals _CHAT_PATTERNS = [ r"^\s*(hi|hello|hey|yo|hola|sup|good\s+(morning|evening|night))\b", r"^\s*(thanks|thank you|thx|ty)\b", r"^\s*(what|who|when|why|how)\s+(is|are|do|does)\b.*\?$", r"\bexplain\s+(to me)?\b(?!.*\b(run|execute|build)\b)", r"\b(define|definition\s+of)\b", r"\btell me about\b", r"\bdifference between\b", ] _EXEC_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _EXEC_PATTERNS] _CHAT_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _CHAT_PATTERNS] def heuristic_detect(prompt: str) -> Optional[ExecutionIntent]: """Return strong-signal intent, or None if ambiguous.""" p = (prompt or "").strip() if not p: return ExecutionIntent(False, "empty prompt", 1.0) exec_hits = sum(1 for r in _EXEC_RE if r.search(p)) chat_hits = sum(1 for r in _CHAT_RE if r.search(p)) # Triple-backtick code block always implies execution intent if "```" in p and exec_hits == 0: # bare code block without verb → still likely wants execution if re.search(r"```(python|bash|sh|shell|js|node)", p, re.IGNORECASE): return ExecutionIntent(True, "code block detected", 0.85) if exec_hits >= 1 and chat_hits == 0: return ExecutionIntent(True, f"matched {exec_hits} execution pattern(s)", 0.9) if chat_hits >= 1 and exec_hits == 0: return ExecutionIntent(False, f"matched {chat_hits} chat pattern(s)", 0.9) if exec_hits == 0 and chat_hits == 0: # Very short prompts are usually chat if len(p) < 30: return ExecutionIntent(False, "short prompt, likely chat", 0.7) return None # ambiguous → ask LLM # Mixed signals → ask LLM return None # ---------------------------------------------------------------------------- # LLM fallback classifier # ---------------------------------------------------------------------------- _CLASSIFIER_SYSTEM = """You are an intent classifier. Decide if the user's message requires running real code or shell commands in a sandbox computer. Reply ONLY with strict JSON, no prose: {"needs_sandbox": true|false, "reason": ""} Rules: - needs_sandbox = true when the user wants to run code, execute shell commands, create/modify files, install packages, debug a running program, or otherwise observe real execution results. - needs_sandbox = false when the user asks for explanations, greetings, brainstorming, advice, or static code review with no run request. """ async def llm_detect(prompt: str) -> ExecutionIntent: messages = [ {"role": "system", "content": _CLASSIFIER_SYSTEM}, {"role": "user", "content": prompt[:2000]}, ] try: result = await llm_router.complete(messages, temperature=0.0, max_tokens=120) text = result["content"].strip() # Tolerate models that wrap JSON in code fences text = re.sub(r"^```(?:json)?|```$", "", text.strip(), flags=re.MULTILINE).strip() obj = json.loads(text) return ExecutionIntent( needs_sandbox=bool(obj.get("needs_sandbox", False)), reason=str(obj.get("reason", "llm classifier"))[:200], confidence=0.75, ) except Exception as e: logger.warning("LLM intent classifier failed: %s", e) # Conservative default → no sandbox return ExecutionIntent(False, f"llm fallback failed ({e})", 0.3) async def detect(prompt: str) -> ExecutionIntent: """Top-level: heuristic first, LLM fallback.""" h = heuristic_detect(prompt) if h is not None: return h return await llm_detect(prompt)