Spaces:
Sleeping
Sleeping
| """ | |
| Execution intent detection. | |
| Decides whether a user prompt needs a real E2B sandbox or is just chat. | |
| Strategy: | |
| 1. Fast keyword/regex heuristics (free, deterministic) | |
| 2. If ambiguous → fall back to LLM classification (cheap) | |
| Output: ExecutionIntent dataclass | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import logging | |
| import re | |
| from dataclasses import dataclass | |
| from typing import List, Optional | |
| from . import llm_router | |
| logger = logging.getLogger(__name__) | |
| class ExecutionIntent: | |
| needs_sandbox: bool | |
| reason: str | |
| confidence: float # 0..1 | |
| # ---------------------------------------------------------------------------- | |
| # Heuristic rules | |
| # ---------------------------------------------------------------------------- | |
| # Strong execution signals | |
| _EXEC_PATTERNS = [ | |
| r"\brun\s+(this|the|my)?\s*(code|script|python|bash|shell|command)", | |
| r"\bexecute\s+(this|the)?\s*(code|script|python|bash|shell)", | |
| r"\bcreate\s+(a\s+)?(file|folder|directory|script)\b", | |
| r"\bwrite\s+(a\s+)?(file|script)\s+(named|called)", | |
| r"\b(install|pip install|npm install|apt install)\b", | |
| r"\b(ls|cd|cat|mkdir|rm|cp|mv|grep|chmod)\s+", | |
| r"```(python|bash|sh|shell|javascript|js|node)\b", | |
| r"\b(unix\s+timestamp|current\s+time)\b.*\b(file|write|create)", | |
| r"\b(proof\.txt|test\.py|script\.py|main\.py)\b", | |
| r"\bdebug\s+(this|my)\b", | |
| r"\bbuild\s+(an?\s+)?(app|website|api|server|script)", | |
| ] | |
| # Strong chat-only signals | |
| _CHAT_PATTERNS = [ | |
| r"^\s*(hi|hello|hey|yo|hola|sup|good\s+(morning|evening|night))\b", | |
| r"^\s*(thanks|thank you|thx|ty)\b", | |
| r"^\s*(what|who|when|why|how)\s+(is|are|do|does)\b.*\?$", | |
| r"\bexplain\s+(to me)?\b(?!.*\b(run|execute|build)\b)", | |
| r"\b(define|definition\s+of)\b", | |
| r"\btell me about\b", | |
| r"\bdifference between\b", | |
| ] | |
| _EXEC_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _EXEC_PATTERNS] | |
| _CHAT_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _CHAT_PATTERNS] | |
| def heuristic_detect(prompt: str) -> Optional[ExecutionIntent]: | |
| """Return strong-signal intent, or None if ambiguous.""" | |
| p = (prompt or "").strip() | |
| if not p: | |
| return ExecutionIntent(False, "empty prompt", 1.0) | |
| exec_hits = sum(1 for r in _EXEC_RE if r.search(p)) | |
| chat_hits = sum(1 for r in _CHAT_RE if r.search(p)) | |
| # Triple-backtick code block always implies execution intent | |
| if "```" in p and exec_hits == 0: | |
| # bare code block without verb → still likely wants execution | |
| if re.search(r"```(python|bash|sh|shell|js|node)", p, re.IGNORECASE): | |
| return ExecutionIntent(True, "code block detected", 0.85) | |
| if exec_hits >= 1 and chat_hits == 0: | |
| return ExecutionIntent(True, f"matched {exec_hits} execution pattern(s)", 0.9) | |
| if chat_hits >= 1 and exec_hits == 0: | |
| return ExecutionIntent(False, f"matched {chat_hits} chat pattern(s)", 0.9) | |
| if exec_hits == 0 and chat_hits == 0: | |
| # Very short prompts are usually chat | |
| if len(p) < 30: | |
| return ExecutionIntent(False, "short prompt, likely chat", 0.7) | |
| return None # ambiguous → ask LLM | |
| # Mixed signals → ask LLM | |
| return None | |
| # ---------------------------------------------------------------------------- | |
| # LLM fallback classifier | |
| # ---------------------------------------------------------------------------- | |
| _CLASSIFIER_SYSTEM = """You are an intent classifier. Decide if the user's message | |
| requires running real code or shell commands in a sandbox computer. | |
| Reply ONLY with strict JSON, no prose: | |
| {"needs_sandbox": true|false, "reason": "<short explanation>"} | |
| Rules: | |
| - needs_sandbox = true when the user wants to run code, execute shell commands, | |
| create/modify files, install packages, debug a running program, or otherwise | |
| observe real execution results. | |
| - needs_sandbox = false when the user asks for explanations, greetings, | |
| brainstorming, advice, or static code review with no run request. | |
| """ | |
| async def llm_detect(prompt: str) -> ExecutionIntent: | |
| messages = [ | |
| {"role": "system", "content": _CLASSIFIER_SYSTEM}, | |
| {"role": "user", "content": prompt[:2000]}, | |
| ] | |
| try: | |
| result = await llm_router.complete(messages, temperature=0.0, max_tokens=120) | |
| text = result["content"].strip() | |
| # Tolerate models that wrap JSON in code fences | |
| text = re.sub(r"^```(?:json)?|```$", "", text.strip(), flags=re.MULTILINE).strip() | |
| obj = json.loads(text) | |
| return ExecutionIntent( | |
| needs_sandbox=bool(obj.get("needs_sandbox", False)), | |
| reason=str(obj.get("reason", "llm classifier"))[:200], | |
| confidence=0.75, | |
| ) | |
| except Exception as e: | |
| logger.warning("LLM intent classifier failed: %s", e) | |
| # Conservative default → no sandbox | |
| return ExecutionIntent(False, f"llm fallback failed ({e})", 0.3) | |
| async def detect(prompt: str) -> ExecutionIntent: | |
| """Top-level: heuristic first, LLM fallback.""" | |
| h = heuristic_detect(prompt) | |
| if h is not None: | |
| return h | |
| return await llm_detect(prompt) | |