Spaces:
Sleeping
Sleeping
File size: 5,101 Bytes
46258b3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | """
Execution intent detection.
Decides whether a user prompt needs a real E2B sandbox or is just chat.
Strategy:
1. Fast keyword/regex heuristics (free, deterministic)
2. If ambiguous → fall back to LLM classification (cheap)
Output: ExecutionIntent dataclass
"""
from __future__ import annotations
import json
import logging
import re
from dataclasses import dataclass
from typing import List, Optional
from . import llm_router
logger = logging.getLogger(__name__)
@dataclass
class ExecutionIntent:
needs_sandbox: bool
reason: str
confidence: float # 0..1
# ----------------------------------------------------------------------------
# Heuristic rules
# ----------------------------------------------------------------------------
# Strong execution signals
_EXEC_PATTERNS = [
r"\brun\s+(this|the|my)?\s*(code|script|python|bash|shell|command)",
r"\bexecute\s+(this|the)?\s*(code|script|python|bash|shell)",
r"\bcreate\s+(a\s+)?(file|folder|directory|script)\b",
r"\bwrite\s+(a\s+)?(file|script)\s+(named|called)",
r"\b(install|pip install|npm install|apt install)\b",
r"\b(ls|cd|cat|mkdir|rm|cp|mv|grep|chmod)\s+",
r"```(python|bash|sh|shell|javascript|js|node)\b",
r"\b(unix\s+timestamp|current\s+time)\b.*\b(file|write|create)",
r"\b(proof\.txt|test\.py|script\.py|main\.py)\b",
r"\bdebug\s+(this|my)\b",
r"\bbuild\s+(an?\s+)?(app|website|api|server|script)",
]
# Strong chat-only signals
_CHAT_PATTERNS = [
r"^\s*(hi|hello|hey|yo|hola|sup|good\s+(morning|evening|night))\b",
r"^\s*(thanks|thank you|thx|ty)\b",
r"^\s*(what|who|when|why|how)\s+(is|are|do|does)\b.*\?$",
r"\bexplain\s+(to me)?\b(?!.*\b(run|execute|build)\b)",
r"\b(define|definition\s+of)\b",
r"\btell me about\b",
r"\bdifference between\b",
]
_EXEC_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _EXEC_PATTERNS]
_CHAT_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _CHAT_PATTERNS]
def heuristic_detect(prompt: str) -> Optional[ExecutionIntent]:
"""Return strong-signal intent, or None if ambiguous."""
p = (prompt or "").strip()
if not p:
return ExecutionIntent(False, "empty prompt", 1.0)
exec_hits = sum(1 for r in _EXEC_RE if r.search(p))
chat_hits = sum(1 for r in _CHAT_RE if r.search(p))
# Triple-backtick code block always implies execution intent
if "```" in p and exec_hits == 0:
# bare code block without verb → still likely wants execution
if re.search(r"```(python|bash|sh|shell|js|node)", p, re.IGNORECASE):
return ExecutionIntent(True, "code block detected", 0.85)
if exec_hits >= 1 and chat_hits == 0:
return ExecutionIntent(True, f"matched {exec_hits} execution pattern(s)", 0.9)
if chat_hits >= 1 and exec_hits == 0:
return ExecutionIntent(False, f"matched {chat_hits} chat pattern(s)", 0.9)
if exec_hits == 0 and chat_hits == 0:
# Very short prompts are usually chat
if len(p) < 30:
return ExecutionIntent(False, "short prompt, likely chat", 0.7)
return None # ambiguous → ask LLM
# Mixed signals → ask LLM
return None
# ----------------------------------------------------------------------------
# LLM fallback classifier
# ----------------------------------------------------------------------------
_CLASSIFIER_SYSTEM = """You are an intent classifier. Decide if the user's message
requires running real code or shell commands in a sandbox computer.
Reply ONLY with strict JSON, no prose:
{"needs_sandbox": true|false, "reason": "<short explanation>"}
Rules:
- needs_sandbox = true when the user wants to run code, execute shell commands,
create/modify files, install packages, debug a running program, or otherwise
observe real execution results.
- needs_sandbox = false when the user asks for explanations, greetings,
brainstorming, advice, or static code review with no run request.
"""
async def llm_detect(prompt: str) -> ExecutionIntent:
messages = [
{"role": "system", "content": _CLASSIFIER_SYSTEM},
{"role": "user", "content": prompt[:2000]},
]
try:
result = await llm_router.complete(messages, temperature=0.0, max_tokens=120)
text = result["content"].strip()
# Tolerate models that wrap JSON in code fences
text = re.sub(r"^```(?:json)?|```$", "", text.strip(), flags=re.MULTILINE).strip()
obj = json.loads(text)
return ExecutionIntent(
needs_sandbox=bool(obj.get("needs_sandbox", False)),
reason=str(obj.get("reason", "llm classifier"))[:200],
confidence=0.75,
)
except Exception as e:
logger.warning("LLM intent classifier failed: %s", e)
# Conservative default → no sandbox
return ExecutionIntent(False, f"llm fallback failed ({e})", 0.3)
async def detect(prompt: str) -> ExecutionIntent:
"""Top-level: heuristic first, LLM fallback."""
h = heuristic_detect(prompt)
if h is not None:
return h
return await llm_detect(prompt)
|