openhands-backend / intent.py
PYAE1994's picture
Phase 1 backend deploy
46258b3 verified
"""
Execution intent detection.
Decides whether a user prompt needs a real E2B sandbox or is just chat.
Strategy:
1. Fast keyword/regex heuristics (free, deterministic)
2. If ambiguous → fall back to LLM classification (cheap)
Output: ExecutionIntent dataclass
"""
from __future__ import annotations
import json
import logging
import re
from dataclasses import dataclass
from typing import List, Optional
from . import llm_router
logger = logging.getLogger(__name__)
@dataclass
class ExecutionIntent:
needs_sandbox: bool
reason: str
confidence: float # 0..1
# ----------------------------------------------------------------------------
# Heuristic rules
# ----------------------------------------------------------------------------
# Strong execution signals
_EXEC_PATTERNS = [
r"\brun\s+(this|the|my)?\s*(code|script|python|bash|shell|command)",
r"\bexecute\s+(this|the)?\s*(code|script|python|bash|shell)",
r"\bcreate\s+(a\s+)?(file|folder|directory|script)\b",
r"\bwrite\s+(a\s+)?(file|script)\s+(named|called)",
r"\b(install|pip install|npm install|apt install)\b",
r"\b(ls|cd|cat|mkdir|rm|cp|mv|grep|chmod)\s+",
r"```(python|bash|sh|shell|javascript|js|node)\b",
r"\b(unix\s+timestamp|current\s+time)\b.*\b(file|write|create)",
r"\b(proof\.txt|test\.py|script\.py|main\.py)\b",
r"\bdebug\s+(this|my)\b",
r"\bbuild\s+(an?\s+)?(app|website|api|server|script)",
]
# Strong chat-only signals
_CHAT_PATTERNS = [
r"^\s*(hi|hello|hey|yo|hola|sup|good\s+(morning|evening|night))\b",
r"^\s*(thanks|thank you|thx|ty)\b",
r"^\s*(what|who|when|why|how)\s+(is|are|do|does)\b.*\?$",
r"\bexplain\s+(to me)?\b(?!.*\b(run|execute|build)\b)",
r"\b(define|definition\s+of)\b",
r"\btell me about\b",
r"\bdifference between\b",
]
_EXEC_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _EXEC_PATTERNS]
_CHAT_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _CHAT_PATTERNS]
def heuristic_detect(prompt: str) -> Optional[ExecutionIntent]:
"""Return strong-signal intent, or None if ambiguous."""
p = (prompt or "").strip()
if not p:
return ExecutionIntent(False, "empty prompt", 1.0)
exec_hits = sum(1 for r in _EXEC_RE if r.search(p))
chat_hits = sum(1 for r in _CHAT_RE if r.search(p))
# Triple-backtick code block always implies execution intent
if "```" in p and exec_hits == 0:
# bare code block without verb → still likely wants execution
if re.search(r"```(python|bash|sh|shell|js|node)", p, re.IGNORECASE):
return ExecutionIntent(True, "code block detected", 0.85)
if exec_hits >= 1 and chat_hits == 0:
return ExecutionIntent(True, f"matched {exec_hits} execution pattern(s)", 0.9)
if chat_hits >= 1 and exec_hits == 0:
return ExecutionIntent(False, f"matched {chat_hits} chat pattern(s)", 0.9)
if exec_hits == 0 and chat_hits == 0:
# Very short prompts are usually chat
if len(p) < 30:
return ExecutionIntent(False, "short prompt, likely chat", 0.7)
return None # ambiguous → ask LLM
# Mixed signals → ask LLM
return None
# ----------------------------------------------------------------------------
# LLM fallback classifier
# ----------------------------------------------------------------------------
_CLASSIFIER_SYSTEM = """You are an intent classifier. Decide if the user's message
requires running real code or shell commands in a sandbox computer.
Reply ONLY with strict JSON, no prose:
{"needs_sandbox": true|false, "reason": "<short explanation>"}
Rules:
- needs_sandbox = true when the user wants to run code, execute shell commands,
create/modify files, install packages, debug a running program, or otherwise
observe real execution results.
- needs_sandbox = false when the user asks for explanations, greetings,
brainstorming, advice, or static code review with no run request.
"""
async def llm_detect(prompt: str) -> ExecutionIntent:
messages = [
{"role": "system", "content": _CLASSIFIER_SYSTEM},
{"role": "user", "content": prompt[:2000]},
]
try:
result = await llm_router.complete(messages, temperature=0.0, max_tokens=120)
text = result["content"].strip()
# Tolerate models that wrap JSON in code fences
text = re.sub(r"^```(?:json)?|```$", "", text.strip(), flags=re.MULTILINE).strip()
obj = json.loads(text)
return ExecutionIntent(
needs_sandbox=bool(obj.get("needs_sandbox", False)),
reason=str(obj.get("reason", "llm classifier"))[:200],
confidence=0.75,
)
except Exception as e:
logger.warning("LLM intent classifier failed: %s", e)
# Conservative default → no sandbox
return ExecutionIntent(False, f"llm fallback failed ({e})", 0.3)
async def detect(prompt: str) -> ExecutionIntent:
"""Top-level: heuristic first, LLM fallback."""
h = heuristic_detect(prompt)
if h is not None:
return h
return await llm_detect(prompt)