Spaces:

PYAE1994
/

openhands-backend

Sleeping

App Files Files Community

openhands-backend / intent.py

PYAE1994

Phase 1 backend deploy

46258b3 verified 17 days ago

raw

history blame contribute delete

5.1 kB

	"""
	Execution intent detection.

	Decides whether a user prompt needs a real E2B sandbox or is just chat.

	Strategy:
	1. Fast keyword/regex heuristics (free, deterministic)
	2. If ambiguous → fall back to LLM classification (cheap)

	Output: ExecutionIntent dataclass
	"""

	from __future__ import annotations

	import json
	import logging
	import re
	from dataclasses import dataclass
	from typing import List, Optional

	from . import llm_router

	logger = logging.getLogger(__name__)


	@dataclass
	class ExecutionIntent:
	needs_sandbox: bool
	reason: str
	confidence: float # 0..1


	# ----------------------------------------------------------------------------
	# Heuristic rules
	# ----------------------------------------------------------------------------

	# Strong execution signals
	_EXEC_PATTERNS = [
	r"\brun\s+(this\|the\|my)?\s*(code\|script\|python\|bash\|shell\|command)",
	r"\bexecute\s+(this\|the)?\s*(code\|script\|python\|bash\|shell)",
	r"\bcreate\s+(a\s+)?(file\|folder\|directory\|script)\b",
	r"\bwrite\s+(a\s+)?(file\|script)\s+(named\|called)",
	r"\b(install\|pip install\|npm install\|apt install)\b",
	r"\b(ls\|cd\|cat\|mkdir\|rm\|cp\|mv\|grep\|chmod)\s+",
	r"```(python\|bash\|sh\|shell\|javascript\|js\|node)\b",
	r"\b(unix\s+timestamp\|current\s+time)\b.*\b(file\|write\|create)",
	r"\b(proof\.txt\|test\.py\|script\.py\|main\.py)\b",
	r"\bdebug\s+(this\|my)\b",
	r"\bbuild\s+(an?\s+)?(app\|website\|api\|server\|script)",
	]

	# Strong chat-only signals
	_CHAT_PATTERNS = [
	r"^\s*(hi\|hello\|hey\|yo\|hola\|sup\|good\s+(morning\|evening\|night))\b",
	r"^\s*(thanks\|thank you\|thx\|ty)\b",
	r"^\s(what\|who\|when\|why\|how)\s+(is\|are\|do\|does)\b.\?$",
	r"\bexplain\s+(to me)?\b(?!.*\b(run\|execute\|build)\b)",
	r"\b(define\|definition\s+of)\b",
	r"\btell me about\b",
	r"\bdifference between\b",
	]

	_EXEC_RE = [re.compile(p, re.IGNORECASE \| re.MULTILINE) for p in _EXEC_PATTERNS]
	_CHAT_RE = [re.compile(p, re.IGNORECASE \| re.MULTILINE) for p in _CHAT_PATTERNS]


	def heuristic_detect(prompt: str) -> Optional[ExecutionIntent]:
	"""Return strong-signal intent, or None if ambiguous."""
	p = (prompt or "").strip()
	if not p:
	return ExecutionIntent(False, "empty prompt", 1.0)

	exec_hits = sum(1 for r in _EXEC_RE if r.search(p))
	chat_hits = sum(1 for r in _CHAT_RE if r.search(p))

	# Triple-backtick code block always implies execution intent
	if "```" in p and exec_hits == 0:
	# bare code block without verb → still likely wants execution
	if re.search(r"```(python\|bash\|sh\|shell\|js\|node)", p, re.IGNORECASE):
	return ExecutionIntent(True, "code block detected", 0.85)

	if exec_hits >= 1 and chat_hits == 0:
	return ExecutionIntent(True, f"matched {exec_hits} execution pattern(s)", 0.9)
	if chat_hits >= 1 and exec_hits == 0:
	return ExecutionIntent(False, f"matched {chat_hits} chat pattern(s)", 0.9)
	if exec_hits == 0 and chat_hits == 0:
	# Very short prompts are usually chat
	if len(p) < 30:
	return ExecutionIntent(False, "short prompt, likely chat", 0.7)
	return None # ambiguous → ask LLM
	# Mixed signals → ask LLM
	return None


	# ----------------------------------------------------------------------------
	# LLM fallback classifier
	# ----------------------------------------------------------------------------

	_CLASSIFIER_SYSTEM = """You are an intent classifier. Decide if the user's message
	requires running real code or shell commands in a sandbox computer.

	Reply ONLY with strict JSON, no prose:
	{"needs_sandbox": true\|false, "reason": "<short explanation>"}

	Rules:
	- needs_sandbox = true when the user wants to run code, execute shell commands,
	create/modify files, install packages, debug a running program, or otherwise
	observe real execution results.
	- needs_sandbox = false when the user asks for explanations, greetings,
	brainstorming, advice, or static code review with no run request.
	"""


	async def llm_detect(prompt: str) -> ExecutionIntent:
	messages = [
	{"role": "system", "content": _CLASSIFIER_SYSTEM},
	{"role": "user", "content": prompt[:2000]},
	]
	try:
	result = await llm_router.complete(messages, temperature=0.0, max_tokens=120)
	text = result["content"].strip()
	# Tolerate models that wrap JSON in code fences
	text = re.sub(r"^```(?:json)?\|```$", "", text.strip(), flags=re.MULTILINE).strip()
	obj = json.loads(text)
	return ExecutionIntent(
	needs_sandbox=bool(obj.get("needs_sandbox", False)),
	reason=str(obj.get("reason", "llm classifier"))[:200],
	confidence=0.75,
	)
	except Exception as e:
	logger.warning("LLM intent classifier failed: %s", e)
	# Conservative default → no sandbox
	return ExecutionIntent(False, f"llm fallback failed ({e})", 0.3)


	async def detect(prompt: str) -> ExecutionIntent:
	"""Top-level: heuristic first, LLM fallback."""
	h = heuristic_detect(prompt)
	if h is not None:
	return h
	return await llm_detect(prompt)