Spaces:

PYAE1994
/

openhands-backend

Sleeping

File size: 5,101 Bytes

46258b3

"""
Execution intent detection.

Decides whether a user prompt needs a real E2B sandbox or is just chat.

Strategy:
  1. Fast keyword/regex heuristics (free, deterministic)
  2. If ambiguous → fall back to LLM classification (cheap)

Output: ExecutionIntent dataclass
"""

from __future__ import annotations

import json
import logging
import re
from dataclasses import dataclass
from typing import List, Optional

from . import llm_router

logger = logging.getLogger(__name__)


@dataclass
class ExecutionIntent:
    needs_sandbox: bool
    reason: str
    confidence: float  # 0..1


# ----------------------------------------------------------------------------
# Heuristic rules
# ----------------------------------------------------------------------------

# Strong execution signals
_EXEC_PATTERNS = [
    r"\brun\s+(this|the|my)?\s*(code|script|python|bash|shell|command)",
    r"\bexecute\s+(this|the)?\s*(code|script|python|bash|shell)",
    r"\bcreate\s+(a\s+)?(file|folder|directory|script)\b",
    r"\bwrite\s+(a\s+)?(file|script)\s+(named|called)",
    r"\b(install|pip install|npm install|apt install)\b",
    r"\b(ls|cd|cat|mkdir|rm|cp|mv|grep|chmod)\s+",
    r"```(python|bash|sh|shell|javascript|js|node)\b",
    r"\b(unix\s+timestamp|current\s+time)\b.*\b(file|write|create)",
    r"\b(proof\.txt|test\.py|script\.py|main\.py)\b",
    r"\bdebug\s+(this|my)\b",
    r"\bbuild\s+(an?\s+)?(app|website|api|server|script)",
]

# Strong chat-only signals
_CHAT_PATTERNS = [
    r"^\s*(hi|hello|hey|yo|hola|sup|good\s+(morning|evening|night))\b",
    r"^\s*(thanks|thank you|thx|ty)\b",
    r"^\s*(what|who|when|why|how)\s+(is|are|do|does)\b.*\?$",
    r"\bexplain\s+(to me)?\b(?!.*\b(run|execute|build)\b)",
    r"\b(define|definition\s+of)\b",
    r"\btell me about\b",
    r"\bdifference between\b",
]

_EXEC_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _EXEC_PATTERNS]
_CHAT_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _CHAT_PATTERNS]


def heuristic_detect(prompt: str) -> Optional[ExecutionIntent]:
    """Return strong-signal intent, or None if ambiguous."""
    p = (prompt or "").strip()
    if not p:
        return ExecutionIntent(False, "empty prompt", 1.0)

    exec_hits = sum(1 for r in _EXEC_RE if r.search(p))
    chat_hits = sum(1 for r in _CHAT_RE if r.search(p))

    # Triple-backtick code block always implies execution intent
    if "```" in p and exec_hits == 0:
        # bare code block without verb → still likely wants execution
        if re.search(r"```(python|bash|sh|shell|js|node)", p, re.IGNORECASE):
            return ExecutionIntent(True, "code block detected", 0.85)

    if exec_hits >= 1 and chat_hits == 0:
        return ExecutionIntent(True, f"matched {exec_hits} execution pattern(s)", 0.9)
    if chat_hits >= 1 and exec_hits == 0:
        return ExecutionIntent(False, f"matched {chat_hits} chat pattern(s)", 0.9)
    if exec_hits == 0 and chat_hits == 0:
        # Very short prompts are usually chat
        if len(p) < 30:
            return ExecutionIntent(False, "short prompt, likely chat", 0.7)
        return None  # ambiguous → ask LLM
    # Mixed signals → ask LLM
    return None


# ----------------------------------------------------------------------------
# LLM fallback classifier
# ----------------------------------------------------------------------------

_CLASSIFIER_SYSTEM = """You are an intent classifier. Decide if the user's message
requires running real code or shell commands in a sandbox computer.

Reply ONLY with strict JSON, no prose:
{"needs_sandbox": true|false, "reason": "<short explanation>"}

Rules:
- needs_sandbox = true  when the user wants to run code, execute shell commands,
  create/modify files, install packages, debug a running program, or otherwise
  observe real execution results.
- needs_sandbox = false when the user asks for explanations, greetings,
  brainstorming, advice, or static code review with no run request.
"""


async def llm_detect(prompt: str) -> ExecutionIntent:
    messages = [
        {"role": "system", "content": _CLASSIFIER_SYSTEM},
        {"role": "user", "content": prompt[:2000]},
    ]
    try:
        result = await llm_router.complete(messages, temperature=0.0, max_tokens=120)
        text = result["content"].strip()
        # Tolerate models that wrap JSON in code fences
        text = re.sub(r"^```(?:json)?|```$", "", text.strip(), flags=re.MULTILINE).strip()
        obj = json.loads(text)
        return ExecutionIntent(
            needs_sandbox=bool(obj.get("needs_sandbox", False)),
            reason=str(obj.get("reason", "llm classifier"))[:200],
            confidence=0.75,
        )
    except Exception as e:
        logger.warning("LLM intent classifier failed: %s", e)
        # Conservative default → no sandbox
        return ExecutionIntent(False, f"llm fallback failed ({e})", 0.3)


async def detect(prompt: str) -> ExecutionIntent:
    """Top-level: heuristic first, LLM fallback."""
    h = heuristic_detect(prompt)
    if h is not None:
        return h
    return await llm_detect(prompt)