File size: 5,101 Bytes
46258b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""
Execution intent detection.

Decides whether a user prompt needs a real E2B sandbox or is just chat.

Strategy:
  1. Fast keyword/regex heuristics (free, deterministic)
  2. If ambiguous → fall back to LLM classification (cheap)

Output: ExecutionIntent dataclass
"""

from __future__ import annotations

import json
import logging
import re
from dataclasses import dataclass
from typing import List, Optional

from . import llm_router

logger = logging.getLogger(__name__)


@dataclass
class ExecutionIntent:
    needs_sandbox: bool
    reason: str
    confidence: float  # 0..1


# ----------------------------------------------------------------------------
# Heuristic rules
# ----------------------------------------------------------------------------

# Strong execution signals
_EXEC_PATTERNS = [
    r"\brun\s+(this|the|my)?\s*(code|script|python|bash|shell|command)",
    r"\bexecute\s+(this|the)?\s*(code|script|python|bash|shell)",
    r"\bcreate\s+(a\s+)?(file|folder|directory|script)\b",
    r"\bwrite\s+(a\s+)?(file|script)\s+(named|called)",
    r"\b(install|pip install|npm install|apt install)\b",
    r"\b(ls|cd|cat|mkdir|rm|cp|mv|grep|chmod)\s+",
    r"```(python|bash|sh|shell|javascript|js|node)\b",
    r"\b(unix\s+timestamp|current\s+time)\b.*\b(file|write|create)",
    r"\b(proof\.txt|test\.py|script\.py|main\.py)\b",
    r"\bdebug\s+(this|my)\b",
    r"\bbuild\s+(an?\s+)?(app|website|api|server|script)",
]

# Strong chat-only signals
_CHAT_PATTERNS = [
    r"^\s*(hi|hello|hey|yo|hola|sup|good\s+(morning|evening|night))\b",
    r"^\s*(thanks|thank you|thx|ty)\b",
    r"^\s*(what|who|when|why|how)\s+(is|are|do|does)\b.*\?$",
    r"\bexplain\s+(to me)?\b(?!.*\b(run|execute|build)\b)",
    r"\b(define|definition\s+of)\b",
    r"\btell me about\b",
    r"\bdifference between\b",
]

_EXEC_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _EXEC_PATTERNS]
_CHAT_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _CHAT_PATTERNS]


def heuristic_detect(prompt: str) -> Optional[ExecutionIntent]:
    """Return strong-signal intent, or None if ambiguous."""
    p = (prompt or "").strip()
    if not p:
        return ExecutionIntent(False, "empty prompt", 1.0)

    exec_hits = sum(1 for r in _EXEC_RE if r.search(p))
    chat_hits = sum(1 for r in _CHAT_RE if r.search(p))

    # Triple-backtick code block always implies execution intent
    if "```" in p and exec_hits == 0:
        # bare code block without verb → still likely wants execution
        if re.search(r"```(python|bash|sh|shell|js|node)", p, re.IGNORECASE):
            return ExecutionIntent(True, "code block detected", 0.85)

    if exec_hits >= 1 and chat_hits == 0:
        return ExecutionIntent(True, f"matched {exec_hits} execution pattern(s)", 0.9)
    if chat_hits >= 1 and exec_hits == 0:
        return ExecutionIntent(False, f"matched {chat_hits} chat pattern(s)", 0.9)
    if exec_hits == 0 and chat_hits == 0:
        # Very short prompts are usually chat
        if len(p) < 30:
            return ExecutionIntent(False, "short prompt, likely chat", 0.7)
        return None  # ambiguous → ask LLM
    # Mixed signals → ask LLM
    return None


# ----------------------------------------------------------------------------
# LLM fallback classifier
# ----------------------------------------------------------------------------

_CLASSIFIER_SYSTEM = """You are an intent classifier. Decide if the user's message
requires running real code or shell commands in a sandbox computer.

Reply ONLY with strict JSON, no prose:
{"needs_sandbox": true|false, "reason": "<short explanation>"}

Rules:
- needs_sandbox = true  when the user wants to run code, execute shell commands,
  create/modify files, install packages, debug a running program, or otherwise
  observe real execution results.
- needs_sandbox = false when the user asks for explanations, greetings,
  brainstorming, advice, or static code review with no run request.
"""


async def llm_detect(prompt: str) -> ExecutionIntent:
    messages = [
        {"role": "system", "content": _CLASSIFIER_SYSTEM},
        {"role": "user", "content": prompt[:2000]},
    ]
    try:
        result = await llm_router.complete(messages, temperature=0.0, max_tokens=120)
        text = result["content"].strip()
        # Tolerate models that wrap JSON in code fences
        text = re.sub(r"^```(?:json)?|```$", "", text.strip(), flags=re.MULTILINE).strip()
        obj = json.loads(text)
        return ExecutionIntent(
            needs_sandbox=bool(obj.get("needs_sandbox", False)),
            reason=str(obj.get("reason", "llm classifier"))[:200],
            confidence=0.75,
        )
    except Exception as e:
        logger.warning("LLM intent classifier failed: %s", e)
        # Conservative default → no sandbox
        return ExecutionIntent(False, f"llm fallback failed ({e})", 0.3)


async def detect(prompt: str) -> ExecutionIntent:
    """Top-level: heuristic first, LLM fallback."""
    h = heuristic_detect(prompt)
    if h is not None:
        return h
    return await llm_detect(prompt)