prometheus04

microagent: align runtime SYSTEM_PROMPT with training data (multi-line bash blocks) — removes scaffold drift between SFT and inference

0cb9627 verified 17 days ago

raw

history blame contribute delete

17.1 kB

	"""
	MicroAgent — Harbor BaseAgent designed for 4B-class models on Terminal-Bench 2.0.

	Why this exists:
	Terminus-2 (Harbor's reference agent) is excellent for frontier models but its
	system prompt + tool schemas + accumulating history routinely consume 20-25K
	tokens of overhead per turn. A 4B model's effective reasoning context is
	much narrower than its theoretical window — it needs signal density, not volume.

	Design pillars:
	1. ~280 token system prompt, single bash tool (no JSON schema bloat).
	2. XML action format native to Hunyuan's chat template.
	3. Adaptive thinking mode: ON for turn 1 and after errors; OFF otherwise.
	4. Sliding-window history (last 4 turn pairs) with permanent task pinning.
	5. Head+tail truncation of observations (~1200 chars total).
	6. Free initial state probe (pwd, ls, README) folded into turn 1's user msg.
	7. Loop and parse-failure detection with corrective injections.
	8. Heuristic per-command timeouts (30s default, 120s for builds/installs).

	Usage:
	# Serve your model
	vllm serve your-username/your-finetuned-model \
	--host 0.0.0.0 --port 8000 \
	--served-model-name microagent-model \
	--max-model-len 16384 \
	--enable-prefix-caching

	# Run TB2 with this agent
	export MICROAGENT_BASE_URL=http://localhost:8000/v1
	export MICROAGENT_API_KEY=EMPTY
	harbor run -d terminal-bench@2.0 \
	--agent-import-path microagent:MicroAgent \
	--model hosted_vllm/microagent-model \
	-k 5 \
	--jobs-dir ./jobs

	Environment variables:
	MICROAGENT_BASE_URL OpenAI-compatible endpoint (default localhost:8000/v1)
	MICROAGENT_API_KEY API key for endpoint (default "EMPTY" for vLLM)
	MICROAGENT_MAX_TURNS Override max turns (default 20)
	MICROAGENT_TEMPERATURE Sampling temperature (default 0.1)

	Note on thinking:
	Qwen3-4B-Thinking-2507 is an always-thinking model — it emits native
	<think>...</think> tokens by default and does NOT accept the
	`enable_thinking` chat-template kwarg (that's the Instruct variant).
	Our SFT data was already in <think>...</think><bash>...</bash> format,
	so the model produces our format natively. We pass no thinking kwarg.
	"""

	from __future__ import annotations

	import asyncio
	import os
	import re
	from dataclasses import dataclass
	from typing import Any

	from openai import AsyncOpenAI

	from harbor.agents.base import BaseAgent
	from harbor.environments.base import BaseEnvironment
	from harbor.models.agent.context import AgentContext


	# --------------------------------------------------------------------------
	# Configuration
	# --------------------------------------------------------------------------

	MAX_TURNS_DEFAULT = 20
	MAX_PARSE_FAILURES = 3
	MAX_REPEATED_COMMANDS = 3
	MAX_HISTORY_TURNS = 5 # turn pairs kept verbatim in context
	OBS_HEAD_CHARS = 600
	OBS_TAIL_CHARS = 600
	DEFAULT_CMD_TIMEOUT = 30
	LONG_CMD_TIMEOUT = 120
	LONG_CMD_PREFIXES = (
	"pip install", "pip3 install", "pip3 -m install", "python -m pip install",
	"apt-get install", "apt install", "apt-get update", "apt update",
	"make ", "cmake ", "cargo build", "cargo test", "cargo install",
	"npm install", "yarn install", "pnpm install",
	"go build", "go test", "go mod download", "go install",
	"mvn ", "gradle ",
	"pytest", "python -m pytest",
	"wget ", "curl -O", "curl -L",
	"git clone", "git submodule",
	"docker build", "docker pull", "docker run",
	)
	MAX_TOKENS_PER_TURN = 1536


	SYSTEM_PROMPT = """You are a terminal agent. You complete tasks by running bash commands in a Linux container.

	Respond in EXACTLY this format every turn:
	<think>brief reasoning, max 2 sentences</think>
	<bash>shell commands (one or more lines)</bash>

	When the task is fully complete and verified, respond instead with:
	<think>brief verification reasoning</think>
	<finish>one-line summary of what you did</finish>

	Rules:
	- One bash block per turn. Multiple lines run sequentially in the same shell.
	- Multi-line files: cat <<'EOF' > path/to/file ... EOF
	- Output is truncated (head + tail). Lines starting with [exit N] mean non-zero exit.
	- Analyze errors before retrying. Never repeat a failed command unchanged.
	- You have a hard turn limit. Use turns efficiently."""


	# --------------------------------------------------------------------------
	# Data structures
	# --------------------------------------------------------------------------

	@dataclass
	class TurnRecord:
	command: str
	observation: str
	exit_code: int
	thinking: str = ""


	@dataclass
	class ParsedAction:
	kind: str # "bash" \| "finish" \| "invalid"
	payload: str = ""
	thinking: str = ""


	# --------------------------------------------------------------------------
	# Parser
	# --------------------------------------------------------------------------

	_THINK_RE = re.compile(r"<think>(.*?)</think>", re.DOTALL)
	_BASH_RE = re.compile(r"<bash>(.*?)</bash>", re.DOTALL)
	_FINISH_RE = re.compile(r"<finish>(.*?)</finish>", re.DOTALL)
	_FENCE_RE = re.compile(r"```(?:bash\|sh)?\s\n(.?)```", re.DOTALL)


	def parse_response(text: str) -> ParsedAction:
	"""Extract action from a model response. Tolerant to whitespace and partial outputs."""
	if not text:
	return ParsedAction(kind="invalid")

	thinking_match = _THINK_RE.search(text)
	thinking = thinking_match.group(1).strip() if thinking_match else ""

	finish_match = _FINISH_RE.search(text)
	if finish_match and finish_match.group(1).strip():
	return ParsedAction(kind="finish", payload=finish_match.group(1).strip(), thinking=thinking)

	bash_match = _BASH_RE.search(text)
	if bash_match and bash_match.group(1).strip():
	return ParsedAction(kind="bash", payload=bash_match.group(1).strip(), thinking=thinking)

	# Tolerant fallback: bare ```bash code fences
	md = _FENCE_RE.search(text)
	if md and md.group(1).strip():
	return ParsedAction(kind="bash", payload=md.group(1).strip(), thinking=thinking)

	return ParsedAction(kind="invalid", thinking=thinking)


	# --------------------------------------------------------------------------
	# Helpers
	# --------------------------------------------------------------------------

	def truncate_output(text: str, head: int = OBS_HEAD_CHARS, tail: int = OBS_TAIL_CHARS) -> str:
	"""Head + tail truncation. Errors usually live at the tail; context at the head."""
	if text is None:
	return "(no output)"
	if len(text) <= head + tail + 50:
	return text
	cut = len(text) - head - tail
	return f"{text[:head]}\n[... truncated {cut} chars ...]\n{text[-tail:]}"


	def format_observation(output: str, exit_code: int) -> str:
	prefix = "" if exit_code == 0 else f"[exit {exit_code}]\n"
	return prefix + truncate_output(output if output else "(no output)")


	def pick_timeout(command: str) -> int:
	cmd = command.lstrip()
	if any(cmd.startswith(p) for p in LONG_CMD_PREFIXES):
	return LONG_CMD_TIMEOUT
	return DEFAULT_CMD_TIMEOUT


	def extract_exec_result(result: Any) -> tuple[str, int]:
	"""Defensive extraction across Harbor environment types."""
	stdout = getattr(result, "stdout", "") or ""
	stderr = getattr(result, "stderr", "") or ""
	exit_code = getattr(result, "exit_code", None)
	if exit_code is None:
	exit_code = getattr(result, "returncode", 0) or 0
	output = stdout
	if stderr:
	output = (output + "\n" + stderr).strip() if output else stderr
	return output, int(exit_code)


	# --------------------------------------------------------------------------
	# Agent
	# --------------------------------------------------------------------------

	class MicroAgent(BaseAgent):
	"""Minimal-overhead agent designed for 4B-class models on Terminal-Bench 2.0."""

	SUPPORTS_ATIF = False # populate trajectory once you wire ATIF schema

	@staticmethod
	def name() -> str:
	return "microagent"

	def version(self) -> str \| None:
	return "0.1.0"

	async def setup(self, environment: BaseEnvironment) -> None:
	# External agent — nothing to install in the container.
	return None

	async def run(
	self,
	instruction: str,
	environment: BaseEnvironment,
	context: AgentContext,
	) -> None:
	client = AsyncOpenAI(
	base_url=os.environ.get("MICROAGENT_BASE_URL", "http://localhost:8000/v1"),
	api_key=os.environ.get("MICROAGENT_API_KEY", "EMPTY"),
	)
	max_turns = int(os.environ.get("MICROAGENT_MAX_TURNS", MAX_TURNS_DEFAULT))
	temperature = float(os.environ.get("MICROAGENT_TEMPERATURE", "0.1"))

	# Harbor passes "provider/name"; vLLM serves the bare name.
	model_id = (self.model_name or "").split("/", 1)[-1] or self.model_name

	# Free orientation — saves a model call on turn 1.
	initial_state = await self._probe_initial_state(environment)

	history: list[TurnRecord] = []
	parse_failures = 0
	consecutive_errors = 0
	last_command: str \| None = None
	repeat_streak = 0
	turn_idx = 0

	while turn_idx < max_turns:
	messages = self._build_messages(instruction, initial_state, history)

	try:
	resp = await client.chat.completions.create(
	model=model_id,
	messages=messages,
	max_tokens=MAX_TOKENS_PER_TURN,
	temperature=temperature,
	)
	raw = resp.choices[0].message.content or ""
	except Exception as e:
	self.logger.error(f"Model call failed at turn {turn_idx}: {e}")
	return

	action = parse_response(raw)

	# ---- Parse failure path -----------------------------------------
	if action.kind == "invalid":
	parse_failures += 1
	self.logger.warning(
	f"Parse failure {parse_failures}/{MAX_PARSE_FAILURES} at turn {turn_idx}"
	)
	self._log_turn(turn_idx, "(parse-fail)", "non-parseable response", 0,
	thinking=action.thinking, note=f"parse_fail {parse_failures}")
	if parse_failures >= MAX_PARSE_FAILURES:
	self.logger.error("Max parse failures — terminating.")
	return
	# Inject a corrective observation; do not consume a turn slot.
	history.append(TurnRecord(
	command="(format reminder)",
	observation=(
	"Your last response was not parseable. Reply in EXACTLY one of:\n"
	" <think>...</think><bash>cmd</bash>\n"
	" <think>...</think><finish>summary</finish>"
	),
	exit_code=0,
	))
	continue

	parse_failures = 0

	# ---- Finish path ------------------------------------------------
	if action.kind == "finish":
	self.logger.info(f"MicroAgent finished at turn {turn_idx}: {action.payload}")
	self._log_turn(turn_idx, "(finish)", action.payload, 0,
	thinking=action.thinking, note="finish")
	return

	# ---- Bash path --------------------------------------------------
	command = action.payload

	# Loop detection
	if command == last_command:
	repeat_streak += 1
	else:
	repeat_streak = 0
	last_command = command

	if repeat_streak >= MAX_REPEATED_COMMANDS:
	self.logger.warning(f"Loop detected at turn {turn_idx} — injecting hint.")
	self._log_turn(turn_idx, command, "loop-detected", -1,
	thinking=action.thinking, note="loop_detected")
	history.append(TurnRecord(
	command="(loop detector)",
	observation=(
	"You have repeated this exact command. It is not making progress. "
	"Try a different approach: inspect with ls/cat/grep, "
	"or read any error message carefully."
	),
	exit_code=0,
	))
	repeat_streak = 0
	last_command = None
	turn_idx += 1
	continue

	# Execute
	timeout = pick_timeout(command)
	try:
	result = await environment.exec(command, timeout=timeout)
	output, exit_code = extract_exec_result(result)
	except asyncio.TimeoutError:
	output = f"(command timed out after {timeout}s)"
	exit_code = 124
	except Exception as e:
	output = f"(execution error: {e})"
	exit_code = 1

	consecutive_errors = consecutive_errors + 1 if exit_code != 0 else 0
	obs = format_observation(output, exit_code)
	history.append(TurnRecord(
	command=command,
	observation=obs,
	exit_code=exit_code,
	thinking=action.thinking,
	))
	self._log_turn(turn_idx, command, obs, exit_code, thinking=action.thinking)
	turn_idx += 1

	self.logger.info("MicroAgent terminated (max turns reached).")

	# ----------------------------------------------------------------------
	# Internal helpers
	# ----------------------------------------------------------------------

	def _log_turn(
	self,
	turn_idx: int,
	command: str,
	observation: str,
	exit_code: int,
	thinking: str = "",
	note: str = "",
	) -> None:
	"""Append-only JSONL turn log. Crash-safe — never raises."""
	import json
	import time
	try:
	self.logs_dir.mkdir(parents=True, exist_ok=True)
	record = {
	"turn": turn_idx,
	"ts": time.time(),
	"command": command,
	"exit_code": exit_code,
	"observation": (observation or "")[:2000],
	"thinking": (thinking or "")[:500],
	"note": note,
	}
	with (self.logs_dir / "turns.jsonl").open("a", encoding="utf-8") as f:
	f.write(json.dumps(record, ensure_ascii=False) + "\n")
	except Exception as e:
	self.logger.debug(f"turn log write failed: {e}")

	async def _probe_initial_state(self, env: BaseEnvironment) -> str:
	"""Cheap initial state — saves a model call on turn 1."""
	chunks: list[str] = []
	for label, cmd in [("pwd", "pwd"), ("ls -la", "ls -la")]:
	try:
	r = await env.exec(cmd, timeout=10)
	out, _ = extract_exec_result(r)
	chunks.append(f"$ {label}\n{truncate_output(out, 300, 200)}")
	except Exception:
	continue
	# README / instructions detection
	try:
	readme_cmd = (
	"for f in README.md README.txt readme.md INSTRUCTIONS.md TASK.md task.md; do "
	" [ -f \"$f\" ] && echo '--- '\"$f\"' ---' && head -c 800 \"$f\" && break; "
	"done"
	)
	r = await env.exec(readme_cmd, timeout=10)
	out, _ = extract_exec_result(r)
	if out.strip():
	chunks.append(out.strip())
	except Exception:
	pass
	return "\n\n".join(chunks)

	def _build_messages(
	self,
	instruction: str,
	initial_state: str,
	history: list[TurnRecord],
	) -> list[dict]:
	msgs: list[dict] = [{"role": "system", "content": SYSTEM_PROMPT}]

	# Pinned task — always present, never truncated.
	first_user = f"TASK:\n{instruction.strip()}\n\nInitial state:\n{initial_state}".rstrip()
	msgs.append({"role": "user", "content": first_user})

	# Sliding window over recent turn pairs
	recent = history[-MAX_HISTORY_TURNS:]
	dropped = len(history) - len(recent)
	if dropped > 0:
	msgs.append({
	"role": "user",
	"content": f"[{dropped} earlier turn(s) elided to save context]"
	})

	for rec in recent:
	# Reconstruct prior assistant turn. Exclude <think> from history to save tokens.
	if rec.command.startswith("(") and rec.command.endswith(")"):
	# Synthetic injected turns (loop hint, format reminder) — only the user side
	msgs.append({"role": "user", "content": rec.observation})
	else:
	msgs.append({"role": "assistant", "content": f"<bash>{rec.command}</bash>"})
	msgs.append({"role": "user", "content": rec.observation})
	return msgs