OpenSOC v1

bb6a031 about 1 month ago

4.31 kB

	"""Shared prompt formatter for the defender role.

	The same formatter is used by:
	* SFT dataset generation (`make_sft_dataset.py`)
	* GRPO rollouts (`train_grpo.ipynb`)
	* Evaluation harness (`eval/eval.py`)

	Keeping the prompt template centralized prevents the classic train-eval
	mismatch where a model regresses because the inference prompt differs
	slightly from the training prompt.

	The defender response format is intentionally machine-parseable:

	Action: <one of dismiss\|monitor\|quarantine_host\|block_ip\|escalate>
	CitedLog: <log_id>
	Rationale: <one short sentence>

	The trainer rewards format compliance via `parse_defender_response`'s
	`format_ok` flag (used as an auxiliary GRPO reward in the notebook).
	"""

	from __future__ import annotations

	import json
	import re
	from dataclasses import dataclass
	from typing import Dict, Optional

	from schema import Alert, Event, TriageAction


	SYSTEM_PROMPT = (
	"You are an autonomous SOC tier-1 triage analyst. "
	"You will receive a single SIEM alert and the small log window that triggered it. "
	"Your job is to choose exactly ONE triage action and cite the log_id that drove your decision.\n\n"
	"Allowed actions (highest cost last): dismiss, monitor, quarantine_host, block_ip, escalate.\n\n"
	"Always respond in this exact format:\n"
	"Action: <action>\n"
	"CitedLog: <log_id>\n"
	"Rationale: <one short sentence>"
	)


	def _event_to_line(e: Event) -> str:
	"""Render a single event as a SIEM-style log line."""
	fields = {k: v for k, v in (e.fields or {}).items() if v not in (None, "")}
	fields_str = " ".join(
	f"{k}={json.dumps(v) if isinstance(v, str) else v}" for k, v in fields.items()
	)
	return (
	f"[{e.timestamp}] {e.log_id} src={e.source} type={e.event_type.value} {fields_str}"
	).rstrip()


	def render_defender_prompt(alert: Alert, events: list[Event]) -> str:
	"""Render the user message for a defender turn."""
	log_lines = "\n".join(_event_to_line(e) for e in events)
	return (
	f"Alert {alert.alert_id} \| severity={alert.severity} \| category={alert.category.value}\n"
	f"Host: {alert.host} \| User: {alert.user}\n"
	f"Summary: {alert.summary}\n\n"
	f"Log window:\n{log_lines}\n\n"
	f"Triage this alert."
	)


	def render_defender_target(action: TriageAction, cited_log_id: str, rationale: str) -> str:
	"""Render the gold response for SFT."""
	return (
	f"Action: {action.value}\n"
	f"CitedLog: {cited_log_id}\n"
	f"Rationale: {rationale}"
	)


	@dataclass
	class ParsedDefenderResponse:
	action: Optional[TriageAction]
	cited_log_id: Optional[str]
	rationale: str
	format_ok: bool


	_ACTION_RE = re.compile(r"action\s:\s([a-z_]+)", re.IGNORECASE)
	_CITE_RE = re.compile(r"cited\slog\s:\s*([A-Za-z0-9\-_]+)", re.IGNORECASE)
	_RATIONALE_RE = re.compile(r"rationale\s:\s(.+)", re.IGNORECASE \| re.DOTALL)


	def parse_defender_response(text: str) -> ParsedDefenderResponse:
	"""Best-effort parse of a defender model output.

	Returns ``format_ok=True`` only if all three fields parse and the
	action is a recognized `TriageAction`. GRPO rollouts can use this as
	a small +0.05 format-compliance bonus.
	"""
	action_match = _ACTION_RE.search(text)
	cite_match = _CITE_RE.search(text)
	rat_match = _RATIONALE_RE.search(text)

	action: Optional[TriageAction] = None
	if action_match:
	try:
	action = TriageAction(action_match.group(1).lower())
	except ValueError:
	action = None

	cited = cite_match.group(1) if cite_match else None
	rationale = rat_match.group(1).strip() if rat_match else ""
	format_ok = bool(action and cited and rationale)
	return ParsedDefenderResponse(
	action=action,
	cited_log_id=cited,
	rationale=rationale,
	format_ok=format_ok,
	)


	def to_chat_messages(alert: Alert, events: list[Event]) -> list[Dict[str, str]]:
	return [
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": render_defender_prompt(alert, events)},
	]


	__all__ = [
	"SYSTEM_PROMPT",
	"render_defender_prompt",
	"render_defender_target",
	"parse_defender_response",
	"to_chat_messages",
	"ParsedDefenderResponse",
	]