open-range / src /open_range /validator /realism_review.py
Aaron Brown
Production all-in-one container with real service execution
49d1c75
"""Check 10: Realism review β€” LLM advisory on scenario plausibility.
Uses LiteLLM to review task briefings for leakage and overall realism.
Always ``advisory=True``: can trigger a retry but never overrides a
mechanical pass.
The LLM never sees flag values or golden-path commands β€” only summaries
and briefings.
"""
from __future__ import annotations
import json
import logging
import os
from typing import Any
from open_range.builder.prompts import REALISM_REVIEW_PROMPT
from open_range.protocols import CheckResult, ContainerSet, SnapshotSpec
logger = logging.getLogger(__name__)
class RealismReviewCheck:
"""LLM-based realism review. Always advisory."""
def __init__(self, model: str | None = None) -> None:
self.model = model or os.environ.get(
"OPENRANGE_VALIDATOR_MODEL",
"azure/gpt-5.2-codex",
)
async def check(self, snapshot: SnapshotSpec, containers: ContainerSet) -> CheckResult:
try:
import litellm # noqa: F811
except ImportError:
return CheckResult(
name="realism_review",
passed=True,
advisory=True,
details={"note": "litellm not installed β€” skipping advisory review"},
)
# Build a redacted summary β€” never expose flag values or golden-path
# commands to the reviewer LLM.
tier = snapshot.topology.get("tier", 1)
summary = {
"task_briefings": {
"red_briefing": snapshot.task.red_briefing,
"blue_briefing": snapshot.task.blue_briefing,
},
"vuln_types": [v.type for v in snapshot.truth_graph.vulns],
"vuln_hosts": [v.host for v in snapshot.truth_graph.vulns],
"topology_hosts": snapshot.topology.get("hosts", []),
"golden_path_length": len(snapshot.golden_path),
"tier": tier,
}
try:
kwargs: dict[str, Any] = {
"model": self.model,
"messages": [
{"role": "system", "content": REALISM_REVIEW_PROMPT},
{"role": "user", "content": json.dumps(summary)},
],
"response_format": {"type": "json_object"},
}
# Codex models don't support temperature
if "codex" not in self.model.lower():
kwargs["temperature"] = 0.0
response = await litellm.acompletion(**kwargs)
review = json.loads(response.choices[0].message.content)
passed = bool(review.get("pass", False))
issues = review.get("issues", [])
except Exception as exc: # noqa: BLE001
# LLM failure should not block validation β€” degrade gracefully.
logger.warning("Realism review LLM call failed: %s", exc)
return CheckResult(
name="realism_review",
passed=True,
advisory=True,
details={"note": f"LLM review failed ({exc}) β€” skipping"},
)
return CheckResult(
name="realism_review",
passed=passed,
advisory=True,
details={"issues": issues, "model": self.model},
error="" if passed else "; ".join(str(i) for i in issues),
)