Spaces:
Runtime error
Runtime error
| """Check 10: Realism review β LLM advisory on scenario plausibility. | |
| Uses LiteLLM to review task briefings for leakage and overall realism. | |
| Always ``advisory=True``: can trigger a retry but never overrides a | |
| mechanical pass. | |
| The LLM never sees flag values or golden-path commands β only summaries | |
| and briefings. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import logging | |
| import os | |
| from typing import Any | |
| from open_range.builder.prompts import REALISM_REVIEW_PROMPT | |
| from open_range.protocols import CheckResult, ContainerSet, SnapshotSpec | |
| logger = logging.getLogger(__name__) | |
| class RealismReviewCheck: | |
| """LLM-based realism review. Always advisory.""" | |
| def __init__(self, model: str | None = None) -> None: | |
| self.model = model or os.environ.get( | |
| "OPENRANGE_VALIDATOR_MODEL", | |
| "azure/gpt-5.2-codex", | |
| ) | |
| async def check(self, snapshot: SnapshotSpec, containers: ContainerSet) -> CheckResult: | |
| try: | |
| import litellm # noqa: F811 | |
| except ImportError: | |
| return CheckResult( | |
| name="realism_review", | |
| passed=True, | |
| advisory=True, | |
| details={"note": "litellm not installed β skipping advisory review"}, | |
| ) | |
| # Build a redacted summary β never expose flag values or golden-path | |
| # commands to the reviewer LLM. | |
| tier = snapshot.topology.get("tier", 1) | |
| summary = { | |
| "task_briefings": { | |
| "red_briefing": snapshot.task.red_briefing, | |
| "blue_briefing": snapshot.task.blue_briefing, | |
| }, | |
| "vuln_types": [v.type for v in snapshot.truth_graph.vulns], | |
| "vuln_hosts": [v.host for v in snapshot.truth_graph.vulns], | |
| "topology_hosts": snapshot.topology.get("hosts", []), | |
| "golden_path_length": len(snapshot.golden_path), | |
| "tier": tier, | |
| } | |
| try: | |
| kwargs: dict[str, Any] = { | |
| "model": self.model, | |
| "messages": [ | |
| {"role": "system", "content": REALISM_REVIEW_PROMPT}, | |
| {"role": "user", "content": json.dumps(summary)}, | |
| ], | |
| "response_format": {"type": "json_object"}, | |
| } | |
| # Codex models don't support temperature | |
| if "codex" not in self.model.lower(): | |
| kwargs["temperature"] = 0.0 | |
| response = await litellm.acompletion(**kwargs) | |
| review = json.loads(response.choices[0].message.content) | |
| passed = bool(review.get("pass", False)) | |
| issues = review.get("issues", []) | |
| except Exception as exc: # noqa: BLE001 | |
| # LLM failure should not block validation β degrade gracefully. | |
| logger.warning("Realism review LLM call failed: %s", exc) | |
| return CheckResult( | |
| name="realism_review", | |
| passed=True, | |
| advisory=True, | |
| details={"note": f"LLM review failed ({exc}) β skipping"}, | |
| ) | |
| return CheckResult( | |
| name="realism_review", | |
| passed=passed, | |
| advisory=True, | |
| details={"issues": issues, "model": self.model}, | |
| error="" if passed else "; ".join(str(i) for i in issues), | |
| ) | |