cicd-debugger-env / env /hidden_tests.py
Lishika's picture
finally added all
ae94737
from __future__ import annotations
from typing import Any
from env.graders.deterministic import DeterministicGrader
class HiddenTestRunner:
"""Evaluates whether a fix generalizes across deterministic CI variants."""
def __init__(self, grader: DeterministicGrader | None = None, pass_threshold: float = 0.65):
self.grader = grader or DeterministicGrader()
self.pass_threshold = pass_threshold
def generate_variants(self, config_text: str) -> list[str]:
base = config_text or ""
variants: list[str] = []
for replacements in self._variant_replacement_sets():
variant = self._apply_replacements(base, replacements)
if variant not in variants:
variants.append(variant)
return variants
def evaluate_fix(
self,
fixed_config: str,
task: dict[str, Any] | None = None,
expected_config: str | None = None,
metadata: dict[str, Any] | None = None,
) -> float:
fixed_config = fixed_config or ""
task = task or {}
metadata = metadata or {}
expected = expected_config or str(task.get("expected_config", ""))
if not fixed_config.strip() or not expected.strip():
return 0.0
total = 0
passed = 0
for replacements in self._variant_replacement_sets():
fixed_variant = self._apply_replacements(fixed_config, replacements)
expected_variant = self._apply_replacements(expected, replacements)
score = self.grader.grade(fixed_variant, expected_variant, metadata)
total += 1
if score >= self.pass_threshold:
passed += 1
if total == 0:
return 0.0
return round(passed / total, 4)
def _variant_replacement_sets(self) -> list[tuple[tuple[str, str], ...]]:
return [
tuple(),
(("ubuntu-latest", "windows-latest"),),
(("windows-latest", "ubuntu-latest"),),
(("node-version: 16", "node-version: 18"),),
(("node-version: \"16\"", "node-version: \"18\""),),
(("python-version: \"3.10\"", "python-version: \"3.12\""),),
(("NODE_ENV=production", "NODE_ENV=development"),),
]
def _apply_replacements(self, text: str, replacements: tuple[tuple[str, str], ...]) -> str:
output = text
for old, new in replacements:
output = output.replace(old, new)
return output