Spaces:
Sleeping
Sleeping
File size: 8,159 Bytes
a5c1fa0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 | # server/fault_injection.py
"""
Dynamic environment perturbation system.
Injects controlled faults into repo variants to test agent robustness:
- Misleading comments on correct lines
- Red herring files that look buggy but aren't
- Flaky test markers (intermittent failures)
- Missing/extra imports
This separates "can the agent solve ideal problems" from
"can the agent handle real-world messy codebases."
"""
import os
import random
from typing import Dict, Any, List, Optional
from dataclasses import dataclass, field
@dataclass
class FaultConfig:
"""Configuration for which faults to inject."""
misleading_comments: bool = False # Add "BUG:" comments on correct lines
red_herring_files: bool = False # Add irrelevant files that look buggy
missing_imports: bool = False # Remove an import (agent must add it back)
noisy_docstrings: bool = False # Add misleading docstrings
enabled: bool = False # Master switch
@classmethod
def none(cls) -> "FaultConfig":
return cls(enabled=False)
@classmethod
def light(cls) -> "FaultConfig":
return cls(
misleading_comments=True,
red_herring_files=False,
missing_imports=False,
noisy_docstrings=True,
enabled=True,
)
@classmethod
def heavy(cls) -> "FaultConfig":
return cls(
misleading_comments=True,
red_herring_files=True,
missing_imports=True,
noisy_docstrings=True,
enabled=True,
)
# Templates for misleading comments
MISLEADING_COMMENTS = [
"# BUG: this line looks wrong but is actually correct",
"# TODO: fix this — seems like a potential issue",
"# HACK: temporary workaround, needs refactoring",
"# NOTE: this was recently changed and might be broken",
"# WARNING: edge case not handled here",
]
# Red herring file content
RED_HERRING_TEMPLATE = '''"""Utility module for {domain}."""
def {func_name}(data):
"""Process {domain} data."""
# BUG: this looks wrong but this file is not relevant to the failing tests
if not data:
return None
result = []
for item in data:
# TODO: this logic seems off — investigate
processed = str(item).upper() # Intentionally "suspicious" looking
result.append(processed)
return result
def {func_name2}(value, threshold=0):
"""Check {domain} threshold."""
# FIXME: comparison might be wrong
return value >= threshold # Actually correct
'''
RED_HERRING_VARIANTS = [
{"domain": "logging", "func_name": "process_logs", "func_name2": "check_log_level"},
{"domain": "metrics", "func_name": "aggregate_metrics", "func_name2": "is_above_threshold"},
{"domain": "config", "func_name": "parse_config", "func_name2": "validate_setting"},
]
@dataclass
class InjectionReport:
"""Report of what faults were injected."""
faults_injected: List[str] = field(default_factory=list)
files_modified: List[str] = field(default_factory=list)
files_added: List[str] = field(default_factory=list)
difficulty_multiplier: float = 1.0
def to_dict(self) -> dict:
return {
"faults_injected": self.faults_injected,
"files_modified": self.files_modified,
"files_added": self.files_added,
"difficulty_multiplier": self.difficulty_multiplier,
}
class FaultInjector:
"""
Injects controlled faults into a working repo directory.
Usage:
injector = FaultInjector(config=FaultConfig.light())
report = injector.inject(working_dir="/tmp/openenv_task1_variant_1_xxx/")
"""
def __init__(self, config: FaultConfig = None):
self.config = config or FaultConfig.none()
def inject(self, working_dir: str, meta: Dict[str, Any] = None) -> InjectionReport:
"""Apply all configured faults to the repo working directory."""
if not self.config.enabled:
return InjectionReport()
report = InjectionReport()
meta = meta or {}
if self.config.misleading_comments:
self._inject_misleading_comments(working_dir, meta, report)
if self.config.red_herring_files:
self._inject_red_herring_files(working_dir, report)
if self.config.noisy_docstrings:
self._inject_noisy_docstrings(working_dir, meta, report)
# Calculate difficulty multiplier
report.difficulty_multiplier = 1.0 + (len(report.faults_injected) * 0.1)
return report
def _inject_misleading_comments(self, working_dir: str, meta: Dict, report: InjectionReport):
"""Add misleading BUG/TODO comments to correct lines in source files."""
bug_files = set(meta.get("bug_files", []) + meta.get("files_to_implement", []))
for root, dirs, files in os.walk(working_dir):
dirs[:] = [d for d in dirs if d not in ("__pycache__", ".git", "tests")]
for fname in files:
if not fname.endswith(".py"):
continue
fpath = os.path.join(root, fname)
rel_path = os.path.relpath(fpath, working_dir)
# Only inject into files that are NOT the buggy ones
if rel_path in bug_files:
continue
try:
with open(fpath, "r") as f:
lines = f.readlines()
if len(lines) < 3:
continue
# Insert a misleading comment at a random line
comment = random.choice(MISLEADING_COMMENTS)
insert_line = random.randint(1, max(1, len(lines) - 1))
indent = " " if lines[insert_line - 1].startswith(" ") else ""
lines.insert(insert_line, f"{indent}{comment}\n")
with open(fpath, "w") as f:
f.writelines(lines)
report.faults_injected.append(f"misleading_comment:{rel_path}:{insert_line}")
report.files_modified.append(rel_path)
except Exception:
continue
def _inject_red_herring_files(self, working_dir: str, report: InjectionReport):
"""Add irrelevant files that look like they contain bugs."""
variant = random.choice(RED_HERRING_VARIANTS)
content = RED_HERRING_TEMPLATE.format(**variant)
src_dir = os.path.join(working_dir, "src")
if not os.path.exists(src_dir):
os.makedirs(src_dir, exist_ok=True)
filename = f"{variant['domain']}_utils.py"
filepath = os.path.join(src_dir, filename)
rel_path = f"src/{filename}"
try:
with open(filepath, "w") as f:
f.write(content)
report.faults_injected.append(f"red_herring_file:{rel_path}")
report.files_added.append(rel_path)
except Exception:
pass
def _inject_noisy_docstrings(self, working_dir: str, meta: Dict, report: InjectionReport):
"""Add misleading docstrings to confuse agent understanding."""
bug_files = meta.get("bug_files", [])
for bug_file in bug_files:
fpath = os.path.join(working_dir, bug_file)
if not os.path.exists(fpath):
continue
try:
with open(fpath, "r") as f:
content = f.read()
# Add a misleading module-level comment
noise = (
"# NOTE: All functions in this module have been thoroughly tested\n"
"# and verified to be correct as of the last code review.\n"
"# Do NOT modify without approval from the team lead.\n\n"
)
content = noise + content
with open(fpath, "w") as f:
f.write(content)
report.faults_injected.append(f"noisy_docstring:{bug_file}")
report.files_modified.append(bug_file)
except Exception:
continue
|