TestForge / agent.py
perceptron01's picture
Upload 7 files
4c69128 verified
Raw
History Blame Contribute Delete
4.03 kB
"""Agent orchestration for TestForge."""
from __future__ import annotations
import shutil
import tempfile
from dataclasses import dataclass
from pathlib import Path
from analyzer import Analysis, analyze
from generator import GeneratedSuite, deterministic_inputs, generate_suite, write_suite
from inject import reset_sample
from model_suggest import suggest_extra_inputs
from mutator import MUTATIONS, Mutation, MutationScore, mutation_score
from patch import write_patch
from runner import RunResult, run_pytest
ROOT = Path(__file__).resolve().parent
SAMPLES_ROOT = ROOT / "samples"
LEGACY_ROOT = SAMPLES_ROOT / "legacy_repo"
RUNS_ROOT = ROOT / "generated" / "runs"
@dataclass(frozen=True)
class ForgeArtifacts:
run_dir: Path
analysis: Analysis
suite: GeneratedSuite
green: RunResult
mutation: MutationScore
patch_path: Path
logs: tuple[str, ...]
def forge_legacy_repo(
max_cases_per_function: int = 4,
mutations: tuple[Mutation, ...] = MUTATIONS,
use_model: bool = False,
) -> ForgeArtifacts:
"""Run the TestForge pipeline for the bundled sample.
The deterministic path (`use_model=False`) is the demo backbone and needs
no model or GPU. When `use_model=True`, a small coder model proposes extra
edge-case argument tuples per function via `model_suggest.suggest_extra_inputs`;
every suggestion is still captured by real execution before it becomes a test.
"""
run_dir = _fresh_run_dir()
logs: list[str] = []
logs.append("Target: bundled legacy_repo with zero tests")
reset_sample(SAMPLES_ROOT, run_dir)
analysis = analyze(run_dir / "samples" / "legacy_repo", package="legacy_repo")
logs.append(f"Analyzed {len(analysis.functions)} public functions")
for fn in analysis.functions:
logs.append(f" - {fn.module}.{fn.qualname}({', '.join(fn.parameters)})")
extra_inputs: dict[str, list[tuple]] = {}
if use_model:
logs.append("Model assist: asking small coder model for extra edge cases")
for fn in analysis.functions:
if fn.arity == 0:
continue
deterministic = deterministic_inputs(fn, max_cases=max_cases_per_function)
suggestions = suggest_extra_inputs(fn, deterministic, max_new=2)
if suggestions:
extra_inputs[f"{fn.module}.{fn.qualname}"] = suggestions
logs.append(f" +{len(suggestions)} model-proposed case(s) for {fn.qualname}")
if not extra_inputs:
logs.append(" no extra cases proposed (model/GPU unavailable or nothing new found)")
suite = generate_suite(
analysis,
max_cases_per_function=max_cases_per_function,
extra_inputs=extra_inputs,
)
write_suite(suite, run_dir)
logs.append(f"Captured {suite.assertion_count} behaviors into {len(suite.files)} test files")
green = run_pytest(run_dir, package_parent=run_dir / "samples")
logs.append(f"Suite run: {green.summary}")
if green.coverage is not None:
logs.append(f"Coverage: 0% -> {green.coverage:.0f}%")
else:
logs.append("Coverage: pytest-cov unavailable locally; suite still ran green")
mutation = mutation_score(run_dir / "samples", suite, run_dir / "mutants", mutations=mutations)
logs.append(f"Mutation score: {mutation.headline()} ({mutation.percent:.1f}%)")
for survived in mutation.survived:
logs.append(f" survived: {survived.label}")
patch_path = write_patch(suite, run_dir / "testforge.patch")
logs.append(f"Patch ready: {patch_path.name}")
return ForgeArtifacts(
run_dir=run_dir,
analysis=analysis,
suite=suite,
green=green,
mutation=mutation,
patch_path=patch_path,
logs=tuple(logs),
)
def _fresh_run_dir() -> Path:
RUNS_ROOT.mkdir(parents=True, exist_ok=True)
path = Path(tempfile.mkdtemp(prefix="testforge_", dir=RUNS_ROOT))
if path.exists():
shutil.rmtree(path)
path.mkdir(parents=True)
return path