from __future__ import annotations from pathlib import Path import gradio as gr from agent import forge_legacy_repo from inject import reset_sample, run_injected_suite LAST_RUN: dict[str, Path] = {} CSS = """ :root { --tf-ink: #171717; --tf-muted: #595959; --tf-line: #d6d3d1; --tf-panel: #fafaf9; --tf-accent: #0f766e; --tf-warn: #b45309; } .gradio-container { font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; color: var(--tf-ink); } .tf-title h1 { font-size: 34px; line-height: 1.1; letter-spacing: 0; margin-bottom: 4px; } .tf-title p { color: var(--tf-muted); margin-top: 0; } .tf-metric { border: 1px solid var(--tf-line); background: var(--tf-panel); border-radius: 8px; padding: 14px; } """ def forge(use_model: bool, max_cases: int): artifacts = forge_legacy_repo(max_cases_per_function=max_cases, use_model=use_model) LAST_RUN["run_dir"] = artifacts.run_dir log = "\n".join(artifacts.logs) preview = _suite_preview(artifacts.suite.files) result_md = _result_markdown(artifacts) return log, preview, result_md, str(artifacts.patch_path), gr.update(interactive=True) def inject_regression(): run_dir = LAST_RUN.get("run_dir") if run_dir is None: return "Forge tests first.", "No run yet." reset_sample(Path(__file__).resolve().parent / "samples", run_dir) result = run_injected_suite(run_dir) status = "caught" if not result.run.ok else "survived" summary = ( f"Injected: {result.mutation_label}\n" f"Result: {result.run.summary} ({status})\n\n" f"{_first_failure(result.run.stdout)}" ) return summary, result.run.stdout[-4000:] def _suite_preview(files: dict[str, str]) -> str: parts: list[str] = [] for name, content in sorted(files.items()): parts.append(f"# {name}\n{content}") return "\n\n".join(parts) def _result_markdown(artifacts) -> str: survived = artifacts.mutation.survived survived_text = "\n".join(f"- {item.label}" for item in survived) or "- None" coverage = "not installed locally" if artifacts.green.coverage is None else f"{artifacts.green.coverage:.0f}%" return f""" ### Forge Result **Suite:** {artifacts.green.summary} **Coverage:** 0% -> {coverage} **Mutation score:** {artifacts.mutation.headline()} ({artifacts.mutation.percent:.1f}%) **Surviving mutants** {survived_text} """ def _first_failure(output: str) -> str: lines = output.splitlines() for index, line in enumerate(lines): if line.startswith("FAILED ") or "E AssertionError" in line: return "\n".join(lines[max(0, index - 4) : index + 10]) return "\n".join(lines[-20:]) with gr.Blocks(title="TestForge Characterization Lab") as demo: gr.HTML( """
Freeze what legacy Python code does today. Refactor without fear.