Spaces:

build-small-hackathon
/

TestForge

Running on Zero

App Files Files Community

TestForge / app.py

perceptron01

Upload 7 files

4c69128 verified 12 days ago

Raw

History Blame Contribute Delete

4.3 kB

	from __future__ import annotations

	from pathlib import Path

	import gradio as gr

	from agent import forge_legacy_repo
	from inject import reset_sample, run_injected_suite


	LAST_RUN: dict[str, Path] = {}


	CSS = """
	:root {
	--tf-ink: #171717;
	--tf-muted: #595959;
	--tf-line: #d6d3d1;
	--tf-panel: #fafaf9;
	--tf-accent: #0f766e;
	--tf-warn: #b45309;
	}
	.gradio-container {
	font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
	color: var(--tf-ink);
	}
	.tf-title h1 {
	font-size: 34px;
	line-height: 1.1;
	letter-spacing: 0;
	margin-bottom: 4px;
	}
	.tf-title p {
	color: var(--tf-muted);
	margin-top: 0;
	}
	.tf-metric {
	border: 1px solid var(--tf-line);
	background: var(--tf-panel);
	border-radius: 8px;
	padding: 14px;
	}
	"""


	def forge(use_model: bool, max_cases: int):
	artifacts = forge_legacy_repo(max_cases_per_function=max_cases, use_model=use_model)
	LAST_RUN["run_dir"] = artifacts.run_dir
	log = "\n".join(artifacts.logs)
	preview = _suite_preview(artifacts.suite.files)
	result_md = _result_markdown(artifacts)
	return log, preview, result_md, str(artifacts.patch_path), gr.update(interactive=True)


	def inject_regression():
	run_dir = LAST_RUN.get("run_dir")
	if run_dir is None:
	return "Forge tests first.", "No run yet."

	reset_sample(Path(__file__).resolve().parent / "samples", run_dir)
	result = run_injected_suite(run_dir)
	status = "caught" if not result.run.ok else "survived"
	summary = (
	f"Injected: {result.mutation_label}\n"
	f"Result: {result.run.summary} ({status})\n\n"
	f"{_first_failure(result.run.stdout)}"
	)
	return summary, result.run.stdout[-4000:]


	def _suite_preview(files: dict[str, str]) -> str:
	parts: list[str] = []
	for name, content in sorted(files.items()):
	parts.append(f"# {name}\n{content}")
	return "\n\n".join(parts)


	def _result_markdown(artifacts) -> str:
	survived = artifacts.mutation.survived
	survived_text = "\n".join(f"- {item.label}" for item in survived) or "- None"
	coverage = "not installed locally" if artifacts.green.coverage is None else f"{artifacts.green.coverage:.0f}%"
	return f"""
	### Forge Result

	Suite: {artifacts.green.summary}
	Coverage: 0% -> {coverage}
	Mutation score: {artifacts.mutation.headline()} ({artifacts.mutation.percent:.1f}%)

	Surviving mutants
	{survived_text}
	"""


	def _first_failure(output: str) -> str:
	lines = output.splitlines()
	for index, line in enumerate(lines):
	if line.startswith("FAILED ") or "E AssertionError" in line:
	return "\n".join(lines[max(0, index - 4) : index + 10])
	return "\n".join(lines[-20:])


	with gr.Blocks(title="TestForge Characterization Lab") as demo:
	gr.HTML(
	"""
	<div class="tf-title">
	<h1>TestForge -- Characterization Lab</h1>
	<p>Freeze what legacy Python code does today. Refactor without fear.</p>
	</div>
	"""
	)
	with gr.Row():
	target = gr.Dropdown(
	["bundled legacy_repo"],
	value="bundled legacy_repo",
	label="Target",
	interactive=False,
	)
	use_model = gr.Checkbox(False, label="Use small coder model")
	max_cases = gr.Slider(2, 4, value=4, step=1, label="Cases per function")
	forge_btn = gr.Button("Forge Tests", variant="primary")

	with gr.Row():
	log = gr.Textbox(label="Agent log", lines=14)
	results = gr.Markdown(label="Results")

	tests_preview = gr.Code(label="Generated tests", language="python", lines=18)

	with gr.Row():
	inject_btn = gr.Button("Inject a regression", interactive=False)
	patch_file = gr.File(label="Download PR patch")

	with gr.Row():
	inject_summary = gr.Textbox(label="Inject result", lines=8)
	inject_output = gr.Textbox(label="Pytest output", lines=8)

	forge_btn.click(
	forge,
	inputs=[use_model, max_cases],
	outputs=[log, tests_preview, results, patch_file, inject_btn],
	)
	inject_btn.click(inject_regression, outputs=[inject_summary, inject_output])


	if __name__ == "__main__":
	demo.launch(
	show_error=True,
	css=CSS,
	ssr_mode=False,
	)