Spaces:

vedkdev
/

FlakyTestSleuthOpenEnvRL

Sleeping

App Files Files Community

FlakyTestSleuthOpenEnvRL / env /sandbox.py

vedkdev

Upload folder using huggingface_hub

761f203 verified about 1 month ago

raw

history blame contribute delete

8.51 kB

	from __future__ import annotations

	import os
	import shutil
	import subprocess
	import tempfile
	from pathlib import Path


	class Sandbox:
	def __init__(self, task: dict):
	self.task = task
	self.tmpdir: str \| None = None
	self.file_tree: list[str] = []

	def setup(self) -> None:
	"""Prepare a working copy of the repository for the episode."""
	self.tmpdir = tempfile.mkdtemp(prefix="flakysleuth_")
	repo_url = str(self.task.get("repo_url", "")).strip()
	sha = str(self.task.get("sha", "")).strip()

	try:
	if repo_url.startswith("fixture://"):
	self._copy_fixture_repo(repo_url)
	else:
	self._clone_repo(repo_url, sha)

	self.file_tree = self._build_file_tree()
	except Exception as exc:
	self.cleanup()
	raise RuntimeError(f"Sandbox setup failed: {exc}") from exc

	def read_file(self, relative_path: str) -> str \| None:
	"""Read a file relative to sandbox root. Returns None when not found/unsafe."""
	if not self.tmpdir:
	return None

	root = os.path.abspath(self.tmpdir)
	full_path = os.path.abspath(os.path.join(root, relative_path))

	# Path traversal guard.
	if os.path.commonpath([root, full_path]) != root:
	return None
	if not os.path.isfile(full_path):
	return None

	try:
	with open(full_path, "r", encoding="utf-8", errors="replace") as handle:
	return handle.read()[:4000]
	except Exception:
	return None

	def grep(self, pattern: str) -> str:
	"""Search .py files in repo, preferring ripgrep and falling back to grep."""
	if not self.tmpdir:
	return "ERROR: Sandbox not initialized"

	rg_cmd = ["rg", "-n", "--glob", "*.py", pattern, "."]
	grep_cmd = ["grep", "-RIn", "--include=*.py", pattern, "."]

	try:
	result = subprocess.run(
	rg_cmd,
	cwd=self.tmpdir,
	capture_output=True,
	text=True,
	timeout=10,
	)
	except FileNotFoundError:
	# ripgrep not installed in runtime; fall back to POSIX grep.
	try:
	result = subprocess.run(
	grep_cmd,
	cwd=self.tmpdir,
	capture_output=True,
	text=True,
	timeout=10,
	)
	except FileNotFoundError:
	return (
	"Search error: neither 'rg' (ripgrep) nor 'grep' is installed in the "
	"runtime."
	)
	except subprocess.TimeoutExpired:
	return "Search timed out"
	except Exception as exc:
	return f"Search error: {exc}"
	except subprocess.TimeoutExpired:
	return "Search timed out"
	except Exception as exc:
	return f"Search error: {exc}"

	try:
	output = (result.stdout + result.stderr).strip()[:2000]
	if output:
	return output
	return f"No matches found for: {pattern}"
	except Exception as exc:
	return f"Search error: {exc}"

	def run_test(self, pytest_test_name: str) -> str:
	"""Run a test for non-order-dependent categories."""
	if not self.tmpdir:
	return "ERROR: Sandbox not initialized"

	category = str(self.task.get("category", "")).strip()
	if category in ("OD", "OD-Brit", "OD-Vic"):
	return (
	"Test execution skipped for order-dependent tests. "
	"Use read_file and search_code for static analysis. "
	"Look for shared state, missing cleanup, or global mutations."
	)

	try:
	result = subprocess.run(
	[
	"python",
	"-m",
	"pytest",
	pytest_test_name,
	"--tb=short",
	"-x",
	"--timeout=30",
	"-q",
	],
	cwd=self.tmpdir,
	capture_output=True,
	text=True,
	timeout=60,
	)
	output = (result.stdout + result.stderr).strip()[:2000]
	return output or "Test completed with no output"
	except subprocess.TimeoutExpired:
	return "Test execution timed out (>60s)"
	except Exception as exc:
	return f"Test execution error: {exc}"

	def cleanup(self) -> None:
	if self.tmpdir and os.path.exists(self.tmpdir):
	shutil.rmtree(self.tmpdir, ignore_errors=True)
	self.tmpdir = None
	self.file_tree = []

	def _clone_repo(self, repo_url: str, sha: str) -> None:
	if not repo_url:
	raise ValueError("Missing repo_url")
	assert self.tmpdir is not None

	sha = (sha or "").strip()
	# Robust path: fetch the exact commit directly (works even when not in shallow branch history).
	if sha and sha.lower() != "nan":
	init = subprocess.run(
	["git", "init", self.tmpdir],
	capture_output=True,
	text=True,
	timeout=20,
	)
	if init.returncode != 0:
	raise RuntimeError(f"git init failed: {init.stderr.strip()}")

	remote = subprocess.run(
	["git", "-C", self.tmpdir, "remote", "add", "origin", repo_url],
	capture_output=True,
	text=True,
	timeout=15,
	)
	if remote.returncode != 0:
	raise RuntimeError(f"git remote add failed: {remote.stderr.strip()}")

	fetch = subprocess.run(
	["git", "-C", self.tmpdir, "fetch", "--depth=1", "origin", sha],
	capture_output=True,
	text=True,
	timeout=120,
	)
	if fetch.returncode != 0:
	raise RuntimeError(
	"git fetch exact sha failed: "
	+ (fetch.stderr.strip() or fetch.stdout.strip())
	)

	checkout = subprocess.run(
	["git", "-C", self.tmpdir, "checkout", "--detach", "FETCH_HEAD"],
	capture_output=True,
	text=True,
	timeout=30,
	)
	if checkout.returncode != 0:
	raise RuntimeError(
	"git checkout fetched sha failed: "
	+ (checkout.stderr.strip() or checkout.stdout.strip())
	)
	return

	# Fallback for rows without a SHA.
	clone = subprocess.run(
	["git", "clone", "--depth=50", repo_url, self.tmpdir],
	capture_output=True,
	text=True,
	timeout=120,
	)
	if clone.returncode != 0:
	raise RuntimeError(
	"git clone failed: " + (clone.stderr.strip() or clone.stdout.strip())
	)

	def _copy_fixture_repo(self, repo_url: str) -> None:
	fixture_name = repo_url.replace("fixture://", "", 1).strip("/")
	if not fixture_name:
	raise ValueError("Fixture name missing in repo_url")

	fixture_dir = (
	Path(__file__).resolve().parent.parent
	/ "dataset"
	/ "fixtures"
	/ fixture_name
	)
	if not fixture_dir.exists():
	raise FileNotFoundError(f"Fixture repo not found: {fixture_dir}")

	assert self.tmpdir is not None
	shutil.copytree(fixture_dir, self.tmpdir, dirs_exist_ok=True)

	def _build_file_tree(self) -> list[str]:
	assert self.tmpdir is not None
	result: list[str] = []
	for root, dirs, files in os.walk(self.tmpdir):
	dirs[:] = [
	d
	for d in dirs
	if not d.startswith(".")
	and d not in ("node_modules", "__pycache__", ".git", "venv", ".tox")
	]
	depth = root.replace(self.tmpdir, "").count(os.sep)
	if depth <= 2:
	for file_name in files:
	rel_path = os.path.relpath(os.path.join(root, file_name), self.tmpdir)
	result.append(rel_path)
	if len(result) > 100:
	break
	return result[:100]