Spaces:

SouravNath
/

repomind-api

Running

App Files Files Community

repomind-api / swe_bench /loader.py

SouravNath

fix: auto-load .env in llm_client; add SWEBenchLoader class to loader

84fad73 3 days ago

raw

history blame contribute delete

7.63 kB

	"""
	swe_bench/loader.py
	───────────────────
	Load and iterate over SWE-bench Lite instances.

	SWE-bench Lite: 300 real GitHub issues from popular Python repositories,
	each with a verified patch that makes all tests pass.

	Schema per instance:
	instance_id : str — unique identifier e.g. "django__django-12345"
	repo : str — "owner/repo"
	base_commit : str — SHA of the commit where the bug exists
	problem_statement : str — the GitHub issue text
	patch : str — gold unified diff (the correct fix)
	test_patch : str — tests that were added / modified to verify the fix
	PASS_TO_PASS : list — tests that must still pass
	FAIL_TO_PASS : list — tests that must now pass (previously failing)
	"""
	from __future__ import annotations

	import json
	import logging
	from dataclasses import dataclass, field
	from pathlib import Path
	from typing import Iterator

	logger = logging.getLogger(__name__)


	@dataclass
	class SWEInstance:
	"""A single SWE-bench problem instance."""

	instance_id: str
	repo: str
	base_commit: str
	problem_statement: str
	patch: str # gold patch — used only for evaluation
	test_patch: str # tests that verify the fix
	fail_to_pass: list[str] # tests that must now pass
	pass_to_pass: list[str] # regression tests that must still pass
	created_at: str = ""
	version: str = ""
	environment_setup_commit: str = ""

	@property
	def repo_name(self) -> str:
	"""e.g. 'django__django' from 'django/django'."""
	return self.repo.replace("/", "__")

	@property
	def org(self) -> str:
	return self.repo.split("/")[0]

	@property
	def project(self) -> str:
	return self.repo.split("/")[1]


	def load_swebench_lite(
	dataset_name: str = "princeton-nlp/SWE-bench_Lite",
	split: str = "test",
	max_instances: int \| None = None,
	instance_ids: list[str] \| None = None,
	cache_dir: Path \| None = None,
	) -> list[SWEInstance]:
	"""
	Load SWE-bench Lite from HuggingFace or a local JSON cache.

	Args:
	dataset_name: HuggingFace dataset identifier.
	split: Dataset split — 'test' (300 issues) or 'dev' (23 issues).
	max_instances: Limit for quick debugging (None = all).
	instance_ids: Filter to specific instance IDs.
	cache_dir: Local cache directory; saves downloaded data as JSON.

	Returns:
	List of SWEInstance objects.
	"""
	cache_path: Path \| None = None
	if cache_dir is not None:
	cache_dir = Path(cache_dir)
	cache_dir.mkdir(parents=True, exist_ok=True)
	cache_path = cache_dir / f"swebench_lite_{split}.json"

	# ── Try local cache first ─────────────────────────────────────────────
	if cache_path and cache_path.exists():
	logger.info("Loading SWE-bench Lite from local cache: %s", cache_path)
	raw = json.loads(cache_path.read_text())
	instances = [_dict_to_instance(r) for r in raw]
	else:
	logger.info("Downloading SWE-bench Lite from HuggingFace: %s", dataset_name)
	try:
	from datasets import load_dataset # type: ignore
	except ImportError as exc:
	raise ImportError(
	"Install 'datasets': pip install datasets"
	) from exc

	ds = load_dataset(dataset_name, split=split)
	instances = [_dict_to_instance(dict(row)) for row in ds]

	if cache_path:
	logger.info("Saving to cache: %s", cache_path)
	cache_path.write_text(
	json.dumps([_instance_to_dict(i) for i in instances], indent=2)
	)

	# ── Apply filters ─────────────────────────────────────────────────────
	if instance_ids:
	id_set = set(instance_ids)
	instances = [i for i in instances if i.instance_id in id_set]
	logger.info("Filtered to %d instances by ID", len(instances))

	if max_instances is not None:
	instances = instances[:max_instances]

	logger.info("Loaded %d SWE-bench Lite instances (split=%s)", len(instances), split)
	return instances


	def iter_instances(
	dataset_name: str = "princeton-nlp/SWE-bench_Lite",
	split: str = "test",
	cache_dir: Path \| None = None,
	) -> Iterator[SWEInstance]:
	"""Streaming iterator — useful for large splits."""
	yield from load_swebench_lite(dataset_name, split=split, cache_dir=cache_dir)


	# ── Private helpers ───────────────────────────────────────────────────────────

	def _dict_to_instance(row: dict) -> SWEInstance:
	return SWEInstance(
	instance_id=row.get("instance_id", ""),
	repo=row.get("repo", ""),
	base_commit=row.get("base_commit", ""),
	problem_statement=row.get("problem_statement", ""),
	patch=row.get("patch", ""),
	test_patch=row.get("test_patch", ""),
	fail_to_pass=_parse_list(row.get("FAIL_TO_PASS", "[]")),
	pass_to_pass=_parse_list(row.get("PASS_TO_PASS", "[]")),
	created_at=row.get("created_at", ""),
	version=row.get("version", ""),
	environment_setup_commit=row.get("environment_setup_commit", ""),
	)


	def _instance_to_dict(instance: SWEInstance) -> dict:
	return {
	"instance_id": instance.instance_id,
	"repo": instance.repo,
	"base_commit": instance.base_commit,
	"problem_statement": instance.problem_statement,
	"patch": instance.patch,
	"test_patch": instance.test_patch,
	"FAIL_TO_PASS": json.dumps(instance.fail_to_pass),
	"PASS_TO_PASS": json.dumps(instance.pass_to_pass),
	"created_at": instance.created_at,
	"version": instance.version,
	"environment_setup_commit": instance.environment_setup_commit,
	}


	def _parse_list(value: str \| list) -> list[str]:
	if isinstance(value, list):
	return value
	try:
	parsed = json.loads(value)
	return parsed if isinstance(parsed, list) else []
	except (json.JSONDecodeError, TypeError):
	return []


	# ── Convenience class (used by experiments/benchmark.py) ─────────────────────

	class SWEBenchLoader:
	"""
	Class wrapper around load_swebench_lite() for use in the benchmark harness.

	Usage:
	loader = SWEBenchLoader()
	instances = loader.load(split="test", max_instances=10)
	"""

	def __init__(
	self,
	dataset_name: str = "princeton-nlp/SWE-bench_Lite",
	cache_dir: Path \| None = Path(".cache/swebench"),
	):
	self.dataset_name = dataset_name
	self.cache_dir = cache_dir

	def load(
	self,
	split: str = "test",
	max_instances: int \| None = None,
	instance_ids: list[str] \| None = None,
	) -> list[dict]:
	"""
	Load instances and return as plain dicts (benchmark-friendly format).
	Keys: instance_id, repo, base_commit, problem_statement,
	FAIL_TO_PASS, PASS_TO_PASS, patch.
	"""
	instances = load_swebench_lite(
	dataset_name=self.dataset_name,
	split=split,
	max_instances=max_instances,
	instance_ids=instance_ids,
	cache_dir=self.cache_dir,
	)
	return [_instance_to_dict(i) for i in instances]