Spaces:

JetBrains-Research
/

ml4se-evals-visualization

Running

App Files Files Community

ml4se-evals-visualization / adapters /__init__.py

egor-bogomolov

Add 13 new benchmark datasets (batches 6-8)

9f85fac 6 days ago

raw

history blame contribute delete

2.93 kB

	"""
	Dataset adapters for the ML4SE Benchmark Viewer.

	Each adapter normalises a different benchmark dataset into a common API shape
	so the Flask routes and templates can handle them uniformly.

	The REGISTRY dict maps slug strings (used in URLs) to adapter instances.
	"""

	from __future__ import annotations

	from typing import Any

	# ---------------------------------------------------------------------------
	# Helper function stubs – injected at runtime by app.py via _set_helpers()
	# ---------------------------------------------------------------------------

	_highlight_code = None
	_code_offset = None
	_extract_test_classes = None


	def _set_helpers(highlight_code_fn, code_offset_fn, extract_test_classes_fn):
	"""Called once by app.py to inject helper functions."""
	global _highlight_code, _code_offset, _extract_test_classes
	_highlight_code = highlight_code_fn
	_code_offset = code_offset_fn
	_extract_test_classes = extract_test_classes_fn

	# Propagate to submodules so adapters can use them
	from adapters import (
	additional,
	code_editing,
	code_generation,
	code_reasoning,
	long_code_arena,
	vulnerability,
	)

	for mod in (
	code_generation,
	code_editing,
	code_reasoning,
	vulnerability,
	long_code_arena,
	additional,
	):
	mod._highlight_code = highlight_code_fn
	mod._code_offset = code_offset_fn
	mod._extract_test_classes = extract_test_classes_fn


	# ---------------------------------------------------------------------------
	# Registry
	# ---------------------------------------------------------------------------

	REGISTRY: dict[str, DatasetAdapter] = {}


	# ---------------------------------------------------------------------------
	# Base class
	# ---------------------------------------------------------------------------


	class DatasetAdapter:
	slug: str = ""
	display_name: str = ""
	has_ground_truth: bool = False
	has_tasks: bool = False
	total_count: int \| None = None # original size before sampling (None = not sampled)

	def problem_count(self) -> int:
	raise NotImplementedError

	def get_problem_summary(self, idx: int) -> dict[str, Any]:
	raise NotImplementedError

	def get_problem_detail(self, idx: int) -> dict[str, Any]:
	raise NotImplementedError

	def get_ground_truth(self, idx: int, input_idx: int) -> dict[str, Any]:
	return {"status": "unavailable", "message": "Ground truth not available for this dataset"}


	# ---------------------------------------------------------------------------
	# Re-export registration entry point
	# ---------------------------------------------------------------------------

	from adapters.registration import register_hf_datasets # noqa: E402, F401

	__all__ = [
	"REGISTRY",
	"DatasetAdapter",
	"_set_helpers",
	"register_hf_datasets",
	]