egor-bogomolov's picture
Add 13 new benchmark datasets (batches 6-8)
9f85fac
"""
Dataset adapters for the ML4SE Benchmark Viewer.
Each adapter normalises a different benchmark dataset into a common API shape
so the Flask routes and templates can handle them uniformly.
The REGISTRY dict maps slug strings (used in URLs) to adapter instances.
"""
from __future__ import annotations
from typing import Any
# ---------------------------------------------------------------------------
# Helper function stubs – injected at runtime by app.py via _set_helpers()
# ---------------------------------------------------------------------------
_highlight_code = None
_code_offset = None
_extract_test_classes = None
def _set_helpers(highlight_code_fn, code_offset_fn, extract_test_classes_fn):
"""Called once by app.py to inject helper functions."""
global _highlight_code, _code_offset, _extract_test_classes
_highlight_code = highlight_code_fn
_code_offset = code_offset_fn
_extract_test_classes = extract_test_classes_fn
# Propagate to submodules so adapters can use them
from adapters import (
additional,
code_editing,
code_generation,
code_reasoning,
long_code_arena,
vulnerability,
)
for mod in (
code_generation,
code_editing,
code_reasoning,
vulnerability,
long_code_arena,
additional,
):
mod._highlight_code = highlight_code_fn
mod._code_offset = code_offset_fn
mod._extract_test_classes = extract_test_classes_fn
# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------
REGISTRY: dict[str, DatasetAdapter] = {}
# ---------------------------------------------------------------------------
# Base class
# ---------------------------------------------------------------------------
class DatasetAdapter:
slug: str = ""
display_name: str = ""
has_ground_truth: bool = False
has_tasks: bool = False
total_count: int | None = None # original size before sampling (None = not sampled)
def problem_count(self) -> int:
raise NotImplementedError
def get_problem_summary(self, idx: int) -> dict[str, Any]:
raise NotImplementedError
def get_problem_detail(self, idx: int) -> dict[str, Any]:
raise NotImplementedError
def get_ground_truth(self, idx: int, input_idx: int) -> dict[str, Any]:
return {"status": "unavailable", "message": "Ground truth not available for this dataset"}
# ---------------------------------------------------------------------------
# Re-export registration entry point
# ---------------------------------------------------------------------------
from adapters.registration import register_hf_datasets # noqa: E402, F401
__all__ = [
"REGISTRY",
"DatasetAdapter",
"_set_helpers",
"register_hf_datasets",
]