Spaces:

JetBrains-Research
/

ml4se-evals-visualization

Paused

File size: 2,928 Bytes

"""
Dataset adapters for the ML4SE Benchmark Viewer.

Each adapter normalises a different benchmark dataset into a common API shape
so the Flask routes and templates can handle them uniformly.

The REGISTRY dict maps slug strings (used in URLs) to adapter instances.
"""

from __future__ import annotations

from typing import Any

# ---------------------------------------------------------------------------
# Helper function stubs – injected at runtime by app.py via _set_helpers()
# ---------------------------------------------------------------------------

_highlight_code = None
_code_offset = None
_extract_test_classes = None


def _set_helpers(highlight_code_fn, code_offset_fn, extract_test_classes_fn):
    """Called once by app.py to inject helper functions."""
    global _highlight_code, _code_offset, _extract_test_classes
    _highlight_code = highlight_code_fn
    _code_offset = code_offset_fn
    _extract_test_classes = extract_test_classes_fn

    # Propagate to submodules so adapters can use them
    from adapters import (
        additional,
        code_editing,
        code_generation,
        code_reasoning,
        long_code_arena,
        vulnerability,
    )

    for mod in (
        code_generation,
        code_editing,
        code_reasoning,
        vulnerability,
        long_code_arena,
        additional,
    ):
        mod._highlight_code = highlight_code_fn
        mod._code_offset = code_offset_fn
        mod._extract_test_classes = extract_test_classes_fn


# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------

REGISTRY: dict[str, DatasetAdapter] = {}


# ---------------------------------------------------------------------------
# Base class
# ---------------------------------------------------------------------------


class DatasetAdapter:
    slug: str = ""
    display_name: str = ""
    has_ground_truth: bool = False
    has_tasks: bool = False
    total_count: int | None = None  # original size before sampling (None = not sampled)

    def problem_count(self) -> int:
        raise NotImplementedError

    def get_problem_summary(self, idx: int) -> dict[str, Any]:
        raise NotImplementedError

    def get_problem_detail(self, idx: int) -> dict[str, Any]:
        raise NotImplementedError

    def get_ground_truth(self, idx: int, input_idx: int) -> dict[str, Any]:
        return {"status": "unavailable", "message": "Ground truth not available for this dataset"}


# ---------------------------------------------------------------------------
# Re-export registration entry point
# ---------------------------------------------------------------------------

from adapters.registration import register_hf_datasets  # noqa: E402, F401

__all__ = [
    "REGISTRY",
    "DatasetAdapter",
    "_set_helpers",
    "register_hf_datasets",
]