File size: 2,928 Bytes
9a8a9c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f85fac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a8a9c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""
Dataset adapters for the ML4SE Benchmark Viewer.

Each adapter normalises a different benchmark dataset into a common API shape
so the Flask routes and templates can handle them uniformly.

The REGISTRY dict maps slug strings (used in URLs) to adapter instances.
"""

from __future__ import annotations

from typing import Any

# ---------------------------------------------------------------------------
# Helper function stubs – injected at runtime by app.py via _set_helpers()
# ---------------------------------------------------------------------------

_highlight_code = None
_code_offset = None
_extract_test_classes = None


def _set_helpers(highlight_code_fn, code_offset_fn, extract_test_classes_fn):
    """Called once by app.py to inject helper functions."""
    global _highlight_code, _code_offset, _extract_test_classes
    _highlight_code = highlight_code_fn
    _code_offset = code_offset_fn
    _extract_test_classes = extract_test_classes_fn

    # Propagate to submodules so adapters can use them
    from adapters import (
        additional,
        code_editing,
        code_generation,
        code_reasoning,
        long_code_arena,
        vulnerability,
    )

    for mod in (
        code_generation,
        code_editing,
        code_reasoning,
        vulnerability,
        long_code_arena,
        additional,
    ):
        mod._highlight_code = highlight_code_fn
        mod._code_offset = code_offset_fn
        mod._extract_test_classes = extract_test_classes_fn


# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------

REGISTRY: dict[str, DatasetAdapter] = {}


# ---------------------------------------------------------------------------
# Base class
# ---------------------------------------------------------------------------


class DatasetAdapter:
    slug: str = ""
    display_name: str = ""
    has_ground_truth: bool = False
    has_tasks: bool = False
    total_count: int | None = None  # original size before sampling (None = not sampled)

    def problem_count(self) -> int:
        raise NotImplementedError

    def get_problem_summary(self, idx: int) -> dict[str, Any]:
        raise NotImplementedError

    def get_problem_detail(self, idx: int) -> dict[str, Any]:
        raise NotImplementedError

    def get_ground_truth(self, idx: int, input_idx: int) -> dict[str, Any]:
        return {"status": "unavailable", "message": "Ground truth not available for this dataset"}


# ---------------------------------------------------------------------------
# Re-export registration entry point
# ---------------------------------------------------------------------------

from adapters.registration import register_hf_datasets  # noqa: E402, F401

__all__ = [
    "REGISTRY",
    "DatasetAdapter",
    "_set_helpers",
    "register_hf_datasets",
]