File size: 2,614 Bytes
f440f03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from __future__ import annotations

import json
import subprocess
import sys
from pathlib import Path

import pytest

from maris_core.data.validator import DatasetValidationError, validate_dataset_dir

REPO_ROOT = Path(__file__).resolve().parents[2]


def test_validate_dataset_dir_accepts_repo_eval_dataset() -> None:
    summary = validate_dataset_dir(REPO_ROOT / "eval-data", profile="eval")

    assert summary.files_checked == 6
    assert summary.total_records == 12
    assert summary.duplicate_count == 0
    assert summary.counts_by_category == {
        "conversation": 2,
        "code": 2,
        "image": 2,
        "music": 2,
        "video": 2,
        "autonomous": 2,
    }


def test_validate_dataset_dir_rejects_missing_eval_fields(tmp_path: Path) -> None:
    dataset_dir = tmp_path / "eval-data"
    conversation_dir = dataset_dir / "conversation"
    conversation_dir.mkdir(parents=True)
    (conversation_dir / "sample.jsonl").write_text(
        json.dumps(
            {
                "timestamp": "2026-04-19T00:00:00Z",
                "type": "conversation",
                "session_id": "eval-conv-broken",
                "user": "Kas nogāja greizi?",
                "assistant": "Nav skaidrs.",
                "language": "lv",
                "source": "test-eval",
                "benchmark_version": "maris-evals-v1",
                "suite": "regression",
                "difficulty": "medium",
                "evaluation_mode": "reference-review",
                "risk_level": "medium",
                "expected_behavior": ["Paskaidro kritumu."],
            }
        )
        + "\n",
        encoding="utf-8",
    )

    with pytest.raises(DatasetValidationError) as exc_info:
        validate_dataset_dir(dataset_dir, profile="eval")

    issues = exc_info.value.issues
    assert any("task_id" in issue for issue in issues)
    assert any("scoring_hints" in issue for issue in issues)
    assert any("reference_answer" in issue for issue in issues)
    assert any("acceptance_criteria" in issue for issue in issues)


def test_validate_datasets_cli_accepts_eval_profile_for_repo_dataset() -> None:
    script_path = REPO_ROOT / "core-python" / "scripts" / "validate_datasets.py"

    result = subprocess.run(  # noqa: S603
        [
            sys.executable,
            str(script_path),
            "--profile",
            "eval",
            str(REPO_ROOT / "eval-data"),
        ],
        check=False,
        capture_output=True,
        text=True,
    )

    assert result.returncode == 0
    assert "Dataset validācija veiksmīga" in result.stdout