maris-ai-master / core-python /tests /test_dataset_validator_profiles.py
MarisUK's picture
Maris AI model sync
f440f03 verified
from __future__ import annotations
import json
import subprocess
import sys
from pathlib import Path
import pytest
from maris_core.data.validator import DatasetValidationError, validate_dataset_dir
REPO_ROOT = Path(__file__).resolve().parents[2]
def test_validate_dataset_dir_accepts_repo_eval_dataset() -> None:
summary = validate_dataset_dir(REPO_ROOT / "eval-data", profile="eval")
assert summary.files_checked == 6
assert summary.total_records == 12
assert summary.duplicate_count == 0
assert summary.counts_by_category == {
"conversation": 2,
"code": 2,
"image": 2,
"music": 2,
"video": 2,
"autonomous": 2,
}
def test_validate_dataset_dir_rejects_missing_eval_fields(tmp_path: Path) -> None:
dataset_dir = tmp_path / "eval-data"
conversation_dir = dataset_dir / "conversation"
conversation_dir.mkdir(parents=True)
(conversation_dir / "sample.jsonl").write_text(
json.dumps(
{
"timestamp": "2026-04-19T00:00:00Z",
"type": "conversation",
"session_id": "eval-conv-broken",
"user": "Kas nogāja greizi?",
"assistant": "Nav skaidrs.",
"language": "lv",
"source": "test-eval",
"benchmark_version": "maris-evals-v1",
"suite": "regression",
"difficulty": "medium",
"evaluation_mode": "reference-review",
"risk_level": "medium",
"expected_behavior": ["Paskaidro kritumu."],
}
)
+ "\n",
encoding="utf-8",
)
with pytest.raises(DatasetValidationError) as exc_info:
validate_dataset_dir(dataset_dir, profile="eval")
issues = exc_info.value.issues
assert any("task_id" in issue for issue in issues)
assert any("scoring_hints" in issue for issue in issues)
assert any("reference_answer" in issue for issue in issues)
assert any("acceptance_criteria" in issue for issue in issues)
def test_validate_datasets_cli_accepts_eval_profile_for_repo_dataset() -> None:
script_path = REPO_ROOT / "core-python" / "scripts" / "validate_datasets.py"
result = subprocess.run( # noqa: S603
[
sys.executable,
str(script_path),
"--profile",
"eval",
str(REPO_ROOT / "eval-data"),
],
check=False,
capture_output=True,
text=True,
)
assert result.returncode == 0
assert "Dataset validācija veiksmīga" in result.stdout