from __future__ import annotations import json import subprocess import sys from pathlib import Path import pytest from maris_core.data.validator import DatasetValidationError, validate_dataset_dir REPO_ROOT = Path(__file__).resolve().parents[2] def test_validate_dataset_dir_accepts_repo_eval_dataset() -> None: summary = validate_dataset_dir(REPO_ROOT / "eval-data", profile="eval") assert summary.files_checked == 6 assert summary.total_records == 12 assert summary.duplicate_count == 0 assert summary.counts_by_category == { "conversation": 2, "code": 2, "image": 2, "music": 2, "video": 2, "autonomous": 2, } def test_validate_dataset_dir_rejects_missing_eval_fields(tmp_path: Path) -> None: dataset_dir = tmp_path / "eval-data" conversation_dir = dataset_dir / "conversation" conversation_dir.mkdir(parents=True) (conversation_dir / "sample.jsonl").write_text( json.dumps( { "timestamp": "2026-04-19T00:00:00Z", "type": "conversation", "session_id": "eval-conv-broken", "user": "Kas nogāja greizi?", "assistant": "Nav skaidrs.", "language": "lv", "source": "test-eval", "benchmark_version": "maris-evals-v1", "suite": "regression", "difficulty": "medium", "evaluation_mode": "reference-review", "risk_level": "medium", "expected_behavior": ["Paskaidro kritumu."], } ) + "\n", encoding="utf-8", ) with pytest.raises(DatasetValidationError) as exc_info: validate_dataset_dir(dataset_dir, profile="eval") issues = exc_info.value.issues assert any("task_id" in issue for issue in issues) assert any("scoring_hints" in issue for issue in issues) assert any("reference_answer" in issue for issue in issues) assert any("acceptance_criteria" in issue for issue in issues) def test_validate_datasets_cli_accepts_eval_profile_for_repo_dataset() -> None: script_path = REPO_ROOT / "core-python" / "scripts" / "validate_datasets.py" result = subprocess.run( # noqa: S603 [ sys.executable, str(script_path), "--profile", "eval", str(REPO_ROOT / "eval-data"), ], check=False, capture_output=True, text=True, ) assert result.returncode == 0 assert "Dataset validācija veiksmīga" in result.stdout