| from __future__ import annotations |
|
|
| import json |
| import subprocess |
| import sys |
| from pathlib import Path |
|
|
| import pytest |
|
|
| from maris_core.data.validator import DatasetValidationError, validate_dataset_dir |
|
|
| REPO_ROOT = Path(__file__).resolve().parents[2] |
|
|
|
|
| def test_validate_dataset_dir_accepts_repo_eval_dataset() -> None: |
| summary = validate_dataset_dir(REPO_ROOT / "eval-data", profile="eval") |
|
|
| assert summary.files_checked == 6 |
| assert summary.total_records == 12 |
| assert summary.duplicate_count == 0 |
| assert summary.counts_by_category == { |
| "conversation": 2, |
| "code": 2, |
| "image": 2, |
| "music": 2, |
| "video": 2, |
| "autonomous": 2, |
| } |
|
|
|
|
| def test_validate_dataset_dir_rejects_missing_eval_fields(tmp_path: Path) -> None: |
| dataset_dir = tmp_path / "eval-data" |
| conversation_dir = dataset_dir / "conversation" |
| conversation_dir.mkdir(parents=True) |
| (conversation_dir / "sample.jsonl").write_text( |
| json.dumps( |
| { |
| "timestamp": "2026-04-19T00:00:00Z", |
| "type": "conversation", |
| "session_id": "eval-conv-broken", |
| "user": "Kas nogāja greizi?", |
| "assistant": "Nav skaidrs.", |
| "language": "lv", |
| "source": "test-eval", |
| "benchmark_version": "maris-evals-v1", |
| "suite": "regression", |
| "difficulty": "medium", |
| "evaluation_mode": "reference-review", |
| "risk_level": "medium", |
| "expected_behavior": ["Paskaidro kritumu."], |
| } |
| ) |
| + "\n", |
| encoding="utf-8", |
| ) |
|
|
| with pytest.raises(DatasetValidationError) as exc_info: |
| validate_dataset_dir(dataset_dir, profile="eval") |
|
|
| issues = exc_info.value.issues |
| assert any("task_id" in issue for issue in issues) |
| assert any("scoring_hints" in issue for issue in issues) |
| assert any("reference_answer" in issue for issue in issues) |
| assert any("acceptance_criteria" in issue for issue in issues) |
|
|
|
|
| def test_validate_datasets_cli_accepts_eval_profile_for_repo_dataset() -> None: |
| script_path = REPO_ROOT / "core-python" / "scripts" / "validate_datasets.py" |
|
|
| result = subprocess.run( |
| [ |
| sys.executable, |
| str(script_path), |
| "--profile", |
| "eval", |
| str(REPO_ROOT / "eval-data"), |
| ], |
| check=False, |
| capture_output=True, |
| text=True, |
| ) |
|
|
| assert result.returncode == 0 |
| assert "Dataset validācija veiksmīga" in result.stdout |
|
|