maris-ai-master / core-python /tests /test_human_training.py
MarisUK's picture
Maris AI model sync
f440f03 verified
"""Tests human-in-the-loop training artifact staging helpers."""
from __future__ import annotations
from pathlib import Path
import pytest
from maris_core.training.human_training import (
HumanTrainingRequest,
build_human_training_launch_spec,
load_human_training_manifest,
publish_human_training_artifacts,
stage_human_training_artifacts,
)
def test_human_training_request_requires_input_signal() -> None:
with pytest.raises(ValueError):
HumanTrainingRequest(
dataset_repo="example-user/memory-dataset",
model_repo="example-user/custom-model",
profile_facts=[],
profile_preferences=[],
response_instructions=[],
conversation_examples=[],
preference_pairs=[],
eval_examples=[],
)
def test_stage_human_training_artifacts_builds_staging_manifest(tmp_path: Path) -> None:
request = HumanTrainingRequest(
dataset_repo="example-user/memory-dataset",
hub_model_id="example-user/custom-model",
output_subdir="runs/self-profile",
continue_model_path="runs/checkpoints/latest",
profile_facts=["Man patīk īsas tehniskas atbildes."],
profile_preferences=["Atbildi latviski."],
response_instructions=["Ja iespējams, dod strukturētu kopsavilkumu."],
conversation_examples=[
{
"user": "Kas ir mana valodas preference?",
"assistant": "Tu dod priekšroku latviešu valodai.",
},
{
"user": "Kas ir mana valodas preference?",
"assistant": "Tu dod priekšroku latviešu valodai.",
},
],
preference_pairs=[
{
"prompt": "Apraksti manu profilu.",
"chosen": "Tu vēlies īsas atbildes latviski.",
"rejected": "Es neko nezinu par tavām preferencēm.",
"confidence": 0.9,
}
],
eval_examples=[
{
"prompt": "Kā tu atbildēsi turpmāk?",
"completion": "Īsi, strukturēti un latviski.",
}
],
)
manifest = stage_human_training_artifacts(request, persistent_dir=str(tmp_path))
assert manifest["artifact_type"] == "human-training-manifest"
assert manifest["ready_for_review"] is True
assert manifest["ready_for_training"] is True
assert manifest["input_summary"]["conversation_examples"] == 2
assert manifest["quality_report"]["duplicates_removed"] >= 1
assert "train_dataset" in manifest["artifacts"]
assert "preference_dataset" in manifest["artifacts"]
assert "eval_dataset" in manifest["artifacts"]
loaded = load_human_training_manifest(str(tmp_path), manifest["run_id"])
assert loaded["run_id"] == manifest["run_id"]
launch_spec = build_human_training_launch_spec(loaded)
assert launch_spec.hub_model_id == "example-user/custom-model"
assert launch_spec.output_subdir == "runs/self-profile"
assert launch_spec.continue_model_path == "runs/checkpoints/latest"
def test_publish_human_training_artifacts_uploads_all_repo_files(tmp_path: Path) -> None:
manifest = stage_human_training_artifacts(
HumanTrainingRequest(
dataset_repo="example-user/memory-dataset",
model_repo="example-user/custom-model",
profile_facts=["Man patīk konkrēti tehniski skaidrojumi."],
),
persistent_dir=str(tmp_path),
)
uploaded: list[tuple[str, str, str]] = []
def fake_save_file(**kwargs):
uploaded.append((kwargs["repo_id"], kwargs["repo_type"], kwargs["path_in_repo"]))
return {"saved": True, "path": kwargs["path_in_repo"]}
published = publish_human_training_artifacts(manifest, save_file=fake_save_file)
assert published
assert all(item[0] == "example-user/memory-dataset" for item in uploaded)
assert all(item[1] == "dataset" for item in uploaded)
assert any(item[2].startswith("data/human-training/") for item in uploaded)
def test_human_training_request_keeps_model_repo_compatibility_alias() -> None:
request = HumanTrainingRequest(
dataset_repo="example-user/memory-dataset",
model_repo="example-user/custom-model",
profile_facts=["Svarīga ir profesionāla latviešu valoda."],
)
assert request.hub_model_id == "example-user/custom-model"
assert request.model_repo == "example-user/custom-model"