maris-ai-master / core-python /tests /test_training_pipeline.py
MarisUK's picture
Maris AI model sync
f440f03 verified
"""Tests training pipeline konfigurācijai un datu sagatavošanai."""
from __future__ import annotations
import asyncio
import importlib.util
import json
import os
import re
import subprocess
import sys
import types
from importlib.metadata import PackageNotFoundError
from pathlib import Path
from typing import Any
import pytest
from maris_core.data.preprocessing import record_to_training_text
from maris_core.training.config import (
AVAILABLE_TRAINING_BASE_MODELS,
DEFAULT_TRAINING_BASE_MODEL,
list_training_base_models,
load_training_config,
)
from maris_core.training.hf_compat import (
MARIS_COMPATIBILITY_ARTIFACT_NAME,
apply_maris_compatibility_identity,
write_maris_compatibility_artifact,
)
from maris_core.training.preferences import load_preference_dataset
from maris_core.training.train import (
_build_benchmark_gate_artifact,
_build_distributed_training_argument_overrides,
_ensure_runtime_home_dir,
_filter_preference_examples_for_branch,
_filter_records_for_branch,
_run_post_training_benchmark,
build_branch_training_configs,
evaluate_with_config,
train,
train_branch_suite,
)
FOREIGN_AI_NAME_RE = re.compile(
r"(?i)\b(?:anthropic|chatgpt|claude|deepseek|gemini|llama|mistral|openai|qwen|TinyLlama)\b"
)
FOREIGN_MODEL_REPO_RE = re.compile(
r"(?i)\b(?:deepseek-ai|meta-llama|mistralai|openai|qwen|TinyLlama)/[A-Za-z0-9][\w.-]*\b"
)
def _assert_output_dir_uses_only_maris_identity(output_dir: Path) -> None:
checked_files = sorted(output_dir.rglob("*"))
for path in checked_files:
if not path.is_file():
continue
if path.name == MARIS_COMPATIBILITY_ARTIFACT_NAME:
continue
if path.suffix.lower() not in {".json", ".jinja", ".md", ".txt"}:
continue
content = path.read_text(encoding="utf-8")
assert FOREIGN_MODEL_REPO_RE.search(content) is None, path
assert FOREIGN_AI_NAME_RE.search(content) is None, path
def test_record_to_training_text_formats_conversation_and_generation() -> None:
conversation = record_to_training_text({"user": "Sveiki", "assistant": "Čau!"})
generation = record_to_training_text({"prompt": "Uzzīmē kaķi", "metadata": {"style": "anime"}})
assert "<|user|>" in conversation
assert "Sveiki" in conversation
assert "Čau!" in conversation
assert "Uzzīmē kaķi" in generation
assert '"style": "anime"' in generation
def test_record_to_training_text_formats_structured_coder_record() -> None:
formatted = record_to_training_text(
{
"prompt": "Salabo retry helperi.",
"target_file": "core-python/maris_core/retries.py",
"buggy_code": "def retry(count):\n return count / 0",
"tests": ["assert retry(1) == 1", "assert retry(3) == 3"],
"edge_cases": ["0 mēģinājumi", "negatīvs skaits"],
"metadata": {"language": "python", "task": "bugfix"},
"completion": "```python\ndef retry(count: int) -> int:\n return max(count, 0)\n```",
}
)
assert "Mērķa fails" in formatted
assert "Esošais vai kļūdainais kods" in formatted
assert "Robežgadījumi" in formatted
assert "```python" in formatted
def test_load_training_config_reads_json_and_env_overrides(
tmp_path: Path,
monkeypatch,
) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps(
{
"model_name": "repo/from-json",
"branch_name": "coder",
"num_epochs": 7,
"report_to": ["tensorboard"],
}
),
encoding="utf-8",
)
monkeypatch.setenv("HF_TRAIN_BATCH_SIZE", "3")
monkeypatch.setenv("HF_TRAIN_ADAPTER_TYPE", "lora")
config = load_training_config(str(config_path), overrides={"learning_rate": 1e-4})
assert config.model_name == "repo/from-json"
assert config.branch_name == "coder"
assert config.num_epochs == 7
assert config.per_device_train_batch_size == 3
assert config.learning_rate == 1e-4
assert config.adapter_type == "lora"
assert config.report_to == ["tensorboard"]
assert config.text_model_id == "MarisUK/maris-ai-text"
assert config.image_model_id == "MarisUK/maris-ai-image"
def test_load_training_config_reads_distributed_runtime_overrides(monkeypatch) -> None:
monkeypatch.setenv("HF_TRAIN_DISTRIBUTED_STRATEGY", "deepspeed")
monkeypatch.setenv("HF_TRAIN_DISTRIBUTED_CONFIG_PATH", "huggingface/deepspeed-zero3.json")
monkeypatch.setenv("HF_TRAIN_NUM_PROCESSES", "8")
monkeypatch.setenv("HF_TRAIN_NUM_MACHINES", "2")
monkeypatch.setenv("HF_TRAIN_MACHINE_RANK", "1")
monkeypatch.setenv("HF_TRAIN_MAIN_PROCESS_IP", "10.0.0.10")
monkeypatch.setenv("HF_TRAIN_MAIN_PROCESS_PORT", "29510")
config = load_training_config()
assert config.distributed_strategy == "deepspeed"
assert config.distributed_config_path == "huggingface/deepspeed-zero3.json"
assert config.use_accelerate is True
assert config.num_processes == 8
assert config.num_machines == 2
assert config.machine_rank == 1
assert config.main_process_ip == "10.0.0.10"
assert config.main_process_port == 29510
def test_load_training_config_reads_gradient_checkpointing_use_reentrant_override(
monkeypatch,
) -> None:
monkeypatch.setenv("HF_TRAIN_GRADIENT_CHECKPOINTING_USE_REENTRANT", "false")
config = load_training_config()
assert config.gradient_checkpointing_use_reentrant is False
def test_load_training_config_reads_runtime_model_repo_overrides(monkeypatch) -> None:
monkeypatch.setenv("TEXT_MODEL", "MarisUK/custom-text")
monkeypatch.setenv("IMAGE_MODEL", "MarisUK/custom-image")
monkeypatch.setenv("MUSIC_MODEL", "MarisUK/custom-music")
monkeypatch.setenv("TTS_MODEL", "MarisUK/custom-tts")
monkeypatch.setenv("STT_MODEL", "MarisUK/custom-stt")
monkeypatch.setenv("VIDEO_MODEL", "MarisUK/custom-video")
config = load_training_config()
assert config.text_model_id == "MarisUK/custom-text"
assert config.image_model_id == "MarisUK/custom-image"
assert config.music_model_id == "MarisUK/custom-music"
assert config.tts_model_id == "MarisUK/custom-tts"
assert config.stt_model_id == "MarisUK/custom-stt"
assert config.video_model_id == "MarisUK/custom-video"
def test_load_training_config_rejects_conflicting_precision_modes(
tmp_path: Path,
) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps({"fp16": True, "bf16": True}),
encoding="utf-8",
)
try:
load_training_config(str(config_path))
except ValueError as exc:
assert "fp16 un bf16" in str(exc)
else:
raise AssertionError("load_training_config() should reject conflicting precision modes")
def test_load_training_config_resolves_model_preset(
tmp_path: Path,
) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps({"model_preset": "coding"}),
encoding="utf-8",
)
config = load_training_config(str(config_path))
assert config.model_preset == "coding"
assert config.model_name == AVAILABLE_TRAINING_BASE_MODELS["coding"]["model_name"]
def test_load_training_config_resolves_extra_model_preset(
tmp_path: Path,
monkeypatch,
) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps({"model_preset": "qwen-32b"}),
encoding="utf-8",
)
monkeypatch.setenv(
"MARIS_TRAIN_EXTRA_MODELS",
json.dumps({"qwen-32b": "Qwen/Qwen2.5-32B-Instruct"}),
)
config = load_training_config(str(config_path))
assert config.model_preset == "qwen-32b"
assert config.model_name == "Qwen/Qwen2.5-32B-Instruct"
def test_load_training_config_rejects_unknown_model_preset(
tmp_path: Path,
) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps({"model_preset": "unknown"}),
encoding="utf-8",
)
try:
load_training_config(str(config_path))
except ValueError as exc:
assert "model_preset" in str(exc)
assert "balanced" in str(exc)
else:
raise AssertionError("load_training_config() should reject unknown model presets")
def test_load_training_config_rejects_non_maris_hub_model_id(
tmp_path: Path,
) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps({"hub_model_id": "someone-else/not-maris"}),
encoding="utf-8",
)
try:
load_training_config(str(config_path))
except RuntimeError as exc:
assert "Maris AI modeli" in str(exc)
else:
raise AssertionError("load_training_config() should reject non-Maris output model ids")
def test_load_training_config_rejects_non_maris_dataset_repo(
tmp_path: Path,
) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps({"dataset_repo": "someone-else/not-maris-memory"}),
encoding="utf-8",
)
try:
load_training_config(str(config_path))
except RuntimeError as exc:
assert "dataset repozitorijs" in str(exc)
else:
raise AssertionError("load_training_config() should reject non-Maris dataset repo ids")
def test_load_training_config_reads_optional_eval_dataset_repo(
tmp_path: Path,
) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps({"eval_dataset_repo": "MarisUK/maris-ai-evals"}),
encoding="utf-8",
)
config = load_training_config(str(config_path))
assert config.eval_dataset_repo == "MarisUK/maris-ai-evals"
def test_load_training_config_reads_explicit_training_and_eval_dataset_repo_lists(
tmp_path: Path,
) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps(
{
"dataset_repo": "MarisUK/maris-ai-memory",
"dataset_repos": [
"MarisUK/maris-ai-memory",
"MarisUK/maris-ai-lv-memory",
"MarisUK/maris-ai-evals",
"MarisUK/maris-ai-benchmark",
],
"eval_dataset_repo": "MarisUK/maris-ai-evals",
"eval_dataset_repos": [
"MarisUK/maris-ai-evals",
"MarisUK/maris-ai-benchmark",
],
}
),
encoding="utf-8",
)
config = load_training_config(str(config_path))
assert config.dataset_repos == [
"MarisUK/maris-ai-memory",
"MarisUK/maris-ai-lv-memory",
"MarisUK/maris-ai-evals",
"MarisUK/maris-ai-benchmark",
]
assert config.eval_dataset_repos == [
"MarisUK/maris-ai-evals",
"MarisUK/maris-ai-benchmark",
]
def test_load_training_config_reads_benchmark_and_preference_paths(
tmp_path: Path,
) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps(
{
"benchmark_dataset_path": "/tmp/benchmarks/release.json",
"benchmark_name": "release-gate",
"benchmark_levels": ["ci", "release"],
"benchmark_min_overall": 0.75,
"benchmark_gate_enabled": True,
"benchmark_feedback_auto_discover": False,
"benchmark_feedback_path": "/tmp/benchmarks/previous.json",
"benchmark_feedback_boost_scale": 2.5,
"benchmark_feedback_max_multiplier": 1.8,
"preference_dataset_path": "/tmp/preferences.json",
"branch_benchmark_targets": {"master": {"overall": 0.8, "reasoning": 0.78}},
"branch_benchmark_names": {
"master": "memory-quality",
"coder": "coder-release-quality",
},
"branch_benchmark_dataset_paths": {
"coder": "/tmp/benchmarks/coder-release.json",
"planner": "/tmp/benchmarks/planner-release.json",
},
"branch_preference_dataset_paths": {
"coder": "/tmp/preferences/coder-preferences.json"
},
"branch_dataset_filter_rules": {
"planner": {"include_record_types": ["autonomous"], "allow_unlabeled": False}
},
"source_weight_map": {"production": 1.5, "synthetic": 1.0, "noisy": 0.6},
}
),
encoding="utf-8",
)
config = load_training_config(str(config_path))
assert config.benchmark_dataset_path == "/tmp/benchmarks/release.json"
assert config.benchmark_name == "release-gate"
assert config.benchmark_levels == ["ci", "release"]
assert config.benchmark_min_overall == 0.75
assert config.benchmark_gate_enabled is True
assert config.benchmark_feedback_auto_discover is False
assert config.benchmark_feedback_path == "/tmp/benchmarks/previous.json"
assert config.benchmark_feedback_boost_scale == 2.5
assert config.benchmark_feedback_max_multiplier == 1.8
assert config.preference_dataset_path == "/tmp/preferences.json"
assert config.branch_benchmark_targets["master"]["reasoning"] == 0.78
assert config.branch_benchmark_names["master"] == "memory-quality"
assert config.branch_benchmark_names["coder"] == "coder-release-quality"
assert config.branch_benchmark_dataset_paths["coder"] == "/tmp/benchmarks/coder-release.json"
assert (
config.branch_benchmark_dataset_paths["planner"] == "/tmp/benchmarks/planner-release.json"
)
assert (
config.branch_preference_dataset_paths["coder"] == "/tmp/preferences/coder-preferences.json"
)
assert config.branch_dataset_filter_rules["planner"]["include_record_types"] == ["autonomous"]
assert config.source_weight_map["production"] == 1.5
def test_load_training_config_default_coder_targets_include_execution_gate() -> None:
config = load_training_config()
assert config.branch_benchmark_targets["coder"]["execution"] == 0.7
assert config.branch_benchmark_targets["master"]["memory_retrieval_pass_rate"] == 0.8
assert config.branch_benchmark_names["master"] == "memory-quality"
assert config.branch_benchmark_dataset_paths["master"].endswith(
"core-python/evals/master_memory_benchmark.json"
)
assert config.branch_benchmark_dataset_paths["coder"].endswith(
"core-python/evals/coder_release_benchmark.json"
)
assert config.branch_preference_dataset_paths["coder"].endswith(
"core-python/evals/coder_preference_dataset.json"
)
def test_apply_branch_runtime_defaults_prefers_master_memory_suite() -> None:
import maris_core.training.train as train_module
config = load_training_config(
overrides={
"branch_name": "master",
"benchmark_dataset_path": "",
"benchmark_name": "chat-quality",
"benchmark_gate_enabled": True,
}
)
resolved = train_module._apply_branch_runtime_defaults(config)
assert resolved.benchmark_name == "memory-quality"
assert resolved.benchmark_dataset_path.endswith(
"core-python/evals/master_memory_benchmark.json"
)
def test_build_benchmark_gate_artifact_uses_world_class_defaults_and_blocks_regressions() -> None:
config = load_training_config(
overrides={
"branch_name": "coder",
"benchmark_gate_enabled": True,
}
)
gate = _build_benchmark_gate_artifact(
config,
{
"benchmark_name": "release-gate",
"score_manifest": {
"overall": 0.8,
"coding": 0.81,
"reasoning": 0.76,
"execution": 0.74,
"grounding": 0.78,
"safety": 0.93,
"judge_overall": 0.78,
"judge_task_completion": 0.77,
"judge_instruction_following": 0.79,
"judge_safety": 0.95,
"judge_regression_risk": 0.8,
},
"success_rate": 0.88,
"production_like_cases": 3,
"production_like_pass_rate": 0.8,
"execution_cases": 4,
"grounding_cases": 3,
},
regression_report={"regression_count": 2},
)
assert gate["targets"]["success_rate"] == 0.85
assert gate["targets"]["production_like_pass_rate"] == 0.75
assert gate["targets"]["judge_overall"] == 0.72
assert gate["passed"] is False
assert gate["failed_metrics"]["regression_count"]["required"] == 0.0
assert gate["failed_metrics"]["regression_count"]["actual"] == 2.0
def test_build_benchmark_gate_artifact_uses_stricter_execution_threshold() -> None:
config = load_training_config(
overrides={
"branch_name": "coder",
"benchmark_gate_enabled": True,
}
)
gate = _build_benchmark_gate_artifact(
config,
{
"benchmark_name": "release-gate",
"score_manifest": {
"overall": 0.8,
"coding": 0.82,
"reasoning": 0.76,
"execution": 0.6,
"grounding": 0.78,
"safety": 0.94,
},
"execution_cases": 4,
},
)
assert gate["passed"] is False
assert gate["targets"]["execution"] == 0.7
assert gate["failed_metrics"]["execution"]["actual"] == 0.6
def test_load_training_config_reads_category_weight_map(tmp_path: Path) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps({"category_weight_map": {"coding": 1.3, "grounding": 1.2}}),
encoding="utf-8",
)
config = load_training_config(str(config_path))
assert config.category_weight_map["coding"] == 1.3
assert config.category_weight_map["grounding"] == 1.2
def test_load_training_config_reads_continue_training_settings(monkeypatch) -> None:
monkeypatch.setenv("HF_TRAIN_CONTINUE_FROM_LATEST", "true")
monkeypatch.setenv("HF_TRAIN_CONTINUE_MODEL_PATH", "/tmp/maris-last-good")
config = load_training_config()
assert config.continue_from_latest_artifact is True
assert config.continue_model_path == "/tmp/maris-last-good"
def test_list_training_base_models_returns_copy() -> None:
models = list_training_base_models()
models["balanced"]["model_name"] = "modified"
assert AVAILABLE_TRAINING_BASE_MODELS["balanced"]["model_name"] == DEFAULT_TRAINING_BASE_MODEL
def test_list_training_base_models_ignores_invalid_extra_models_json(monkeypatch) -> None:
monkeypatch.setenv("MARIS_TRAIN_EXTRA_MODELS", "{not valid json")
models = list_training_base_models()
assert {"balanced", "reasoning", "coding", "lightweight"}.issubset(models)
def test_list_training_base_models_accepts_owner_name_fallback_syntax(monkeypatch) -> None:
monkeypatch.setenv(
"MARIS_TRAIN_EXTRA_MODELS",
"Qwen/Qwen3-Coder-480B-A35B-Instruct, coder-7b=Qwen/Qwen2.5-7B-Instruct",
)
models = list_training_base_models()
assert models["qwen-qwen3-coder-480b-a35b-instruct"]["model_name"] == (
"Qwen/Qwen3-Coder-480B-A35B-Instruct"
)
assert models["coder-7b"]["model_name"] == "Qwen/Qwen2.5-7B-Instruct"
def test_list_training_base_models_accepts_string_shorthand(monkeypatch) -> None:
monkeypatch.setenv("MARIS_TRAIN_EXTRA_MODELS", '{"qwen-880b":"Qwen/Qwen3-880B-Instruct"}')
models = list_training_base_models()
assert models["qwen-880b"]["model_name"] == "Qwen/Qwen3-880B-Instruct"
assert models["qwen-880b"]["label"] == "Qwen 880B"
def test_load_training_config_prefers_explicit_model_name_over_preset(
tmp_path: Path,
monkeypatch,
) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps({"model_preset": "coding"}),
encoding="utf-8",
)
monkeypatch.setenv("HF_TRAIN_BASE_MODEL", "custom/model")
monkeypatch.setenv("HF_TRAIN_MODEL_PRESET", "reasoning")
config = load_training_config(str(config_path))
assert config.model_name == "custom/model"
assert config.model_preset == ""
def test_huggingface_train_script_resolves_relative_config_from_repo_root(
tmp_path: Path,
monkeypatch,
) -> None:
repo_root = next(
parent
for parent in Path(__file__).resolve().parents
if (parent / "huggingface" / "train.sh").is_file()
)
fake_python = tmp_path / "python3"
invocation_log = tmp_path / "train-invocation.json"
fake_python.write_text(
"\n".join(
[
f"#!{sys.executable}",
"import json",
"import os",
"import sys",
"from pathlib import Path",
"",
"Path(os.environ['TRAIN_SH_LOG']).write_text(",
" json.dumps({'cwd': os.getcwd(), 'argv': sys.argv[1:]}, ensure_ascii=False),",
" encoding='utf-8',",
")",
]
),
encoding="utf-8",
)
fake_python.chmod(0o755)
existing_path = os.environ.get("PATH", "")
monkeypatch.setenv(
"PATH",
f"{tmp_path}{os.pathsep}{existing_path}" if existing_path else str(tmp_path),
)
monkeypatch.setenv("HF_TRAINING_CONFIG_PATH", "huggingface/training-config.json")
monkeypatch.setenv("TRAIN_SH_LOG", str(invocation_log))
subprocess.run(
["bash", str(repo_root / "huggingface" / "train.sh")],
check=True,
cwd=repo_root,
)
logged = json.loads(invocation_log.read_text(encoding="utf-8"))
assert logged["cwd"] == str(repo_root / "core-python")
assert logged["argv"][0] == str(repo_root / "core-python" / "scripts" / "train_model.py")
assert logged["argv"][1:3] == [
"--config",
str(repo_root / "huggingface" / "training-config.json"),
]
def test_huggingface_train_hf_script_uses_persistent_paths_and_uploads_model(
tmp_path: Path,
monkeypatch,
) -> None:
repo_root = next(
parent
for parent in Path(__file__).resolve().parents
if (parent / "huggingface" / "train-hf.sh").is_file()
)
persistent_dir = tmp_path / "persistent"
fake_python = tmp_path / "python3"
invocation_log = tmp_path / "train-hf-invocations.jsonl"
fake_python.write_text(
"\n".join(
[
f"#!{sys.executable}",
"import json",
"import os",
"import sys",
"from pathlib import Path",
"",
"log_path = Path(os.environ['TRAIN_HF_LOG'])",
"with log_path.open('a', encoding='utf-8') as handle:",
" handle.write(",
" json.dumps({'cwd': os.getcwd(), 'argv': sys.argv[1:]}, ensure_ascii=False) + '\\n'",
" )",
]
),
encoding="utf-8",
)
fake_python.chmod(0o755)
existing_path = os.environ.get("PATH", "")
monkeypatch.setenv(
"PATH",
f"{tmp_path}{os.pathsep}{existing_path}" if existing_path else str(tmp_path),
)
monkeypatch.setenv("HF_PERSISTENT_DIR", str(persistent_dir))
monkeypatch.setenv("TRAIN_HF_LOG", str(invocation_log))
monkeypatch.delenv("HF_TRAIN_OUTPUT_DIR", raising=False)
monkeypatch.delenv("HF_LOCAL_MODEL_DIR", raising=False)
monkeypatch.delenv("HF_TRAIN_PUSH_TO_HUB", raising=False)
subprocess.run(
["bash", str(repo_root / "huggingface" / "train-hf.sh"), "--model-preset", "coding"],
check=True,
cwd=repo_root,
)
logged = [
json.loads(line)
for line in invocation_log.read_text(encoding="utf-8").splitlines()
if line.strip()
]
assert len(logged) == 2
assert logged[0]["cwd"] == str(repo_root / "core-python")
assert logged[0]["argv"][0] == str(repo_root / "core-python" / "scripts" / "train_model.py")
assert logged[0]["argv"][1:5] == [
"--config",
str(repo_root / "huggingface" / "training-config.hf-jobs.json"),
"--model-preset",
"coding",
]
assert logged[1]["argv"][0] == str(repo_root / "core-python" / "scripts" / "export_to_hf.py")
assert logged[1]["argv"][1:3] == [
"--model-path",
str(persistent_dir / "maris-ai-master"),
]
def test_huggingface_train_hf_script_enables_accelerate_on_gpu_space(
tmp_path: Path,
monkeypatch,
) -> None:
repo_root = next(
parent
for parent in Path(__file__).resolve().parents
if (parent / "huggingface" / "train-hf.sh").is_file()
)
persistent_dir = tmp_path / "persistent-gpu"
fake_python = tmp_path / "python3"
fake_nvidia_smi = tmp_path / "nvidia-smi"
invocation_log = tmp_path / "train-hf-gpu-invocations.jsonl"
fake_python.write_text(
"\n".join(
[
f"#!{sys.executable}",
"import json",
"import os",
"import sys",
"from pathlib import Path",
"",
"log_path = Path(os.environ['TRAIN_HF_GPU_LOG'])",
"with log_path.open('a', encoding='utf-8') as handle:",
" handle.write(",
" json.dumps({'cwd': os.getcwd(), 'argv': sys.argv[1:]}, ensure_ascii=False) + '\\n'",
" )",
]
),
encoding="utf-8",
)
fake_python.chmod(0o755)
fake_nvidia_smi.write_text("#!/usr/bin/env bash\necho 'GPU 0: Fake GPU'\n", encoding="utf-8")
fake_nvidia_smi.chmod(0o755)
existing_path = os.environ.get("PATH", "")
monkeypatch.setenv(
"PATH",
f"{tmp_path}{os.pathsep}{existing_path}" if existing_path else str(tmp_path),
)
monkeypatch.setenv("HF_PERSISTENT_DIR", str(persistent_dir))
monkeypatch.setenv("TRAIN_HF_GPU_LOG", str(invocation_log))
monkeypatch.delenv("HF_TRAIN_USE_ACCELERATE", raising=False)
monkeypatch.delenv("HF_TRAIN_NUM_PROCESSES", raising=False)
subprocess.run(
["bash", str(repo_root / "huggingface" / "train-hf.sh"), "--model-preset", "coding"],
check=True,
cwd=repo_root,
)
logged = [
json.loads(line)
for line in invocation_log.read_text(encoding="utf-8").splitlines()
if line.strip()
]
assert logged[0]["argv"][0:2] == ["-m", "accelerate.commands.launch"]
assert "--config_file" in logged[0]["argv"]
assert str(repo_root / "huggingface" / "accelerate-gpu-config.yaml") in logged[0]["argv"]
assert "--num_processes" in logged[0]["argv"]
assert logged[0]["argv"][logged[0]["argv"].index("--num_processes") + 1] == "1"
assert str(repo_root / "core-python" / "scripts" / "train_model.py") in logged[0]["argv"]
assert logged[1]["argv"][0] == str(repo_root / "core-python" / "scripts" / "export_to_hf.py")
def test_huggingface_train_job_script_uses_accelerate_for_distributed_launch(
tmp_path: Path,
monkeypatch,
) -> None:
repo_root = next(
parent
for parent in Path(__file__).resolve().parents
if (parent / "huggingface" / "train-job.sh").is_file()
)
fake_python = tmp_path / "python3"
fake_nvidia_smi = tmp_path / "nvidia-smi"
invocation_log = tmp_path / "train-job-invocations.jsonl"
fake_python.write_text(
"\n".join(
[
f"#!{sys.executable}",
"import json",
"import os",
"import sys",
"from pathlib import Path",
"",
"log_path = Path(os.environ['TRAIN_JOB_LOG'])",
"with log_path.open('a', encoding='utf-8') as handle:",
" handle.write(",
" json.dumps({'cwd': os.getcwd(), 'argv': sys.argv[1:]}, ensure_ascii=False) + '\\n'",
" )",
]
),
encoding="utf-8",
)
fake_python.chmod(0o755)
fake_nvidia_smi.write_text("#!/usr/bin/env bash\necho 'GPU 0: Fake GPU'\n", encoding="utf-8")
fake_nvidia_smi.chmod(0o755)
existing_path = os.environ.get("PATH", "")
monkeypatch.setenv(
"PATH",
f"{tmp_path}{os.pathsep}{existing_path}" if existing_path else str(tmp_path),
)
monkeypatch.setenv("HF_JOB_WORK_DIR", str(tmp_path / "job-work"))
monkeypatch.setenv("TRAIN_JOB_LOG", str(invocation_log))
monkeypatch.setenv("HF_TRAIN_DISTRIBUTED_STRATEGY", "deepspeed")
monkeypatch.delenv("HF_TRAIN_USE_ACCELERATE", raising=False)
monkeypatch.delenv("HF_TRAIN_NUM_PROCESSES", raising=False)
subprocess.run(
["bash", str(repo_root / "huggingface" / "train-job.sh"), "--model-preset", "coding"],
check=True,
cwd=repo_root,
)
logged = [
json.loads(line)
for line in invocation_log.read_text(encoding="utf-8").splitlines()
if line.strip()
]
assert logged[0]["argv"][0:2] == ["-m", "accelerate.commands.launch"]
assert str(repo_root / "huggingface" / "accelerate-gpu-config.yaml") in logged[0]["argv"]
assert str(repo_root / "huggingface" / "training-config.hf-jobs.json") in logged[0]["argv"]
assert str(repo_root / "core-python" / "scripts" / "train_model.py") in logged[0]["argv"]
assert logged[1]["argv"][0] == str(repo_root / "core-python" / "scripts" / "export_to_hf.py")
def test_configure_tokenizer_expands_large_model_context_window() -> None:
import maris_core.training.train as train_module
tokenizer = types.SimpleNamespace(
pad_token=None,
pad_token_id=None,
eos_token="<eos>",
eos_token_id=7,
model_max_length=4096,
)
config = load_training_config(
overrides={
"model_name": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
"max_seq_length": 65536,
}
)
train_module._configure_tokenizer(tokenizer, config)
assert tokenizer.pad_token == "<eos>"
assert tokenizer.pad_token_id == 7
assert tokenizer.model_max_length == 65536
def test_load_tokenizer_forces_remote_snapshot_restore(monkeypatch) -> None:
import maris_core.training.train as train_module
compat_flags: list[bool | None] = []
class FakeTokenizer:
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
class CompatPath:
def __init__(self, model_name: str, *, allow_remote_snapshot: bool | None = None):
del model_name
compat_flags.append(allow_remote_snapshot)
def __enter__(self) -> str:
return "/tmp/fake-model"
def __exit__(self, exc_type, exc, tb) -> None:
del exc_type, exc, tb
return None
monkeypatch.setitem(
sys.modules, "transformers", types.SimpleNamespace(AutoTokenizer=FakeTokenizer)
)
monkeypatch.setattr(train_module, "maris_hf_compatible_path", CompatPath)
config = load_training_config(overrides={"model_name": "MarisUK/maris-ai-master"})
tokenizer = train_module._load_tokenizer("MarisUK/maris-ai-master", config)
assert isinstance(tokenizer, FakeTokenizer)
assert compat_flags == [True]
def test_load_tokenizer_falls_back_to_explicit_slow_class(monkeypatch, tmp_path) -> None:
import maris_core.training.train as train_module
compat_flags: list[bool | None] = []
tokenizer_attempts: list[tuple[str, Any]] = []
model_dir = tmp_path / "trained-model"
model_dir.mkdir(parents=True, exist_ok=True)
(model_dir / "tokenizer_config.json").write_text(
json.dumps({"tokenizer_class": "Qwen2TokenizerFast"}),
encoding="utf-8",
)
(model_dir / "config.json").write_text(
json.dumps(
{
"tokenizer_class": "Qwen2TokenizerFast",
"auto_map": {"AutoTokenizer": ["Qwen2Tokenizer", "Qwen2TokenizerFast"]},
}
),
encoding="utf-8",
)
class FakeAutoTokenizer:
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name
tokenizer_attempts.append(("auto", kwargs.get("use_fast")))
if kwargs.get("use_fast", True):
raise ValueError(
"Couldn't instantiate the backend tokenizer from one of the available paths."
)
raise ValueError("tokenizer config still points to a fast tokenizer class")
class FakeSlowTokenizer:
@classmethod
def from_pretrained(cls, model_name, **kwargs):
tokenizer_attempts.append(("slow", kwargs.get("use_fast")))
assert model_name == str(model_dir)
assert "use_fast" not in kwargs
return cls()
class CompatPath:
def __init__(self, model_name: str, *, allow_remote_snapshot: bool | None = None):
del model_name
compat_flags.append(allow_remote_snapshot)
def __enter__(self) -> str:
return str(model_dir)
def __exit__(self, exc_type, exc, tb) -> None:
del exc_type, exc, tb
return None
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoTokenizer=FakeAutoTokenizer,
Qwen2Tokenizer=FakeSlowTokenizer,
),
)
monkeypatch.setattr(train_module, "maris_hf_compatible_path", CompatPath)
config = load_training_config(overrides={"model_name": "MarisUK/maris-ai-master"})
tokenizer = train_module._load_tokenizer("MarisUK/maris-ai-master", config)
assert isinstance(tokenizer, FakeSlowTokenizer)
assert compat_flags == [True]
assert tokenizer_attempts == [("auto", True), ("auto", False), ("slow", None)]
def test_load_tokenizer_retries_after_installing_missing_backends(monkeypatch, tmp_path) -> None:
import maris_core.training.train as train_module
compat_flags: list[bool | None] = []
tokenizer_attempts: list[tuple[str, Any]] = []
model_dir = tmp_path / "trained-model"
model_dir.mkdir(parents=True, exist_ok=True)
class FakeAutoTokenizer:
retry_ready = False
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name
tokenizer_attempts.append(("auto", kwargs.get("use_fast")))
if cls.retry_ready:
return cls()
raise ValueError(
"You need to have sentencepiece or tiktoken installed to convert a slow tokenizer to a fast one."
)
class CompatPath:
def __init__(self, model_name: str, *, allow_remote_snapshot: bool | None = None):
del model_name
compat_flags.append(allow_remote_snapshot)
def __enter__(self) -> str:
return str(model_dir)
def __exit__(self, exc_type, exc, tb) -> None:
del exc_type, exc, tb
install_attempts: list[bool] = []
def fake_install_missing_tokenizer_backends() -> bool:
install_attempts.append(True)
FakeAutoTokenizer.retry_ready = True
return True
monkeypatch.setitem(
sys.modules, "transformers", types.SimpleNamespace(AutoTokenizer=FakeAutoTokenizer)
)
monkeypatch.setattr(train_module, "maris_hf_compatible_path", CompatPath)
monkeypatch.setattr(
train_module,
"_install_missing_tokenizer_backends",
fake_install_missing_tokenizer_backends,
)
config = load_training_config(overrides={"model_name": "MarisUK/maris-ai-master"})
tokenizer = train_module._load_tokenizer("MarisUK/maris-ai-master", config)
assert isinstance(tokenizer, FakeAutoTokenizer)
assert compat_flags == [True]
assert install_attempts == [True]
assert tokenizer_attempts == [("auto", True), ("auto", False), ("auto", True)]
def test_install_missing_tokenizer_backends_only_installs_missing_packages(monkeypatch) -> None:
import maris_core.training.train as train_module
installed_commands: list[list[str]] = []
available_modules = {"tiktoken"}
def fake_find_spec(name: str):
return object() if name in available_modules else None
monkeypatch.setattr(train_module.importlib.util, "find_spec", fake_find_spec)
monkeypatch.setattr(train_module.importlib, "invalidate_caches", lambda: None)
monkeypatch.setattr(
train_module.subprocess,
"run",
lambda command, **kwargs: (
installed_commands.append(command) or types.SimpleNamespace(stdout="")
),
)
installed = train_module._install_missing_tokenizer_backends()
assert installed is True
assert installed_commands == [
[sys.executable, "-m", "pip", "install", "--no-cache-dir", "sentencepiece"]
]
def test_install_missing_tokenizer_backends_is_noop_when_backends_exist(monkeypatch) -> None:
import maris_core.training.train as train_module
monkeypatch.setattr(train_module.importlib.util, "find_spec", lambda name: object())
monkeypatch.setattr(
train_module.subprocess,
"run",
lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("pip should not run")),
)
installed = train_module._install_missing_tokenizer_backends()
assert installed is False
def test_prepare_training_model_passes_use_reentrant_override(monkeypatch) -> None:
import maris_core.training.train as train_module
class FakeModel:
def __init__(self):
self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
self.gradient_checkpointing_kwargs = None
def gradient_checkpointing_enable(self, *, gradient_checkpointing_kwargs=None):
self.gradient_checkpointing_kwargs = gradient_checkpointing_kwargs
model = FakeModel()
tokenizer = types.SimpleNamespace(pad_token_id=7)
config = load_training_config(
overrides={
"gradient_checkpointing": True,
"gradient_checkpointing_use_reentrant": False,
}
)
monkeypatch.setattr(train_module, "_load_model", lambda model_name, config: model)
monkeypatch.setattr(train_module, "_apply_peft_adapter", lambda model, config: model)
prepared_model = train_module._prepare_training_model(
"MarisUK/maris-ai-master", tokenizer, config
)
assert prepared_model is model
assert model.config.pad_token_id == 7
assert model.config.use_cache is False
assert model.gradient_checkpointing_kwargs == {"use_reentrant": False}
def test_prepare_training_model_falls_back_when_runtime_rejects_use_reentrant(
monkeypatch, caplog
) -> None:
import maris_core.training.train as train_module
class FakeModel:
def __init__(self):
self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
self.gradient_checkpointing_enabled = False
def gradient_checkpointing_enable(self):
self.gradient_checkpointing_enabled = True
model = FakeModel()
tokenizer = types.SimpleNamespace(pad_token_id=7)
config = load_training_config(
overrides={
"gradient_checkpointing": True,
"gradient_checkpointing_use_reentrant": False,
}
)
monkeypatch.setattr(train_module, "_load_model", lambda model_name, config: model)
monkeypatch.setattr(train_module, "_apply_peft_adapter", lambda model, config: model)
with caplog.at_level("WARNING"):
prepared_model = train_module._prepare_training_model(
"MarisUK/maris-ai-master", tokenizer, config
)
assert prepared_model is model
assert model.gradient_checkpointing_enabled is True
assert "Ignoring explicit gradient_checkpointing_use_reentrant=False" in caplog.text
def test_train_auto_enables_deepspeed_for_giant_long_context_model(
tmp_path: Path, monkeypatch
) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch if batched else self.items[0])
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Čau!"},
{"prompt": "Uzraksti plānu", "completion": "Gatavs."},
]
)
},
)
monkeypatch.setattr(
"maris_core.training.train._load_json_object",
lambda path_value, *, label: {"config_path": path_value, "label": label},
)
monkeypatch.setattr(
"maris_core.training.train._require_runtime_package", lambda *args, **kwargs: None
)
class FakeTokenizer:
pad_token = None
pad_token_id = None
eos_token = "<eos>"
eos_token_id = 0
model_max_length = 4096
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
def __call__(self, texts, truncation, max_length, padding):
del truncation, padding
return {
"input_ids": [[1] * min(max_length, 4) for _ in texts],
"attention_mask": [[1] * min(max_length, 4) for _ in texts],
}
def save_pretrained(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
class FakeModel:
def __init__(self):
self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
def gradient_checkpointing_enable(self):
self.gradient_checkpointing_enabled = True
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainResult:
metrics = {"train_loss": 0.2}
class FakeTrainer:
last_instance = None
def __init__(self, *, model, args, train_dataset, eval_dataset=None, data_collator=None):
del model, train_dataset, eval_dataset, data_collator
self.args = args
FakeTrainer.last_instance = self
def train(self):
return FakeTrainResult()
def evaluate(self):
return {"eval_loss": 0.1}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
BitsAndBytesConfig=lambda **kwargs: kwargs,
),
)
monkeypatch.setitem(
sys.modules,
"peft",
types.SimpleNamespace(
LoraConfig=lambda **kwargs: kwargs,
TaskType=types.SimpleNamespace(CAUSAL_LM="CAUSAL_LM"),
get_peft_model=lambda model, peft_config: model,
prepare_model_for_kbit_training=lambda model, use_gradient_checkpointing: model,
),
)
train(
output_dir=str(tmp_path / "giant-long-context"),
model_name="Qwen/Qwen3-Coder-480B-A35B-Instruct",
max_seq_length=65536,
distributed_strategy="none",
use_accelerate=False,
)
assert FakeTrainer.last_instance is not None
assert FakeTrainer.last_instance.args.kwargs["deepspeed"].endswith(
"huggingface/deepspeed-zero3.json"
)
assert FakeTrainer.last_instance.args.kwargs["ddp_find_unused_parameters"] is False
def test_train_uses_eval_split_and_writes_metrics(
tmp_path: Path,
monkeypatch,
) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
split_index = max(1, len(self.items) - 1)
return {
"train": FakeDataset(self.items[:split_index]),
"test": FakeDataset(self.items[split_index:]),
}
def map(self, fn, *, batched, remove_columns, desc):
assert batched is True
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch)
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
fake_dataset = {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Labdien"},
{"user": "Kā iet?", "assistant": "Labi"},
]
)
}
monkeypatch.setattr("maris_core.training.train.load_hf_dataset", lambda _: fake_dataset)
class FakeTokenizer:
pad_token = None
pad_token_id = None
eos_token = "<eos>"
eos_token_id = 99
@classmethod
def from_pretrained(cls, model_name):
assert model_name == DEFAULT_TRAINING_BASE_MODEL
return cls()
def __call__(self, texts, *, truncation, max_length, padding):
assert truncation is True
assert max_length == 256
assert padding is False
return {
"input_ids": [[1, 2, 3] for _ in texts],
"attention_mask": [[1, 1, 1] for _ in texts],
}
def save_pretrained(self, output_dir):
Path(output_dir, "tokenizer.json").write_text(
json.dumps(
{
"model": {
"type": "BPE",
"unk_token": "Qwen/Qwen2.5-7B-Instruct",
},
"added_tokens": [
{"content": "Claude"},
{"content": "DeepSeek"},
],
}
),
encoding="utf-8",
)
Path(output_dir, "tokenizer_config.json").write_text(
json.dumps(
{
"name_or_path": DEFAULT_TRAINING_BASE_MODEL,
"tokenizer_class": "Qwen2TokenizerFast",
"auto_map": {"AutoTokenizer": ["qwen2.Qwen2Tokenizer", None]},
"chat_template": "You are Qwen, a helpful assistant for Qwen/Qwen2.5-7B-Instruct.",
"init_kwargs": {
"chat_template": "Respond like TinyLlama and DeepSeek.",
},
}
),
encoding="utf-8",
)
Path(output_dir, "chat_template.jinja").write_text(
"System: meta-llama/Llama-3.2-3B-Instruct and Claude must answer here.",
encoding="utf-8",
)
class FakeModelConfig:
pad_token_id = None
class FakeModel:
config = FakeModelConfig()
@classmethod
def from_pretrained(cls, model_name):
assert model_name == DEFAULT_TRAINING_BASE_MODEL
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainResult:
metrics = {"train_loss": 0.25}
class FakeTrainer:
last_instance = None
def __init__(
self,
*,
model,
args,
train_dataset,
eval_dataset=None,
data_collator=None,
):
del model, data_collator
self.args = args
self.train_dataset = train_dataset
self.eval_dataset = eval_dataset
FakeTrainer.last_instance = self
def train(self):
return FakeTrainResult()
def evaluate(self):
return {"eval_loss": 0.5}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "model.bin").write_text("ok", encoding="utf-8")
Path(output_dir, "config.json").write_text(
json.dumps(
{
"_name_or_path": DEFAULT_TRAINING_BASE_MODEL,
"model_type": "qwen2",
"architectures": ["Qwen2ForCausalLM"],
"tokenizer_class": "Qwen2TokenizerFast",
"auto_map": {
"AutoConfig": "qwen2.configuration_qwen2.Qwen2Config",
"AutoModelForCausalLM": "qwen2.modeling_qwen2.Qwen2ForCausalLM",
},
}
),
encoding="utf-8",
)
Path(output_dir, "adapter_config.json").write_text(
json.dumps(
{
"base_model_name_or_path": DEFAULT_TRAINING_BASE_MODEL,
"base_model_class": "Qwen2ForCausalLM",
"parent_library": "transformers.models.qwen2.modeling_qwen2",
"auto_mapping": {
"base_model_class": "Qwen2ForCausalLM",
"parent_library": "transformers.models.qwen2.modeling_qwen2",
},
"description": "Adapter derived from Qwen and Llama.",
}
),
encoding="utf-8",
)
def push_to_hub(self, **kwargs):
self.push_kwargs = kwargs
fake_transformers = types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
)
monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
preference_dataset_path = tmp_path / "preferences.json"
preference_dataset_path.write_text(
json.dumps(
[
{
"prompt": "Kurš variants ir labāks?",
"chosen": "Variants A",
"rejected": "Variants B",
"source": "human_review",
"tags": ["quality"],
}
]
),
encoding="utf-8",
)
async def fake_benchmark(config, *, model_path):
assert model_path.endswith("trained-model")
return {
"artifact_type": "chat-benchmark-manifest",
"benchmark_name": config.benchmark_name,
"branch": config.branch_name,
"model": config.hub_model_id,
"score_manifest": {
"overall": 0.81,
"reasoning": 0.8,
"factuality": 0.79,
"latvian_quality": 0.86,
"coding": 0.74,
"long_context": 0.75,
"helpfulness": 0.83,
},
}
monkeypatch.setattr("maris_core.training.train._run_post_training_benchmark", fake_benchmark)
output_dir = tmp_path / "trained-model"
metrics = train(
output_dir=str(output_dir),
max_seq_length=256,
benchmark_dataset_path=str(tmp_path / "benchmarks.json"),
preference_dataset_path=str(preference_dataset_path),
)
assert metrics["train_loss"] == 0.25
assert metrics["eval_loss"] == 0.5
assert metrics["perplexity"] > 1.0
assert FakeTrainer.last_instance is not None
assert len(FakeTrainer.last_instance.train_dataset) == 1
assert len(FakeTrainer.last_instance.eval_dataset) == 1
assert FakeTrainer.last_instance.args.kwargs["evaluation_strategy"] == "steps"
assert (output_dir / "training-config.json").is_file()
assert (output_dir / "training-metrics.json").is_file()
assert (output_dir / "maris-metadata.json").is_file()
assert (output_dir / "training-provenance.json").is_file()
assert (output_dir / "README.md").is_file()
assert (output_dir / "benchmark-manifest.json").is_file()
assert (output_dir / "benchmark-release-gate.json").is_file()
assert (output_dir / "benchmark-history.json").is_file()
assert (output_dir / "benchmark-regression-report.json").is_file()
assert (output_dir / "benchmark-feedback.json").is_file()
assert (output_dir / "preference-summary.json").is_file()
assert (output_dir / "human-eval-summary.json").is_file()
assert (output_dir / "blind-side-by-side-eval.json").is_file()
training_config = json.loads((output_dir / "training-config.json").read_text(encoding="utf-8"))
training_metrics = json.loads(
(output_dir / "training-metrics.json").read_text(encoding="utf-8")
)
benchmark_manifest = json.loads(
(output_dir / "benchmark-manifest.json").read_text(encoding="utf-8")
)
benchmark_gate = json.loads(
(output_dir / "benchmark-release-gate.json").read_text(encoding="utf-8")
)
benchmark_history = json.loads(
(output_dir / "benchmark-history.json").read_text(encoding="utf-8")
)
benchmark_regression = json.loads(
(output_dir / "benchmark-regression-report.json").read_text(encoding="utf-8")
)
benchmark_feedback = json.loads(
(output_dir / "benchmark-feedback.json").read_text(encoding="utf-8")
)
preference_summary = json.loads(
(output_dir / "preference-summary.json").read_text(encoding="utf-8")
)
human_eval_summary = json.loads(
(output_dir / "human-eval-summary.json").read_text(encoding="utf-8")
)
blind_side_by_side = json.loads(
(output_dir / "blind-side-by-side-eval.json").read_text(encoding="utf-8")
)
training_provenance = json.loads(
(output_dir / "training-provenance.json").read_text(encoding="utf-8")
)
saved_model_config = json.loads((output_dir / "config.json").read_text(encoding="utf-8"))
saved_tokenizer_config = json.loads(
(output_dir / "tokenizer_config.json").read_text(encoding="utf-8")
)
saved_tokenizer_json = json.loads((output_dir / "tokenizer.json").read_text(encoding="utf-8"))
saved_adapter_config = json.loads(
(output_dir / "adapter_config.json").read_text(encoding="utf-8")
)
compatibility_manifest = json.loads(
(output_dir / MARIS_COMPATIBILITY_ARTIFACT_NAME).read_text(encoding="utf-8")
)
saved_chat_template = (output_dir / "chat_template.jinja").read_text(encoding="utf-8")
assert training_config["maris_origin"] == "Maris AI"
assert training_config["maris_model_id"] == "MarisUK/maris-ai-master"
assert "model_name" not in training_config
assert training_metrics["maris_origin"] == "Maris AI"
assert training_metrics["artifact_type"] == "training-metrics"
assert training_metrics["dataset_repo"] == "MarisUK/maris-ai-memory"
assert training_metrics["benchmark_regressions"] == 0.0
assert training_provenance["maris_origin"] == "Maris AI"
assert training_provenance["train_examples"] == 1
assert training_provenance["eval_examples"] == 1
assert training_provenance["base_model_name"] == "Maris AI"
assert training_provenance["base_model_lineage"] == "Maris AI"
model_card = (output_dir / "README.md").read_text(encoding="utf-8")
assert "Maris AI Model" in model_card
assert "Qwen/" not in model_card
assert "TinyLlama/" not in model_card
assert saved_model_config["_name_or_path"] == "MarisUK/maris-ai-master"
assert saved_model_config["model_type"] == "maris"
assert saved_model_config["architectures"] == ["MarisCompatibleCausalLM"]
assert saved_model_config["tokenizer_class"] == "MarisCompatibleTokenizer"
assert saved_tokenizer_config["name_or_path"] == "MarisUK/maris-ai-master"
assert saved_tokenizer_config["tokenizer_class"] == "MarisCompatibleTokenizer"
assert saved_tokenizer_json["model"]["unk_token"] == "MarisUK/maris-ai-master"
assert saved_tokenizer_json["added_tokens"][0]["content"] == "Maris AI"
assert saved_tokenizer_json["added_tokens"][1]["content"] == "Maris AI"
assert "Maris AI" in saved_tokenizer_config["chat_template"]
assert "Qwen" not in saved_tokenizer_config["chat_template"]
assert "Maris AI" in saved_tokenizer_config["init_kwargs"]["chat_template"]
assert saved_adapter_config["base_model_name_or_path"] == "MarisUK/maris-ai-master"
assert saved_adapter_config["base_model_class"] == "MarisCompatibleCausalLM"
assert saved_adapter_config["parent_library"] == "maris.compat"
assert "Qwen" not in saved_adapter_config["description"]
assert "Llama" not in saved_adapter_config["description"]
assert compatibility_manifest["artifact_type"] == "maris-hf-compatibility"
assert compatibility_manifest["maris_model_id"] == "MarisUK/maris-ai-master"
assert "config.json" in compatibility_manifest["artifacts"]
assert "tokenizer_config.json" in compatibility_manifest["artifacts"]
assert "adapter_config.json" in compatibility_manifest["artifacts"]
assert "meta-llama/" not in saved_chat_template
assert "Claude" not in saved_chat_template
assert "Maris AI" in saved_chat_template
assert metrics["perplexity"] < 1000
assert metrics["benchmark_overall"] == 0.81
assert metrics["benchmark_gate_passed"] == 1.0
assert benchmark_manifest["artifact_type"] == "chat-benchmark-manifest"
assert benchmark_gate["artifact_type"] == "benchmark-release-gate"
assert benchmark_history["artifact_type"] == "chat-benchmark-history"
assert benchmark_history["run_count"] == 1
assert benchmark_regression["artifact_type"] == "chat-benchmark-regression-report"
assert benchmark_regression["status"] == "no-baseline"
assert benchmark_feedback["artifact_type"] == "benchmark-feedback-reweighting"
assert training_metrics["scoring_dashboard"]["train"]["sources"]["unknown"]["records"] == 1
assert training_metrics["scoring_dashboard"]["train"]["categories"]["general"]["records"] == 1
assert training_metrics["scoring_dashboard_train_sources_unknown_records"] == 1.0
assert training_metrics["scoring_dashboard_train_categories_general_records"] == 1.0
_assert_output_dir_uses_only_maris_identity(output_dir)
assert benchmark_gate["passed"] is True
assert preference_summary["artifact_type"] == "preference-dataset-summary"
assert benchmark_manifest["score_manifest"]["pairwise_win_rate"] == 1.0
assert human_eval_summary["artifact_type"] == "human-eval-summary"
assert blind_side_by_side["artifact_type"] == "blind-side-by-side-eval-set"
def test_train_pushes_to_hub_when_enabled(
tmp_path: Path,
monkeypatch,
) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
assert batched is True
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch)
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
monkeypatch.setenv("HF_TRAIN_PUSH_TO_HUB", "true")
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Labdien"},
{"user": "Kā iet?", "assistant": "Labi"},
]
)
},
)
class FakeTokenizer:
pad_token = None
pad_token_id = None
eos_token = "<eos>"
eos_token_id = 99
@classmethod
def from_pretrained(cls, model_name):
assert model_name == DEFAULT_TRAINING_BASE_MODEL
return cls()
def __call__(self, texts, *, truncation, max_length, padding):
del texts, truncation, max_length, padding
return {"input_ids": [[1, 2, 3]], "attention_mask": [[1, 1, 1]]}
def save_pretrained(self, output_dir):
Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")
class FakeModelConfig:
pad_token_id = None
class FakeModel:
config = FakeModelConfig()
@classmethod
def from_pretrained(cls, model_name):
assert model_name == DEFAULT_TRAINING_BASE_MODEL
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainResult:
metrics = {"train_loss": 0.1}
class FakeTrainer:
last_instance = None
def __init__(self, **kwargs):
self.kwargs = kwargs
self.push_kwargs = None
FakeTrainer.last_instance = self
def train(self):
return FakeTrainResult()
def evaluate(self):
return {"eval_loss": 0.2}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
def push_to_hub(self, **kwargs):
self.push_kwargs = kwargs
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
train(output_dir=str(tmp_path / "push-model"), max_seq_length=256)
assert FakeTrainer.last_instance is not None
assert FakeTrainer.last_instance.push_kwargs == {
"commit_message": "Maris AI training sync (master)"
}
def test_train_prefers_existing_local_artifact_when_continue_mode_enabled(
tmp_path: Path,
monkeypatch,
) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
del desc, remove_columns
assert batched is True
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch)
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
output_dir = tmp_path / "continued-model"
output_dir.mkdir(parents=True, exist_ok=True)
(output_dir / "config.json").write_text("{}", encoding="utf-8")
import maris_core.training.train as train_module
(output_dir / "training-config.json").write_text(
json.dumps(
{
train_module.MODEL_SOURCE_FINGERPRINT_KEY: train_module._build_model_source_fingerprint(
DEFAULT_TRAINING_BASE_MODEL
)
}
),
encoding="utf-8",
)
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Labdien"},
{"user": "Kā iet?", "assistant": "Labi"},
]
)
},
)
captured_paths: dict[str, str] = {}
class FakeTokenizer:
pad_token = None
pad_token_id = None
eos_token = "<eos>"
eos_token_id = 99
@classmethod
def from_pretrained(cls, model_name):
captured_paths["tokenizer"] = model_name
return cls()
def __call__(self, texts, *, truncation, max_length, padding):
del texts, truncation, max_length, padding
return {"input_ids": [[1, 2, 3]], "attention_mask": [[1, 1, 1]]}
def save_pretrained(self, output_dir):
Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")
class FakeModelConfig:
pad_token_id = None
class FakeModel:
config = FakeModelConfig()
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del kwargs
captured_paths["model"] = model_name
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainer:
def __init__(self, **kwargs):
self.kwargs = kwargs
def train(self):
return types.SimpleNamespace(metrics={"train_loss": 0.1})
def evaluate(self):
return {"eval_loss": 0.2}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
train(
output_dir=str(output_dir),
continue_from_latest_artifact=True,
max_seq_length=256,
)
assert captured_paths["tokenizer"] == str(output_dir)
assert captured_paths["model"] == str(output_dir)
def test_train_does_not_auto_resume_from_incompatible_output_artifact(
tmp_path: Path, monkeypatch
) -> None:
output_dir = tmp_path / "incompatible-output"
output_dir.mkdir(parents=True, exist_ok=True)
(output_dir / "config.json").write_text("{}", encoding="utf-8")
import maris_core.training.train as train_module
(output_dir / "training-config.json").write_text(
json.dumps(
{
train_module.MODEL_SOURCE_FINGERPRINT_KEY: train_module._build_model_source_fingerprint(
"meta-llama/Llama-3.2-3B-Instruct"
)
}
),
encoding="utf-8",
)
config = load_training_config(
overrides={
"output_dir": str(output_dir),
"model_name": "Qwen/Qwen2.5-1.5B-Instruct",
"continue_from_latest_artifact": True,
}
)
assert train_module._resolve_training_model_source(config) == "Qwen/Qwen2.5-1.5B-Instruct"
def test_train_restores_maris_artifacts_after_push_to_hub(tmp_path: Path, monkeypatch) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
assert batched is True
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch)
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
monkeypatch.setenv("HF_TRAIN_PUSH_TO_HUB", "true")
monkeypatch.setenv("HF_TOKEN", "token")
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Labdien"},
{"user": "Kā iet?", "assistant": "Labi"},
]
)
},
)
class FakeTokenizer:
pad_token = None
pad_token_id = None
eos_token = "<eos>"
eos_token_id = 99
@classmethod
def from_pretrained(cls, model_name):
assert model_name == DEFAULT_TRAINING_BASE_MODEL
return cls()
def __call__(self, texts, *, truncation, max_length, padding):
del texts, truncation, max_length, padding
return {"input_ids": [[1, 2, 3]], "attention_mask": [[1, 1, 1]]}
def save_pretrained(self, output_dir):
Path(output_dir, "tokenizer_config.json").write_text(
json.dumps(
{
"name_or_path": "MarisUK/maris-ai-master",
"tokenizer_class": "Qwen2TokenizerFast",
"chat_template": "You are Qwen and Claude in one assistant.",
}
),
encoding="utf-8",
)
class FakeModelConfig:
pad_token_id = None
class FakeModel:
config = FakeModelConfig()
@classmethod
def from_pretrained(cls, model_name):
assert model_name == DEFAULT_TRAINING_BASE_MODEL
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainResult:
metrics = {"train_loss": 0.1}
class FakeTrainer:
def __init__(self, **kwargs):
self.kwargs = kwargs
def train(self):
return FakeTrainResult()
def evaluate(self):
return {"eval_loss": 0.2}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "config.json").write_text(
json.dumps(
{
"_name_or_path": "MarisUK/maris-ai-master",
"model_type": "qwen2",
"architectures": ["Qwen2ForCausalLM"],
}
),
encoding="utf-8",
)
Path(output_dir, "adapter_config.json").write_text(
json.dumps(
{
"base_model_name_or_path": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
"base_model_class": "Qwen2ForCausalLM",
"parent_library": "transformers.models.qwen2.modeling_qwen2",
"description": "Adapter built from DeepSeek and Mistral.",
}
),
encoding="utf-8",
)
def push_to_hub(self, **kwargs):
del kwargs
output_dir = Path(self.kwargs["args"].kwargs["output_dir"])
Path(output_dir, "README.md").write_text(
"\n".join(
(
"---",
"library_name: transformers",
"datasets:",
"- generator",
"---",
"# master",
)
)
+ "\n",
encoding="utf-8",
)
Path(output_dir, "config.json").write_text(
json.dumps(
{
"_name_or_path": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
"model_type": "qwen2",
"architectures": ["Qwen2ForCausalLM"],
}
),
encoding="utf-8",
)
Path(output_dir, "tokenizer_config.json").write_text(
json.dumps(
{
"name_or_path": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
"tokenizer_class": "Qwen2TokenizerFast",
"chat_template": "Use meta-llama/Llama-3.2-3B-Instruct with Gemini.",
}
),
encoding="utf-8",
)
Path(output_dir, "tokenizer.json").write_text(
json.dumps(
{
"model": {"type": "BPE", "unk_token": "DeepSeek-Coder"},
"added_tokens": [{"content": "Anthropic"}],
}
),
encoding="utf-8",
)
Path(output_dir, "chat_template.jinja").write_text(
"System prompt from Anthropic Claude and OpenAI ChatGPT.",
encoding="utf-8",
)
upload_calls: list[dict[str, str]] = []
class FakeHfApi:
def __init__(self, token=None):
self.token = token
def create_repo(self, **kwargs):
upload_calls.append({"create_repo": kwargs})
def upload_folder(self, **kwargs):
upload_calls.append(kwargs)
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
monkeypatch.setitem(sys.modules, "huggingface_hub", types.SimpleNamespace(HfApi=FakeHfApi))
output_dir = tmp_path / "push-model"
train(output_dir=str(output_dir), max_seq_length=256)
assert "Maris AI Model" in (output_dir / "README.md").read_text(encoding="utf-8")
assert "generated_from_trainer" not in (output_dir / "README.md").read_text(encoding="utf-8")
assert json.loads((output_dir / "config.json").read_text(encoding="utf-8"))[
"_name_or_path"
] == ("MarisUK/maris-ai-master")
assert (
json.loads((output_dir / "config.json").read_text(encoding="utf-8"))["model_type"]
== "maris"
)
assert json.loads((output_dir / "config.json").read_text(encoding="utf-8"))[
"architectures"
] == ["MarisCompatibleCausalLM"]
assert (
json.loads((output_dir / "tokenizer_config.json").read_text(encoding="utf-8"))[
"name_or_path"
]
== "MarisUK/maris-ai-master"
)
assert (
json.loads((output_dir / "tokenizer_config.json").read_text(encoding="utf-8"))[
"tokenizer_class"
]
== "MarisCompatibleTokenizer"
)
assert (
"Maris AI"
in json.loads((output_dir / "tokenizer_config.json").read_text(encoding="utf-8"))[
"chat_template"
]
)
assert (
json.loads((output_dir / "tokenizer.json").read_text(encoding="utf-8"))["model"][
"unk_token"
]
== "Maris AI"
)
assert (
json.loads((output_dir / "tokenizer.json").read_text(encoding="utf-8"))["added_tokens"][0][
"content"
]
== "Maris AI"
)
assert (
json.loads((output_dir / "adapter_config.json").read_text(encoding="utf-8"))[
"base_model_name_or_path"
]
== "MarisUK/maris-ai-master"
)
assert (
json.loads((output_dir / "adapter_config.json").read_text(encoding="utf-8"))[
"base_model_class"
]
== "MarisCompatibleCausalLM"
)
assert (
"DeepSeek"
not in json.loads((output_dir / "adapter_config.json").read_text(encoding="utf-8"))[
"description"
]
)
assert "Anthropic" not in (output_dir / "chat_template.jinja").read_text(encoding="utf-8")
assert "ChatGPT" not in (output_dir / "chat_template.jinja").read_text(encoding="utf-8")
assert "Maris AI" in (output_dir / "chat_template.jinja").read_text(encoding="utf-8")
assert (output_dir / MARIS_COMPATIBILITY_ARTIFACT_NAME).is_file()
_assert_output_dir_uses_only_maris_identity(output_dir)
assert upload_calls == [
{
"create_repo": {
"repo_id": "MarisUK/maris-ai-master",
"repo_type": "model",
"exist_ok": True,
}
},
{
"folder_path": str(output_dir),
"repo_id": "MarisUK/maris-ai-master",
"repo_type": "model",
"commit_message": "Maris AI artifact sync (master)",
},
]
def test_export_model_creates_repo_before_upload(tmp_path: Path, monkeypatch) -> None:
script_path = Path(__file__).resolve().parents[1] / "scripts" / "export_to_hf.py"
spec = importlib.util.spec_from_file_location("export_to_hf", script_path)
assert spec is not None and spec.loader is not None
export_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(export_module)
model_dir = tmp_path / "model"
model_dir.mkdir()
model_dir.joinpath("config.json").write_text("{}", encoding="utf-8")
monkeypatch.setenv("HF_TOKEN", "token")
calls: list[dict[str, object]] = []
class FakeHfApi:
def __init__(self, token=None):
calls.append({"init": token})
def create_repo(self, **kwargs):
calls.append({"create_repo": kwargs})
def upload_folder(self, **kwargs):
calls.append({"upload_folder": kwargs})
monkeypatch.setitem(sys.modules, "huggingface_hub", types.SimpleNamespace(HfApi=FakeHfApi))
export_module.export_model(str(model_dir), "MarisUK/maris-ai-master")
assert calls == [
{"init": "token"},
{
"create_repo": {
"repo_id": "MarisUK/maris-ai-master",
"repo_type": "model",
"exist_ok": True,
}
},
{
"upload_folder": {
"folder_path": str(model_dir),
"repo_id": "MarisUK/maris-ai-master",
"repo_type": "model",
"commit_message": "Maris AI model export",
}
},
]
def test_export_model_publishes_branch_suite_to_runtime_repos(tmp_path: Path, monkeypatch) -> None:
script_path = Path(__file__).resolve().parents[1] / "scripts" / "export_to_hf.py"
spec = importlib.util.spec_from_file_location("export_to_hf", script_path)
assert spec is not None and spec.loader is not None
export_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(export_module)
suite_dir = tmp_path / "suite"
suite_dir.mkdir()
for branch_name in ("master", "coder", "image", "tts"):
branch_dir = suite_dir / branch_name
branch_dir.mkdir()
branch_dir.joinpath("config.json").write_text("{}", encoding="utf-8")
suite_dir.joinpath("branch-suite.json").write_text(
json.dumps(
{
"branches": {
"master": {"output_dir": str(suite_dir / "master")},
"coder": {"output_dir": str(suite_dir / "coder")},
"image": {"output_dir": str(suite_dir / "image")},
"tts": {"output_dir": str(suite_dir / "tts")},
}
}
),
encoding="utf-8",
)
monkeypatch.setenv("HF_TOKEN", "token")
calls: list[dict[str, object]] = []
class FakeHfApi:
def __init__(self, token=None):
calls.append({"init": token})
def create_repo(self, **kwargs):
calls.append({"create_repo": kwargs})
def upload_folder(self, **kwargs):
calls.append({"upload_folder": kwargs})
monkeypatch.setitem(sys.modules, "huggingface_hub", types.SimpleNamespace(HfApi=FakeHfApi))
export_module.export_model(str(suite_dir), "MarisUK/maris-ai-master")
assert calls == [
{"init": "token"},
{
"create_repo": {
"repo_id": "MarisUK/maris-ai-master",
"repo_type": "model",
"exist_ok": True,
}
},
{
"upload_folder": {
"folder_path": str(suite_dir),
"repo_id": "MarisUK/maris-ai-master",
"repo_type": "model",
"commit_message": "Maris AI model export",
}
},
{
"create_repo": {
"repo_id": "MarisUK/maris-ai-text",
"repo_type": "model",
"exist_ok": True,
}
},
{
"upload_folder": {
"folder_path": str(suite_dir / "master"),
"repo_id": "MarisUK/maris-ai-text",
"repo_type": "model",
"commit_message": "Maris AI model export (master)",
}
},
{
"create_repo": {
"repo_id": "MarisUK/maris-ai-codex",
"repo_type": "model",
"exist_ok": True,
}
},
{
"upload_folder": {
"folder_path": str(suite_dir / "coder"),
"repo_id": "MarisUK/maris-ai-codex",
"repo_type": "model",
"commit_message": "Maris AI model export (coder)",
}
},
{
"create_repo": {
"repo_id": "MarisUK/maris-ai-image",
"repo_type": "model",
"exist_ok": True,
}
},
{
"upload_folder": {
"folder_path": str(suite_dir / "image"),
"repo_id": "MarisUK/maris-ai-image",
"repo_type": "model",
"commit_message": "Maris AI model export (image)",
}
},
{
"create_repo": {
"repo_id": "MarisUK/maris-tts-runtime",
"repo_type": "model",
"exist_ok": True,
}
},
{
"upload_folder": {
"folder_path": str(suite_dir / "tts"),
"repo_id": "MarisUK/maris-tts-runtime",
"repo_type": "model",
"commit_message": "Maris AI model export (tts)",
}
},
]
def test_export_model_discovers_fallback_branch_dirs_without_manifest(
tmp_path: Path, monkeypatch
) -> None:
script_path = Path(__file__).resolve().parents[1] / "scripts" / "export_to_hf.py"
spec = importlib.util.spec_from_file_location("export_to_hf", script_path)
assert spec is not None and spec.loader is not None
export_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(export_module)
suite_dir = tmp_path / "suite"
suite_dir.mkdir()
suite_dir.joinpath("config.json").write_text("{}", encoding="utf-8")
for branch_name in ("master", "coder"):
branch_dir = suite_dir / branch_name
branch_dir.mkdir()
branch_dir.joinpath("config.json").write_text("{}", encoding="utf-8")
monkeypatch.setenv("HF_TOKEN", "token")
calls: list[dict[str, object]] = []
class FakeHfApi:
def __init__(self, token=None):
calls.append({"init": token})
def create_repo(self, **kwargs):
calls.append({"create_repo": kwargs})
def upload_folder(self, **kwargs):
calls.append({"upload_folder": kwargs})
monkeypatch.setitem(sys.modules, "huggingface_hub", types.SimpleNamespace(HfApi=FakeHfApi))
export_module.export_model(str(suite_dir), "MarisUK/maris-ai-master")
assert calls == [
{"init": "token"},
{
"create_repo": {
"repo_id": "MarisUK/maris-ai-master",
"repo_type": "model",
"exist_ok": True,
}
},
{
"upload_folder": {
"folder_path": str(suite_dir),
"repo_id": "MarisUK/maris-ai-master",
"repo_type": "model",
"commit_message": "Maris AI model export",
}
},
{
"create_repo": {
"repo_id": "MarisUK/maris-ai-text",
"repo_type": "model",
"exist_ok": True,
}
},
{
"upload_folder": {
"folder_path": str(suite_dir / "master"),
"repo_id": "MarisUK/maris-ai-text",
"repo_type": "model",
"commit_message": "Maris AI model export (master)",
}
},
{
"create_repo": {
"repo_id": "MarisUK/maris-ai-codex",
"repo_type": "model",
"exist_ok": True,
}
},
{
"upload_folder": {
"folder_path": str(suite_dir / "coder"),
"repo_id": "MarisUK/maris-ai-codex",
"repo_type": "model",
"commit_message": "Maris AI model export (coder)",
}
},
]
def test_train_filters_unsupported_training_arguments(
tmp_path: Path,
monkeypatch,
) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
assert batched is True
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch)
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Labdien"},
{"user": "Kā iet?", "assistant": "Labi"},
]
)
},
)
class FakeTokenizer:
pad_token = None
pad_token_id = None
eos_token = "<eos>"
eos_token_id = 99
@classmethod
def from_pretrained(cls, model_name):
assert model_name == DEFAULT_TRAINING_BASE_MODEL
return cls()
def __call__(self, texts, *, truncation, max_length, padding):
assert truncation is True
assert max_length == 256
assert padding is False
return {
"input_ids": [[1, 2, 3] for _ in texts],
"attention_mask": [[1, 1, 1] for _ in texts],
}
def save_pretrained(self, output_dir):
Path(output_dir, "tokenizer.json").write_text("{}", encoding="utf-8")
class FakeModelConfig:
pad_token_id = None
class FakeModel:
config = FakeModelConfig()
@classmethod
def from_pretrained(cls, model_name):
assert model_name == DEFAULT_TRAINING_BASE_MODEL
return cls()
class StrictTrainingArguments:
def __init__(
self,
*,
output_dir,
num_train_epochs,
learning_rate,
per_device_train_batch_size,
per_device_eval_batch_size,
gradient_accumulation_steps,
warmup_ratio,
weight_decay,
logging_steps,
save_steps,
eval_steps,
save_total_limit,
lr_scheduler_type,
seed,
fp16,
bf16,
report_to,
save_safetensors,
remove_unused_columns,
eval_strategy,
load_best_model_at_end,
metric_for_best_model,
greater_is_better,
):
self.kwargs = {
"output_dir": output_dir,
"num_train_epochs": num_train_epochs,
"learning_rate": learning_rate,
"per_device_train_batch_size": per_device_train_batch_size,
"per_device_eval_batch_size": per_device_eval_batch_size,
"gradient_accumulation_steps": gradient_accumulation_steps,
"warmup_ratio": warmup_ratio,
"weight_decay": weight_decay,
"logging_steps": logging_steps,
"save_steps": save_steps,
"eval_steps": eval_steps,
"save_total_limit": save_total_limit,
"lr_scheduler_type": lr_scheduler_type,
"seed": seed,
"fp16": fp16,
"bf16": bf16,
"report_to": report_to,
"save_safetensors": save_safetensors,
"remove_unused_columns": remove_unused_columns,
"eval_strategy": eval_strategy,
"load_best_model_at_end": load_best_model_at_end,
"metric_for_best_model": metric_for_best_model,
"greater_is_better": greater_is_better,
}
class FakeTrainResult:
metrics = {"train_loss": 0.25}
class FakeTrainer:
last_instance = None
def __init__(
self,
*,
model,
args,
train_dataset,
eval_dataset=None,
data_collator=None,
):
del model, data_collator
self.args = args
self.train_dataset = train_dataset
self.eval_dataset = eval_dataset
FakeTrainer.last_instance = self
def train(self):
return FakeTrainResult()
def evaluate(self):
return {"eval_loss": 0.5}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "model.bin").write_text("ok", encoding="utf-8")
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=StrictTrainingArguments,
),
)
metrics = train(output_dir=str(tmp_path / "trained-model"), max_seq_length=256)
assert metrics["eval_loss"] == 0.5
assert FakeTrainer.last_instance is not None
assert "overwrite_output_dir" not in FakeTrainer.last_instance.args.kwargs
assert FakeTrainer.last_instance.args.kwargs["eval_strategy"] == "steps"
def test_build_branch_training_configs_creates_branch_output_dirs() -> None:
configs = build_branch_training_configs(
load_training_config(
overrides={
"output_dir": "/tmp/maris-branch",
"eval_dataset_repo": "MarisUK/maris-ai-evals",
}
)
)
branch_names = {config.branch_name for config in configs}
assert {"master", "coder", "planner", "image", "music", "tts", "stt", "video"} == branch_names
coder_config = next(config for config in configs if config.branch_name == "coder")
assert coder_config.output_dir.endswith("/coder")
assert coder_config.eval_dataset_repo == "MarisUK/maris-ai-evals"
assert coder_config.benchmark_gate_enabled is True
assert coder_config.benchmark_min_overall >= 0.76
assert coder_config.benchmark_dataset_path.endswith(
"core-python/evals/coder_release_benchmark.json"
)
assert coder_config.preference_dataset_path.endswith(
"core-python/evals/coder_preference_dataset.json"
)
assert coder_config.quality_min_text_chars >= 18
assert coder_config.category_weight_map["coding"] >= 1.35
assert coder_config.category_weight_map["grounding"] >= 1.25
planner_config = next(config for config in configs if config.branch_name == "planner")
assert planner_config.benchmark_gate_enabled is True
assert planner_config.benchmark_min_overall >= 0.76
assert planner_config.benchmark_dataset_path.endswith(
"core-python/evals/planner_release_benchmark.json"
)
master_config = next(config for config in configs if config.branch_name == "master")
assert master_config.hub_model_id == "MarisUK/maris-ai-text"
assert master_config.benchmark_gate_enabled is True
assert master_config.quality_min_text_chars >= 12
image_config = next(config for config in configs if config.branch_name == "image")
assert image_config.adapter_type == "specialist_model"
assert image_config.hub_model_id == "MarisUK/maris-ai-image"
def test_train_branch_suite_writes_external_manifests_for_specialists(
tmp_path: Path,
monkeypatch,
) -> None:
base_config = load_training_config(overrides={"output_dir": str(tmp_path / "branches")})
monkeypatch.setattr(
"maris_core.training.train.train_with_config",
lambda branch_config: {"branch_len": float(len(branch_config.branch_name))},
)
results = train_branch_suite(base_config)
branch_suite = json.loads(
(tmp_path / "branches" / "branch-suite.json").read_text(encoding="utf-8")
)
assert results["master"]["status"] == "trained"
assert results["master"]["maris_origin"] == "Maris AI"
assert results["coder"]["status"] == "trained"
assert results["image"]["status"] == "external_specialist"
assert results["tts"]["status"] == "external_specialist"
assert (tmp_path / "branches" / "image" / "branch-config.json").is_file()
assert (tmp_path / "branches" / "tts" / "branch-config.json").is_file()
assert (tmp_path / "branches" / "branch-suite.json").is_file()
assert branch_suite["artifact_type"] == "branch-suite"
assert branch_suite["maris_origin"] == "Maris AI"
assert branch_suite["dataset_repo"] == "MarisUK/maris-ai-memory"
assert branch_suite["branches"]["image"]["maris_origin"] == "Maris AI"
assert branch_suite["branches"]["stt"]["maris_model_id"] == "MarisUK/maris-stt-runtime"
def test_post_training_benchmark_results_use_maris_model_id(tmp_path: Path, monkeypatch) -> None:
benchmark_path = tmp_path / "benchmark.json"
benchmark_path.write_text(
json.dumps([{"name": "identity", "message": "Kas tu esi?", "expected_terms": ["Maris"]}]),
encoding="utf-8",
)
config = load_training_config(
overrides={
"benchmark_dataset_path": str(benchmark_path),
"benchmark_levels": ["ci"],
"hub_model_id": "MarisUK/maris-ai-master-trained",
}
)
class FakePipeline:
pass
def fake_pipeline(*args, **kwargs):
del args, kwargs
return FakePipeline()
async def fake_run_chat_benchmark_with_responder(cases, *, responder, concurrency):
del concurrency
response = await responder(cases[0])
return [types.SimpleNamespace(model=response["model"], response=response["response"])]
def fake_build_chat_benchmark_manifest(results, *, benchmark_name, branch, model):
return {
"benchmark_name": benchmark_name,
"branch": branch,
"model": model,
"results": [{"model": results[0].model, "response": results[0].response}],
}
monkeypatch.setitem(sys.modules, "transformers", types.SimpleNamespace(pipeline=fake_pipeline))
monkeypatch.setattr(
"maris_core.training.train.run_chat_benchmark_with_responder",
fake_run_chat_benchmark_with_responder,
)
monkeypatch.setattr(
"maris_core.training.train.call_generation_pipeline",
lambda *args, **kwargs: [{"generated_text": "Es esmu Maris AI."}],
)
monkeypatch.setattr(
"maris_core.training.train.build_chat_benchmark_manifest",
fake_build_chat_benchmark_manifest,
)
payload = asyncio.run(
_run_post_training_benchmark(config, model_path=str(tmp_path / "trained-model"))
)
assert payload == {
"benchmark_name": config.benchmark_name,
"branch": config.branch_name,
"model": "MarisUK/maris-ai-master-trained",
"results": [
{
"model": "MarisUK/maris-ai-master-trained",
"response": "Es esmu Maris AI.",
}
],
}
def test_post_training_benchmark_filters_cases_by_branch(tmp_path: Path, monkeypatch) -> None:
benchmark_path = tmp_path / "benchmark.json"
benchmark_path.write_text(
json.dumps(
[
{"name": "master-case", "message": "Sveiki", "branches": ["master"], "level": "ci"},
{
"name": "coder-case",
"message": "Uzraksti Python helperi",
"profile": "coder",
"branches": ["coder"],
"level": "ci",
},
]
),
encoding="utf-8",
)
config = load_training_config(
overrides={
"branch_name": "coder",
"benchmark_dataset_path": str(benchmark_path),
"benchmark_levels": ["ci"],
}
)
class FakePipeline:
pass
captured_case_names: list[str] = []
def fake_pipeline(*args, **kwargs):
del args, kwargs
return FakePipeline()
async def fake_run_chat_benchmark_with_responder(cases, *, responder, concurrency):
del responder, concurrency
captured_case_names.extend(case.name for case in cases)
return []
monkeypatch.setitem(sys.modules, "transformers", types.SimpleNamespace(pipeline=fake_pipeline))
monkeypatch.setattr(
"maris_core.training.train.run_chat_benchmark_with_responder",
fake_run_chat_benchmark_with_responder,
)
monkeypatch.setattr(
"maris_core.training.train.build_chat_benchmark_manifest",
lambda results, *, benchmark_name, branch, model: {
"benchmark_name": benchmark_name,
"branch": branch,
"model": model,
"results": results,
},
)
asyncio.run(_run_post_training_benchmark(config, model_path=str(tmp_path / "trained-model")))
assert captured_case_names == ["coder-case"]
def test_filter_records_for_branch_keeps_coder_specific_mix() -> None:
records = [
{
"type": "conversation",
"user": "Sveiki",
"assistant": "Čau",
},
{
"type": "code",
"prompt": "Salabo parseri",
"metadata": {"language": "python", "task": "bugfix", "project_area": "core-python"},
},
{
"type": "autonomous",
"prompt": "Investigate CI",
"metadata": {"workflow": "ci-triage", "project_area": "operations"},
},
]
filtered, report = _filter_records_for_branch(
records,
branch_name="coder",
split_name="train",
)
assert len(filtered) == 1
assert filtered[0]["type"] == "code"
assert report.kept_records == 1
assert report.dropped_records == 2
def test_filter_records_for_branch_keeps_master_general_mix() -> None:
records = [
{"type": "conversation", "user": "Sveiki", "assistant": "Čau"},
{"type": "code", "prompt": "Uzraksti helperi", "profile": "coder"},
{"type": "autonomous", "prompt": "Plan sprint", "branch": "planner"},
]
filtered, report = _filter_records_for_branch(
records,
branch_name="master",
split_name="train",
)
assert [record["type"] for record in filtered] == ["conversation"]
assert report.kept_records == 1
assert report.dropped_records == 2
def test_filter_records_for_branch_uses_custom_rule_map() -> None:
records = [
{"type": "conversation", "user": "Sveiki", "assistant": "Čau"},
{"type": "code", "prompt": "Uzraksti helperi", "profile": "coder"},
]
filtered, report = _filter_records_for_branch(
records,
branch_name="coder",
split_name="train",
branch_filter_rules={
"coder": {
"include_record_types": ["conversation"],
"exclude_explicit_branches": ["planner"],
}
},
)
assert [record["type"] for record in filtered] == ["conversation"]
assert report.kept_records == 1
assert report.dropped_records == 1
def test_filter_preference_examples_for_branch_keeps_coder_examples_only() -> None:
examples_path = Path(
"/home/runner/work/Maris-MI/Maris-MI/core-python/evals/coder_preference_dataset.json"
)
examples = load_preference_dataset(examples_path)
filtered = _filter_preference_examples_for_branch(
examples,
branch_name="coder",
)
assert filtered
assert all((example.branch or "").lower() == "coder" for example in filtered)
def test_filter_preference_examples_for_branch_uses_custom_rule_map() -> None:
examples_path = Path(
"/home/runner/work/Maris-MI/Maris-MI/core-python/evals/coder_preference_dataset.json"
)
examples = load_preference_dataset(examples_path)
filtered = _filter_preference_examples_for_branch(
examples,
branch_name="planner",
branch_filter_rules={
"planner": {
"include_task_types": ["repo-level"],
}
},
)
assert len(filtered) >= 1
assert all(example.task_type == "repo-level" for example in filtered)
def test_train_uses_external_eval_dataset_when_configured(tmp_path: Path, monkeypatch) -> None:
dataset_calls: list[str] = []
class FakeSplit(list):
column_names = ["text"]
def map(self, function, **kwargs):
del kwargs
batch = {"text": [item["text"] for item in self]}
mapped = function(batch)
size = len(next(iter(mapped.values()))) if mapped else 0
return FakeSplit(
[{key: value[index] for key, value in mapped.items()} for index in range(size)]
)
def train_test_split(self, *, test_size, seed):
del test_size, seed
midpoint = max(1, len(self) - 1)
return {"train": FakeSplit(self[:midpoint]), "test": FakeSplit(self[midpoint:])}
def fake_load_hf_dataset(repo_id: str):
dataset_calls.append(repo_id)
if repo_id == "MarisUK/maris-ai-memory":
return {"train": FakeSplit([{"text": "train-1"}, {"text": "train-2"}])}
if repo_id == "MarisUK/maris-ai-evals":
return {"train": FakeSplit([{"text": "eval-1"}])}
raise AssertionError(f"Unexpected repo id: {repo_id}")
class FakeTokenizer:
pad_token = None
eos_token = "<eos>"
pad_token_id = None
eos_token_id = 7
@classmethod
def from_pretrained(cls, model_name):
del model_name
return cls()
def __call__(self, texts, **kwargs):
del kwargs
return {
"input_ids": [[index + 1] for index, _ in enumerate(texts)],
"attention_mask": [[1] for _ in texts],
}
def save_pretrained(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "tokenizer.json").write_text("{}", encoding="utf-8")
class FakeModel:
config = types.SimpleNamespace(pad_token_id=None)
@classmethod
def from_pretrained(cls, model_name):
del model_name
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainer:
last_instance = None
def __init__(self, *, model, args, train_dataset, eval_dataset=None, data_collator=None):
del model, data_collator
self.args = args
self.train_dataset = train_dataset
self.eval_dataset = eval_dataset
FakeTrainer.last_instance = self
def train(self):
return types.SimpleNamespace(metrics={"train_loss": 0.1})
def evaluate(self):
return {"eval_loss": 0.2}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "model.bin").write_text("ok", encoding="utf-8")
monkeypatch.setattr("maris_core.training.train.load_hf_dataset", fake_load_hf_dataset)
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
metrics = train(
output_dir=str(tmp_path / "trained-model"),
dataset_repos=["MarisUK/maris-ai-memory"],
eval_dataset_repo="MarisUK/maris-ai-evals",
eval_dataset_repos=["MarisUK/maris-ai-evals"],
)
assert metrics["eval_loss"] == 0.2
assert dataset_calls == ["MarisUK/maris-ai-memory", "MarisUK/maris-ai-evals"]
assert FakeTrainer.last_instance is not None
assert len(FakeTrainer.last_instance.train_dataset) == 1
assert len(FakeTrainer.last_instance.eval_dataset) == 1
def test_train_merges_multiple_dataset_repos_for_training_and_eval(
tmp_path: Path,
monkeypatch,
) -> None:
dataset_calls: list[str] = []
class FakeSplit(list):
column_names = ["text"]
def map(self, function, **kwargs):
del kwargs
batch = {"text": [item["text"] for item in self]}
mapped = function(batch)
size = len(next(iter(mapped.values()))) if mapped else 0
return FakeSplit(
[{key: value[index] for key, value in mapped.items()} for index in range(size)]
)
repo_rows = {
"MarisUK/maris-ai-memory": {
"train": [{"text": "memory-train"}],
"validation": [{"text": "memory-val"}],
},
"MarisUK/maris-ai-lv-memory": {
"train": [{"text": "lv-train"}],
"validation": [{"text": "lv-val"}],
},
"MarisUK/maris-ai-evals": {
"train": [{"text": "eval-train"}],
"validation": [{"text": "eval-val"}],
},
"MarisUK/maris-ai-benchmark": {
"train": [{"text": "bench-train"}],
"validation": [{"text": "bench-val"}],
},
}
def fake_load_hf_dataset(repo_id: str):
dataset_calls.append(repo_id)
if repo_id not in repo_rows:
raise AssertionError(f"Unexpected repo id: {repo_id}")
payload = repo_rows[repo_id]
return {split_name: FakeSplit(list(records)) for split_name, records in payload.items()}
class FakeTokenizer:
pad_token = None
eos_token = "<eos>"
pad_token_id = None
eos_token_id = 7
@classmethod
def from_pretrained(cls, model_name):
del model_name
return cls()
def __call__(self, texts, **kwargs):
del kwargs
return {
"input_ids": [[index + 1] for index, _ in enumerate(texts)],
"attention_mask": [[1] for _ in texts],
}
def save_pretrained(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "tokenizer.json").write_text("{}", encoding="utf-8")
class FakeModel:
config = types.SimpleNamespace(pad_token_id=None)
@classmethod
def from_pretrained(cls, model_name):
del model_name
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainer:
last_instance = None
def __init__(self, *, model, args, train_dataset, eval_dataset=None, data_collator=None):
del model, data_collator
self.args = args
self.train_dataset = train_dataset
self.eval_dataset = eval_dataset
FakeTrainer.last_instance = self
def train(self):
return types.SimpleNamespace(metrics={"train_loss": 0.1})
def evaluate(self):
return {"eval_loss": 0.2}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "model.bin").write_text("ok", encoding="utf-8")
monkeypatch.setattr("maris_core.training.train.load_hf_dataset", fake_load_hf_dataset)
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
metrics = train(
output_dir=str(tmp_path / "trained-model"),
dataset_repo="MarisUK/maris-ai-memory",
dataset_repos=[
"MarisUK/maris-ai-memory",
"MarisUK/maris-ai-lv-memory",
"MarisUK/maris-ai-evals",
"MarisUK/maris-ai-benchmark",
],
eval_dataset_repo="MarisUK/maris-ai-evals",
eval_dataset_repos=[
"MarisUK/maris-ai-evals",
"MarisUK/maris-ai-benchmark",
],
)
assert metrics["eval_loss"] == 0.2
assert dataset_calls == [
"MarisUK/maris-ai-memory",
"MarisUK/maris-ai-lv-memory",
"MarisUK/maris-ai-evals",
"MarisUK/maris-ai-benchmark",
"MarisUK/maris-ai-evals",
"MarisUK/maris-ai-benchmark",
]
assert FakeTrainer.last_instance is not None
assert len(FakeTrainer.last_instance.train_dataset) == 4
assert len(FakeTrainer.last_instance.eval_dataset) == 2
def test_evaluate_with_config_prefers_external_eval_dataset(tmp_path: Path, monkeypatch) -> None:
dataset_calls: list[str] = []
trained_model_dir = tmp_path / "trained-model"
trained_model_dir.mkdir(parents=True, exist_ok=True)
(trained_model_dir / "config.json").write_text(
json.dumps(
{
"_name_or_path": "MarisUK/maris-ai-master",
"model_type": "qwen2",
"architectures": ["Qwen2ForCausalLM"],
"tokenizer_class": "Qwen2TokenizerFast",
"auto_map": {"AutoModelForCausalLM": "qwen2.modeling_qwen2.Qwen2ForCausalLM"},
}
),
encoding="utf-8",
)
(trained_model_dir / "tokenizer_config.json").write_text(
json.dumps(
{
"name_or_path": "MarisUK/maris-ai-master",
"tokenizer_class": "Qwen2TokenizerFast",
}
),
encoding="utf-8",
)
write_maris_compatibility_artifact(
trained_model_dir,
maris_model_id="MarisUK/maris-ai-master",
)
apply_maris_compatibility_identity(trained_model_dir)
class FakeSplit(list):
column_names = ["text"]
def map(self, function, **kwargs):
del kwargs
batch = {"text": [item["text"] for item in self]}
mapped = function(batch)
size = len(next(iter(mapped.values()))) if mapped else 0
return FakeSplit(
[{key: value[index] for key, value in mapped.items()} for index in range(size)]
)
def fake_load_hf_dataset(repo_id: str):
dataset_calls.append(repo_id)
if repo_id == "MarisUK/maris-ai-evals":
return {"train": FakeSplit([{"text": "eval-1"}, {"text": "eval-2"}])}
raise AssertionError(f"Unexpected repo id: {repo_id}")
class FakeTokenizer:
pad_token = None
eos_token = "<eos>"
pad_token_id = None
eos_token_id = 7
@classmethod
def from_pretrained(cls, model_name):
loaded_dir = Path(model_name)
assert loaded_dir != trained_model_dir
tokenizer_config = json.loads(
loaded_dir.joinpath("tokenizer_config.json").read_text(encoding="utf-8")
)
assert tokenizer_config["tokenizer_class"] == "Qwen2TokenizerFast"
return cls()
def __call__(self, texts, **kwargs):
del kwargs
return {
"input_ids": [[index + 1] for index, _ in enumerate(texts)],
"attention_mask": [[1] for _ in texts],
}
class FakeModel:
@classmethod
def from_pretrained(cls, model_name):
loaded_dir = Path(model_name)
assert loaded_dir != trained_model_dir
model_config = json.loads(
loaded_dir.joinpath("config.json").read_text(encoding="utf-8")
)
assert model_config["model_type"] == "qwen2"
assert model_config["architectures"] == ["Qwen2ForCausalLM"]
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainer:
def __init__(self, *, model, args, eval_dataset=None, data_collator=None):
del model, args, data_collator
self.eval_dataset = eval_dataset
def evaluate(self):
return {"eval_loss": 0.3}
monkeypatch.setattr("maris_core.training.train.load_hf_dataset", fake_load_hf_dataset)
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
config = load_training_config(
overrides={
"output_dir": str(trained_model_dir),
"eval_dataset_repo": "MarisUK/maris-ai-evals",
"eval_dataset_repos": ["MarisUK/maris-ai-evals"],
"benchmark_dataset_path": str(tmp_path / "benchmark.json"),
"benchmark_levels": ["ci"],
}
)
async def fake_benchmark(config, *, model_path):
assert model_path.endswith("trained-model")
return {
"artifact_type": "chat-benchmark-manifest",
"benchmark_name": config.benchmark_name,
"branch": config.branch_name,
"model": config.hub_model_id,
"generated_at": "2026-04-16T00:00:00Z",
"score_manifest": {
"overall": 0.79,
"reasoning": 0.76,
"factuality": 0.75,
"helpfulness": 0.8,
"execution": 0.75,
},
"category_scores": {"coding": 0.74},
"execution_language_pass_rates": {"python": 1.0},
"execution_language_scores": {"python": 0.74},
"category_execution_pass_rates": {"coding": 1.0},
}
monkeypatch.setattr("maris_core.training.train._run_post_training_benchmark", fake_benchmark)
metrics = evaluate_with_config(config, model_path=str(trained_model_dir))
assert metrics["eval_loss"] == 0.3
assert metrics["eval_examples"] == 2.0
assert metrics["benchmark_overall"] == 0.79
assert metrics["benchmark_gate_passed"] == 1.0
assert metrics["benchmark_regressions"] == 0.0
assert dataset_calls == ["MarisUK/maris-ai-evals"]
assert (trained_model_dir / "benchmark-manifest.json").is_file()
assert (trained_model_dir / "benchmark-history.json").is_file()
assert (trained_model_dir / "benchmark-regression-report.json").is_file()
assert (
json.loads((trained_model_dir / "config.json").read_text(encoding="utf-8"))["model_type"]
== "maris"
)
assert (
json.loads((trained_model_dir / "tokenizer_config.json").read_text(encoding="utf-8"))[
"tokenizer_class"
]
== "MarisCompatibleTokenizer"
)
def test_load_training_config_reads_peft_and_preference_optimization_settings(
tmp_path: Path,
) -> None:
config_path = tmp_path / "training.json"
config_path.write_text(
json.dumps(
{
"adapter_type": "qlora",
"lora_r": 32,
"lora_alpha": 64,
"lora_dropout": 0.15,
"lora_bias": "all",
"peft_target_modules": ["q_proj", "v_proj"],
"qlora_quant_type": "fp4",
"qlora_use_double_quant": False,
"qlora_compute_dtype": "bfloat16",
"preference_dataset_path": "/tmp/preferences.json",
"preference_optimization": "dpo",
"preference_beta": 0.25,
"preference_max_prompt_length": 256,
"preference_max_length": 768,
"preference_reference_model": "MarisUK/maris-ai-master",
}
),
encoding="utf-8",
)
config = load_training_config(str(config_path))
assert config.adapter_type == "qlora"
assert config.lora_r == 32
assert config.lora_alpha == 64
assert config.lora_dropout == 0.15
assert config.lora_bias == "all"
assert config.peft_target_modules == ["q_proj", "v_proj"]
assert config.qlora_quant_type == "fp4"
assert config.qlora_use_double_quant is False
assert config.qlora_compute_dtype == "bfloat16"
assert config.preference_optimization == "dpo"
assert config.preference_beta == 0.25
assert config.preference_max_prompt_length == 256
assert config.preference_max_length == 768
assert config.preference_reference_model == "MarisUK/maris-ai-master"
def test_train_runs_qlora_and_dpo_preference_stage(tmp_path: Path, monkeypatch) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
assert batched is True
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch)
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Labdien"},
{"user": "Kas jauns?", "assistant": "Viss kārtībā"},
]
)
},
)
preference_dataset_path = tmp_path / "preferences.json"
preference_dataset_path.write_text(
json.dumps(
[
{
"prompt": "Atbildi korekti",
"chosen": "Šī ir labākā atbilde.",
"rejected": "Nē.",
"source": "human_review",
}
]
),
encoding="utf-8",
)
model_load_calls: list[dict[str, object]] = []
bnb_calls: list[dict[str, object]] = []
lora_config_calls: list[dict[str, object]] = []
class FakeTokenizer:
pad_token = None
pad_token_id = None
eos_token = "<eos>"
eos_token_id = 99
@classmethod
def from_pretrained(cls, model_name):
return cls()
def __call__(self, texts, *, truncation, max_length, padding):
del truncation, max_length, padding
return {
"input_ids": [[1, 2, 3] for _ in texts],
"attention_mask": [[1, 1, 1] for _ in texts],
}
def save_pretrained(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")
class FakeBitsAndBytesConfig:
def __init__(self, **kwargs):
bnb_calls.append(kwargs)
self.kwargs = kwargs
class FakeModel:
def __init__(self):
self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
self.prepared_for_kbit = False
self.peft_config = None
self.trainable_parameters_printed = False
@classmethod
def from_pretrained(cls, model_name, **kwargs):
model_load_calls.append({"model_name": model_name, "kwargs": kwargs})
return cls()
def print_trainable_parameters(self):
self.trainable_parameters_printed = True
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainResult:
metrics = {"train_loss": 0.2}
class FakeTrainer:
def __init__(self, *, model, args, train_dataset, eval_dataset=None, data_collator=None):
del data_collator
self.model = model
self.args = args
self.train_dataset = train_dataset
self.eval_dataset = eval_dataset
def train(self):
return FakeTrainResult()
def evaluate(self):
return {"eval_loss": 0.4}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "adapter_config.json").write_text("{}", encoding="utf-8")
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
fake_transformers = types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
BitsAndBytesConfig=FakeBitsAndBytesConfig,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
)
monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
class FakeLoraConfig:
def __init__(self, **kwargs):
lora_config_calls.append(kwargs)
self.kwargs = kwargs
class FakeAutoPeftModelForCausalLM:
@classmethod
def from_pretrained(cls, model_name, **kwargs):
model_load_calls.append({"model_name": model_name, "kwargs": kwargs, "auto_peft": True})
return FakeModel()
def fake_prepare_model_for_kbit_training(model, use_gradient_checkpointing):
model.prepared_for_kbit = use_gradient_checkpointing
return model
def fake_get_peft_model(model, peft_config):
model.peft_config = peft_config
return model
monkeypatch.setitem(
sys.modules,
"peft",
types.SimpleNamespace(
AutoPeftModelForCausalLM=FakeAutoPeftModelForCausalLM,
LoraConfig=FakeLoraConfig,
TaskType=types.SimpleNamespace(CAUSAL_LM="CAUSAL_LM"),
get_peft_model=fake_get_peft_model,
prepare_model_for_kbit_training=fake_prepare_model_for_kbit_training,
),
)
class FakeDPOConfig:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeDPOTrainer:
last_instance = None
def __init__(self, **kwargs):
self.kwargs = kwargs
FakeDPOTrainer.last_instance = self
def train(self):
return types.SimpleNamespace(metrics={"loss": 0.12})
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "adapter_config.json").write_text("{}", encoding="utf-8")
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
monkeypatch.setitem(
sys.modules,
"trl",
types.SimpleNamespace(DPOConfig=FakeDPOConfig, DPOTrainer=FakeDPOTrainer),
)
output_dir = tmp_path / "trained-model"
metrics = train(
output_dir=str(output_dir),
max_seq_length=256,
adapter_type="qlora",
qlora_compute_dtype="float16",
qlora_quant_type="nf4",
qlora_use_double_quant=True,
lora_r=8,
lora_alpha=16,
lora_dropout=0.1,
peft_target_modules=["q_proj", "v_proj"],
preference_dataset_path=str(preference_dataset_path),
preference_optimization="dpo",
preference_beta=0.2,
preference_max_prompt_length=128,
preference_max_length=512,
)
assert metrics["train_loss"] == 0.2
assert metrics["preference_loss"] == 0.12
assert metrics["preference_examples"] == 1.0
assert metrics["preference_stage"] == 1.0
assert bnb_calls[0]["load_in_4bit"] is True
assert bnb_calls[0]["bnb_4bit_quant_type"] == "nf4"
assert lora_config_calls[0]["r"] == 8
assert lora_config_calls[0]["lora_alpha"] == 16
assert lora_config_calls[0]["target_modules"] == ["q_proj", "v_proj"]
assert FakeDPOTrainer.last_instance is not None
assert "ref_model" in FakeDPOTrainer.last_instance.kwargs
assert len(FakeDPOTrainer.last_instance.kwargs["train_dataset"]) == 1
assert any(call.get("auto_peft") for call in model_load_calls)
def test_train_runs_orpo_preference_stage_without_reference_model(
tmp_path: Path,
monkeypatch,
) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch)
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Labdien"},
{"user": "Kā iet?", "assistant": "Labi"},
]
)
},
)
preference_dataset_path = tmp_path / "preferences.json"
preference_dataset_path.write_text(
json.dumps(
[
{
"prompt": "Atbildi pieklājīgi",
"chosen": "Protams, palīdzēšu.",
"rejected": "Nē.",
"source": "human_review",
}
]
),
encoding="utf-8",
)
class FakeTokenizer:
pad_token = None
pad_token_id = None
eos_token = "<eos>"
eos_token_id = 99
@classmethod
def from_pretrained(cls, model_name):
del model_name
return cls()
def __call__(self, texts, *, truncation, max_length, padding):
del truncation, max_length, padding
return {
"input_ids": [[1, 2, 3] for _ in texts],
"attention_mask": [[1, 1, 1] for _ in texts],
}
def save_pretrained(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")
class FakeModel:
def __init__(self):
self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainer:
def __init__(self, *, model, args, train_dataset, eval_dataset=None, data_collator=None):
del model, args, train_dataset, eval_dataset, data_collator
def train(self):
return types.SimpleNamespace(metrics={"train_loss": 0.11})
def evaluate(self):
return {"eval_loss": 0.22}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
class FakeORPOConfig:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeORPOTrainer:
last_instance = None
def __init__(self, **kwargs):
self.kwargs = kwargs
FakeORPOTrainer.last_instance = self
def train(self):
return types.SimpleNamespace(metrics={"loss": 0.07})
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
monkeypatch.setitem(
sys.modules,
"trl",
types.SimpleNamespace(ORPOConfig=FakeORPOConfig, ORPOTrainer=FakeORPOTrainer),
)
metrics = train(
output_dir=str(tmp_path / "trained-model"),
preference_dataset_path=str(preference_dataset_path),
preference_optimization="orpo",
)
assert metrics["preference_loss"] == 0.07
assert FakeORPOTrainer.last_instance is not None
assert "ref_model" not in FakeORPOTrainer.last_instance.kwargs
def test_train_retries_tokenizer_with_slow_backend(tmp_path: Path, monkeypatch) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch)
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Labdien"},
{"user": "Kas jauns?", "assistant": "Viss kārtībā"},
]
)
},
)
tokenizer_fast_attempts: list[bool] = []
class FakeTokenizer:
pad_token = None
pad_token_id = None
eos_token = "<eos>"
eos_token_id = 99
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name
tokenizer_fast_attempts.append(bool(kwargs.get("use_fast", True)))
if kwargs.get("use_fast", True):
raise ValueError("fast tokenizer unavailable")
return cls()
def __call__(self, texts, *, truncation, max_length, padding):
del texts, truncation, max_length, padding
return {"input_ids": [[1, 2, 3]], "attention_mask": [[1, 1, 1]]}
def save_pretrained(self, output_dir):
Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")
class FakeModelConfig:
pad_token_id = None
use_cache = True
class FakeModel:
config = FakeModelConfig()
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainResult:
metrics = {"train_loss": 0.1}
class FakeTrainer:
def __init__(self, **kwargs):
self.kwargs = kwargs
def train(self):
return FakeTrainResult()
def evaluate(self):
return {"eval_loss": 0.2}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
train(
output_dir=str(tmp_path / "slow-tokenizer"), model_name="custom/model", max_seq_length=256
)
assert tokenizer_fast_attempts == [True, False]
def test_train_auto_switches_giant_models_to_resource_saver_mode(
tmp_path: Path,
monkeypatch,
) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch)
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Labdien"},
{"user": "Kas jauns?", "assistant": "Viss kārtībā"},
]
)
},
)
monkeypatch.setenv("HF_TRAIN_BATCH_SIZE", "4")
monkeypatch.setenv("HF_TRAIN_EVAL_BATCH_SIZE", "2")
monkeypatch.setenv("HF_TRAIN_GRADIENT_ACCUMULATION_STEPS", "4")
model_load_calls: list[dict[str, object]] = []
bnb_calls: list[dict[str, object]] = []
class FakeTokenizer:
pad_token = None
pad_token_id = None
eos_token = "<eos>"
eos_token_id = 99
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
def __call__(self, texts, *, truncation, max_length, padding):
del texts, truncation, max_length, padding
return {"input_ids": [[1, 2, 3]], "attention_mask": [[1, 1, 1]]}
def save_pretrained(self, output_dir):
Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")
class FakeBitsAndBytesConfig:
def __init__(self, **kwargs):
bnb_calls.append(kwargs)
self.kwargs = kwargs
class FakeModel:
def __init__(self):
self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
@classmethod
def from_pretrained(cls, model_name, **kwargs):
model_load_calls.append({"model_name": model_name, "kwargs": kwargs})
return cls()
def print_trainable_parameters(self):
return None
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainResult:
metrics = {"train_loss": 0.2}
class FakeTrainer:
last_instance = None
def __init__(self, **kwargs):
self.kwargs = kwargs
self.args = kwargs["args"]
FakeTrainer.last_instance = self
def train(self):
return FakeTrainResult()
def evaluate(self):
return {"eval_loss": 0.4}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "adapter_config.json").write_text("{}", encoding="utf-8")
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
BitsAndBytesConfig=FakeBitsAndBytesConfig,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
class FakeLoraConfig:
def __init__(self, **kwargs):
self.kwargs = kwargs
def fake_prepare_model_for_kbit_training(model, use_gradient_checkpointing):
del use_gradient_checkpointing
return model
def fake_get_peft_model(model, peft_config):
del peft_config
return model
monkeypatch.setitem(
sys.modules,
"peft",
types.SimpleNamespace(
LoraConfig=FakeLoraConfig,
TaskType=types.SimpleNamespace(CAUSAL_LM="CAUSAL_LM"),
get_peft_model=fake_get_peft_model,
prepare_model_for_kbit_training=fake_prepare_model_for_kbit_training,
),
)
output_dir = tmp_path / "giant-model"
train(
output_dir=str(output_dir),
model_name="Qwen/Qwen3-Coder-480B-A35B-Instruct",
adapter_type="full",
max_seq_length=256,
)
assert bnb_calls[0]["load_in_4bit"] is True
assert model_load_calls[0]["model_name"] == "Qwen/Qwen3-Coder-480B-A35B-Instruct"
assert model_load_calls[0]["kwargs"]["device_map"] == "auto"
assert model_load_calls[0]["kwargs"]["low_cpu_mem_usage"] is True
assert "quantization_config" in model_load_calls[0]["kwargs"]
assert FakeTrainer.last_instance is not None
assert FakeTrainer.last_instance.args.kwargs["per_device_train_batch_size"] == 1
assert FakeTrainer.last_instance.args.kwargs["per_device_eval_batch_size"] == 1
assert FakeTrainer.last_instance.args.kwargs["gradient_accumulation_steps"] == 16
training_config = json.loads((output_dir / "training-config.json").read_text(encoding="utf-8"))
assert training_config["adapter_type"] == "qlora"
def test_train_disables_pin_memory_and_tqdm_in_non_interactive_environment(
tmp_path: Path, monkeypatch
) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch if batched else self.items[0])
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Čau!"},
{"prompt": "Uzraksti plānu", "completion": "Gatavs."},
]
)
},
)
import maris_core.training.train as train_module
monkeypatch.setattr(train_module.sys, "stderr", types.SimpleNamespace(isatty=lambda: False))
monkeypatch.setitem(
sys.modules,
"torch",
types.SimpleNamespace(
cuda=types.SimpleNamespace(is_available=lambda: False),
backends=types.SimpleNamespace(mps=types.SimpleNamespace(is_available=lambda: False)),
),
)
class FakeTokenizer:
pad_token_id = 0
eos_token_id = 1
pad_token = "<pad>"
eos_token = "</s>"
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
def __call__(self, texts, truncation, padding, max_length):
del truncation, padding, max_length
if isinstance(texts, str):
texts = [texts]
return {
"input_ids": [[1, 2, 3] for _ in texts],
"attention_mask": [[1, 1, 1] for _ in texts],
}
def save_pretrained(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")
class FakeModel:
def __init__(self):
self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainResult:
metrics = {"train_loss": 0.2}
class FakeTrainer:
last_instance = None
def __init__(self, **kwargs):
self.args = kwargs["args"]
FakeTrainer.last_instance = self
def train(self):
return FakeTrainResult()
def evaluate(self):
return {"eval_loss": 0.4}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
train(output_dir=str(tmp_path / "cpu-runtime"), model_name="custom/model", max_seq_length=256)
assert FakeTrainer.last_instance is not None
assert FakeTrainer.last_instance.args.kwargs["dataloader_pin_memory"] is False
assert FakeTrainer.last_instance.args.kwargs["disable_tqdm"] is True
assert FakeTrainer.last_instance.args.kwargs["logging_first_step"] is True
def test_train_enables_bf16_by_default_when_cuda_supports_it(tmp_path: Path, monkeypatch) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch if batched else self.items[0])
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Čau!"},
{"prompt": "Uzraksti plānu", "completion": "Gatavs."},
]
)
},
)
import maris_core.training.train as train_module
monkeypatch.setattr(train_module.sys, "stderr", types.SimpleNamespace(isatty=lambda: True))
monkeypatch.setitem(
sys.modules,
"torch",
types.SimpleNamespace(
cuda=types.SimpleNamespace(
is_available=lambda: True,
is_bf16_supported=lambda: True,
),
backends=types.SimpleNamespace(mps=types.SimpleNamespace(is_available=lambda: False)),
),
)
class FakeTokenizer:
pad_token_id = 0
eos_token_id = 1
pad_token = "<pad>"
eos_token = "</s>"
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
def __call__(self, texts, truncation, padding, max_length):
del truncation, padding, max_length
if isinstance(texts, str):
texts = [texts]
return {
"input_ids": [[1, 2, 3] for _ in texts],
"attention_mask": [[1, 1, 1] for _ in texts],
}
def save_pretrained(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")
class FakeModel:
def __init__(self):
self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainResult:
metrics = {"train_loss": 0.2}
class FakeTrainer:
last_instance = None
def __init__(self, **kwargs):
self.args = kwargs["args"]
FakeTrainer.last_instance = self
def train(self):
return FakeTrainResult()
def evaluate(self):
return {"eval_loss": 0.4}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
train(output_dir=str(tmp_path / "cuda-runtime"), model_name="custom/model", max_seq_length=256)
assert FakeTrainer.last_instance is not None
assert FakeTrainer.last_instance.args.kwargs["bf16"] is True
assert FakeTrainer.last_instance.args.kwargs["fp16"] is False
def test_train_uses_fsdp_training_arguments_when_requested(tmp_path: Path, monkeypatch) -> None:
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch if batched else self.items[0])
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Čau!"},
{"prompt": "Uzraksti plānu", "completion": "Gatavs."},
]
)
},
)
class FakeTokenizer:
pad_token_id = 0
eos_token_id = 1
pad_token = "<pad>"
eos_token = "</s>"
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
def __call__(self, texts, truncation, padding, max_length):
del truncation, padding, max_length
if isinstance(texts, str):
texts = [texts]
return {
"input_ids": [[1, 2, 3] for _ in texts],
"attention_mask": [[1, 1, 1] for _ in texts],
}
def save_pretrained(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")
class FakeModel:
def __init__(self):
self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainResult:
metrics = {"train_loss": 0.2}
class FakeTrainer:
last_instance = None
def __init__(self, **kwargs):
self.args = kwargs["args"]
FakeTrainer.last_instance = self
def train(self):
return FakeTrainResult()
def evaluate(self):
return {"eval_loss": 0.4}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
fsdp_config_path = tmp_path / "fsdp-config.json"
fsdp_config_path.write_text(
json.dumps({"activation_checkpointing": False, "limit_all_gathers": False}),
encoding="utf-8",
)
train(
output_dir=str(tmp_path / "fsdp-runtime"),
model_name="custom/model",
max_seq_length=256,
distributed_strategy="fsdp",
distributed_config_path=str(fsdp_config_path),
fsdp_transformer_layer_cls_to_wrap=["Qwen2DecoderLayer"],
)
assert FakeTrainer.last_instance is not None
assert FakeTrainer.last_instance.args.kwargs["fsdp"] == "full_shard auto_wrap"
assert FakeTrainer.last_instance.args.kwargs["fsdp_config"]["activation_checkpointing"] is False
assert FakeTrainer.last_instance.args.kwargs["fsdp_config"]["limit_all_gathers"] is False
assert FakeTrainer.last_instance.args.kwargs["fsdp_config"]["min_num_params"] == 100_000_000
assert FakeTrainer.last_instance.args.kwargs["fsdp_config"][
"transformer_layer_cls_to_wrap"
] == ["Qwen2DecoderLayer"]
assert FakeTrainer.last_instance.args.kwargs["ddp_find_unused_parameters"] is False
def test_train_uses_deepspeed_training_arguments_when_requested(
tmp_path: Path, monkeypatch
) -> None:
monkeypatch.setattr(
"maris_core.training.train.get_installed_package_version",
lambda package_name: "0.18.9",
)
class FakeDataset:
def __init__(self, items):
self.items = list(items)
self.column_names = list(self.items[0].keys()) if self.items else []
def train_test_split(self, *, test_size, seed):
del test_size, seed
return {
"train": FakeDataset(self.items[:1]),
"test": FakeDataset(self.items[1:]),
}
def map(self, fn, *, batched, remove_columns, desc):
del remove_columns, desc
batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
transformed = fn(batch if batched else self.items[0])
size = len(next(iter(transformed.values()))) if transformed else 0
return FakeDataset(
[{key: transformed[key][index] for key in transformed} for index in range(size)]
)
def __len__(self):
return len(self.items)
monkeypatch.setattr(
"maris_core.training.train.load_hf_dataset",
lambda _: {
"train": FakeDataset(
[
{"user": "Sveiki", "assistant": "Čau!"},
{"prompt": "Uzraksti plānu", "completion": "Gatavs."},
]
)
},
)
class FakeTokenizer:
pad_token_id = 0
eos_token_id = 1
pad_token = "<pad>"
eos_token = "</s>"
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
def __call__(self, texts, truncation, padding, max_length):
del truncation, padding, max_length
if isinstance(texts, str):
texts = [texts]
return {
"input_ids": [[1, 2, 3] for _ in texts],
"attention_mask": [[1, 1, 1] for _ in texts],
}
def save_pretrained(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")
class FakeModel:
def __init__(self):
self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
@classmethod
def from_pretrained(cls, model_name, **kwargs):
del model_name, kwargs
return cls()
class FakeTrainingArguments:
def __init__(self, **kwargs):
self.kwargs = kwargs
class FakeTrainResult:
metrics = {"train_loss": 0.2}
class FakeTrainer:
last_instance = None
def __init__(self, **kwargs):
self.args = kwargs["args"]
FakeTrainer.last_instance = self
def train(self):
return FakeTrainResult()
def evaluate(self):
return {"eval_loss": 0.4}
def save_model(self, output_dir):
Path(output_dir).mkdir(parents=True, exist_ok=True)
Path(output_dir, "config.json").write_text("{}", encoding="utf-8")
monkeypatch.setitem(
sys.modules,
"transformers",
types.SimpleNamespace(
AutoModelForCausalLM=FakeModel,
AutoTokenizer=FakeTokenizer,
DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
Trainer=FakeTrainer,
TrainingArguments=FakeTrainingArguments,
),
)
deepspeed_config_path = tmp_path / "deepspeed.json"
deepspeed_config_path.write_text(
json.dumps({"zero_optimization": {"stage": 3}}), encoding="utf-8"
)
train(
output_dir=str(tmp_path / "deepspeed-runtime"),
model_name="custom/model",
max_seq_length=256,
distributed_strategy="deepspeed",
distributed_config_path=str(deepspeed_config_path),
)
assert FakeTrainer.last_instance is not None
assert FakeTrainer.last_instance.args.kwargs["deepspeed"] == str(deepspeed_config_path)
assert FakeTrainer.last_instance.args.kwargs["ddp_find_unused_parameters"] is False
def test_deepspeed_training_arguments_raise_clear_error_when_dependency_missing(
tmp_path: Path, monkeypatch
) -> None:
deepspeed_config_path = tmp_path / "deepspeed.json"
deepspeed_config_path.write_text(
json.dumps({"zero_optimization": {"stage": 3}}), encoding="utf-8"
)
config = load_training_config(
overrides={
"distributed_strategy": "deepspeed",
"distributed_config_path": str(deepspeed_config_path),
}
)
def _raise_missing_package(package_name: str) -> None:
raise PackageNotFoundError(package_name)
monkeypatch.setattr(
"maris_core.training.train.get_installed_package_version",
_raise_missing_package,
)
with pytest.raises(
ImportError,
match="DeepSpeed režīms nepieciešams instalēt 'deepspeed'",
):
_build_distributed_training_argument_overrides(config)
def test_deepspeed_training_arguments_raise_clear_error_when_metadata_lookup_stops(
tmp_path: Path, monkeypatch
) -> None:
deepspeed_config_path = tmp_path / "deepspeed.json"
deepspeed_config_path.write_text(
json.dumps({"zero_optimization": {"stage": 3}}), encoding="utf-8"
)
config = load_training_config(
overrides={
"distributed_strategy": "deepspeed",
"distributed_config_path": str(deepspeed_config_path),
}
)
def _raise_stop_iteration(package_name: str) -> None:
raise StopIteration(package_name)
monkeypatch.setattr(
"maris_core.training.train.get_installed_package_version",
_raise_stop_iteration,
)
with pytest.raises(
ImportError,
match="DeepSpeed režīms nepieciešams instalēt 'deepspeed'",
):
_build_distributed_training_argument_overrides(config)
def test_train_model_cli_exits_cleanly_when_runtime_dependency_missing(monkeypatch, capsys) -> None:
script_path = Path(__file__).resolve().parents[1] / "scripts" / "train_model.py"
spec = importlib.util.spec_from_file_location("train_model", script_path)
assert spec is not None and spec.loader is not None
train_model_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(train_model_module)
def _raise_missing_dependency(_config: object) -> dict[str, object]:
raise ImportError("DeepSpeed režīms nepieciešams instalēt 'deepspeed' Python pakotni.")
monkeypatch.setattr(
train_model_module, "load_training_config", lambda *args, **kwargs: object()
)
monkeypatch.setitem(
sys.modules,
"maris_core.training.train",
types.SimpleNamespace(
train_branch_suite=lambda _config: {},
train_with_config=_raise_missing_dependency,
),
)
monkeypatch.setattr(sys, "argv", [str(script_path)])
with pytest.raises(SystemExit) as exc_info:
train_model_module.main()
assert exc_info.value.code == 2
captured = capsys.readouterr()
assert "DeepSpeed režīms nepieciešams instalēt 'deepspeed' Python pakotni." in captured.err
assert "Traceback" not in captured.err
def test_train_model_cli_exits_cleanly_for_branch_suite_dependency_missing(
monkeypatch, capsys
) -> None:
script_path = Path(__file__).resolve().parents[1] / "scripts" / "train_model.py"
spec = importlib.util.spec_from_file_location("train_model", script_path)
assert spec is not None and spec.loader is not None
train_model_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(train_model_module)
def _raise_missing_dependency(_config: object) -> dict[str, object]:
raise ImportError("DeepSpeed režīms nepieciešams instalēt 'deepspeed' Python pakotni.")
monkeypatch.setattr(
train_model_module, "load_training_config", lambda *args, **kwargs: object()
)
monkeypatch.setattr(train_model_module, "replace", lambda config, **kwargs: config)
monkeypatch.setitem(
sys.modules,
"maris_core.training.train",
types.SimpleNamespace(
train_branch_suite=_raise_missing_dependency,
train_with_config=lambda _config: {},
),
)
monkeypatch.setattr(sys, "argv", [str(script_path), "--all-branches"])
with pytest.raises(SystemExit) as exc_info:
train_model_module.main()
assert exc_info.value.code == 2
captured = capsys.readouterr()
assert "DeepSpeed režīms nepieciešams instalēt 'deepspeed' Python pakotni." in captured.err
assert "Traceback" not in captured.err
def test_ensure_runtime_home_dir_sets_temp_home_when_missing(tmp_path: Path, monkeypatch) -> None:
monkeypatch.delenv("HOME", raising=False)
monkeypatch.delenv("USER", raising=False)
monkeypatch.delenv("LOGNAME", raising=False)
monkeypatch.delenv("USERNAME", raising=False)
monkeypatch.setattr("maris_core.training.train.tempfile.gettempdir", lambda: str(tmp_path))
monkeypatch.setattr("maris_core.training.train.os.getuid", lambda: 1000)
resolved = _ensure_runtime_home_dir()
expected = tmp_path / "maris-home-1000"
assert resolved == str(expected)
assert os.environ["HOME"] == str(expected)
assert os.environ["USER"] == "maris-1000"
assert os.environ["LOGNAME"] == "maris-1000"
assert os.environ["USERNAME"] == "maris-1000"
assert expected.is_dir()
def test_ensure_runtime_home_dir_keeps_existing_home_and_user(monkeypatch) -> None:
monkeypatch.setenv("HOME", "/existing/home")
monkeypatch.setenv("USER", "existing-user")
monkeypatch.setenv("LOGNAME", "existing-user")
monkeypatch.setenv("USERNAME", "existing-user")
resolved = _ensure_runtime_home_dir()
assert resolved == "/existing/home"
assert os.environ["HOME"] == "/existing/home"
assert os.environ["USER"] == "existing-user"
assert os.environ["LOGNAME"] == "existing-user"
assert os.environ["USERNAME"] == "existing-user"
def test_ensure_runtime_home_dir_uses_unknown_suffix_when_getuid_fails(
tmp_path: Path, monkeypatch
) -> None:
monkeypatch.setenv("HOME", " ")
monkeypatch.delenv("USER", raising=False)
monkeypatch.delenv("LOGNAME", raising=False)
monkeypatch.delenv("USERNAME", raising=False)
monkeypatch.setattr("maris_core.training.train.tempfile.gettempdir", lambda: str(tmp_path))
def _raise_os_error() -> int:
raise OSError("uid not available")
monkeypatch.setattr("maris_core.training.train.os.getuid", _raise_os_error)
resolved = _ensure_runtime_home_dir()
expected = tmp_path / "maris-home-unknown"
assert resolved == str(expected)
assert os.environ["HOME"] == str(expected)
assert os.environ["USER"] == "maris-unknown"
assert os.environ["LOGNAME"] == "maris-unknown"
assert os.environ["USERNAME"] == "maris-unknown"
assert expected.is_dir()