"""Tests training pipeline konfigurācijai un datu sagatavošanai."""

from __future__ import annotations

import asyncio
import importlib.util
import json
import os
import re
import subprocess
import sys
import types
from importlib.metadata import PackageNotFoundError
from pathlib import Path
from typing import Any

import pytest

from maris_core.data.preprocessing import record_to_training_text
from maris_core.training.config import (
    AVAILABLE_TRAINING_BASE_MODELS,
    DEFAULT_TRAINING_BASE_MODEL,
    list_training_base_models,
    load_training_config,
)
from maris_core.training.hf_compat import (
    MARIS_COMPATIBILITY_ARTIFACT_NAME,
    apply_maris_compatibility_identity,
    write_maris_compatibility_artifact,
)
from maris_core.training.preferences import load_preference_dataset
from maris_core.training.train import (
    _build_benchmark_gate_artifact,
    _build_distributed_training_argument_overrides,
    _ensure_runtime_home_dir,
    _filter_preference_examples_for_branch,
    _filter_records_for_branch,
    _run_post_training_benchmark,
    build_branch_training_configs,
    evaluate_with_config,
    train,
    train_branch_suite,
)

FOREIGN_AI_NAME_RE = re.compile(
    r"(?i)\b(?:anthropic|chatgpt|claude|deepseek|gemini|llama|mistral|openai|qwen|TinyLlama)\b"
)
FOREIGN_MODEL_REPO_RE = re.compile(
    r"(?i)\b(?:deepseek-ai|meta-llama|mistralai|openai|qwen|TinyLlama)/[A-Za-z0-9][\w.-]*\b"
)


def _assert_output_dir_uses_only_maris_identity(output_dir: Path) -> None:
    checked_files = sorted(output_dir.rglob("*"))
    for path in checked_files:
        if not path.is_file():
            continue
        if path.name == MARIS_COMPATIBILITY_ARTIFACT_NAME:
            continue
        if path.suffix.lower() not in {".json", ".jinja", ".md", ".txt"}:
            continue
        content = path.read_text(encoding="utf-8")
        assert FOREIGN_MODEL_REPO_RE.search(content) is None, path
        assert FOREIGN_AI_NAME_RE.search(content) is None, path


def test_record_to_training_text_formats_conversation_and_generation() -> None:
    conversation = record_to_training_text({"user": "Sveiki", "assistant": "Čau!"})
    generation = record_to_training_text({"prompt": "Uzzīmē kaķi", "metadata": {"style": "anime"}})

    assert "<|user|>" in conversation
    assert "Sveiki" in conversation
    assert "Čau!" in conversation
    assert "Uzzīmē kaķi" in generation
    assert '"style": "anime"' in generation


def test_record_to_training_text_formats_structured_coder_record() -> None:
    formatted = record_to_training_text(
        {
            "prompt": "Salabo retry helperi.",
            "target_file": "core-python/maris_core/retries.py",
            "buggy_code": "def retry(count):\n    return count / 0",
            "tests": ["assert retry(1) == 1", "assert retry(3) == 3"],
            "edge_cases": ["0 mēģinājumi", "negatīvs skaits"],
            "metadata": {"language": "python", "task": "bugfix"},
            "completion": "```python\ndef retry(count: int) -> int:\n    return max(count, 0)\n```",
        }
    )

    assert "Mērķa fails" in formatted
    assert "Esošais vai kļūdainais kods" in formatted
    assert "Robežgadījumi" in formatted
    assert "```python" in formatted


def test_load_training_config_reads_json_and_env_overrides(
    tmp_path: Path,
    monkeypatch,
) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps(
            {
                "model_name": "repo/from-json",
                "branch_name": "coder",
                "num_epochs": 7,
                "report_to": ["tensorboard"],
            }
        ),
        encoding="utf-8",
    )

    monkeypatch.setenv("HF_TRAIN_BATCH_SIZE", "3")
    monkeypatch.setenv("HF_TRAIN_ADAPTER_TYPE", "lora")
    config = load_training_config(str(config_path), overrides={"learning_rate": 1e-4})

    assert config.model_name == "repo/from-json"
    assert config.branch_name == "coder"
    assert config.num_epochs == 7
    assert config.per_device_train_batch_size == 3
    assert config.learning_rate == 1e-4
    assert config.adapter_type == "lora"
    assert config.report_to == ["tensorboard"]
    assert config.text_model_id == "MarisUK/maris-ai-text"
    assert config.image_model_id == "MarisUK/maris-ai-image"


def test_load_training_config_reads_distributed_runtime_overrides(monkeypatch) -> None:
    monkeypatch.setenv("HF_TRAIN_DISTRIBUTED_STRATEGY", "deepspeed")
    monkeypatch.setenv("HF_TRAIN_DISTRIBUTED_CONFIG_PATH", "huggingface/deepspeed-zero3.json")
    monkeypatch.setenv("HF_TRAIN_NUM_PROCESSES", "8")
    monkeypatch.setenv("HF_TRAIN_NUM_MACHINES", "2")
    monkeypatch.setenv("HF_TRAIN_MACHINE_RANK", "1")
    monkeypatch.setenv("HF_TRAIN_MAIN_PROCESS_IP", "10.0.0.10")
    monkeypatch.setenv("HF_TRAIN_MAIN_PROCESS_PORT", "29510")

    config = load_training_config()

    assert config.distributed_strategy == "deepspeed"
    assert config.distributed_config_path == "huggingface/deepspeed-zero3.json"
    assert config.use_accelerate is True
    assert config.num_processes == 8
    assert config.num_machines == 2
    assert config.machine_rank == 1
    assert config.main_process_ip == "10.0.0.10"
    assert config.main_process_port == 29510


def test_load_training_config_reads_gradient_checkpointing_use_reentrant_override(
    monkeypatch,
) -> None:
    monkeypatch.setenv("HF_TRAIN_GRADIENT_CHECKPOINTING_USE_REENTRANT", "false")

    config = load_training_config()

    assert config.gradient_checkpointing_use_reentrant is False


def test_load_training_config_reads_runtime_model_repo_overrides(monkeypatch) -> None:
    monkeypatch.setenv("TEXT_MODEL", "MarisUK/custom-text")
    monkeypatch.setenv("IMAGE_MODEL", "MarisUK/custom-image")
    monkeypatch.setenv("MUSIC_MODEL", "MarisUK/custom-music")
    monkeypatch.setenv("TTS_MODEL", "MarisUK/custom-tts")
    monkeypatch.setenv("STT_MODEL", "MarisUK/custom-stt")
    monkeypatch.setenv("VIDEO_MODEL", "MarisUK/custom-video")

    config = load_training_config()

    assert config.text_model_id == "MarisUK/custom-text"
    assert config.image_model_id == "MarisUK/custom-image"
    assert config.music_model_id == "MarisUK/custom-music"
    assert config.tts_model_id == "MarisUK/custom-tts"
    assert config.stt_model_id == "MarisUK/custom-stt"
    assert config.video_model_id == "MarisUK/custom-video"


def test_load_training_config_rejects_conflicting_precision_modes(
    tmp_path: Path,
) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps({"fp16": True, "bf16": True}),
        encoding="utf-8",
    )

    try:
        load_training_config(str(config_path))
    except ValueError as exc:
        assert "fp16 un bf16" in str(exc)
    else:
        raise AssertionError("load_training_config() should reject conflicting precision modes")


def test_load_training_config_resolves_model_preset(
    tmp_path: Path,
) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps({"model_preset": "coding"}),
        encoding="utf-8",
    )

    config = load_training_config(str(config_path))

    assert config.model_preset == "coding"
    assert config.model_name == AVAILABLE_TRAINING_BASE_MODELS["coding"]["model_name"]


def test_load_training_config_resolves_extra_model_preset(
    tmp_path: Path,
    monkeypatch,
) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps({"model_preset": "qwen-32b"}),
        encoding="utf-8",
    )
    monkeypatch.setenv(
        "MARIS_TRAIN_EXTRA_MODELS",
        json.dumps({"qwen-32b": "Qwen/Qwen2.5-32B-Instruct"}),
    )

    config = load_training_config(str(config_path))

    assert config.model_preset == "qwen-32b"
    assert config.model_name == "Qwen/Qwen2.5-32B-Instruct"


def test_load_training_config_rejects_unknown_model_preset(
    tmp_path: Path,
) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps({"model_preset": "unknown"}),
        encoding="utf-8",
    )

    try:
        load_training_config(str(config_path))
    except ValueError as exc:
        assert "model_preset" in str(exc)
        assert "balanced" in str(exc)
    else:
        raise AssertionError("load_training_config() should reject unknown model presets")


def test_load_training_config_rejects_non_maris_hub_model_id(
    tmp_path: Path,
) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps({"hub_model_id": "someone-else/not-maris"}),
        encoding="utf-8",
    )

    try:
        load_training_config(str(config_path))
    except RuntimeError as exc:
        assert "Maris AI modeli" in str(exc)
    else:
        raise AssertionError("load_training_config() should reject non-Maris output model ids")


def test_load_training_config_rejects_non_maris_dataset_repo(
    tmp_path: Path,
) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps({"dataset_repo": "someone-else/not-maris-memory"}),
        encoding="utf-8",
    )

    try:
        load_training_config(str(config_path))
    except RuntimeError as exc:
        assert "dataset repozitorijs" in str(exc)
    else:
        raise AssertionError("load_training_config() should reject non-Maris dataset repo ids")


def test_load_training_config_reads_optional_eval_dataset_repo(
    tmp_path: Path,
) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps({"eval_dataset_repo": "MarisUK/maris-ai-evals"}),
        encoding="utf-8",
    )

    config = load_training_config(str(config_path))

    assert config.eval_dataset_repo == "MarisUK/maris-ai-evals"


def test_load_training_config_reads_explicit_training_and_eval_dataset_repo_lists(
    tmp_path: Path,
) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps(
            {
                "dataset_repo": "MarisUK/maris-ai-memory",
                "dataset_repos": [
                    "MarisUK/maris-ai-memory",
                    "MarisUK/maris-ai-lv-memory",
                    "MarisUK/maris-ai-evals",
                    "MarisUK/maris-ai-benchmark",
                ],
                "eval_dataset_repo": "MarisUK/maris-ai-evals",
                "eval_dataset_repos": [
                    "MarisUK/maris-ai-evals",
                    "MarisUK/maris-ai-benchmark",
                ],
            }
        ),
        encoding="utf-8",
    )

    config = load_training_config(str(config_path))

    assert config.dataset_repos == [
        "MarisUK/maris-ai-memory",
        "MarisUK/maris-ai-lv-memory",
        "MarisUK/maris-ai-evals",
        "MarisUK/maris-ai-benchmark",
    ]
    assert config.eval_dataset_repos == [
        "MarisUK/maris-ai-evals",
        "MarisUK/maris-ai-benchmark",
    ]


def test_load_training_config_reads_benchmark_and_preference_paths(
    tmp_path: Path,
) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps(
            {
                "benchmark_dataset_path": "/tmp/benchmarks/release.json",
                "benchmark_name": "release-gate",
                "benchmark_levels": ["ci", "release"],
                "benchmark_min_overall": 0.75,
                "benchmark_gate_enabled": True,
                "benchmark_feedback_auto_discover": False,
                "benchmark_feedback_path": "/tmp/benchmarks/previous.json",
                "benchmark_feedback_boost_scale": 2.5,
                "benchmark_feedback_max_multiplier": 1.8,
                "preference_dataset_path": "/tmp/preferences.json",
                "branch_benchmark_targets": {"master": {"overall": 0.8, "reasoning": 0.78}},
                "branch_benchmark_names": {
                    "master": "memory-quality",
                    "coder": "coder-release-quality",
                },
                "branch_benchmark_dataset_paths": {
                    "coder": "/tmp/benchmarks/coder-release.json",
                    "planner": "/tmp/benchmarks/planner-release.json",
                },
                "branch_preference_dataset_paths": {
                    "coder": "/tmp/preferences/coder-preferences.json"
                },
                "branch_dataset_filter_rules": {
                    "planner": {"include_record_types": ["autonomous"], "allow_unlabeled": False}
                },
                "source_weight_map": {"production": 1.5, "synthetic": 1.0, "noisy": 0.6},
            }
        ),
        encoding="utf-8",
    )

    config = load_training_config(str(config_path))

    assert config.benchmark_dataset_path == "/tmp/benchmarks/release.json"
    assert config.benchmark_name == "release-gate"
    assert config.benchmark_levels == ["ci", "release"]
    assert config.benchmark_min_overall == 0.75
    assert config.benchmark_gate_enabled is True
    assert config.benchmark_feedback_auto_discover is False
    assert config.benchmark_feedback_path == "/tmp/benchmarks/previous.json"
    assert config.benchmark_feedback_boost_scale == 2.5
    assert config.benchmark_feedback_max_multiplier == 1.8
    assert config.preference_dataset_path == "/tmp/preferences.json"
    assert config.branch_benchmark_targets["master"]["reasoning"] == 0.78
    assert config.branch_benchmark_names["master"] == "memory-quality"
    assert config.branch_benchmark_names["coder"] == "coder-release-quality"
    assert config.branch_benchmark_dataset_paths["coder"] == "/tmp/benchmarks/coder-release.json"
    assert (
        config.branch_benchmark_dataset_paths["planner"] == "/tmp/benchmarks/planner-release.json"
    )
    assert (
        config.branch_preference_dataset_paths["coder"] == "/tmp/preferences/coder-preferences.json"
    )
    assert config.branch_dataset_filter_rules["planner"]["include_record_types"] == ["autonomous"]
    assert config.source_weight_map["production"] == 1.5


def test_load_training_config_default_coder_targets_include_execution_gate() -> None:
    config = load_training_config()

    assert config.branch_benchmark_targets["coder"]["execution"] == 0.7
    assert config.branch_benchmark_targets["master"]["memory_retrieval_pass_rate"] == 0.8
    assert config.branch_benchmark_names["master"] == "memory-quality"
    assert config.branch_benchmark_dataset_paths["master"].endswith(
        "core-python/evals/master_memory_benchmark.json"
    )
    assert config.branch_benchmark_dataset_paths["coder"].endswith(
        "core-python/evals/coder_release_benchmark.json"
    )
    assert config.branch_preference_dataset_paths["coder"].endswith(
        "core-python/evals/coder_preference_dataset.json"
    )


def test_apply_branch_runtime_defaults_prefers_master_memory_suite() -> None:
    import maris_core.training.train as train_module

    config = load_training_config(
        overrides={
            "branch_name": "master",
            "benchmark_dataset_path": "",
            "benchmark_name": "chat-quality",
            "benchmark_gate_enabled": True,
        }
    )

    resolved = train_module._apply_branch_runtime_defaults(config)

    assert resolved.benchmark_name == "memory-quality"
    assert resolved.benchmark_dataset_path.endswith(
        "core-python/evals/master_memory_benchmark.json"
    )


def test_build_benchmark_gate_artifact_uses_world_class_defaults_and_blocks_regressions() -> None:
    config = load_training_config(
        overrides={
            "branch_name": "coder",
            "benchmark_gate_enabled": True,
        }
    )

    gate = _build_benchmark_gate_artifact(
        config,
        {
            "benchmark_name": "release-gate",
            "score_manifest": {
                "overall": 0.8,
                "coding": 0.81,
                "reasoning": 0.76,
                "execution": 0.74,
                "grounding": 0.78,
                "safety": 0.93,
                "judge_overall": 0.78,
                "judge_task_completion": 0.77,
                "judge_instruction_following": 0.79,
                "judge_safety": 0.95,
                "judge_regression_risk": 0.8,
            },
            "success_rate": 0.88,
            "production_like_cases": 3,
            "production_like_pass_rate": 0.8,
            "execution_cases": 4,
            "grounding_cases": 3,
        },
        regression_report={"regression_count": 2},
    )

    assert gate["targets"]["success_rate"] == 0.85
    assert gate["targets"]["production_like_pass_rate"] == 0.75
    assert gate["targets"]["judge_overall"] == 0.72
    assert gate["passed"] is False
    assert gate["failed_metrics"]["regression_count"]["required"] == 0.0
    assert gate["failed_metrics"]["regression_count"]["actual"] == 2.0


def test_build_benchmark_gate_artifact_uses_stricter_execution_threshold() -> None:
    config = load_training_config(
        overrides={
            "branch_name": "coder",
            "benchmark_gate_enabled": True,
        }
    )

    gate = _build_benchmark_gate_artifact(
        config,
        {
            "benchmark_name": "release-gate",
            "score_manifest": {
                "overall": 0.8,
                "coding": 0.82,
                "reasoning": 0.76,
                "execution": 0.6,
                "grounding": 0.78,
                "safety": 0.94,
            },
            "execution_cases": 4,
        },
    )

    assert gate["passed"] is False
    assert gate["targets"]["execution"] == 0.7
    assert gate["failed_metrics"]["execution"]["actual"] == 0.6


def test_load_training_config_reads_category_weight_map(tmp_path: Path) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps({"category_weight_map": {"coding": 1.3, "grounding": 1.2}}),
        encoding="utf-8",
    )

    config = load_training_config(str(config_path))

    assert config.category_weight_map["coding"] == 1.3
    assert config.category_weight_map["grounding"] == 1.2


def test_load_training_config_reads_continue_training_settings(monkeypatch) -> None:
    monkeypatch.setenv("HF_TRAIN_CONTINUE_FROM_LATEST", "true")
    monkeypatch.setenv("HF_TRAIN_CONTINUE_MODEL_PATH", "/tmp/maris-last-good")

    config = load_training_config()

    assert config.continue_from_latest_artifact is True
    assert config.continue_model_path == "/tmp/maris-last-good"


def test_list_training_base_models_returns_copy() -> None:
    models = list_training_base_models()
    models["balanced"]["model_name"] = "modified"

    assert AVAILABLE_TRAINING_BASE_MODELS["balanced"]["model_name"] == DEFAULT_TRAINING_BASE_MODEL


def test_list_training_base_models_ignores_invalid_extra_models_json(monkeypatch) -> None:
    monkeypatch.setenv("MARIS_TRAIN_EXTRA_MODELS", "{not valid json")

    models = list_training_base_models()

    assert {"balanced", "reasoning", "coding", "lightweight"}.issubset(models)


def test_list_training_base_models_accepts_owner_name_fallback_syntax(monkeypatch) -> None:
    monkeypatch.setenv(
        "MARIS_TRAIN_EXTRA_MODELS",
        "Qwen/Qwen3-Coder-480B-A35B-Instruct, coder-7b=Qwen/Qwen2.5-7B-Instruct",
    )

    models = list_training_base_models()

    assert models["qwen-qwen3-coder-480b-a35b-instruct"]["model_name"] == (
        "Qwen/Qwen3-Coder-480B-A35B-Instruct"
    )
    assert models["coder-7b"]["model_name"] == "Qwen/Qwen2.5-7B-Instruct"


def test_list_training_base_models_accepts_string_shorthand(monkeypatch) -> None:
    monkeypatch.setenv("MARIS_TRAIN_EXTRA_MODELS", '{"qwen-880b":"Qwen/Qwen3-880B-Instruct"}')

    models = list_training_base_models()

    assert models["qwen-880b"]["model_name"] == "Qwen/Qwen3-880B-Instruct"
    assert models["qwen-880b"]["label"] == "Qwen 880B"


def test_load_training_config_prefers_explicit_model_name_over_preset(
    tmp_path: Path,
    monkeypatch,
) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps({"model_preset": "coding"}),
        encoding="utf-8",
    )
    monkeypatch.setenv("HF_TRAIN_BASE_MODEL", "custom/model")
    monkeypatch.setenv("HF_TRAIN_MODEL_PRESET", "reasoning")

    config = load_training_config(str(config_path))

    assert config.model_name == "custom/model"
    assert config.model_preset == ""


def test_huggingface_train_script_resolves_relative_config_from_repo_root(
    tmp_path: Path,
    monkeypatch,
) -> None:
    repo_root = next(
        parent
        for parent in Path(__file__).resolve().parents
        if (parent / "huggingface" / "train.sh").is_file()
    )
    fake_python = tmp_path / "python3"
    invocation_log = tmp_path / "train-invocation.json"
    fake_python.write_text(
        "\n".join(
            [
                f"#!{sys.executable}",
                "import json",
                "import os",
                "import sys",
                "from pathlib import Path",
                "",
                "Path(os.environ['TRAIN_SH_LOG']).write_text(",
                "    json.dumps({'cwd': os.getcwd(), 'argv': sys.argv[1:]}, ensure_ascii=False),",
                "    encoding='utf-8',",
                ")",
            ]
        ),
        encoding="utf-8",
    )
    fake_python.chmod(0o755)
    existing_path = os.environ.get("PATH", "")
    monkeypatch.setenv(
        "PATH",
        f"{tmp_path}{os.pathsep}{existing_path}" if existing_path else str(tmp_path),
    )
    monkeypatch.setenv("HF_TRAINING_CONFIG_PATH", "huggingface/training-config.json")
    monkeypatch.setenv("TRAIN_SH_LOG", str(invocation_log))

    subprocess.run(
        ["bash", str(repo_root / "huggingface" / "train.sh")],
        check=True,
        cwd=repo_root,
    )

    logged = json.loads(invocation_log.read_text(encoding="utf-8"))
    assert logged["cwd"] == str(repo_root / "core-python")
    assert logged["argv"][0] == str(repo_root / "core-python" / "scripts" / "train_model.py")
    assert logged["argv"][1:3] == [
        "--config",
        str(repo_root / "huggingface" / "training-config.json"),
    ]


def test_huggingface_train_hf_script_uses_persistent_paths_and_uploads_model(
    tmp_path: Path,
    monkeypatch,
) -> None:
    repo_root = next(
        parent
        for parent in Path(__file__).resolve().parents
        if (parent / "huggingface" / "train-hf.sh").is_file()
    )
    persistent_dir = tmp_path / "persistent"
    fake_python = tmp_path / "python3"
    invocation_log = tmp_path / "train-hf-invocations.jsonl"
    fake_python.write_text(
        "\n".join(
            [
                f"#!{sys.executable}",
                "import json",
                "import os",
                "import sys",
                "from pathlib import Path",
                "",
                "log_path = Path(os.environ['TRAIN_HF_LOG'])",
                "with log_path.open('a', encoding='utf-8') as handle:",
                "    handle.write(",
                "        json.dumps({'cwd': os.getcwd(), 'argv': sys.argv[1:]}, ensure_ascii=False) + '\\n'",
                "    )",
            ]
        ),
        encoding="utf-8",
    )
    fake_python.chmod(0o755)
    existing_path = os.environ.get("PATH", "")
    monkeypatch.setenv(
        "PATH",
        f"{tmp_path}{os.pathsep}{existing_path}" if existing_path else str(tmp_path),
    )
    monkeypatch.setenv("HF_PERSISTENT_DIR", str(persistent_dir))
    monkeypatch.setenv("TRAIN_HF_LOG", str(invocation_log))
    monkeypatch.delenv("HF_TRAIN_OUTPUT_DIR", raising=False)
    monkeypatch.delenv("HF_LOCAL_MODEL_DIR", raising=False)
    monkeypatch.delenv("HF_TRAIN_PUSH_TO_HUB", raising=False)

    subprocess.run(
        ["bash", str(repo_root / "huggingface" / "train-hf.sh"), "--model-preset", "coding"],
        check=True,
        cwd=repo_root,
    )

    logged = [
        json.loads(line)
        for line in invocation_log.read_text(encoding="utf-8").splitlines()
        if line.strip()
    ]
    assert len(logged) == 2
    assert logged[0]["cwd"] == str(repo_root / "core-python")
    assert logged[0]["argv"][0] == str(repo_root / "core-python" / "scripts" / "train_model.py")
    assert logged[0]["argv"][1:5] == [
        "--config",
        str(repo_root / "huggingface" / "training-config.hf-jobs.json"),
        "--model-preset",
        "coding",
    ]
    assert logged[1]["argv"][0] == str(repo_root / "core-python" / "scripts" / "export_to_hf.py")
    assert logged[1]["argv"][1:3] == [
        "--model-path",
        str(persistent_dir / "maris-ai-master"),
    ]


def test_huggingface_train_hf_script_enables_accelerate_on_gpu_space(
    tmp_path: Path,
    monkeypatch,
) -> None:
    repo_root = next(
        parent
        for parent in Path(__file__).resolve().parents
        if (parent / "huggingface" / "train-hf.sh").is_file()
    )
    persistent_dir = tmp_path / "persistent-gpu"
    fake_python = tmp_path / "python3"
    fake_nvidia_smi = tmp_path / "nvidia-smi"
    invocation_log = tmp_path / "train-hf-gpu-invocations.jsonl"
    fake_python.write_text(
        "\n".join(
            [
                f"#!{sys.executable}",
                "import json",
                "import os",
                "import sys",
                "from pathlib import Path",
                "",
                "log_path = Path(os.environ['TRAIN_HF_GPU_LOG'])",
                "with log_path.open('a', encoding='utf-8') as handle:",
                "    handle.write(",
                "        json.dumps({'cwd': os.getcwd(), 'argv': sys.argv[1:]}, ensure_ascii=False) + '\\n'",
                "    )",
            ]
        ),
        encoding="utf-8",
    )
    fake_python.chmod(0o755)
    fake_nvidia_smi.write_text("#!/usr/bin/env bash\necho 'GPU 0: Fake GPU'\n", encoding="utf-8")
    fake_nvidia_smi.chmod(0o755)
    existing_path = os.environ.get("PATH", "")
    monkeypatch.setenv(
        "PATH",
        f"{tmp_path}{os.pathsep}{existing_path}" if existing_path else str(tmp_path),
    )
    monkeypatch.setenv("HF_PERSISTENT_DIR", str(persistent_dir))
    monkeypatch.setenv("TRAIN_HF_GPU_LOG", str(invocation_log))
    monkeypatch.delenv("HF_TRAIN_USE_ACCELERATE", raising=False)
    monkeypatch.delenv("HF_TRAIN_NUM_PROCESSES", raising=False)

    subprocess.run(
        ["bash", str(repo_root / "huggingface" / "train-hf.sh"), "--model-preset", "coding"],
        check=True,
        cwd=repo_root,
    )

    logged = [
        json.loads(line)
        for line in invocation_log.read_text(encoding="utf-8").splitlines()
        if line.strip()
    ]

    assert logged[0]["argv"][0:2] == ["-m", "accelerate.commands.launch"]
    assert "--config_file" in logged[0]["argv"]
    assert str(repo_root / "huggingface" / "accelerate-gpu-config.yaml") in logged[0]["argv"]
    assert "--num_processes" in logged[0]["argv"]
    assert logged[0]["argv"][logged[0]["argv"].index("--num_processes") + 1] == "1"
    assert str(repo_root / "core-python" / "scripts" / "train_model.py") in logged[0]["argv"]
    assert logged[1]["argv"][0] == str(repo_root / "core-python" / "scripts" / "export_to_hf.py")


def test_huggingface_train_job_script_uses_accelerate_for_distributed_launch(
    tmp_path: Path,
    monkeypatch,
) -> None:
    repo_root = next(
        parent
        for parent in Path(__file__).resolve().parents
        if (parent / "huggingface" / "train-job.sh").is_file()
    )
    fake_python = tmp_path / "python3"
    fake_nvidia_smi = tmp_path / "nvidia-smi"
    invocation_log = tmp_path / "train-job-invocations.jsonl"
    fake_python.write_text(
        "\n".join(
            [
                f"#!{sys.executable}",
                "import json",
                "import os",
                "import sys",
                "from pathlib import Path",
                "",
                "log_path = Path(os.environ['TRAIN_JOB_LOG'])",
                "with log_path.open('a', encoding='utf-8') as handle:",
                "    handle.write(",
                "        json.dumps({'cwd': os.getcwd(), 'argv': sys.argv[1:]}, ensure_ascii=False) + '\\n'",
                "    )",
            ]
        ),
        encoding="utf-8",
    )
    fake_python.chmod(0o755)
    fake_nvidia_smi.write_text("#!/usr/bin/env bash\necho 'GPU 0: Fake GPU'\n", encoding="utf-8")
    fake_nvidia_smi.chmod(0o755)
    existing_path = os.environ.get("PATH", "")
    monkeypatch.setenv(
        "PATH",
        f"{tmp_path}{os.pathsep}{existing_path}" if existing_path else str(tmp_path),
    )
    monkeypatch.setenv("HF_JOB_WORK_DIR", str(tmp_path / "job-work"))
    monkeypatch.setenv("TRAIN_JOB_LOG", str(invocation_log))
    monkeypatch.setenv("HF_TRAIN_DISTRIBUTED_STRATEGY", "deepspeed")
    monkeypatch.delenv("HF_TRAIN_USE_ACCELERATE", raising=False)
    monkeypatch.delenv("HF_TRAIN_NUM_PROCESSES", raising=False)

    subprocess.run(
        ["bash", str(repo_root / "huggingface" / "train-job.sh"), "--model-preset", "coding"],
        check=True,
        cwd=repo_root,
    )

    logged = [
        json.loads(line)
        for line in invocation_log.read_text(encoding="utf-8").splitlines()
        if line.strip()
    ]

    assert logged[0]["argv"][0:2] == ["-m", "accelerate.commands.launch"]
    assert str(repo_root / "huggingface" / "accelerate-gpu-config.yaml") in logged[0]["argv"]
    assert str(repo_root / "huggingface" / "training-config.hf-jobs.json") in logged[0]["argv"]
    assert str(repo_root / "core-python" / "scripts" / "train_model.py") in logged[0]["argv"]
    assert logged[1]["argv"][0] == str(repo_root / "core-python" / "scripts" / "export_to_hf.py")


def test_configure_tokenizer_expands_large_model_context_window() -> None:
    import maris_core.training.train as train_module

    tokenizer = types.SimpleNamespace(
        pad_token=None,
        pad_token_id=None,
        eos_token="<eos>",
        eos_token_id=7,
        model_max_length=4096,
    )
    config = load_training_config(
        overrides={
            "model_name": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
            "max_seq_length": 65536,
        }
    )

    train_module._configure_tokenizer(tokenizer, config)

    assert tokenizer.pad_token == "<eos>"
    assert tokenizer.pad_token_id == 7
    assert tokenizer.model_max_length == 65536


def test_load_tokenizer_forces_remote_snapshot_restore(monkeypatch) -> None:
    import maris_core.training.train as train_module

    compat_flags: list[bool | None] = []

    class FakeTokenizer:
        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

    class CompatPath:
        def __init__(self, model_name: str, *, allow_remote_snapshot: bool | None = None):
            del model_name
            compat_flags.append(allow_remote_snapshot)

        def __enter__(self) -> str:
            return "/tmp/fake-model"

        def __exit__(self, exc_type, exc, tb) -> None:
            del exc_type, exc, tb
            return None

    monkeypatch.setitem(
        sys.modules, "transformers", types.SimpleNamespace(AutoTokenizer=FakeTokenizer)
    )
    monkeypatch.setattr(train_module, "maris_hf_compatible_path", CompatPath)

    config = load_training_config(overrides={"model_name": "MarisUK/maris-ai-master"})

    tokenizer = train_module._load_tokenizer("MarisUK/maris-ai-master", config)

    assert isinstance(tokenizer, FakeTokenizer)
    assert compat_flags == [True]


def test_load_tokenizer_falls_back_to_explicit_slow_class(monkeypatch, tmp_path) -> None:
    import maris_core.training.train as train_module

    compat_flags: list[bool | None] = []
    tokenizer_attempts: list[tuple[str, Any]] = []
    model_dir = tmp_path / "trained-model"
    model_dir.mkdir(parents=True, exist_ok=True)
    (model_dir / "tokenizer_config.json").write_text(
        json.dumps({"tokenizer_class": "Qwen2TokenizerFast"}),
        encoding="utf-8",
    )
    (model_dir / "config.json").write_text(
        json.dumps(
            {
                "tokenizer_class": "Qwen2TokenizerFast",
                "auto_map": {"AutoTokenizer": ["Qwen2Tokenizer", "Qwen2TokenizerFast"]},
            }
        ),
        encoding="utf-8",
    )

    class FakeAutoTokenizer:
        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name
            tokenizer_attempts.append(("auto", kwargs.get("use_fast")))
            if kwargs.get("use_fast", True):
                raise ValueError(
                    "Couldn't instantiate the backend tokenizer from one of the available paths."
                )
            raise ValueError("tokenizer config still points to a fast tokenizer class")

    class FakeSlowTokenizer:
        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            tokenizer_attempts.append(("slow", kwargs.get("use_fast")))
            assert model_name == str(model_dir)
            assert "use_fast" not in kwargs
            return cls()

    class CompatPath:
        def __init__(self, model_name: str, *, allow_remote_snapshot: bool | None = None):
            del model_name
            compat_flags.append(allow_remote_snapshot)

        def __enter__(self) -> str:
            return str(model_dir)

        def __exit__(self, exc_type, exc, tb) -> None:
            del exc_type, exc, tb
            return None

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoTokenizer=FakeAutoTokenizer,
            Qwen2Tokenizer=FakeSlowTokenizer,
        ),
    )
    monkeypatch.setattr(train_module, "maris_hf_compatible_path", CompatPath)

    config = load_training_config(overrides={"model_name": "MarisUK/maris-ai-master"})

    tokenizer = train_module._load_tokenizer("MarisUK/maris-ai-master", config)

    assert isinstance(tokenizer, FakeSlowTokenizer)
    assert compat_flags == [True]
    assert tokenizer_attempts == [("auto", True), ("auto", False), ("slow", None)]


def test_load_tokenizer_retries_after_installing_missing_backends(monkeypatch, tmp_path) -> None:
    import maris_core.training.train as train_module

    compat_flags: list[bool | None] = []
    tokenizer_attempts: list[tuple[str, Any]] = []
    model_dir = tmp_path / "trained-model"
    model_dir.mkdir(parents=True, exist_ok=True)

    class FakeAutoTokenizer:
        retry_ready = False

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name
            tokenizer_attempts.append(("auto", kwargs.get("use_fast")))
            if cls.retry_ready:
                return cls()
            raise ValueError(
                "You need to have sentencepiece or tiktoken installed to convert a slow tokenizer to a fast one."
            )

    class CompatPath:
        def __init__(self, model_name: str, *, allow_remote_snapshot: bool | None = None):
            del model_name
            compat_flags.append(allow_remote_snapshot)

        def __enter__(self) -> str:
            return str(model_dir)

        def __exit__(self, exc_type, exc, tb) -> None:
            del exc_type, exc, tb

    install_attempts: list[bool] = []

    def fake_install_missing_tokenizer_backends() -> bool:
        install_attempts.append(True)
        FakeAutoTokenizer.retry_ready = True
        return True

    monkeypatch.setitem(
        sys.modules, "transformers", types.SimpleNamespace(AutoTokenizer=FakeAutoTokenizer)
    )
    monkeypatch.setattr(train_module, "maris_hf_compatible_path", CompatPath)
    monkeypatch.setattr(
        train_module,
        "_install_missing_tokenizer_backends",
        fake_install_missing_tokenizer_backends,
    )

    config = load_training_config(overrides={"model_name": "MarisUK/maris-ai-master"})

    tokenizer = train_module._load_tokenizer("MarisUK/maris-ai-master", config)

    assert isinstance(tokenizer, FakeAutoTokenizer)
    assert compat_flags == [True]
    assert install_attempts == [True]
    assert tokenizer_attempts == [("auto", True), ("auto", False), ("auto", True)]


def test_install_missing_tokenizer_backends_only_installs_missing_packages(monkeypatch) -> None:
    import maris_core.training.train as train_module

    installed_commands: list[list[str]] = []
    available_modules = {"tiktoken"}

    def fake_find_spec(name: str):
        return object() if name in available_modules else None

    monkeypatch.setattr(train_module.importlib.util, "find_spec", fake_find_spec)
    monkeypatch.setattr(train_module.importlib, "invalidate_caches", lambda: None)
    monkeypatch.setattr(
        train_module.subprocess,
        "run",
        lambda command, **kwargs: (
            installed_commands.append(command) or types.SimpleNamespace(stdout="")
        ),
    )

    installed = train_module._install_missing_tokenizer_backends()

    assert installed is True
    assert installed_commands == [
        [sys.executable, "-m", "pip", "install", "--no-cache-dir", "sentencepiece"]
    ]


def test_install_missing_tokenizer_backends_is_noop_when_backends_exist(monkeypatch) -> None:
    import maris_core.training.train as train_module

    monkeypatch.setattr(train_module.importlib.util, "find_spec", lambda name: object())
    monkeypatch.setattr(
        train_module.subprocess,
        "run",
        lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("pip should not run")),
    )

    installed = train_module._install_missing_tokenizer_backends()

    assert installed is False


def test_prepare_training_model_passes_use_reentrant_override(monkeypatch) -> None:
    import maris_core.training.train as train_module

    class FakeModel:
        def __init__(self):
            self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
            self.gradient_checkpointing_kwargs = None

        def gradient_checkpointing_enable(self, *, gradient_checkpointing_kwargs=None):
            self.gradient_checkpointing_kwargs = gradient_checkpointing_kwargs

    model = FakeModel()
    tokenizer = types.SimpleNamespace(pad_token_id=7)
    config = load_training_config(
        overrides={
            "gradient_checkpointing": True,
            "gradient_checkpointing_use_reentrant": False,
        }
    )

    monkeypatch.setattr(train_module, "_load_model", lambda model_name, config: model)
    monkeypatch.setattr(train_module, "_apply_peft_adapter", lambda model, config: model)

    prepared_model = train_module._prepare_training_model(
        "MarisUK/maris-ai-master", tokenizer, config
    )

    assert prepared_model is model
    assert model.config.pad_token_id == 7
    assert model.config.use_cache is False
    assert model.gradient_checkpointing_kwargs == {"use_reentrant": False}


def test_prepare_training_model_falls_back_when_runtime_rejects_use_reentrant(
    monkeypatch, caplog
) -> None:
    import maris_core.training.train as train_module

    class FakeModel:
        def __init__(self):
            self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
            self.gradient_checkpointing_enabled = False

        def gradient_checkpointing_enable(self):
            self.gradient_checkpointing_enabled = True

    model = FakeModel()
    tokenizer = types.SimpleNamespace(pad_token_id=7)
    config = load_training_config(
        overrides={
            "gradient_checkpointing": True,
            "gradient_checkpointing_use_reentrant": False,
        }
    )

    monkeypatch.setattr(train_module, "_load_model", lambda model_name, config: model)
    monkeypatch.setattr(train_module, "_apply_peft_adapter", lambda model, config: model)

    with caplog.at_level("WARNING"):
        prepared_model = train_module._prepare_training_model(
            "MarisUK/maris-ai-master", tokenizer, config
        )

    assert prepared_model is model
    assert model.gradient_checkpointing_enabled is True
    assert "Ignoring explicit gradient_checkpointing_use_reentrant=False" in caplog.text


def test_train_auto_enables_deepspeed_for_giant_long_context_model(
    tmp_path: Path, monkeypatch
) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch if batched else self.items[0])
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Čau!"},
                    {"prompt": "Uzraksti plānu", "completion": "Gatavs."},
                ]
            )
        },
    )
    monkeypatch.setattr(
        "maris_core.training.train._load_json_object",
        lambda path_value, *, label: {"config_path": path_value, "label": label},
    )
    monkeypatch.setattr(
        "maris_core.training.train._require_runtime_package", lambda *args, **kwargs: None
    )

    class FakeTokenizer:
        pad_token = None
        pad_token_id = None
        eos_token = "<eos>"
        eos_token_id = 0
        model_max_length = 4096

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

        def __call__(self, texts, truncation, max_length, padding):
            del truncation, padding
            return {
                "input_ids": [[1] * min(max_length, 4) for _ in texts],
                "attention_mask": [[1] * min(max_length, 4) for _ in texts],
            }

        def save_pretrained(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)

    class FakeModel:
        def __init__(self):
            self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

        def gradient_checkpointing_enable(self):
            self.gradient_checkpointing_enabled = True

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainResult:
        metrics = {"train_loss": 0.2}

    class FakeTrainer:
        last_instance = None

        def __init__(self, *, model, args, train_dataset, eval_dataset=None, data_collator=None):
            del model, train_dataset, eval_dataset, data_collator
            self.args = args
            FakeTrainer.last_instance = self

        def train(self):
            return FakeTrainResult()

        def evaluate(self):
            return {"eval_loss": 0.1}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
            BitsAndBytesConfig=lambda **kwargs: kwargs,
        ),
    )
    monkeypatch.setitem(
        sys.modules,
        "peft",
        types.SimpleNamespace(
            LoraConfig=lambda **kwargs: kwargs,
            TaskType=types.SimpleNamespace(CAUSAL_LM="CAUSAL_LM"),
            get_peft_model=lambda model, peft_config: model,
            prepare_model_for_kbit_training=lambda model, use_gradient_checkpointing: model,
        ),
    )

    train(
        output_dir=str(tmp_path / "giant-long-context"),
        model_name="Qwen/Qwen3-Coder-480B-A35B-Instruct",
        max_seq_length=65536,
        distributed_strategy="none",
        use_accelerate=False,
    )

    assert FakeTrainer.last_instance is not None
    assert FakeTrainer.last_instance.args.kwargs["deepspeed"].endswith(
        "huggingface/deepspeed-zero3.json"
    )
    assert FakeTrainer.last_instance.args.kwargs["ddp_find_unused_parameters"] is False


def test_train_uses_eval_split_and_writes_metrics(
    tmp_path: Path,
    monkeypatch,
) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            split_index = max(1, len(self.items) - 1)
            return {
                "train": FakeDataset(self.items[:split_index]),
                "test": FakeDataset(self.items[split_index:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            assert batched is True
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch)
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    fake_dataset = {
        "train": FakeDataset(
            [
                {"user": "Sveiki", "assistant": "Labdien"},
                {"user": "Kā iet?", "assistant": "Labi"},
            ]
        )
    }

    monkeypatch.setattr("maris_core.training.train.load_hf_dataset", lambda _: fake_dataset)

    class FakeTokenizer:
        pad_token = None
        pad_token_id = None
        eos_token = "<eos>"
        eos_token_id = 99

        @classmethod
        def from_pretrained(cls, model_name):
            assert model_name == DEFAULT_TRAINING_BASE_MODEL
            return cls()

        def __call__(self, texts, *, truncation, max_length, padding):
            assert truncation is True
            assert max_length == 256
            assert padding is False
            return {
                "input_ids": [[1, 2, 3] for _ in texts],
                "attention_mask": [[1, 1, 1] for _ in texts],
            }

        def save_pretrained(self, output_dir):
            Path(output_dir, "tokenizer.json").write_text(
                json.dumps(
                    {
                        "model": {
                            "type": "BPE",
                            "unk_token": "Qwen/Qwen2.5-7B-Instruct",
                        },
                        "added_tokens": [
                            {"content": "Claude"},
                            {"content": "DeepSeek"},
                        ],
                    }
                ),
                encoding="utf-8",
            )
            Path(output_dir, "tokenizer_config.json").write_text(
                json.dumps(
                    {
                        "name_or_path": DEFAULT_TRAINING_BASE_MODEL,
                        "tokenizer_class": "Qwen2TokenizerFast",
                        "auto_map": {"AutoTokenizer": ["qwen2.Qwen2Tokenizer", None]},
                        "chat_template": "You are Qwen, a helpful assistant for Qwen/Qwen2.5-7B-Instruct.",
                        "init_kwargs": {
                            "chat_template": "Respond like TinyLlama and DeepSeek.",
                        },
                    }
                ),
                encoding="utf-8",
            )
            Path(output_dir, "chat_template.jinja").write_text(
                "System: meta-llama/Llama-3.2-3B-Instruct and Claude must answer here.",
                encoding="utf-8",
            )

    class FakeModelConfig:
        pad_token_id = None

    class FakeModel:
        config = FakeModelConfig()

        @classmethod
        def from_pretrained(cls, model_name):
            assert model_name == DEFAULT_TRAINING_BASE_MODEL
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainResult:
        metrics = {"train_loss": 0.25}

    class FakeTrainer:
        last_instance = None

        def __init__(
            self,
            *,
            model,
            args,
            train_dataset,
            eval_dataset=None,
            data_collator=None,
        ):
            del model, data_collator
            self.args = args
            self.train_dataset = train_dataset
            self.eval_dataset = eval_dataset
            FakeTrainer.last_instance = self

        def train(self):
            return FakeTrainResult()

        def evaluate(self):
            return {"eval_loss": 0.5}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "model.bin").write_text("ok", encoding="utf-8")
            Path(output_dir, "config.json").write_text(
                json.dumps(
                    {
                        "_name_or_path": DEFAULT_TRAINING_BASE_MODEL,
                        "model_type": "qwen2",
                        "architectures": ["Qwen2ForCausalLM"],
                        "tokenizer_class": "Qwen2TokenizerFast",
                        "auto_map": {
                            "AutoConfig": "qwen2.configuration_qwen2.Qwen2Config",
                            "AutoModelForCausalLM": "qwen2.modeling_qwen2.Qwen2ForCausalLM",
                        },
                    }
                ),
                encoding="utf-8",
            )
            Path(output_dir, "adapter_config.json").write_text(
                json.dumps(
                    {
                        "base_model_name_or_path": DEFAULT_TRAINING_BASE_MODEL,
                        "base_model_class": "Qwen2ForCausalLM",
                        "parent_library": "transformers.models.qwen2.modeling_qwen2",
                        "auto_mapping": {
                            "base_model_class": "Qwen2ForCausalLM",
                            "parent_library": "transformers.models.qwen2.modeling_qwen2",
                        },
                        "description": "Adapter derived from Qwen and Llama.",
                    }
                ),
                encoding="utf-8",
            )

        def push_to_hub(self, **kwargs):
            self.push_kwargs = kwargs

    fake_transformers = types.SimpleNamespace(
        AutoModelForCausalLM=FakeModel,
        AutoTokenizer=FakeTokenizer,
        DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
        Trainer=FakeTrainer,
        TrainingArguments=FakeTrainingArguments,
    )
    monkeypatch.setitem(sys.modules, "transformers", fake_transformers)
    preference_dataset_path = tmp_path / "preferences.json"
    preference_dataset_path.write_text(
        json.dumps(
            [
                {
                    "prompt": "Kurš variants ir labāks?",
                    "chosen": "Variants A",
                    "rejected": "Variants B",
                    "source": "human_review",
                    "tags": ["quality"],
                }
            ]
        ),
        encoding="utf-8",
    )

    async def fake_benchmark(config, *, model_path):
        assert model_path.endswith("trained-model")
        return {
            "artifact_type": "chat-benchmark-manifest",
            "benchmark_name": config.benchmark_name,
            "branch": config.branch_name,
            "model": config.hub_model_id,
            "score_manifest": {
                "overall": 0.81,
                "reasoning": 0.8,
                "factuality": 0.79,
                "latvian_quality": 0.86,
                "coding": 0.74,
                "long_context": 0.75,
                "helpfulness": 0.83,
            },
        }

    monkeypatch.setattr("maris_core.training.train._run_post_training_benchmark", fake_benchmark)

    output_dir = tmp_path / "trained-model"
    metrics = train(
        output_dir=str(output_dir),
        max_seq_length=256,
        benchmark_dataset_path=str(tmp_path / "benchmarks.json"),
        preference_dataset_path=str(preference_dataset_path),
    )

    assert metrics["train_loss"] == 0.25
    assert metrics["eval_loss"] == 0.5
    assert metrics["perplexity"] > 1.0
    assert FakeTrainer.last_instance is not None
    assert len(FakeTrainer.last_instance.train_dataset) == 1
    assert len(FakeTrainer.last_instance.eval_dataset) == 1
    assert FakeTrainer.last_instance.args.kwargs["evaluation_strategy"] == "steps"
    assert (output_dir / "training-config.json").is_file()
    assert (output_dir / "training-metrics.json").is_file()
    assert (output_dir / "maris-metadata.json").is_file()
    assert (output_dir / "training-provenance.json").is_file()
    assert (output_dir / "README.md").is_file()
    assert (output_dir / "benchmark-manifest.json").is_file()
    assert (output_dir / "benchmark-release-gate.json").is_file()
    assert (output_dir / "benchmark-history.json").is_file()
    assert (output_dir / "benchmark-regression-report.json").is_file()
    assert (output_dir / "benchmark-feedback.json").is_file()
    assert (output_dir / "preference-summary.json").is_file()
    assert (output_dir / "human-eval-summary.json").is_file()
    assert (output_dir / "blind-side-by-side-eval.json").is_file()
    training_config = json.loads((output_dir / "training-config.json").read_text(encoding="utf-8"))
    training_metrics = json.loads(
        (output_dir / "training-metrics.json").read_text(encoding="utf-8")
    )
    benchmark_manifest = json.loads(
        (output_dir / "benchmark-manifest.json").read_text(encoding="utf-8")
    )
    benchmark_gate = json.loads(
        (output_dir / "benchmark-release-gate.json").read_text(encoding="utf-8")
    )
    benchmark_history = json.loads(
        (output_dir / "benchmark-history.json").read_text(encoding="utf-8")
    )
    benchmark_regression = json.loads(
        (output_dir / "benchmark-regression-report.json").read_text(encoding="utf-8")
    )
    benchmark_feedback = json.loads(
        (output_dir / "benchmark-feedback.json").read_text(encoding="utf-8")
    )
    preference_summary = json.loads(
        (output_dir / "preference-summary.json").read_text(encoding="utf-8")
    )
    human_eval_summary = json.loads(
        (output_dir / "human-eval-summary.json").read_text(encoding="utf-8")
    )
    blind_side_by_side = json.loads(
        (output_dir / "blind-side-by-side-eval.json").read_text(encoding="utf-8")
    )
    training_provenance = json.loads(
        (output_dir / "training-provenance.json").read_text(encoding="utf-8")
    )
    saved_model_config = json.loads((output_dir / "config.json").read_text(encoding="utf-8"))
    saved_tokenizer_config = json.loads(
        (output_dir / "tokenizer_config.json").read_text(encoding="utf-8")
    )
    saved_tokenizer_json = json.loads((output_dir / "tokenizer.json").read_text(encoding="utf-8"))
    saved_adapter_config = json.loads(
        (output_dir / "adapter_config.json").read_text(encoding="utf-8")
    )
    compatibility_manifest = json.loads(
        (output_dir / MARIS_COMPATIBILITY_ARTIFACT_NAME).read_text(encoding="utf-8")
    )
    saved_chat_template = (output_dir / "chat_template.jinja").read_text(encoding="utf-8")
    assert training_config["maris_origin"] == "Maris AI"
    assert training_config["maris_model_id"] == "MarisUK/maris-ai-master"
    assert "model_name" not in training_config
    assert training_metrics["maris_origin"] == "Maris AI"
    assert training_metrics["artifact_type"] == "training-metrics"
    assert training_metrics["dataset_repo"] == "MarisUK/maris-ai-memory"
    assert training_metrics["benchmark_regressions"] == 0.0
    assert training_provenance["maris_origin"] == "Maris AI"
    assert training_provenance["train_examples"] == 1
    assert training_provenance["eval_examples"] == 1
    assert training_provenance["base_model_name"] == "Maris AI"
    assert training_provenance["base_model_lineage"] == "Maris AI"
    model_card = (output_dir / "README.md").read_text(encoding="utf-8")
    assert "Maris AI Model" in model_card
    assert "Qwen/" not in model_card
    assert "TinyLlama/" not in model_card
    assert saved_model_config["_name_or_path"] == "MarisUK/maris-ai-master"
    assert saved_model_config["model_type"] == "maris"
    assert saved_model_config["architectures"] == ["MarisCompatibleCausalLM"]
    assert saved_model_config["tokenizer_class"] == "MarisCompatibleTokenizer"
    assert saved_tokenizer_config["name_or_path"] == "MarisUK/maris-ai-master"
    assert saved_tokenizer_config["tokenizer_class"] == "MarisCompatibleTokenizer"
    assert saved_tokenizer_json["model"]["unk_token"] == "MarisUK/maris-ai-master"
    assert saved_tokenizer_json["added_tokens"][0]["content"] == "Maris AI"
    assert saved_tokenizer_json["added_tokens"][1]["content"] == "Maris AI"
    assert "Maris AI" in saved_tokenizer_config["chat_template"]
    assert "Qwen" not in saved_tokenizer_config["chat_template"]
    assert "Maris AI" in saved_tokenizer_config["init_kwargs"]["chat_template"]
    assert saved_adapter_config["base_model_name_or_path"] == "MarisUK/maris-ai-master"
    assert saved_adapter_config["base_model_class"] == "MarisCompatibleCausalLM"
    assert saved_adapter_config["parent_library"] == "maris.compat"
    assert "Qwen" not in saved_adapter_config["description"]
    assert "Llama" not in saved_adapter_config["description"]
    assert compatibility_manifest["artifact_type"] == "maris-hf-compatibility"
    assert compatibility_manifest["maris_model_id"] == "MarisUK/maris-ai-master"
    assert "config.json" in compatibility_manifest["artifacts"]
    assert "tokenizer_config.json" in compatibility_manifest["artifacts"]
    assert "adapter_config.json" in compatibility_manifest["artifacts"]
    assert "meta-llama/" not in saved_chat_template
    assert "Claude" not in saved_chat_template
    assert "Maris AI" in saved_chat_template
    assert metrics["perplexity"] < 1000
    assert metrics["benchmark_overall"] == 0.81
    assert metrics["benchmark_gate_passed"] == 1.0
    assert benchmark_manifest["artifact_type"] == "chat-benchmark-manifest"
    assert benchmark_gate["artifact_type"] == "benchmark-release-gate"
    assert benchmark_history["artifact_type"] == "chat-benchmark-history"
    assert benchmark_history["run_count"] == 1
    assert benchmark_regression["artifact_type"] == "chat-benchmark-regression-report"
    assert benchmark_regression["status"] == "no-baseline"
    assert benchmark_feedback["artifact_type"] == "benchmark-feedback-reweighting"
    assert training_metrics["scoring_dashboard"]["train"]["sources"]["unknown"]["records"] == 1
    assert training_metrics["scoring_dashboard"]["train"]["categories"]["general"]["records"] == 1
    assert training_metrics["scoring_dashboard_train_sources_unknown_records"] == 1.0
    assert training_metrics["scoring_dashboard_train_categories_general_records"] == 1.0
    _assert_output_dir_uses_only_maris_identity(output_dir)
    assert benchmark_gate["passed"] is True
    assert preference_summary["artifact_type"] == "preference-dataset-summary"
    assert benchmark_manifest["score_manifest"]["pairwise_win_rate"] == 1.0
    assert human_eval_summary["artifact_type"] == "human-eval-summary"
    assert blind_side_by_side["artifact_type"] == "blind-side-by-side-eval-set"


def test_train_pushes_to_hub_when_enabled(
    tmp_path: Path,
    monkeypatch,
) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            assert batched is True
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch)
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    monkeypatch.setenv("HF_TRAIN_PUSH_TO_HUB", "true")
    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Labdien"},
                    {"user": "Kā iet?", "assistant": "Labi"},
                ]
            )
        },
    )

    class FakeTokenizer:
        pad_token = None
        pad_token_id = None
        eos_token = "<eos>"
        eos_token_id = 99

        @classmethod
        def from_pretrained(cls, model_name):
            assert model_name == DEFAULT_TRAINING_BASE_MODEL
            return cls()

        def __call__(self, texts, *, truncation, max_length, padding):
            del texts, truncation, max_length, padding
            return {"input_ids": [[1, 2, 3]], "attention_mask": [[1, 1, 1]]}

        def save_pretrained(self, output_dir):
            Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")

    class FakeModelConfig:
        pad_token_id = None

    class FakeModel:
        config = FakeModelConfig()

        @classmethod
        def from_pretrained(cls, model_name):
            assert model_name == DEFAULT_TRAINING_BASE_MODEL
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainResult:
        metrics = {"train_loss": 0.1}

    class FakeTrainer:
        last_instance = None

        def __init__(self, **kwargs):
            self.kwargs = kwargs
            self.push_kwargs = None
            FakeTrainer.last_instance = self

        def train(self):
            return FakeTrainResult()

        def evaluate(self):
            return {"eval_loss": 0.2}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

        def push_to_hub(self, **kwargs):
            self.push_kwargs = kwargs

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )

    train(output_dir=str(tmp_path / "push-model"), max_seq_length=256)

    assert FakeTrainer.last_instance is not None
    assert FakeTrainer.last_instance.push_kwargs == {
        "commit_message": "Maris AI training sync (master)"
    }


def test_train_prefers_existing_local_artifact_when_continue_mode_enabled(
    tmp_path: Path,
    monkeypatch,
) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            del desc, remove_columns
            assert batched is True
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch)
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    output_dir = tmp_path / "continued-model"
    output_dir.mkdir(parents=True, exist_ok=True)
    (output_dir / "config.json").write_text("{}", encoding="utf-8")
    import maris_core.training.train as train_module

    (output_dir / "training-config.json").write_text(
        json.dumps(
            {
                train_module.MODEL_SOURCE_FINGERPRINT_KEY: train_module._build_model_source_fingerprint(
                    DEFAULT_TRAINING_BASE_MODEL
                )
            }
        ),
        encoding="utf-8",
    )
    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Labdien"},
                    {"user": "Kā iet?", "assistant": "Labi"},
                ]
            )
        },
    )

    captured_paths: dict[str, str] = {}

    class FakeTokenizer:
        pad_token = None
        pad_token_id = None
        eos_token = "<eos>"
        eos_token_id = 99

        @classmethod
        def from_pretrained(cls, model_name):
            captured_paths["tokenizer"] = model_name
            return cls()

        def __call__(self, texts, *, truncation, max_length, padding):
            del texts, truncation, max_length, padding
            return {"input_ids": [[1, 2, 3]], "attention_mask": [[1, 1, 1]]}

        def save_pretrained(self, output_dir):
            Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")

    class FakeModelConfig:
        pad_token_id = None

    class FakeModel:
        config = FakeModelConfig()

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del kwargs
            captured_paths["model"] = model_name
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainer:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

        def train(self):
            return types.SimpleNamespace(metrics={"train_loss": 0.1})

        def evaluate(self):
            return {"eval_loss": 0.2}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )

    train(
        output_dir=str(output_dir),
        continue_from_latest_artifact=True,
        max_seq_length=256,
    )

    assert captured_paths["tokenizer"] == str(output_dir)
    assert captured_paths["model"] == str(output_dir)


def test_train_does_not_auto_resume_from_incompatible_output_artifact(
    tmp_path: Path, monkeypatch
) -> None:
    output_dir = tmp_path / "incompatible-output"
    output_dir.mkdir(parents=True, exist_ok=True)
    (output_dir / "config.json").write_text("{}", encoding="utf-8")
    import maris_core.training.train as train_module

    (output_dir / "training-config.json").write_text(
        json.dumps(
            {
                train_module.MODEL_SOURCE_FINGERPRINT_KEY: train_module._build_model_source_fingerprint(
                    "meta-llama/Llama-3.2-3B-Instruct"
                )
            }
        ),
        encoding="utf-8",
    )

    config = load_training_config(
        overrides={
            "output_dir": str(output_dir),
            "model_name": "Qwen/Qwen2.5-1.5B-Instruct",
            "continue_from_latest_artifact": True,
        }
    )

    assert train_module._resolve_training_model_source(config) == "Qwen/Qwen2.5-1.5B-Instruct"


def test_train_restores_maris_artifacts_after_push_to_hub(tmp_path: Path, monkeypatch) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            assert batched is True
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch)
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    monkeypatch.setenv("HF_TRAIN_PUSH_TO_HUB", "true")
    monkeypatch.setenv("HF_TOKEN", "token")
    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Labdien"},
                    {"user": "Kā iet?", "assistant": "Labi"},
                ]
            )
        },
    )

    class FakeTokenizer:
        pad_token = None
        pad_token_id = None
        eos_token = "<eos>"
        eos_token_id = 99

        @classmethod
        def from_pretrained(cls, model_name):
            assert model_name == DEFAULT_TRAINING_BASE_MODEL
            return cls()

        def __call__(self, texts, *, truncation, max_length, padding):
            del texts, truncation, max_length, padding
            return {"input_ids": [[1, 2, 3]], "attention_mask": [[1, 1, 1]]}

        def save_pretrained(self, output_dir):
            Path(output_dir, "tokenizer_config.json").write_text(
                json.dumps(
                    {
                        "name_or_path": "MarisUK/maris-ai-master",
                        "tokenizer_class": "Qwen2TokenizerFast",
                        "chat_template": "You are Qwen and Claude in one assistant.",
                    }
                ),
                encoding="utf-8",
            )

    class FakeModelConfig:
        pad_token_id = None

    class FakeModel:
        config = FakeModelConfig()

        @classmethod
        def from_pretrained(cls, model_name):
            assert model_name == DEFAULT_TRAINING_BASE_MODEL
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainResult:
        metrics = {"train_loss": 0.1}

    class FakeTrainer:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

        def train(self):
            return FakeTrainResult()

        def evaluate(self):
            return {"eval_loss": 0.2}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "config.json").write_text(
                json.dumps(
                    {
                        "_name_or_path": "MarisUK/maris-ai-master",
                        "model_type": "qwen2",
                        "architectures": ["Qwen2ForCausalLM"],
                    }
                ),
                encoding="utf-8",
            )
            Path(output_dir, "adapter_config.json").write_text(
                json.dumps(
                    {
                        "base_model_name_or_path": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
                        "base_model_class": "Qwen2ForCausalLM",
                        "parent_library": "transformers.models.qwen2.modeling_qwen2",
                        "description": "Adapter built from DeepSeek and Mistral.",
                    }
                ),
                encoding="utf-8",
            )

        def push_to_hub(self, **kwargs):
            del kwargs
            output_dir = Path(self.kwargs["args"].kwargs["output_dir"])
            Path(output_dir, "README.md").write_text(
                "\n".join(
                    (
                        "---",
                        "library_name: transformers",
                        "datasets:",
                        "- generator",
                        "---",
                        "# master",
                    )
                )
                + "\n",
                encoding="utf-8",
            )
            Path(output_dir, "config.json").write_text(
                json.dumps(
                    {
                        "_name_or_path": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
                        "model_type": "qwen2",
                        "architectures": ["Qwen2ForCausalLM"],
                    }
                ),
                encoding="utf-8",
            )
            Path(output_dir, "tokenizer_config.json").write_text(
                json.dumps(
                    {
                        "name_or_path": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
                        "tokenizer_class": "Qwen2TokenizerFast",
                        "chat_template": "Use meta-llama/Llama-3.2-3B-Instruct with Gemini.",
                    }
                ),
                encoding="utf-8",
            )
            Path(output_dir, "tokenizer.json").write_text(
                json.dumps(
                    {
                        "model": {"type": "BPE", "unk_token": "DeepSeek-Coder"},
                        "added_tokens": [{"content": "Anthropic"}],
                    }
                ),
                encoding="utf-8",
            )
            Path(output_dir, "chat_template.jinja").write_text(
                "System prompt from Anthropic Claude and OpenAI ChatGPT.",
                encoding="utf-8",
            )

    upload_calls: list[dict[str, str]] = []

    class FakeHfApi:
        def __init__(self, token=None):
            self.token = token

        def create_repo(self, **kwargs):
            upload_calls.append({"create_repo": kwargs})

        def upload_folder(self, **kwargs):
            upload_calls.append(kwargs)

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )
    monkeypatch.setitem(sys.modules, "huggingface_hub", types.SimpleNamespace(HfApi=FakeHfApi))

    output_dir = tmp_path / "push-model"
    train(output_dir=str(output_dir), max_seq_length=256)

    assert "Maris AI Model" in (output_dir / "README.md").read_text(encoding="utf-8")
    assert "generated_from_trainer" not in (output_dir / "README.md").read_text(encoding="utf-8")
    assert json.loads((output_dir / "config.json").read_text(encoding="utf-8"))[
        "_name_or_path"
    ] == ("MarisUK/maris-ai-master")
    assert (
        json.loads((output_dir / "config.json").read_text(encoding="utf-8"))["model_type"]
        == "maris"
    )
    assert json.loads((output_dir / "config.json").read_text(encoding="utf-8"))[
        "architectures"
    ] == ["MarisCompatibleCausalLM"]
    assert (
        json.loads((output_dir / "tokenizer_config.json").read_text(encoding="utf-8"))[
            "name_or_path"
        ]
        == "MarisUK/maris-ai-master"
    )
    assert (
        json.loads((output_dir / "tokenizer_config.json").read_text(encoding="utf-8"))[
            "tokenizer_class"
        ]
        == "MarisCompatibleTokenizer"
    )
    assert (
        "Maris AI"
        in json.loads((output_dir / "tokenizer_config.json").read_text(encoding="utf-8"))[
            "chat_template"
        ]
    )
    assert (
        json.loads((output_dir / "tokenizer.json").read_text(encoding="utf-8"))["model"][
            "unk_token"
        ]
        == "Maris AI"
    )
    assert (
        json.loads((output_dir / "tokenizer.json").read_text(encoding="utf-8"))["added_tokens"][0][
            "content"
        ]
        == "Maris AI"
    )
    assert (
        json.loads((output_dir / "adapter_config.json").read_text(encoding="utf-8"))[
            "base_model_name_or_path"
        ]
        == "MarisUK/maris-ai-master"
    )
    assert (
        json.loads((output_dir / "adapter_config.json").read_text(encoding="utf-8"))[
            "base_model_class"
        ]
        == "MarisCompatibleCausalLM"
    )
    assert (
        "DeepSeek"
        not in json.loads((output_dir / "adapter_config.json").read_text(encoding="utf-8"))[
            "description"
        ]
    )
    assert "Anthropic" not in (output_dir / "chat_template.jinja").read_text(encoding="utf-8")
    assert "ChatGPT" not in (output_dir / "chat_template.jinja").read_text(encoding="utf-8")
    assert "Maris AI" in (output_dir / "chat_template.jinja").read_text(encoding="utf-8")
    assert (output_dir / MARIS_COMPATIBILITY_ARTIFACT_NAME).is_file()
    _assert_output_dir_uses_only_maris_identity(output_dir)
    assert upload_calls == [
        {
            "create_repo": {
                "repo_id": "MarisUK/maris-ai-master",
                "repo_type": "model",
                "exist_ok": True,
            }
        },
        {
            "folder_path": str(output_dir),
            "repo_id": "MarisUK/maris-ai-master",
            "repo_type": "model",
            "commit_message": "Maris AI artifact sync (master)",
        },
    ]


def test_export_model_creates_repo_before_upload(tmp_path: Path, monkeypatch) -> None:
    script_path = Path(__file__).resolve().parents[1] / "scripts" / "export_to_hf.py"
    spec = importlib.util.spec_from_file_location("export_to_hf", script_path)
    assert spec is not None and spec.loader is not None
    export_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(export_module)

    model_dir = tmp_path / "model"
    model_dir.mkdir()
    model_dir.joinpath("config.json").write_text("{}", encoding="utf-8")
    monkeypatch.setenv("HF_TOKEN", "token")

    calls: list[dict[str, object]] = []

    class FakeHfApi:
        def __init__(self, token=None):
            calls.append({"init": token})

        def create_repo(self, **kwargs):
            calls.append({"create_repo": kwargs})

        def upload_folder(self, **kwargs):
            calls.append({"upload_folder": kwargs})

    monkeypatch.setitem(sys.modules, "huggingface_hub", types.SimpleNamespace(HfApi=FakeHfApi))

    export_module.export_model(str(model_dir), "MarisUK/maris-ai-master")

    assert calls == [
        {"init": "token"},
        {
            "create_repo": {
                "repo_id": "MarisUK/maris-ai-master",
                "repo_type": "model",
                "exist_ok": True,
            }
        },
        {
            "upload_folder": {
                "folder_path": str(model_dir),
                "repo_id": "MarisUK/maris-ai-master",
                "repo_type": "model",
                "commit_message": "Maris AI model export",
            }
        },
    ]


def test_export_model_publishes_branch_suite_to_runtime_repos(tmp_path: Path, monkeypatch) -> None:
    script_path = Path(__file__).resolve().parents[1] / "scripts" / "export_to_hf.py"
    spec = importlib.util.spec_from_file_location("export_to_hf", script_path)
    assert spec is not None and spec.loader is not None
    export_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(export_module)

    suite_dir = tmp_path / "suite"
    suite_dir.mkdir()
    for branch_name in ("master", "coder", "image", "tts"):
        branch_dir = suite_dir / branch_name
        branch_dir.mkdir()
        branch_dir.joinpath("config.json").write_text("{}", encoding="utf-8")
    suite_dir.joinpath("branch-suite.json").write_text(
        json.dumps(
            {
                "branches": {
                    "master": {"output_dir": str(suite_dir / "master")},
                    "coder": {"output_dir": str(suite_dir / "coder")},
                    "image": {"output_dir": str(suite_dir / "image")},
                    "tts": {"output_dir": str(suite_dir / "tts")},
                }
            }
        ),
        encoding="utf-8",
    )
    monkeypatch.setenv("HF_TOKEN", "token")

    calls: list[dict[str, object]] = []

    class FakeHfApi:
        def __init__(self, token=None):
            calls.append({"init": token})

        def create_repo(self, **kwargs):
            calls.append({"create_repo": kwargs})

        def upload_folder(self, **kwargs):
            calls.append({"upload_folder": kwargs})

    monkeypatch.setitem(sys.modules, "huggingface_hub", types.SimpleNamespace(HfApi=FakeHfApi))

    export_module.export_model(str(suite_dir), "MarisUK/maris-ai-master")

    assert calls == [
        {"init": "token"},
        {
            "create_repo": {
                "repo_id": "MarisUK/maris-ai-master",
                "repo_type": "model",
                "exist_ok": True,
            }
        },
        {
            "upload_folder": {
                "folder_path": str(suite_dir),
                "repo_id": "MarisUK/maris-ai-master",
                "repo_type": "model",
                "commit_message": "Maris AI model export",
            }
        },
        {
            "create_repo": {
                "repo_id": "MarisUK/maris-ai-text",
                "repo_type": "model",
                "exist_ok": True,
            }
        },
        {
            "upload_folder": {
                "folder_path": str(suite_dir / "master"),
                "repo_id": "MarisUK/maris-ai-text",
                "repo_type": "model",
                "commit_message": "Maris AI model export (master)",
            }
        },
        {
            "create_repo": {
                "repo_id": "MarisUK/maris-ai-codex",
                "repo_type": "model",
                "exist_ok": True,
            }
        },
        {
            "upload_folder": {
                "folder_path": str(suite_dir / "coder"),
                "repo_id": "MarisUK/maris-ai-codex",
                "repo_type": "model",
                "commit_message": "Maris AI model export (coder)",
            }
        },
        {
            "create_repo": {
                "repo_id": "MarisUK/maris-ai-image",
                "repo_type": "model",
                "exist_ok": True,
            }
        },
        {
            "upload_folder": {
                "folder_path": str(suite_dir / "image"),
                "repo_id": "MarisUK/maris-ai-image",
                "repo_type": "model",
                "commit_message": "Maris AI model export (image)",
            }
        },
        {
            "create_repo": {
                "repo_id": "MarisUK/maris-tts-runtime",
                "repo_type": "model",
                "exist_ok": True,
            }
        },
        {
            "upload_folder": {
                "folder_path": str(suite_dir / "tts"),
                "repo_id": "MarisUK/maris-tts-runtime",
                "repo_type": "model",
                "commit_message": "Maris AI model export (tts)",
            }
        },
    ]


def test_export_model_discovers_fallback_branch_dirs_without_manifest(
    tmp_path: Path, monkeypatch
) -> None:
    script_path = Path(__file__).resolve().parents[1] / "scripts" / "export_to_hf.py"
    spec = importlib.util.spec_from_file_location("export_to_hf", script_path)
    assert spec is not None and spec.loader is not None
    export_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(export_module)

    suite_dir = tmp_path / "suite"
    suite_dir.mkdir()
    suite_dir.joinpath("config.json").write_text("{}", encoding="utf-8")
    for branch_name in ("master", "coder"):
        branch_dir = suite_dir / branch_name
        branch_dir.mkdir()
        branch_dir.joinpath("config.json").write_text("{}", encoding="utf-8")
    monkeypatch.setenv("HF_TOKEN", "token")

    calls: list[dict[str, object]] = []

    class FakeHfApi:
        def __init__(self, token=None):
            calls.append({"init": token})

        def create_repo(self, **kwargs):
            calls.append({"create_repo": kwargs})

        def upload_folder(self, **kwargs):
            calls.append({"upload_folder": kwargs})

    monkeypatch.setitem(sys.modules, "huggingface_hub", types.SimpleNamespace(HfApi=FakeHfApi))

    export_module.export_model(str(suite_dir), "MarisUK/maris-ai-master")

    assert calls == [
        {"init": "token"},
        {
            "create_repo": {
                "repo_id": "MarisUK/maris-ai-master",
                "repo_type": "model",
                "exist_ok": True,
            }
        },
        {
            "upload_folder": {
                "folder_path": str(suite_dir),
                "repo_id": "MarisUK/maris-ai-master",
                "repo_type": "model",
                "commit_message": "Maris AI model export",
            }
        },
        {
            "create_repo": {
                "repo_id": "MarisUK/maris-ai-text",
                "repo_type": "model",
                "exist_ok": True,
            }
        },
        {
            "upload_folder": {
                "folder_path": str(suite_dir / "master"),
                "repo_id": "MarisUK/maris-ai-text",
                "repo_type": "model",
                "commit_message": "Maris AI model export (master)",
            }
        },
        {
            "create_repo": {
                "repo_id": "MarisUK/maris-ai-codex",
                "repo_type": "model",
                "exist_ok": True,
            }
        },
        {
            "upload_folder": {
                "folder_path": str(suite_dir / "coder"),
                "repo_id": "MarisUK/maris-ai-codex",
                "repo_type": "model",
                "commit_message": "Maris AI model export (coder)",
            }
        },
    ]


def test_train_filters_unsupported_training_arguments(
    tmp_path: Path,
    monkeypatch,
) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            assert batched is True
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch)
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Labdien"},
                    {"user": "Kā iet?", "assistant": "Labi"},
                ]
            )
        },
    )

    class FakeTokenizer:
        pad_token = None
        pad_token_id = None
        eos_token = "<eos>"
        eos_token_id = 99

        @classmethod
        def from_pretrained(cls, model_name):
            assert model_name == DEFAULT_TRAINING_BASE_MODEL
            return cls()

        def __call__(self, texts, *, truncation, max_length, padding):
            assert truncation is True
            assert max_length == 256
            assert padding is False
            return {
                "input_ids": [[1, 2, 3] for _ in texts],
                "attention_mask": [[1, 1, 1] for _ in texts],
            }

        def save_pretrained(self, output_dir):
            Path(output_dir, "tokenizer.json").write_text("{}", encoding="utf-8")

    class FakeModelConfig:
        pad_token_id = None

    class FakeModel:
        config = FakeModelConfig()

        @classmethod
        def from_pretrained(cls, model_name):
            assert model_name == DEFAULT_TRAINING_BASE_MODEL
            return cls()

    class StrictTrainingArguments:
        def __init__(
            self,
            *,
            output_dir,
            num_train_epochs,
            learning_rate,
            per_device_train_batch_size,
            per_device_eval_batch_size,
            gradient_accumulation_steps,
            warmup_ratio,
            weight_decay,
            logging_steps,
            save_steps,
            eval_steps,
            save_total_limit,
            lr_scheduler_type,
            seed,
            fp16,
            bf16,
            report_to,
            save_safetensors,
            remove_unused_columns,
            eval_strategy,
            load_best_model_at_end,
            metric_for_best_model,
            greater_is_better,
        ):
            self.kwargs = {
                "output_dir": output_dir,
                "num_train_epochs": num_train_epochs,
                "learning_rate": learning_rate,
                "per_device_train_batch_size": per_device_train_batch_size,
                "per_device_eval_batch_size": per_device_eval_batch_size,
                "gradient_accumulation_steps": gradient_accumulation_steps,
                "warmup_ratio": warmup_ratio,
                "weight_decay": weight_decay,
                "logging_steps": logging_steps,
                "save_steps": save_steps,
                "eval_steps": eval_steps,
                "save_total_limit": save_total_limit,
                "lr_scheduler_type": lr_scheduler_type,
                "seed": seed,
                "fp16": fp16,
                "bf16": bf16,
                "report_to": report_to,
                "save_safetensors": save_safetensors,
                "remove_unused_columns": remove_unused_columns,
                "eval_strategy": eval_strategy,
                "load_best_model_at_end": load_best_model_at_end,
                "metric_for_best_model": metric_for_best_model,
                "greater_is_better": greater_is_better,
            }

    class FakeTrainResult:
        metrics = {"train_loss": 0.25}

    class FakeTrainer:
        last_instance = None

        def __init__(
            self,
            *,
            model,
            args,
            train_dataset,
            eval_dataset=None,
            data_collator=None,
        ):
            del model, data_collator
            self.args = args
            self.train_dataset = train_dataset
            self.eval_dataset = eval_dataset
            FakeTrainer.last_instance = self

        def train(self):
            return FakeTrainResult()

        def evaluate(self):
            return {"eval_loss": 0.5}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "model.bin").write_text("ok", encoding="utf-8")

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=StrictTrainingArguments,
        ),
    )

    metrics = train(output_dir=str(tmp_path / "trained-model"), max_seq_length=256)

    assert metrics["eval_loss"] == 0.5
    assert FakeTrainer.last_instance is not None
    assert "overwrite_output_dir" not in FakeTrainer.last_instance.args.kwargs
    assert FakeTrainer.last_instance.args.kwargs["eval_strategy"] == "steps"


def test_build_branch_training_configs_creates_branch_output_dirs() -> None:
    configs = build_branch_training_configs(
        load_training_config(
            overrides={
                "output_dir": "/tmp/maris-branch",
                "eval_dataset_repo": "MarisUK/maris-ai-evals",
            }
        )
    )

    branch_names = {config.branch_name for config in configs}
    assert {"master", "coder", "planner", "image", "music", "tts", "stt", "video"} == branch_names
    coder_config = next(config for config in configs if config.branch_name == "coder")
    assert coder_config.output_dir.endswith("/coder")
    assert coder_config.eval_dataset_repo == "MarisUK/maris-ai-evals"
    assert coder_config.benchmark_gate_enabled is True
    assert coder_config.benchmark_min_overall >= 0.76
    assert coder_config.benchmark_dataset_path.endswith(
        "core-python/evals/coder_release_benchmark.json"
    )
    assert coder_config.preference_dataset_path.endswith(
        "core-python/evals/coder_preference_dataset.json"
    )
    assert coder_config.quality_min_text_chars >= 18
    assert coder_config.category_weight_map["coding"] >= 1.35
    assert coder_config.category_weight_map["grounding"] >= 1.25
    planner_config = next(config for config in configs if config.branch_name == "planner")
    assert planner_config.benchmark_gate_enabled is True
    assert planner_config.benchmark_min_overall >= 0.76
    assert planner_config.benchmark_dataset_path.endswith(
        "core-python/evals/planner_release_benchmark.json"
    )
    master_config = next(config for config in configs if config.branch_name == "master")
    assert master_config.hub_model_id == "MarisUK/maris-ai-text"
    assert master_config.benchmark_gate_enabled is True
    assert master_config.quality_min_text_chars >= 12
    image_config = next(config for config in configs if config.branch_name == "image")
    assert image_config.adapter_type == "specialist_model"
    assert image_config.hub_model_id == "MarisUK/maris-ai-image"


def test_train_branch_suite_writes_external_manifests_for_specialists(
    tmp_path: Path,
    monkeypatch,
) -> None:
    base_config = load_training_config(overrides={"output_dir": str(tmp_path / "branches")})

    monkeypatch.setattr(
        "maris_core.training.train.train_with_config",
        lambda branch_config: {"branch_len": float(len(branch_config.branch_name))},
    )

    results = train_branch_suite(base_config)

    branch_suite = json.loads(
        (tmp_path / "branches" / "branch-suite.json").read_text(encoding="utf-8")
    )

    assert results["master"]["status"] == "trained"
    assert results["master"]["maris_origin"] == "Maris AI"
    assert results["coder"]["status"] == "trained"
    assert results["image"]["status"] == "external_specialist"
    assert results["tts"]["status"] == "external_specialist"
    assert (tmp_path / "branches" / "image" / "branch-config.json").is_file()
    assert (tmp_path / "branches" / "tts" / "branch-config.json").is_file()
    assert (tmp_path / "branches" / "branch-suite.json").is_file()
    assert branch_suite["artifact_type"] == "branch-suite"
    assert branch_suite["maris_origin"] == "Maris AI"
    assert branch_suite["dataset_repo"] == "MarisUK/maris-ai-memory"
    assert branch_suite["branches"]["image"]["maris_origin"] == "Maris AI"
    assert branch_suite["branches"]["stt"]["maris_model_id"] == "MarisUK/maris-stt-runtime"


def test_post_training_benchmark_results_use_maris_model_id(tmp_path: Path, monkeypatch) -> None:
    benchmark_path = tmp_path / "benchmark.json"
    benchmark_path.write_text(
        json.dumps([{"name": "identity", "message": "Kas tu esi?", "expected_terms": ["Maris"]}]),
        encoding="utf-8",
    )
    config = load_training_config(
        overrides={
            "benchmark_dataset_path": str(benchmark_path),
            "benchmark_levels": ["ci"],
            "hub_model_id": "MarisUK/maris-ai-master-trained",
        }
    )

    class FakePipeline:
        pass

    def fake_pipeline(*args, **kwargs):
        del args, kwargs
        return FakePipeline()

    async def fake_run_chat_benchmark_with_responder(cases, *, responder, concurrency):
        del concurrency
        response = await responder(cases[0])
        return [types.SimpleNamespace(model=response["model"], response=response["response"])]

    def fake_build_chat_benchmark_manifest(results, *, benchmark_name, branch, model):
        return {
            "benchmark_name": benchmark_name,
            "branch": branch,
            "model": model,
            "results": [{"model": results[0].model, "response": results[0].response}],
        }

    monkeypatch.setitem(sys.modules, "transformers", types.SimpleNamespace(pipeline=fake_pipeline))
    monkeypatch.setattr(
        "maris_core.training.train.run_chat_benchmark_with_responder",
        fake_run_chat_benchmark_with_responder,
    )
    monkeypatch.setattr(
        "maris_core.training.train.call_generation_pipeline",
        lambda *args, **kwargs: [{"generated_text": "Es esmu Maris AI."}],
    )
    monkeypatch.setattr(
        "maris_core.training.train.build_chat_benchmark_manifest",
        fake_build_chat_benchmark_manifest,
    )

    payload = asyncio.run(
        _run_post_training_benchmark(config, model_path=str(tmp_path / "trained-model"))
    )
    assert payload == {
        "benchmark_name": config.benchmark_name,
        "branch": config.branch_name,
        "model": "MarisUK/maris-ai-master-trained",
        "results": [
            {
                "model": "MarisUK/maris-ai-master-trained",
                "response": "Es esmu Maris AI.",
            }
        ],
    }


def test_post_training_benchmark_filters_cases_by_branch(tmp_path: Path, monkeypatch) -> None:
    benchmark_path = tmp_path / "benchmark.json"
    benchmark_path.write_text(
        json.dumps(
            [
                {"name": "master-case", "message": "Sveiki", "branches": ["master"], "level": "ci"},
                {
                    "name": "coder-case",
                    "message": "Uzraksti Python helperi",
                    "profile": "coder",
                    "branches": ["coder"],
                    "level": "ci",
                },
            ]
        ),
        encoding="utf-8",
    )
    config = load_training_config(
        overrides={
            "branch_name": "coder",
            "benchmark_dataset_path": str(benchmark_path),
            "benchmark_levels": ["ci"],
        }
    )

    class FakePipeline:
        pass

    captured_case_names: list[str] = []

    def fake_pipeline(*args, **kwargs):
        del args, kwargs
        return FakePipeline()

    async def fake_run_chat_benchmark_with_responder(cases, *, responder, concurrency):
        del responder, concurrency
        captured_case_names.extend(case.name for case in cases)
        return []

    monkeypatch.setitem(sys.modules, "transformers", types.SimpleNamespace(pipeline=fake_pipeline))
    monkeypatch.setattr(
        "maris_core.training.train.run_chat_benchmark_with_responder",
        fake_run_chat_benchmark_with_responder,
    )
    monkeypatch.setattr(
        "maris_core.training.train.build_chat_benchmark_manifest",
        lambda results, *, benchmark_name, branch, model: {
            "benchmark_name": benchmark_name,
            "branch": branch,
            "model": model,
            "results": results,
        },
    )

    asyncio.run(_run_post_training_benchmark(config, model_path=str(tmp_path / "trained-model")))

    assert captured_case_names == ["coder-case"]


def test_filter_records_for_branch_keeps_coder_specific_mix() -> None:
    records = [
        {
            "type": "conversation",
            "user": "Sveiki",
            "assistant": "Čau",
        },
        {
            "type": "code",
            "prompt": "Salabo parseri",
            "metadata": {"language": "python", "task": "bugfix", "project_area": "core-python"},
        },
        {
            "type": "autonomous",
            "prompt": "Investigate CI",
            "metadata": {"workflow": "ci-triage", "project_area": "operations"},
        },
    ]

    filtered, report = _filter_records_for_branch(
        records,
        branch_name="coder",
        split_name="train",
    )

    assert len(filtered) == 1
    assert filtered[0]["type"] == "code"
    assert report.kept_records == 1
    assert report.dropped_records == 2


def test_filter_records_for_branch_keeps_master_general_mix() -> None:
    records = [
        {"type": "conversation", "user": "Sveiki", "assistant": "Čau"},
        {"type": "code", "prompt": "Uzraksti helperi", "profile": "coder"},
        {"type": "autonomous", "prompt": "Plan sprint", "branch": "planner"},
    ]

    filtered, report = _filter_records_for_branch(
        records,
        branch_name="master",
        split_name="train",
    )

    assert [record["type"] for record in filtered] == ["conversation"]
    assert report.kept_records == 1
    assert report.dropped_records == 2


def test_filter_records_for_branch_uses_custom_rule_map() -> None:
    records = [
        {"type": "conversation", "user": "Sveiki", "assistant": "Čau"},
        {"type": "code", "prompt": "Uzraksti helperi", "profile": "coder"},
    ]

    filtered, report = _filter_records_for_branch(
        records,
        branch_name="coder",
        split_name="train",
        branch_filter_rules={
            "coder": {
                "include_record_types": ["conversation"],
                "exclude_explicit_branches": ["planner"],
            }
        },
    )

    assert [record["type"] for record in filtered] == ["conversation"]
    assert report.kept_records == 1
    assert report.dropped_records == 1


def test_filter_preference_examples_for_branch_keeps_coder_examples_only() -> None:
    examples_path = Path(
        "/home/runner/work/Maris-MI/Maris-MI/core-python/evals/coder_preference_dataset.json"
    )
    examples = load_preference_dataset(examples_path)

    filtered = _filter_preference_examples_for_branch(
        examples,
        branch_name="coder",
    )

    assert filtered
    assert all((example.branch or "").lower() == "coder" for example in filtered)


def test_filter_preference_examples_for_branch_uses_custom_rule_map() -> None:
    examples_path = Path(
        "/home/runner/work/Maris-MI/Maris-MI/core-python/evals/coder_preference_dataset.json"
    )
    examples = load_preference_dataset(examples_path)

    filtered = _filter_preference_examples_for_branch(
        examples,
        branch_name="planner",
        branch_filter_rules={
            "planner": {
                "include_task_types": ["repo-level"],
            }
        },
    )

    assert len(filtered) >= 1
    assert all(example.task_type == "repo-level" for example in filtered)


def test_train_uses_external_eval_dataset_when_configured(tmp_path: Path, monkeypatch) -> None:
    dataset_calls: list[str] = []

    class FakeSplit(list):
        column_names = ["text"]

        def map(self, function, **kwargs):
            del kwargs
            batch = {"text": [item["text"] for item in self]}
            mapped = function(batch)
            size = len(next(iter(mapped.values()))) if mapped else 0
            return FakeSplit(
                [{key: value[index] for key, value in mapped.items()} for index in range(size)]
            )

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            midpoint = max(1, len(self) - 1)
            return {"train": FakeSplit(self[:midpoint]), "test": FakeSplit(self[midpoint:])}

    def fake_load_hf_dataset(repo_id: str):
        dataset_calls.append(repo_id)
        if repo_id == "MarisUK/maris-ai-memory":
            return {"train": FakeSplit([{"text": "train-1"}, {"text": "train-2"}])}
        if repo_id == "MarisUK/maris-ai-evals":
            return {"train": FakeSplit([{"text": "eval-1"}])}
        raise AssertionError(f"Unexpected repo id: {repo_id}")

    class FakeTokenizer:
        pad_token = None
        eos_token = "<eos>"
        pad_token_id = None
        eos_token_id = 7

        @classmethod
        def from_pretrained(cls, model_name):
            del model_name
            return cls()

        def __call__(self, texts, **kwargs):
            del kwargs
            return {
                "input_ids": [[index + 1] for index, _ in enumerate(texts)],
                "attention_mask": [[1] for _ in texts],
            }

        def save_pretrained(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "tokenizer.json").write_text("{}", encoding="utf-8")

    class FakeModel:
        config = types.SimpleNamespace(pad_token_id=None)

        @classmethod
        def from_pretrained(cls, model_name):
            del model_name
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainer:
        last_instance = None

        def __init__(self, *, model, args, train_dataset, eval_dataset=None, data_collator=None):
            del model, data_collator
            self.args = args
            self.train_dataset = train_dataset
            self.eval_dataset = eval_dataset
            FakeTrainer.last_instance = self

        def train(self):
            return types.SimpleNamespace(metrics={"train_loss": 0.1})

        def evaluate(self):
            return {"eval_loss": 0.2}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "model.bin").write_text("ok", encoding="utf-8")

    monkeypatch.setattr("maris_core.training.train.load_hf_dataset", fake_load_hf_dataset)
    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )

    metrics = train(
        output_dir=str(tmp_path / "trained-model"),
        dataset_repos=["MarisUK/maris-ai-memory"],
        eval_dataset_repo="MarisUK/maris-ai-evals",
        eval_dataset_repos=["MarisUK/maris-ai-evals"],
    )

    assert metrics["eval_loss"] == 0.2
    assert dataset_calls == ["MarisUK/maris-ai-memory", "MarisUK/maris-ai-evals"]
    assert FakeTrainer.last_instance is not None
    assert len(FakeTrainer.last_instance.train_dataset) == 1
    assert len(FakeTrainer.last_instance.eval_dataset) == 1


def test_train_merges_multiple_dataset_repos_for_training_and_eval(
    tmp_path: Path,
    monkeypatch,
) -> None:
    dataset_calls: list[str] = []

    class FakeSplit(list):
        column_names = ["text"]

        def map(self, function, **kwargs):
            del kwargs
            batch = {"text": [item["text"] for item in self]}
            mapped = function(batch)
            size = len(next(iter(mapped.values()))) if mapped else 0
            return FakeSplit(
                [{key: value[index] for key, value in mapped.items()} for index in range(size)]
            )

    repo_rows = {
        "MarisUK/maris-ai-memory": {
            "train": [{"text": "memory-train"}],
            "validation": [{"text": "memory-val"}],
        },
        "MarisUK/maris-ai-lv-memory": {
            "train": [{"text": "lv-train"}],
            "validation": [{"text": "lv-val"}],
        },
        "MarisUK/maris-ai-evals": {
            "train": [{"text": "eval-train"}],
            "validation": [{"text": "eval-val"}],
        },
        "MarisUK/maris-ai-benchmark": {
            "train": [{"text": "bench-train"}],
            "validation": [{"text": "bench-val"}],
        },
    }

    def fake_load_hf_dataset(repo_id: str):
        dataset_calls.append(repo_id)
        if repo_id not in repo_rows:
            raise AssertionError(f"Unexpected repo id: {repo_id}")
        payload = repo_rows[repo_id]
        return {split_name: FakeSplit(list(records)) for split_name, records in payload.items()}

    class FakeTokenizer:
        pad_token = None
        eos_token = "<eos>"
        pad_token_id = None
        eos_token_id = 7

        @classmethod
        def from_pretrained(cls, model_name):
            del model_name
            return cls()

        def __call__(self, texts, **kwargs):
            del kwargs
            return {
                "input_ids": [[index + 1] for index, _ in enumerate(texts)],
                "attention_mask": [[1] for _ in texts],
            }

        def save_pretrained(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "tokenizer.json").write_text("{}", encoding="utf-8")

    class FakeModel:
        config = types.SimpleNamespace(pad_token_id=None)

        @classmethod
        def from_pretrained(cls, model_name):
            del model_name
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainer:
        last_instance = None

        def __init__(self, *, model, args, train_dataset, eval_dataset=None, data_collator=None):
            del model, data_collator
            self.args = args
            self.train_dataset = train_dataset
            self.eval_dataset = eval_dataset
            FakeTrainer.last_instance = self

        def train(self):
            return types.SimpleNamespace(metrics={"train_loss": 0.1})

        def evaluate(self):
            return {"eval_loss": 0.2}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "model.bin").write_text("ok", encoding="utf-8")

    monkeypatch.setattr("maris_core.training.train.load_hf_dataset", fake_load_hf_dataset)
    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )

    metrics = train(
        output_dir=str(tmp_path / "trained-model"),
        dataset_repo="MarisUK/maris-ai-memory",
        dataset_repos=[
            "MarisUK/maris-ai-memory",
            "MarisUK/maris-ai-lv-memory",
            "MarisUK/maris-ai-evals",
            "MarisUK/maris-ai-benchmark",
        ],
        eval_dataset_repo="MarisUK/maris-ai-evals",
        eval_dataset_repos=[
            "MarisUK/maris-ai-evals",
            "MarisUK/maris-ai-benchmark",
        ],
    )

    assert metrics["eval_loss"] == 0.2
    assert dataset_calls == [
        "MarisUK/maris-ai-memory",
        "MarisUK/maris-ai-lv-memory",
        "MarisUK/maris-ai-evals",
        "MarisUK/maris-ai-benchmark",
        "MarisUK/maris-ai-evals",
        "MarisUK/maris-ai-benchmark",
    ]
    assert FakeTrainer.last_instance is not None
    assert len(FakeTrainer.last_instance.train_dataset) == 4
    assert len(FakeTrainer.last_instance.eval_dataset) == 2


def test_evaluate_with_config_prefers_external_eval_dataset(tmp_path: Path, monkeypatch) -> None:
    dataset_calls: list[str] = []
    trained_model_dir = tmp_path / "trained-model"
    trained_model_dir.mkdir(parents=True, exist_ok=True)
    (trained_model_dir / "config.json").write_text(
        json.dumps(
            {
                "_name_or_path": "MarisUK/maris-ai-master",
                "model_type": "qwen2",
                "architectures": ["Qwen2ForCausalLM"],
                "tokenizer_class": "Qwen2TokenizerFast",
                "auto_map": {"AutoModelForCausalLM": "qwen2.modeling_qwen2.Qwen2ForCausalLM"},
            }
        ),
        encoding="utf-8",
    )
    (trained_model_dir / "tokenizer_config.json").write_text(
        json.dumps(
            {
                "name_or_path": "MarisUK/maris-ai-master",
                "tokenizer_class": "Qwen2TokenizerFast",
            }
        ),
        encoding="utf-8",
    )
    write_maris_compatibility_artifact(
        trained_model_dir,
        maris_model_id="MarisUK/maris-ai-master",
    )
    apply_maris_compatibility_identity(trained_model_dir)

    class FakeSplit(list):
        column_names = ["text"]

        def map(self, function, **kwargs):
            del kwargs
            batch = {"text": [item["text"] for item in self]}
            mapped = function(batch)
            size = len(next(iter(mapped.values()))) if mapped else 0
            return FakeSplit(
                [{key: value[index] for key, value in mapped.items()} for index in range(size)]
            )

    def fake_load_hf_dataset(repo_id: str):
        dataset_calls.append(repo_id)
        if repo_id == "MarisUK/maris-ai-evals":
            return {"train": FakeSplit([{"text": "eval-1"}, {"text": "eval-2"}])}
        raise AssertionError(f"Unexpected repo id: {repo_id}")

    class FakeTokenizer:
        pad_token = None
        eos_token = "<eos>"
        pad_token_id = None
        eos_token_id = 7

        @classmethod
        def from_pretrained(cls, model_name):
            loaded_dir = Path(model_name)
            assert loaded_dir != trained_model_dir
            tokenizer_config = json.loads(
                loaded_dir.joinpath("tokenizer_config.json").read_text(encoding="utf-8")
            )
            assert tokenizer_config["tokenizer_class"] == "Qwen2TokenizerFast"
            return cls()

        def __call__(self, texts, **kwargs):
            del kwargs
            return {
                "input_ids": [[index + 1] for index, _ in enumerate(texts)],
                "attention_mask": [[1] for _ in texts],
            }

    class FakeModel:
        @classmethod
        def from_pretrained(cls, model_name):
            loaded_dir = Path(model_name)
            assert loaded_dir != trained_model_dir
            model_config = json.loads(
                loaded_dir.joinpath("config.json").read_text(encoding="utf-8")
            )
            assert model_config["model_type"] == "qwen2"
            assert model_config["architectures"] == ["Qwen2ForCausalLM"]
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainer:
        def __init__(self, *, model, args, eval_dataset=None, data_collator=None):
            del model, args, data_collator
            self.eval_dataset = eval_dataset

        def evaluate(self):
            return {"eval_loss": 0.3}

    monkeypatch.setattr("maris_core.training.train.load_hf_dataset", fake_load_hf_dataset)
    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )

    config = load_training_config(
        overrides={
            "output_dir": str(trained_model_dir),
            "eval_dataset_repo": "MarisUK/maris-ai-evals",
            "eval_dataset_repos": ["MarisUK/maris-ai-evals"],
            "benchmark_dataset_path": str(tmp_path / "benchmark.json"),
            "benchmark_levels": ["ci"],
        }
    )

    async def fake_benchmark(config, *, model_path):
        assert model_path.endswith("trained-model")
        return {
            "artifact_type": "chat-benchmark-manifest",
            "benchmark_name": config.benchmark_name,
            "branch": config.branch_name,
            "model": config.hub_model_id,
            "generated_at": "2026-04-16T00:00:00Z",
            "score_manifest": {
                "overall": 0.79,
                "reasoning": 0.76,
                "factuality": 0.75,
                "helpfulness": 0.8,
                "execution": 0.75,
            },
            "category_scores": {"coding": 0.74},
            "execution_language_pass_rates": {"python": 1.0},
            "execution_language_scores": {"python": 0.74},
            "category_execution_pass_rates": {"coding": 1.0},
        }

    monkeypatch.setattr("maris_core.training.train._run_post_training_benchmark", fake_benchmark)

    metrics = evaluate_with_config(config, model_path=str(trained_model_dir))

    assert metrics["eval_loss"] == 0.3
    assert metrics["eval_examples"] == 2.0
    assert metrics["benchmark_overall"] == 0.79
    assert metrics["benchmark_gate_passed"] == 1.0
    assert metrics["benchmark_regressions"] == 0.0
    assert dataset_calls == ["MarisUK/maris-ai-evals"]
    assert (trained_model_dir / "benchmark-manifest.json").is_file()
    assert (trained_model_dir / "benchmark-history.json").is_file()
    assert (trained_model_dir / "benchmark-regression-report.json").is_file()
    assert (
        json.loads((trained_model_dir / "config.json").read_text(encoding="utf-8"))["model_type"]
        == "maris"
    )
    assert (
        json.loads((trained_model_dir / "tokenizer_config.json").read_text(encoding="utf-8"))[
            "tokenizer_class"
        ]
        == "MarisCompatibleTokenizer"
    )


def test_load_training_config_reads_peft_and_preference_optimization_settings(
    tmp_path: Path,
) -> None:
    config_path = tmp_path / "training.json"
    config_path.write_text(
        json.dumps(
            {
                "adapter_type": "qlora",
                "lora_r": 32,
                "lora_alpha": 64,
                "lora_dropout": 0.15,
                "lora_bias": "all",
                "peft_target_modules": ["q_proj", "v_proj"],
                "qlora_quant_type": "fp4",
                "qlora_use_double_quant": False,
                "qlora_compute_dtype": "bfloat16",
                "preference_dataset_path": "/tmp/preferences.json",
                "preference_optimization": "dpo",
                "preference_beta": 0.25,
                "preference_max_prompt_length": 256,
                "preference_max_length": 768,
                "preference_reference_model": "MarisUK/maris-ai-master",
            }
        ),
        encoding="utf-8",
    )

    config = load_training_config(str(config_path))

    assert config.adapter_type == "qlora"
    assert config.lora_r == 32
    assert config.lora_alpha == 64
    assert config.lora_dropout == 0.15
    assert config.lora_bias == "all"
    assert config.peft_target_modules == ["q_proj", "v_proj"]
    assert config.qlora_quant_type == "fp4"
    assert config.qlora_use_double_quant is False
    assert config.qlora_compute_dtype == "bfloat16"
    assert config.preference_optimization == "dpo"
    assert config.preference_beta == 0.25
    assert config.preference_max_prompt_length == 256
    assert config.preference_max_length == 768
    assert config.preference_reference_model == "MarisUK/maris-ai-master"


def test_train_runs_qlora_and_dpo_preference_stage(tmp_path: Path, monkeypatch) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            assert batched is True
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch)
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Labdien"},
                    {"user": "Kas jauns?", "assistant": "Viss kārtībā"},
                ]
            )
        },
    )

    preference_dataset_path = tmp_path / "preferences.json"
    preference_dataset_path.write_text(
        json.dumps(
            [
                {
                    "prompt": "Atbildi korekti",
                    "chosen": "Šī ir labākā atbilde.",
                    "rejected": "Nē.",
                    "source": "human_review",
                }
            ]
        ),
        encoding="utf-8",
    )

    model_load_calls: list[dict[str, object]] = []
    bnb_calls: list[dict[str, object]] = []
    lora_config_calls: list[dict[str, object]] = []

    class FakeTokenizer:
        pad_token = None
        pad_token_id = None
        eos_token = "<eos>"
        eos_token_id = 99

        @classmethod
        def from_pretrained(cls, model_name):
            return cls()

        def __call__(self, texts, *, truncation, max_length, padding):
            del truncation, max_length, padding
            return {
                "input_ids": [[1, 2, 3] for _ in texts],
                "attention_mask": [[1, 1, 1] for _ in texts],
            }

        def save_pretrained(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")

    class FakeBitsAndBytesConfig:
        def __init__(self, **kwargs):
            bnb_calls.append(kwargs)
            self.kwargs = kwargs

    class FakeModel:
        def __init__(self):
            self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)
            self.prepared_for_kbit = False
            self.peft_config = None
            self.trainable_parameters_printed = False

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            model_load_calls.append({"model_name": model_name, "kwargs": kwargs})
            return cls()

        def print_trainable_parameters(self):
            self.trainable_parameters_printed = True

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainResult:
        metrics = {"train_loss": 0.2}

    class FakeTrainer:
        def __init__(self, *, model, args, train_dataset, eval_dataset=None, data_collator=None):
            del data_collator
            self.model = model
            self.args = args
            self.train_dataset = train_dataset
            self.eval_dataset = eval_dataset

        def train(self):
            return FakeTrainResult()

        def evaluate(self):
            return {"eval_loss": 0.4}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "adapter_config.json").write_text("{}", encoding="utf-8")
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

    fake_transformers = types.SimpleNamespace(
        AutoModelForCausalLM=FakeModel,
        AutoTokenizer=FakeTokenizer,
        BitsAndBytesConfig=FakeBitsAndBytesConfig,
        DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
        Trainer=FakeTrainer,
        TrainingArguments=FakeTrainingArguments,
    )
    monkeypatch.setitem(sys.modules, "transformers", fake_transformers)

    class FakeLoraConfig:
        def __init__(self, **kwargs):
            lora_config_calls.append(kwargs)
            self.kwargs = kwargs

    class FakeAutoPeftModelForCausalLM:
        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            model_load_calls.append({"model_name": model_name, "kwargs": kwargs, "auto_peft": True})
            return FakeModel()

    def fake_prepare_model_for_kbit_training(model, use_gradient_checkpointing):
        model.prepared_for_kbit = use_gradient_checkpointing
        return model

    def fake_get_peft_model(model, peft_config):
        model.peft_config = peft_config
        return model

    monkeypatch.setitem(
        sys.modules,
        "peft",
        types.SimpleNamespace(
            AutoPeftModelForCausalLM=FakeAutoPeftModelForCausalLM,
            LoraConfig=FakeLoraConfig,
            TaskType=types.SimpleNamespace(CAUSAL_LM="CAUSAL_LM"),
            get_peft_model=fake_get_peft_model,
            prepare_model_for_kbit_training=fake_prepare_model_for_kbit_training,
        ),
    )

    class FakeDPOConfig:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeDPOTrainer:
        last_instance = None

        def __init__(self, **kwargs):
            self.kwargs = kwargs
            FakeDPOTrainer.last_instance = self

        def train(self):
            return types.SimpleNamespace(metrics={"loss": 0.12})

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "adapter_config.json").write_text("{}", encoding="utf-8")
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

    monkeypatch.setitem(
        sys.modules,
        "trl",
        types.SimpleNamespace(DPOConfig=FakeDPOConfig, DPOTrainer=FakeDPOTrainer),
    )

    output_dir = tmp_path / "trained-model"
    metrics = train(
        output_dir=str(output_dir),
        max_seq_length=256,
        adapter_type="qlora",
        qlora_compute_dtype="float16",
        qlora_quant_type="nf4",
        qlora_use_double_quant=True,
        lora_r=8,
        lora_alpha=16,
        lora_dropout=0.1,
        peft_target_modules=["q_proj", "v_proj"],
        preference_dataset_path=str(preference_dataset_path),
        preference_optimization="dpo",
        preference_beta=0.2,
        preference_max_prompt_length=128,
        preference_max_length=512,
    )

    assert metrics["train_loss"] == 0.2
    assert metrics["preference_loss"] == 0.12
    assert metrics["preference_examples"] == 1.0
    assert metrics["preference_stage"] == 1.0
    assert bnb_calls[0]["load_in_4bit"] is True
    assert bnb_calls[0]["bnb_4bit_quant_type"] == "nf4"
    assert lora_config_calls[0]["r"] == 8
    assert lora_config_calls[0]["lora_alpha"] == 16
    assert lora_config_calls[0]["target_modules"] == ["q_proj", "v_proj"]
    assert FakeDPOTrainer.last_instance is not None
    assert "ref_model" in FakeDPOTrainer.last_instance.kwargs
    assert len(FakeDPOTrainer.last_instance.kwargs["train_dataset"]) == 1
    assert any(call.get("auto_peft") for call in model_load_calls)


def test_train_runs_orpo_preference_stage_without_reference_model(
    tmp_path: Path,
    monkeypatch,
) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch)
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Labdien"},
                    {"user": "Kā iet?", "assistant": "Labi"},
                ]
            )
        },
    )

    preference_dataset_path = tmp_path / "preferences.json"
    preference_dataset_path.write_text(
        json.dumps(
            [
                {
                    "prompt": "Atbildi pieklājīgi",
                    "chosen": "Protams, palīdzēšu.",
                    "rejected": "Nē.",
                    "source": "human_review",
                }
            ]
        ),
        encoding="utf-8",
    )

    class FakeTokenizer:
        pad_token = None
        pad_token_id = None
        eos_token = "<eos>"
        eos_token_id = 99

        @classmethod
        def from_pretrained(cls, model_name):
            del model_name
            return cls()

        def __call__(self, texts, *, truncation, max_length, padding):
            del truncation, max_length, padding
            return {
                "input_ids": [[1, 2, 3] for _ in texts],
                "attention_mask": [[1, 1, 1] for _ in texts],
            }

        def save_pretrained(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")

    class FakeModel:
        def __init__(self):
            self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainer:
        def __init__(self, *, model, args, train_dataset, eval_dataset=None, data_collator=None):
            del model, args, train_dataset, eval_dataset, data_collator

        def train(self):
            return types.SimpleNamespace(metrics={"train_loss": 0.11})

        def evaluate(self):
            return {"eval_loss": 0.22}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )

    class FakeORPOConfig:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeORPOTrainer:
        last_instance = None

        def __init__(self, **kwargs):
            self.kwargs = kwargs
            FakeORPOTrainer.last_instance = self

        def train(self):
            return types.SimpleNamespace(metrics={"loss": 0.07})

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

    monkeypatch.setitem(
        sys.modules,
        "trl",
        types.SimpleNamespace(ORPOConfig=FakeORPOConfig, ORPOTrainer=FakeORPOTrainer),
    )

    metrics = train(
        output_dir=str(tmp_path / "trained-model"),
        preference_dataset_path=str(preference_dataset_path),
        preference_optimization="orpo",
    )

    assert metrics["preference_loss"] == 0.07
    assert FakeORPOTrainer.last_instance is not None
    assert "ref_model" not in FakeORPOTrainer.last_instance.kwargs


def test_train_retries_tokenizer_with_slow_backend(tmp_path: Path, monkeypatch) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch)
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Labdien"},
                    {"user": "Kas jauns?", "assistant": "Viss kārtībā"},
                ]
            )
        },
    )

    tokenizer_fast_attempts: list[bool] = []

    class FakeTokenizer:
        pad_token = None
        pad_token_id = None
        eos_token = "<eos>"
        eos_token_id = 99

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name
            tokenizer_fast_attempts.append(bool(kwargs.get("use_fast", True)))
            if kwargs.get("use_fast", True):
                raise ValueError("fast tokenizer unavailable")
            return cls()

        def __call__(self, texts, *, truncation, max_length, padding):
            del texts, truncation, max_length, padding
            return {"input_ids": [[1, 2, 3]], "attention_mask": [[1, 1, 1]]}

        def save_pretrained(self, output_dir):
            Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")

    class FakeModelConfig:
        pad_token_id = None
        use_cache = True

    class FakeModel:
        config = FakeModelConfig()

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainResult:
        metrics = {"train_loss": 0.1}

    class FakeTrainer:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

        def train(self):
            return FakeTrainResult()

        def evaluate(self):
            return {"eval_loss": 0.2}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )

    train(
        output_dir=str(tmp_path / "slow-tokenizer"), model_name="custom/model", max_seq_length=256
    )

    assert tokenizer_fast_attempts == [True, False]


def test_train_auto_switches_giant_models_to_resource_saver_mode(
    tmp_path: Path,
    monkeypatch,
) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch)
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Labdien"},
                    {"user": "Kas jauns?", "assistant": "Viss kārtībā"},
                ]
            )
        },
    )
    monkeypatch.setenv("HF_TRAIN_BATCH_SIZE", "4")
    monkeypatch.setenv("HF_TRAIN_EVAL_BATCH_SIZE", "2")
    monkeypatch.setenv("HF_TRAIN_GRADIENT_ACCUMULATION_STEPS", "4")

    model_load_calls: list[dict[str, object]] = []
    bnb_calls: list[dict[str, object]] = []

    class FakeTokenizer:
        pad_token = None
        pad_token_id = None
        eos_token = "<eos>"
        eos_token_id = 99

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

        def __call__(self, texts, *, truncation, max_length, padding):
            del texts, truncation, max_length, padding
            return {"input_ids": [[1, 2, 3]], "attention_mask": [[1, 1, 1]]}

        def save_pretrained(self, output_dir):
            Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")

    class FakeBitsAndBytesConfig:
        def __init__(self, **kwargs):
            bnb_calls.append(kwargs)
            self.kwargs = kwargs

    class FakeModel:
        def __init__(self):
            self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            model_load_calls.append({"model_name": model_name, "kwargs": kwargs})
            return cls()

        def print_trainable_parameters(self):
            return None

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainResult:
        metrics = {"train_loss": 0.2}

    class FakeTrainer:
        last_instance = None

        def __init__(self, **kwargs):
            self.kwargs = kwargs
            self.args = kwargs["args"]
            FakeTrainer.last_instance = self

        def train(self):
            return FakeTrainResult()

        def evaluate(self):
            return {"eval_loss": 0.4}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "adapter_config.json").write_text("{}", encoding="utf-8")
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            BitsAndBytesConfig=FakeBitsAndBytesConfig,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )

    class FakeLoraConfig:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    def fake_prepare_model_for_kbit_training(model, use_gradient_checkpointing):
        del use_gradient_checkpointing
        return model

    def fake_get_peft_model(model, peft_config):
        del peft_config
        return model

    monkeypatch.setitem(
        sys.modules,
        "peft",
        types.SimpleNamespace(
            LoraConfig=FakeLoraConfig,
            TaskType=types.SimpleNamespace(CAUSAL_LM="CAUSAL_LM"),
            get_peft_model=fake_get_peft_model,
            prepare_model_for_kbit_training=fake_prepare_model_for_kbit_training,
        ),
    )

    output_dir = tmp_path / "giant-model"
    train(
        output_dir=str(output_dir),
        model_name="Qwen/Qwen3-Coder-480B-A35B-Instruct",
        adapter_type="full",
        max_seq_length=256,
    )

    assert bnb_calls[0]["load_in_4bit"] is True
    assert model_load_calls[0]["model_name"] == "Qwen/Qwen3-Coder-480B-A35B-Instruct"
    assert model_load_calls[0]["kwargs"]["device_map"] == "auto"
    assert model_load_calls[0]["kwargs"]["low_cpu_mem_usage"] is True
    assert "quantization_config" in model_load_calls[0]["kwargs"]
    assert FakeTrainer.last_instance is not None
    assert FakeTrainer.last_instance.args.kwargs["per_device_train_batch_size"] == 1
    assert FakeTrainer.last_instance.args.kwargs["per_device_eval_batch_size"] == 1
    assert FakeTrainer.last_instance.args.kwargs["gradient_accumulation_steps"] == 16
    training_config = json.loads((output_dir / "training-config.json").read_text(encoding="utf-8"))
    assert training_config["adapter_type"] == "qlora"


def test_train_disables_pin_memory_and_tqdm_in_non_interactive_environment(
    tmp_path: Path, monkeypatch
) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch if batched else self.items[0])
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Čau!"},
                    {"prompt": "Uzraksti plānu", "completion": "Gatavs."},
                ]
            )
        },
    )
    import maris_core.training.train as train_module

    monkeypatch.setattr(train_module.sys, "stderr", types.SimpleNamespace(isatty=lambda: False))
    monkeypatch.setitem(
        sys.modules,
        "torch",
        types.SimpleNamespace(
            cuda=types.SimpleNamespace(is_available=lambda: False),
            backends=types.SimpleNamespace(mps=types.SimpleNamespace(is_available=lambda: False)),
        ),
    )

    class FakeTokenizer:
        pad_token_id = 0
        eos_token_id = 1
        pad_token = "<pad>"
        eos_token = "</s>"

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

        def __call__(self, texts, truncation, padding, max_length):
            del truncation, padding, max_length
            if isinstance(texts, str):
                texts = [texts]
            return {
                "input_ids": [[1, 2, 3] for _ in texts],
                "attention_mask": [[1, 1, 1] for _ in texts],
            }

        def save_pretrained(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")

    class FakeModel:
        def __init__(self):
            self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainResult:
        metrics = {"train_loss": 0.2}

    class FakeTrainer:
        last_instance = None

        def __init__(self, **kwargs):
            self.args = kwargs["args"]
            FakeTrainer.last_instance = self

        def train(self):
            return FakeTrainResult()

        def evaluate(self):
            return {"eval_loss": 0.4}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )

    train(output_dir=str(tmp_path / "cpu-runtime"), model_name="custom/model", max_seq_length=256)

    assert FakeTrainer.last_instance is not None
    assert FakeTrainer.last_instance.args.kwargs["dataloader_pin_memory"] is False
    assert FakeTrainer.last_instance.args.kwargs["disable_tqdm"] is True
    assert FakeTrainer.last_instance.args.kwargs["logging_first_step"] is True


def test_train_enables_bf16_by_default_when_cuda_supports_it(tmp_path: Path, monkeypatch) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch if batched else self.items[0])
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Čau!"},
                    {"prompt": "Uzraksti plānu", "completion": "Gatavs."},
                ]
            )
        },
    )
    import maris_core.training.train as train_module

    monkeypatch.setattr(train_module.sys, "stderr", types.SimpleNamespace(isatty=lambda: True))
    monkeypatch.setitem(
        sys.modules,
        "torch",
        types.SimpleNamespace(
            cuda=types.SimpleNamespace(
                is_available=lambda: True,
                is_bf16_supported=lambda: True,
            ),
            backends=types.SimpleNamespace(mps=types.SimpleNamespace(is_available=lambda: False)),
        ),
    )

    class FakeTokenizer:
        pad_token_id = 0
        eos_token_id = 1
        pad_token = "<pad>"
        eos_token = "</s>"

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

        def __call__(self, texts, truncation, padding, max_length):
            del truncation, padding, max_length
            if isinstance(texts, str):
                texts = [texts]
            return {
                "input_ids": [[1, 2, 3] for _ in texts],
                "attention_mask": [[1, 1, 1] for _ in texts],
            }

        def save_pretrained(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")

    class FakeModel:
        def __init__(self):
            self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainResult:
        metrics = {"train_loss": 0.2}

    class FakeTrainer:
        last_instance = None

        def __init__(self, **kwargs):
            self.args = kwargs["args"]
            FakeTrainer.last_instance = self

        def train(self):
            return FakeTrainResult()

        def evaluate(self):
            return {"eval_loss": 0.4}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )

    train(output_dir=str(tmp_path / "cuda-runtime"), model_name="custom/model", max_seq_length=256)

    assert FakeTrainer.last_instance is not None
    assert FakeTrainer.last_instance.args.kwargs["bf16"] is True
    assert FakeTrainer.last_instance.args.kwargs["fp16"] is False


def test_train_uses_fsdp_training_arguments_when_requested(tmp_path: Path, monkeypatch) -> None:
    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch if batched else self.items[0])
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Čau!"},
                    {"prompt": "Uzraksti plānu", "completion": "Gatavs."},
                ]
            )
        },
    )

    class FakeTokenizer:
        pad_token_id = 0
        eos_token_id = 1
        pad_token = "<pad>"
        eos_token = "</s>"

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

        def __call__(self, texts, truncation, padding, max_length):
            del truncation, padding, max_length
            if isinstance(texts, str):
                texts = [texts]
            return {
                "input_ids": [[1, 2, 3] for _ in texts],
                "attention_mask": [[1, 1, 1] for _ in texts],
            }

        def save_pretrained(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")

    class FakeModel:
        def __init__(self):
            self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainResult:
        metrics = {"train_loss": 0.2}

    class FakeTrainer:
        last_instance = None

        def __init__(self, **kwargs):
            self.args = kwargs["args"]
            FakeTrainer.last_instance = self

        def train(self):
            return FakeTrainResult()

        def evaluate(self):
            return {"eval_loss": 0.4}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )

    fsdp_config_path = tmp_path / "fsdp-config.json"
    fsdp_config_path.write_text(
        json.dumps({"activation_checkpointing": False, "limit_all_gathers": False}),
        encoding="utf-8",
    )

    train(
        output_dir=str(tmp_path / "fsdp-runtime"),
        model_name="custom/model",
        max_seq_length=256,
        distributed_strategy="fsdp",
        distributed_config_path=str(fsdp_config_path),
        fsdp_transformer_layer_cls_to_wrap=["Qwen2DecoderLayer"],
    )

    assert FakeTrainer.last_instance is not None
    assert FakeTrainer.last_instance.args.kwargs["fsdp"] == "full_shard auto_wrap"
    assert FakeTrainer.last_instance.args.kwargs["fsdp_config"]["activation_checkpointing"] is False
    assert FakeTrainer.last_instance.args.kwargs["fsdp_config"]["limit_all_gathers"] is False
    assert FakeTrainer.last_instance.args.kwargs["fsdp_config"]["min_num_params"] == 100_000_000
    assert FakeTrainer.last_instance.args.kwargs["fsdp_config"][
        "transformer_layer_cls_to_wrap"
    ] == ["Qwen2DecoderLayer"]
    assert FakeTrainer.last_instance.args.kwargs["ddp_find_unused_parameters"] is False


def test_train_uses_deepspeed_training_arguments_when_requested(
    tmp_path: Path, monkeypatch
) -> None:
    monkeypatch.setattr(
        "maris_core.training.train.get_installed_package_version",
        lambda package_name: "0.18.9",
    )

    class FakeDataset:
        def __init__(self, items):
            self.items = list(items)
            self.column_names = list(self.items[0].keys()) if self.items else []

        def train_test_split(self, *, test_size, seed):
            del test_size, seed
            return {
                "train": FakeDataset(self.items[:1]),
                "test": FakeDataset(self.items[1:]),
            }

        def map(self, fn, *, batched, remove_columns, desc):
            del remove_columns, desc
            batch = {key: [item.get(key) for item in self.items] for key in self.column_names}
            transformed = fn(batch if batched else self.items[0])
            size = len(next(iter(transformed.values()))) if transformed else 0
            return FakeDataset(
                [{key: transformed[key][index] for key in transformed} for index in range(size)]
            )

        def __len__(self):
            return len(self.items)

    monkeypatch.setattr(
        "maris_core.training.train.load_hf_dataset",
        lambda _: {
            "train": FakeDataset(
                [
                    {"user": "Sveiki", "assistant": "Čau!"},
                    {"prompt": "Uzraksti plānu", "completion": "Gatavs."},
                ]
            )
        },
    )

    class FakeTokenizer:
        pad_token_id = 0
        eos_token_id = 1
        pad_token = "<pad>"
        eos_token = "</s>"

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

        def __call__(self, texts, truncation, padding, max_length):
            del truncation, padding, max_length
            if isinstance(texts, str):
                texts = [texts]
            return {
                "input_ids": [[1, 2, 3] for _ in texts],
                "attention_mask": [[1, 1, 1] for _ in texts],
            }

        def save_pretrained(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "tokenizer_config.json").write_text("{}", encoding="utf-8")

    class FakeModel:
        def __init__(self):
            self.config = types.SimpleNamespace(pad_token_id=None, use_cache=True)

        @classmethod
        def from_pretrained(cls, model_name, **kwargs):
            del model_name, kwargs
            return cls()

    class FakeTrainingArguments:
        def __init__(self, **kwargs):
            self.kwargs = kwargs

    class FakeTrainResult:
        metrics = {"train_loss": 0.2}

    class FakeTrainer:
        last_instance = None

        def __init__(self, **kwargs):
            self.args = kwargs["args"]
            FakeTrainer.last_instance = self

        def train(self):
            return FakeTrainResult()

        def evaluate(self):
            return {"eval_loss": 0.4}

        def save_model(self, output_dir):
            Path(output_dir).mkdir(parents=True, exist_ok=True)
            Path(output_dir, "config.json").write_text("{}", encoding="utf-8")

    monkeypatch.setitem(
        sys.modules,
        "transformers",
        types.SimpleNamespace(
            AutoModelForCausalLM=FakeModel,
            AutoTokenizer=FakeTokenizer,
            DataCollatorForLanguageModeling=lambda **kwargs: kwargs,
            Trainer=FakeTrainer,
            TrainingArguments=FakeTrainingArguments,
        ),
    )

    deepspeed_config_path = tmp_path / "deepspeed.json"
    deepspeed_config_path.write_text(
        json.dumps({"zero_optimization": {"stage": 3}}), encoding="utf-8"
    )

    train(
        output_dir=str(tmp_path / "deepspeed-runtime"),
        model_name="custom/model",
        max_seq_length=256,
        distributed_strategy="deepspeed",
        distributed_config_path=str(deepspeed_config_path),
    )

    assert FakeTrainer.last_instance is not None
    assert FakeTrainer.last_instance.args.kwargs["deepspeed"] == str(deepspeed_config_path)
    assert FakeTrainer.last_instance.args.kwargs["ddp_find_unused_parameters"] is False


def test_deepspeed_training_arguments_raise_clear_error_when_dependency_missing(
    tmp_path: Path, monkeypatch
) -> None:
    deepspeed_config_path = tmp_path / "deepspeed.json"
    deepspeed_config_path.write_text(
        json.dumps({"zero_optimization": {"stage": 3}}), encoding="utf-8"
    )
    config = load_training_config(
        overrides={
            "distributed_strategy": "deepspeed",
            "distributed_config_path": str(deepspeed_config_path),
        }
    )

    def _raise_missing_package(package_name: str) -> None:
        raise PackageNotFoundError(package_name)

    monkeypatch.setattr(
        "maris_core.training.train.get_installed_package_version",
        _raise_missing_package,
    )

    with pytest.raises(
        ImportError,
        match="DeepSpeed režīms nepieciešams instalēt 'deepspeed'",
    ):
        _build_distributed_training_argument_overrides(config)


def test_deepspeed_training_arguments_raise_clear_error_when_metadata_lookup_stops(
    tmp_path: Path, monkeypatch
) -> None:
    deepspeed_config_path = tmp_path / "deepspeed.json"
    deepspeed_config_path.write_text(
        json.dumps({"zero_optimization": {"stage": 3}}), encoding="utf-8"
    )
    config = load_training_config(
        overrides={
            "distributed_strategy": "deepspeed",
            "distributed_config_path": str(deepspeed_config_path),
        }
    )

    def _raise_stop_iteration(package_name: str) -> None:
        raise StopIteration(package_name)

    monkeypatch.setattr(
        "maris_core.training.train.get_installed_package_version",
        _raise_stop_iteration,
    )

    with pytest.raises(
        ImportError,
        match="DeepSpeed režīms nepieciešams instalēt 'deepspeed'",
    ):
        _build_distributed_training_argument_overrides(config)


def test_train_model_cli_exits_cleanly_when_runtime_dependency_missing(monkeypatch, capsys) -> None:
    script_path = Path(__file__).resolve().parents[1] / "scripts" / "train_model.py"
    spec = importlib.util.spec_from_file_location("train_model", script_path)
    assert spec is not None and spec.loader is not None
    train_model_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(train_model_module)

    def _raise_missing_dependency(_config: object) -> dict[str, object]:
        raise ImportError("DeepSpeed režīms nepieciešams instalēt 'deepspeed' Python pakotni.")

    monkeypatch.setattr(
        train_model_module, "load_training_config", lambda *args, **kwargs: object()
    )
    monkeypatch.setitem(
        sys.modules,
        "maris_core.training.train",
        types.SimpleNamespace(
            train_branch_suite=lambda _config: {},
            train_with_config=_raise_missing_dependency,
        ),
    )
    monkeypatch.setattr(sys, "argv", [str(script_path)])

    with pytest.raises(SystemExit) as exc_info:
        train_model_module.main()

    assert exc_info.value.code == 2
    captured = capsys.readouterr()
    assert "DeepSpeed režīms nepieciešams instalēt 'deepspeed' Python pakotni." in captured.err
    assert "Traceback" not in captured.err


def test_train_model_cli_exits_cleanly_for_branch_suite_dependency_missing(
    monkeypatch, capsys
) -> None:
    script_path = Path(__file__).resolve().parents[1] / "scripts" / "train_model.py"
    spec = importlib.util.spec_from_file_location("train_model", script_path)
    assert spec is not None and spec.loader is not None
    train_model_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(train_model_module)

    def _raise_missing_dependency(_config: object) -> dict[str, object]:
        raise ImportError("DeepSpeed režīms nepieciešams instalēt 'deepspeed' Python pakotni.")

    monkeypatch.setattr(
        train_model_module, "load_training_config", lambda *args, **kwargs: object()
    )
    monkeypatch.setattr(train_model_module, "replace", lambda config, **kwargs: config)
    monkeypatch.setitem(
        sys.modules,
        "maris_core.training.train",
        types.SimpleNamespace(
            train_branch_suite=_raise_missing_dependency,
            train_with_config=lambda _config: {},
        ),
    )
    monkeypatch.setattr(sys, "argv", [str(script_path), "--all-branches"])

    with pytest.raises(SystemExit) as exc_info:
        train_model_module.main()

    assert exc_info.value.code == 2
    captured = capsys.readouterr()
    assert "DeepSpeed režīms nepieciešams instalēt 'deepspeed' Python pakotni." in captured.err
    assert "Traceback" not in captured.err


def test_ensure_runtime_home_dir_sets_temp_home_when_missing(tmp_path: Path, monkeypatch) -> None:
    monkeypatch.delenv("HOME", raising=False)
    monkeypatch.delenv("USER", raising=False)
    monkeypatch.delenv("LOGNAME", raising=False)
    monkeypatch.delenv("USERNAME", raising=False)
    monkeypatch.setattr("maris_core.training.train.tempfile.gettempdir", lambda: str(tmp_path))
    monkeypatch.setattr("maris_core.training.train.os.getuid", lambda: 1000)

    resolved = _ensure_runtime_home_dir()

    expected = tmp_path / "maris-home-1000"
    assert resolved == str(expected)
    assert os.environ["HOME"] == str(expected)
    assert os.environ["USER"] == "maris-1000"
    assert os.environ["LOGNAME"] == "maris-1000"
    assert os.environ["USERNAME"] == "maris-1000"
    assert expected.is_dir()


def test_ensure_runtime_home_dir_keeps_existing_home_and_user(monkeypatch) -> None:
    monkeypatch.setenv("HOME", "/existing/home")
    monkeypatch.setenv("USER", "existing-user")
    monkeypatch.setenv("LOGNAME", "existing-user")
    monkeypatch.setenv("USERNAME", "existing-user")

    resolved = _ensure_runtime_home_dir()

    assert resolved == "/existing/home"
    assert os.environ["HOME"] == "/existing/home"
    assert os.environ["USER"] == "existing-user"
    assert os.environ["LOGNAME"] == "existing-user"
    assert os.environ["USERNAME"] == "existing-user"


def test_ensure_runtime_home_dir_uses_unknown_suffix_when_getuid_fails(
    tmp_path: Path, monkeypatch
) -> None:
    monkeypatch.setenv("HOME", "   ")
    monkeypatch.delenv("USER", raising=False)
    monkeypatch.delenv("LOGNAME", raising=False)
    monkeypatch.delenv("USERNAME", raising=False)
    monkeypatch.setattr("maris_core.training.train.tempfile.gettempdir", lambda: str(tmp_path))

    def _raise_os_error() -> int:
        raise OSError("uid not available")

    monkeypatch.setattr("maris_core.training.train.os.getuid", _raise_os_error)

    resolved = _ensure_runtime_home_dir()

    expected = tmp_path / "maris-home-unknown"
    assert resolved == str(expected)
    assert os.environ["HOME"] == str(expected)
    assert os.environ["USER"] == "maris-unknown"
    assert os.environ["LOGNAME"] == "maris-unknown"
    assert os.environ["USERNAME"] == "maris-unknown"
    assert expected.is_dir()