| """Tests Maris treniņu UI palīgfunkcijām.""" |
|
|
| from __future__ import annotations |
|
|
| from pathlib import Path |
|
|
| import pytest |
|
|
| from maris_core.training.space_ui import ( |
| SpaceTrainingRequest, |
| build_space_training_command, |
| build_space_training_env, |
| has_completed_training_artifacts, |
| list_space_model_choices, |
| parse_training_progress, |
| read_log_since, |
| resolve_optional_persistent_path, |
| resolve_output_dir, |
| ) |
|
|
|
|
| def test_space_training_request_rejects_invalid_repo_id() -> None: |
| with pytest.raises(ValueError): |
| SpaceTrainingRequest(dataset_repo="invalid repo") |
|
|
|
|
| def test_space_training_request_rejects_non_maris_repo_ids() -> None: |
| with pytest.raises(ValueError): |
| SpaceTrainingRequest(dataset_repo="someone-else/not-maris-memory") |
|
|
| with pytest.raises(ValueError): |
| SpaceTrainingRequest(model_repo="someone-else/not-maris-model") |
|
|
|
|
| def test_resolve_output_dir_keeps_path_inside_persistent_root(tmp_path: Path) -> None: |
| output_dir = resolve_output_dir(str(tmp_path), "runs/session-1") |
|
|
| assert output_dir == tmp_path / "runs" / "session-1" |
|
|
|
|
| def test_resolve_output_dir_rejects_escape_attempt(tmp_path: Path) -> None: |
| with pytest.raises(ValueError): |
| resolve_output_dir(str(tmp_path), "../escape") |
|
|
|
|
| def test_build_space_training_command_prefers_custom_model_name() -> None: |
| request = SpaceTrainingRequest(model_preset="coding", model_name="Qwen/Qwen2.5-1.5B-Instruct") |
|
|
| command = build_space_training_command("/tmp/train-hf.sh", request) |
|
|
| assert command == [ |
| "bash", |
| "/tmp/train-hf.sh", |
| "--model-name", |
| "Qwen/Qwen2.5-1.5B-Instruct", |
| ] |
|
|
|
|
| def test_space_training_request_accepts_custom_model_without_preset() -> None: |
| request = SpaceTrainingRequest(model_preset="", model_name="meta-llama/Llama-3.2-3B-Instruct") |
|
|
| assert request.model_preset == "" |
| assert request.model_name == "meta-llama/Llama-3.2-3B-Instruct" |
|
|
|
|
| def test_build_space_training_env_uses_preset_and_persistent_storage(tmp_path: Path) -> None: |
| request = SpaceTrainingRequest( |
| model_preset="coding", |
| hub_model_id="MarisUK/maris-ai-lv", |
| output_subdir="runs/coder", |
| continue_model_path="runs/checkpoints", |
| push_to_hub=False, |
| ) |
|
|
| env = build_space_training_env({}, request, str(tmp_path)) |
|
|
| assert env["HF_PERSISTENT_DIR"] == str(tmp_path) |
| assert env["HF_TRAIN_OUTPUT_DIR"] == str(tmp_path / "runs" / "coder") |
| assert env["HF_LOCAL_MODEL_DIR"] == str(tmp_path / "runs" / "coder") |
| assert env["HF_MODEL_REPO"] == "MarisUK/maris-ai-lv" |
| assert env["HF_TRAIN_MODEL_PRESET"] == "coding" |
| assert env["HF_TRAINING_CONFIG_PATH"] == "huggingface/training-config.json" |
| assert env["MARIS_TRAIN_CONFIG_PATH"] == "huggingface/training-config.json" |
| assert env["HF_TRAIN_PUSH_TO_HUB"] == "false" |
| assert env["HF_TRAIN_CONTINUE_FROM_LATEST"] == "true" |
| assert env["HF_TRAIN_CONTINUE_MODEL_PATH"] == str(tmp_path / "runs" / "checkpoints") |
| assert env["HF_TRAIN_DISTRIBUTED_STRATEGY"] == "none" |
| assert env["MARIS_TRAIN_DISTRIBUTED_STRATEGY"] == "none" |
| assert env["PYTHONUNBUFFERED"] == "1" |
|
|
|
|
| def test_build_space_training_env_clears_inherited_distributed_overrides(tmp_path: Path) -> None: |
| request = SpaceTrainingRequest(model_preset="balanced") |
|
|
| env = build_space_training_env( |
| { |
| "HF_TRAIN_DISTRIBUTED_STRATEGY": "deepspeed", |
| "MARIS_TRAIN_DISTRIBUTED_STRATEGY": "fsdp", |
| "HF_TRAIN_DISTRIBUTED_CONFIG_PATH": "/tmp/deepspeed.json", |
| "MARIS_TRAIN_DISTRIBUTED_CONFIG_PATH": "/tmp/fsdp.json", |
| }, |
| request, |
| str(tmp_path), |
| ) |
|
|
| assert env["HF_TRAIN_DISTRIBUTED_STRATEGY"] == "none" |
| assert env["MARIS_TRAIN_DISTRIBUTED_STRATEGY"] == "none" |
| assert "HF_TRAIN_DISTRIBUTED_CONFIG_PATH" not in env |
| assert "MARIS_TRAIN_DISTRIBUTED_CONFIG_PATH" not in env |
|
|
|
|
| def test_build_space_training_env_allows_explicit_space_config_override(tmp_path: Path) -> None: |
| request = SpaceTrainingRequest(model_preset="balanced") |
|
|
| env = build_space_training_env( |
| {"MARIS_SPACE_TRAIN_CONFIG_PATH": "huggingface/custom-space-config.json"}, |
| request, |
| str(tmp_path), |
| ) |
|
|
| assert env["HF_TRAINING_CONFIG_PATH"] == "huggingface/custom-space-config.json" |
| assert env["MARIS_TRAIN_CONFIG_PATH"] == "huggingface/custom-space-config.json" |
|
|
|
|
| def test_has_completed_training_artifacts_detects_finished_space_run(tmp_path: Path) -> None: |
| output_dir = tmp_path / "runs" / "demo" |
| output_dir.mkdir(parents=True) |
|
|
| assert has_completed_training_artifacts(output_dir) is False |
|
|
| (output_dir / "training-metrics.json").write_text("{}", encoding="utf-8") |
|
|
| assert has_completed_training_artifacts(output_dir) is True |
|
|
|
|
| def test_list_space_model_choices_exposes_presets() -> None: |
| choices = list_space_model_choices() |
|
|
| assert {"balanced", "reasoning", "coding", "lightweight"}.issubset(choices) |
|
|
|
|
| def test_list_space_model_choices_can_include_large_external_models(monkeypatch) -> None: |
| monkeypatch.setenv( |
| "MARIS_TRAIN_EXTRA_MODELS", |
| ( |
| '{"qwen-880b":{"model_name":"Qwen/Qwen3-880B-Instruct",' |
| '"label":"Qwen ultra preset",' |
| '"description":"Large external preset for giant-model experiments."}}' |
| ), |
| ) |
|
|
| choices = list_space_model_choices() |
|
|
| assert choices["qwen-880b"]["model_name"] == "Qwen/Qwen3-880B-Instruct" |
| assert choices["qwen-880b"]["label"] == "Qwen ultra preset" |
|
|
|
|
| def test_space_training_request_defaults_to_balanced_model_selection() -> None: |
| request = SpaceTrainingRequest(model_preset="", model_name="") |
|
|
| assert request.model_preset == "balanced" |
| assert request.model_name == "" |
|
|
|
|
| def test_space_training_request_accepts_separate_hub_model_id() -> None: |
| request = SpaceTrainingRequest( |
| model_repo="", |
| hub_model_id="MarisUK/maris-ai-lv", |
| model_preset="", |
| model_name="meta-llama/Llama-3.2-3B-Instruct", |
| ) |
|
|
| assert request.hub_model_id == "MarisUK/maris-ai-lv" |
| assert request.model_repo == "MarisUK/maris-ai-lv" |
|
|
|
|
| def test_resolve_optional_persistent_path_returns_none_for_empty_value(tmp_path: Path) -> None: |
| assert resolve_optional_persistent_path(str(tmp_path), "") is None |
|
|
|
|
| def test_read_log_since_reads_only_delta(tmp_path: Path) -> None: |
| log_path = tmp_path / "train.log" |
| log_path.write_text("line-1\nline-2\n", encoding="utf-8") |
|
|
| first_chunk, first_offset = read_log_since(log_path, 0) |
| second_chunk, second_offset = read_log_since(log_path, first_offset) |
|
|
| assert first_chunk == "line-1\nline-2\n" |
| assert second_chunk == "" |
| assert second_offset == first_offset |
|
|
|
|
| def test_parse_training_progress_detects_epoch_and_loss() -> None: |
| progress = parse_training_progress( |
| "Epoch 2/4\n{'loss': 0.125, 'epoch': 2.0}\n", |
| request={"num_epochs": 4}, |
| running=True, |
| exit_code=None, |
| ) |
|
|
| assert progress["stage"] == "training" |
| assert progress["percent"] >= 60 |
| assert progress["current_epoch"] == 2.0 |
| assert progress["total_epochs"] == 4 |
| assert progress["loss"] == 0.125 |
|
|
|
|
| def test_parse_training_progress_reports_structured_preparing_stage() -> None: |
| progress = parse_training_progress( |
| '{"maris_training_event": true, "event": "prepare_model", "stage": "preparing", "label": "Ielādē tokenizeri un modeli"}\n', |
| request={"num_epochs": 3}, |
| running=True, |
| exit_code=None, |
| ) |
|
|
| assert progress["stage"] == "preparing" |
| assert progress["label"] == "Ielādē tokenizeri un modeli" |
| assert progress["percent"] == 20 |
| assert progress["events_detected"] == 1 |
|
|
|
|
| def test_parse_training_progress_reports_completion() -> None: |
| progress = parse_training_progress( |
| "Training complete\n", |
| request={"num_epochs": 3}, |
| running=False, |
| exit_code=0, |
| ) |
|
|
| assert progress["stage"] == "completed" |
| assert progress["percent"] == 100 |
|
|
|
|
| def test_parse_training_progress_prefers_structured_events() -> None: |
| progress = parse_training_progress( |
| "\n".join( |
| [ |
| '{"maris_training_event": true, "event": "log", "stage": "training", "label": "Trenē modeli · solis 12/40", "epoch": 1.5, "total_epochs": 4, "step": 12, "total_steps": 40, "loss": 0.2451, "eval_loss": 0.1987, "learning_rate": 0.0002, "eta_seconds": 180}', |
| "Epoch 1/4", |
| ] |
| ), |
| request={"num_epochs": 4}, |
| running=True, |
| exit_code=None, |
| ) |
|
|
| assert progress["stage"] == "training" |
| assert progress["label"] == "Trenē modeli · solis 12/40" |
| assert progress["current_epoch"] == 1.5 |
| assert progress["total_epochs"] == 4 |
| assert progress["current_step"] == 12 |
| assert progress["total_steps"] == 40 |
| assert progress["loss"] == 0.2451 |
| assert progress["eval_loss"] == 0.1987 |
| assert progress["learning_rate"] == 0.0002 |
| assert progress["eta_seconds"] == 180 |
| assert progress["events_detected"] == 1 |
|
|