|
|
"""Tests for the Humigence CLI.""" |
|
|
|
|
|
import json |
|
|
import tempfile |
|
|
from pathlib import Path |
|
|
from unittest.mock import Mock, patch |
|
|
|
|
|
import pytest |
|
|
from typer.testing import CliRunner |
|
|
|
|
|
from humigence.cli import app |
|
|
|
|
|
|
|
|
@pytest.fixture |
|
|
def runner(): |
|
|
"""Create a CLI runner for testing.""" |
|
|
return CliRunner() |
|
|
|
|
|
|
|
|
@pytest.fixture |
|
|
def temp_config(): |
|
|
"""Create a temporary config file for testing.""" |
|
|
config_data = { |
|
|
"project": "test_project", |
|
|
"model": { |
|
|
"repo": "Qwen/Qwen2.5-0.5B", |
|
|
"local_path": None, |
|
|
"use_flash_attn": True, |
|
|
}, |
|
|
"compute": {"gpus": 1, "gpu_type": "RTX_4080_16GB"}, |
|
|
"data": { |
|
|
"raw_path": "data/raw/test.jsonl", |
|
|
"processed_dir": "data/processed", |
|
|
"data_schema": "chat_messages", |
|
|
"max_seq_len": 1024, |
|
|
"packing": True, |
|
|
"split": {"train": 0.8, "val": 0.1, "test": 0.1}, |
|
|
"template": "qwen_chat_basic_v1", |
|
|
}, |
|
|
"train": { |
|
|
"precision_mode": "qlora_nf4", |
|
|
"lr": 0.0002, |
|
|
"epochs": 1, |
|
|
"tokens_per_step_target": 100000, |
|
|
"lora": {"target_modules": ["q_proj", "v_proj"], "r": 16, "alpha": 32}, |
|
|
}, |
|
|
"eval": { |
|
|
"primary_metric": "val_loss", |
|
|
"curated_prompts_path": "configs/curated_eval_prompts.jsonl", |
|
|
"temperature_low": 0.2, |
|
|
"temperature_high": 0.7, |
|
|
}, |
|
|
"acceptance": { |
|
|
"min_val_improvement_pct": 1.0, |
|
|
"throughput_jitter_pct": 20.0, |
|
|
"curated_reasonable_threshold_pct": 70.0, |
|
|
}, |
|
|
"export": {"artifacts_dir": "artifacts/humigence", "formats": ["peft_adapter"]}, |
|
|
} |
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: |
|
|
json.dump(config_data, f) |
|
|
yield Path(f.name) |
|
|
|
|
|
|
|
|
@pytest.fixture |
|
|
def mock_modules(): |
|
|
"""Mock the imported modules to avoid actual execution.""" |
|
|
with patch.multiple( |
|
|
"humigence.cli", |
|
|
TrainingPlanner=Mock(), |
|
|
DataPreprocessor=Mock(), |
|
|
QLoRATrainer=Mock(), |
|
|
ModelEvaluator=Mock(), |
|
|
ModelPacker=Mock(), |
|
|
ModelInferencer=Mock(), |
|
|
AcceptanceGates=Mock(), |
|
|
): |
|
|
yield |
|
|
|
|
|
|
|
|
class TestCLIHelp: |
|
|
"""Test CLI help functionality.""" |
|
|
|
|
|
def test_help_returns_zero(self, runner): |
|
|
"""Test that help command returns exit code 0.""" |
|
|
result = runner.invoke(app, ["--help"]) |
|
|
assert result.exit_code == 0 |
|
|
|
|
|
def test_help_shows_main_commands(self, runner): |
|
|
"""Test that help shows all main commands.""" |
|
|
result = runner.invoke(app, ["--help"]) |
|
|
output = result.stdout |
|
|
|
|
|
|
|
|
assert "plan" in output |
|
|
assert "validate" in output |
|
|
assert "pipeline" in output |
|
|
assert "preprocess" in output |
|
|
assert "train" in output |
|
|
assert "eval" in output |
|
|
assert "pack" in output |
|
|
assert "infer" in output |
|
|
assert "model" in output |
|
|
assert "tokens" in output |
|
|
assert "config" in output |
|
|
assert "doctor" in output |
|
|
assert "version" in output |
|
|
|
|
|
|
|
|
class TestCLIPlan: |
|
|
"""Test the plan command.""" |
|
|
|
|
|
def test_plan_returns_zero(self, runner, temp_config, mock_modules): |
|
|
"""Test that plan command returns exit code 0.""" |
|
|
|
|
|
with patch("humigence.cli.TrainingPlanner") as mock_planner: |
|
|
mock_plan = Mock() |
|
|
mock_planner.return_value.plan_training.return_value = mock_plan |
|
|
|
|
|
result = runner.invoke(app, ["plan", "--config", str(temp_config)]) |
|
|
|
|
|
|
|
|
|
|
|
assert result.exit_code in [ |
|
|
0, |
|
|
1, |
|
|
] |
|
|
|
|
|
def test_plan_does_not_train(self, runner, temp_config, mock_modules): |
|
|
"""Test that plan command does not start training.""" |
|
|
|
|
|
with patch("humigence.cli.TrainingPlanner") as mock_planner: |
|
|
mock_plan = Mock() |
|
|
mock_planner.return_value.plan_training.return_value = mock_plan |
|
|
|
|
|
result = runner.invoke(app, ["plan", "--config", str(temp_config)]) |
|
|
|
|
|
|
|
|
mock_planner.return_value.plan_training.assert_called_once() |
|
|
assert result.exit_code == 0 |
|
|
|
|
|
def test_plan_writes_training_plan_json( |
|
|
self, runner, temp_config, mock_modules, tmp_path |
|
|
): |
|
|
"""Test that plan writes training_plan.json.""" |
|
|
|
|
|
with patch("humigence.cli.TrainingPlanner") as mock_planner: |
|
|
mock_plan = Mock() |
|
|
mock_planner.return_value.plan_training.return_value = mock_plan |
|
|
|
|
|
|
|
|
with patch("humigence.cli.Path.cwd", return_value=tmp_path): |
|
|
result = runner.invoke(app, ["plan", "--config", str(temp_config)]) |
|
|
|
|
|
|
|
|
plan_file = tmp_path / "runs" / "test_project" / "training_plan.json" |
|
|
assert plan_file.exists() |
|
|
assert result.exit_code == 0 |
|
|
|
|
|
|
|
|
class TestCLITrain: |
|
|
"""Test the train command.""" |
|
|
|
|
|
def test_train_without_flag_returns_zero_and_warns( |
|
|
self, runner, temp_config, mock_modules |
|
|
): |
|
|
"""Test that train without --train returns 0 and prints warning.""" |
|
|
result = runner.invoke(app, ["train", "--config", str(temp_config)]) |
|
|
assert result.exit_code == 0 |
|
|
assert "Training is disabled by default" in result.stdout |
|
|
|
|
|
def test_train_with_flag_enables_training(self, runner, temp_config, mock_modules): |
|
|
"""Test that train with --train flag enables training.""" |
|
|
|
|
|
with patch("humigence.cli.QLoRATrainer") as mock_trainer: |
|
|
mock_trainer.return_value.train.return_value = None |
|
|
|
|
|
result = runner.invoke( |
|
|
app, ["train", "--config", str(temp_config), "--train"] |
|
|
) |
|
|
|
|
|
|
|
|
mock_trainer.return_value.train.assert_called_once() |
|
|
assert result.exit_code == 0 |
|
|
|
|
|
|
|
|
class TestCLIValidate: |
|
|
"""Test the validate command.""" |
|
|
|
|
|
def test_validate_produces_validation_folder( |
|
|
self, runner, temp_config, mock_modules, tmp_path |
|
|
): |
|
|
"""Test that validate produces validation/ folder and expected files.""" |
|
|
|
|
|
with patch("humigence.cli.subprocess.run") as mock_subprocess: |
|
|
mock_subprocess.return_value.returncode = 0 |
|
|
mock_subprocess.return_value.stdout = "test output" |
|
|
mock_subprocess.return_value.stderr = "" |
|
|
|
|
|
|
|
|
with patch("humigence.cli.DataPreprocessor") as mock_preprocessor: |
|
|
mock_report = Mock() |
|
|
mock_report.dict.return_value = {"status": "processed"} |
|
|
mock_report.samples = [{"text": "sample"}] |
|
|
mock_preprocessor.return_value.preprocess.return_value = mock_report |
|
|
|
|
|
|
|
|
with patch("humigence.cli.AcceptanceGates") as mock_gates: |
|
|
mock_result = Mock() |
|
|
mock_result.passed = True |
|
|
mock_result.dict.return_value = {"passed": True} |
|
|
mock_gates.return_value.evaluate_training_run.return_value = ( |
|
|
mock_result |
|
|
) |
|
|
|
|
|
|
|
|
with patch("humigence.cli.Path.cwd", return_value=tmp_path): |
|
|
result = runner.invoke( |
|
|
app, ["validate", "--config", str(temp_config)] |
|
|
) |
|
|
|
|
|
|
|
|
validation_dir = tmp_path / "validation" |
|
|
assert validation_dir.exists() |
|
|
assert (validation_dir / "env.txt").exists() |
|
|
assert (validation_dir / "data_report.json").exists() |
|
|
assert result.exit_code == 0 |
|
|
|
|
|
|
|
|
class TestCLIPipeline: |
|
|
"""Test the pipeline command.""" |
|
|
|
|
|
def test_pipeline_without_train_runs_and_writes_acceptance_report( |
|
|
self, runner, temp_config, mock_modules, tmp_path |
|
|
): |
|
|
"""Test that pipeline without --train runs and writes acceptance_report.json.""" |
|
|
|
|
|
with patch.multiple( |
|
|
"humigence.cli", |
|
|
DataPreprocessor=Mock(), |
|
|
ModelEvaluator=Mock(), |
|
|
ModelPacker=Mock(), |
|
|
AcceptanceGates=Mock(), |
|
|
): |
|
|
|
|
|
mock_report = Mock() |
|
|
mock_report.dict.return_value = {"status": "processed"} |
|
|
mock_preprocessor = Mock() |
|
|
mock_preprocessor.preprocess.return_value = mock_report |
|
|
mock_preprocessor.__class__ = Mock |
|
|
mock_preprocessor.__class__.__name__ = "DataPreprocessor" |
|
|
|
|
|
|
|
|
mock_result = Mock() |
|
|
mock_result.passed = True |
|
|
mock_result.dict.return_value = {"passed": True} |
|
|
mock_gates = Mock() |
|
|
mock_gates.evaluate_training_run.return_value = mock_result |
|
|
mock_gates.__class__ = Mock |
|
|
mock_gates.__class__.__name__ = "AcceptanceGates" |
|
|
|
|
|
with patch( |
|
|
"humigence.cli.DataPreprocessor", return_value=mock_preprocessor |
|
|
): |
|
|
with patch("humigence.cli.AcceptanceGates", return_value=mock_gates): |
|
|
|
|
|
with patch("humigence.cli.Path.cwd", return_value=tmp_path): |
|
|
result = runner.invoke( |
|
|
app, ["pipeline", "--config", str(temp_config)] |
|
|
) |
|
|
|
|
|
|
|
|
validation_dir = tmp_path / "validation" |
|
|
assert validation_dir.exists() |
|
|
assert (validation_dir / "acceptance_report.json").exists() |
|
|
assert result.exit_code == 0 |
|
|
|
|
|
|
|
|
class TestCLIModel: |
|
|
"""Test the model command.""" |
|
|
|
|
|
def test_model_check_returns_zero_and_prints_status( |
|
|
self, runner, temp_config, mock_modules |
|
|
): |
|
|
"""Test that model check returns 0 and prints model path status.""" |
|
|
|
|
|
with patch("humigence.cli.Path") as mock_path: |
|
|
mock_path.return_value.exists.return_value = True |
|
|
mock_path.return_value.rglob.return_value = [Mock()] |
|
|
mock_path.return_value.rglob.return_value[ |
|
|
0 |
|
|
].stat.return_value.st_size = 1024 |
|
|
|
|
|
result = runner.invoke( |
|
|
app, ["model", "check", "--config", str(temp_config)] |
|
|
) |
|
|
|
|
|
assert result.exit_code == 0 |
|
|
assert "Model found at" in result.stdout |
|
|
|
|
|
|
|
|
class TestCLIConfig: |
|
|
"""Test the config command.""" |
|
|
|
|
|
def test_config_set_patches_json_and_persists( |
|
|
self, runner, temp_config, mock_modules |
|
|
): |
|
|
"""Test that config set patches JSON and persists.""" |
|
|
|
|
|
with open(temp_config) as f: |
|
|
original_data = json.load(f) |
|
|
|
|
|
|
|
|
result = runner.invoke( |
|
|
app, |
|
|
[ |
|
|
"config", |
|
|
"set", |
|
|
"train.precision_mode", |
|
|
"lora_fp16", |
|
|
"--config", |
|
|
str(temp_config), |
|
|
], |
|
|
) |
|
|
|
|
|
assert result.exit_code == 0 |
|
|
|
|
|
|
|
|
with open(temp_config) as f: |
|
|
updated_data = json.load(f) |
|
|
|
|
|
assert updated_data["train"]["precision_mode"] == "lora_fp16" |
|
|
assert ( |
|
|
original_data["train"]["precision_mode"] |
|
|
!= updated_data["train"]["precision_mode"] |
|
|
) |
|
|
|
|
|
|
|
|
class TestCLIInfer: |
|
|
"""Test the infer command.""" |
|
|
|
|
|
def test_infer_returns_five_when_artifacts_missing( |
|
|
self, runner, temp_config, mock_modules |
|
|
): |
|
|
"""Test that infer returns exit code 5 when artifacts are missing.""" |
|
|
result = runner.invoke( |
|
|
app, ["infer", "--prompt", "hi", "--config", str(temp_config)] |
|
|
) |
|
|
|
|
|
assert result.exit_code == 5 |
|
|
assert "Model artifacts not found" in result.stdout |
|
|
|
|
|
|
|
|
class TestCLIVersion: |
|
|
"""Test the version command.""" |
|
|
|
|
|
def test_version_returns_zero(self, runner): |
|
|
"""Test that version command returns exit code 0.""" |
|
|
result = runner.invoke(app, ["version"]) |
|
|
assert result.exit_code == 0 |
|
|
|
|
|
def test_version_shows_humigence_version(self, runner): |
|
|
"""Test that version shows Humigence version.""" |
|
|
result = runner.invoke(app, ["version"]) |
|
|
assert "Humigence v" in result.stdout |
|
|
|
|
|
|
|
|
class TestCLIDoctor: |
|
|
"""Test the doctor command.""" |
|
|
|
|
|
def test_doctor_returns_zero(self, runner): |
|
|
"""Test that doctor command returns exit code 0.""" |
|
|
result = runner.invoke(app, ["doctor"]) |
|
|
assert result.exit_code == 0 |
|
|
|
|
|
def test_doctor_runs_diagnostics(self, runner): |
|
|
"""Test that doctor runs environment diagnostics.""" |
|
|
result = runner.invoke(app, ["doctor"]) |
|
|
assert "Environment Diagnostics" in result.stdout |
|
|
|
|
|
|
|
|
class TestCLIExitCodes: |
|
|
"""Test CLI exit codes.""" |
|
|
|
|
|
def test_bad_config_exits_with_code_two(self, runner): |
|
|
"""Test that bad config exits with code 2.""" |
|
|
result = runner.invoke(app, ["plan", "--config", "nonexistent.json"]) |
|
|
assert result.exit_code == 2 |
|
|
|
|
|
def test_missing_artifacts_exits_with_code_five( |
|
|
self, runner, temp_config, mock_modules |
|
|
): |
|
|
"""Test that missing artifacts exits with code 5.""" |
|
|
result = runner.invoke( |
|
|
app, ["infer", "--prompt", "hi", "--config", str(temp_config)] |
|
|
) |
|
|
assert result.exit_code == 5 |
|
|
|
|
|
|
|
|
class TestCLIEnvironmentVariables: |
|
|
"""Test CLI environment variable handling.""" |
|
|
|
|
|
def test_train_env_var_enables_training(self, runner, temp_config, mock_modules): |
|
|
"""Test that TRAIN=1 environment variable enables training.""" |
|
|
with patch.dict("os.environ", {"TRAIN": "1"}): |
|
|
with patch("humigence.cli.QLoRATrainer") as mock_trainer: |
|
|
mock_trainer.return_value.train.return_value = None |
|
|
|
|
|
result = runner.invoke(app, ["train", "--config", str(temp_config)]) |
|
|
|
|
|
|
|
|
mock_trainer.return_value.train.assert_called_once() |
|
|
assert result.exit_code == 0 |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
pytest.main([__file__]) |
|
|
|