from pathlib import Path import sys from types import ModuleType from hackathon_advisor.data import DEFAULT_EMBEDDING_MODEL_FILE, DEFAULT_EMBEDDING_MODEL_REPO from hackathon_advisor.llama_embedding import ( DEFAULT_N_CTX, LlamaCppEmbedder, SubprocessLlamaCppEmbedder, create_llama_cpp_embedder, ) def test_llama_embedder_uses_q8_defaults_and_configured_context( monkeypatch, tmp_path: Path, ) -> None: model_path = tmp_path / "embedding.gguf" model_path.write_bytes(b"gguf") captured: dict = {} hub = ModuleType("huggingface_hub") def fake_hf_hub_download(repo_id: str, filename: str, repo_type: str) -> str: captured["download"] = { "repo_id": repo_id, "filename": filename, "repo_type": repo_type, } return str(model_path) hub.hf_hub_download = fake_hf_hub_download llama_cpp = ModuleType("llama_cpp") llama_cpp.LLAMA_POOLING_TYPE_MEAN = 1 class FakeLlama: def __init__(self, **kwargs) -> None: captured["llama_kwargs"] = kwargs def embed(self, text: str, normalize: bool) -> list[float]: captured["embed"] = {"text": text, "normalize": normalize} return [1.0, 0.0] llama_cpp.Llama = FakeLlama monkeypatch.setitem(sys.modules, "huggingface_hub", hub) monkeypatch.setitem(sys.modules, "llama_cpp", llama_cpp) vector = LlamaCppEmbedder().embed("private archive") assert vector == [1.0, 0.0] assert captured["download"] == { "repo_id": DEFAULT_EMBEDDING_MODEL_REPO, "filename": DEFAULT_EMBEDDING_MODEL_FILE, "repo_type": "model", } assert captured["llama_kwargs"]["n_ctx"] == DEFAULT_N_CTX assert captured["llama_kwargs"]["n_batch"] == DEFAULT_N_CTX assert captured["llama_kwargs"]["n_ubatch"] == DEFAULT_N_CTX assert captured["embed"] == {"text": "private archive", "normalize": True} def test_create_llama_embedder_accepts_explicit_batch(monkeypatch) -> None: monkeypatch.setenv("ADVISOR_EMBEDDING_BATCH", "256") embedder = create_llama_cpp_embedder({"dimensions": 768}) assert embedder.n_batch == 256 def test_create_llama_embedder_can_isolate_native_runtime(monkeypatch) -> None: monkeypatch.setenv("ADVISOR_EMBEDDING_SUBPROCESS", "1") embedder = create_llama_cpp_embedder({"dimensions": 768}) assert isinstance(embedder, SubprocessLlamaCppEmbedder) embedder.close() def test_create_llama_embedder_isolates_macos_minicpm_runtime(monkeypatch) -> None: monkeypatch.delenv("ADVISOR_EMBEDDING_SUBPROCESS", raising=False) monkeypatch.setenv("ADVISOR_MODEL_BACKEND", "minicpm-transformers") monkeypatch.setattr("hackathon_advisor.llama_embedding.platform.system", lambda: "Darwin") embedder = create_llama_cpp_embedder({"dimensions": 768}) assert isinstance(embedder, SubprocessLlamaCppEmbedder) embedder.close() def test_create_llama_embedder_keeps_in_process_when_isolation_disabled(monkeypatch) -> None: monkeypatch.setenv("ADVISOR_EMBEDDING_SUBPROCESS", "0") monkeypatch.setenv("ADVISOR_MODEL_BACKEND", "minicpm-transformers") monkeypatch.setattr("hackathon_advisor.llama_embedding.platform.system", lambda: "Darwin") embedder = create_llama_cpp_embedder({"dimensions": 768}) assert isinstance(embedder, LlamaCppEmbedder)