Spaces:
Running on Zero
Running on Zero
| from pathlib import Path | |
| import sys | |
| from types import ModuleType | |
| from hackathon_advisor.data import DEFAULT_EMBEDDING_MODEL_FILE, DEFAULT_EMBEDDING_MODEL_REPO | |
| from hackathon_advisor.llama_embedding import ( | |
| DEFAULT_N_CTX, | |
| LlamaCppEmbedder, | |
| SubprocessLlamaCppEmbedder, | |
| create_llama_cpp_embedder, | |
| ) | |
| def test_llama_embedder_uses_q8_defaults_and_configured_context( | |
| monkeypatch, | |
| tmp_path: Path, | |
| ) -> None: | |
| model_path = tmp_path / "embedding.gguf" | |
| model_path.write_bytes(b"gguf") | |
| captured: dict = {} | |
| hub = ModuleType("huggingface_hub") | |
| def fake_hf_hub_download(repo_id: str, filename: str, repo_type: str) -> str: | |
| captured["download"] = { | |
| "repo_id": repo_id, | |
| "filename": filename, | |
| "repo_type": repo_type, | |
| } | |
| return str(model_path) | |
| hub.hf_hub_download = fake_hf_hub_download | |
| llama_cpp = ModuleType("llama_cpp") | |
| llama_cpp.LLAMA_POOLING_TYPE_MEAN = 1 | |
| class FakeLlama: | |
| def __init__(self, **kwargs) -> None: | |
| captured["llama_kwargs"] = kwargs | |
| def embed(self, text: str, normalize: bool) -> list[float]: | |
| captured["embed"] = {"text": text, "normalize": normalize} | |
| return [1.0, 0.0] | |
| llama_cpp.Llama = FakeLlama | |
| monkeypatch.setitem(sys.modules, "huggingface_hub", hub) | |
| monkeypatch.setitem(sys.modules, "llama_cpp", llama_cpp) | |
| vector = LlamaCppEmbedder().embed("private archive") | |
| assert vector == [1.0, 0.0] | |
| assert captured["download"] == { | |
| "repo_id": DEFAULT_EMBEDDING_MODEL_REPO, | |
| "filename": DEFAULT_EMBEDDING_MODEL_FILE, | |
| "repo_type": "model", | |
| } | |
| assert captured["llama_kwargs"]["n_ctx"] == DEFAULT_N_CTX | |
| assert captured["llama_kwargs"]["n_batch"] == DEFAULT_N_CTX | |
| assert captured["llama_kwargs"]["n_ubatch"] == DEFAULT_N_CTX | |
| assert captured["embed"] == {"text": "private archive", "normalize": True} | |
| def test_create_llama_embedder_accepts_explicit_batch(monkeypatch) -> None: | |
| monkeypatch.setenv("ADVISOR_EMBEDDING_BATCH", "256") | |
| embedder = create_llama_cpp_embedder({"dimensions": 768}) | |
| assert embedder.n_batch == 256 | |
| def test_create_llama_embedder_can_isolate_native_runtime(monkeypatch) -> None: | |
| monkeypatch.setenv("ADVISOR_EMBEDDING_SUBPROCESS", "1") | |
| embedder = create_llama_cpp_embedder({"dimensions": 768}) | |
| assert isinstance(embedder, SubprocessLlamaCppEmbedder) | |
| embedder.close() | |
| def test_create_llama_embedder_isolates_macos_minicpm_runtime(monkeypatch) -> None: | |
| monkeypatch.delenv("ADVISOR_EMBEDDING_SUBPROCESS", raising=False) | |
| monkeypatch.setenv("ADVISOR_MODEL_BACKEND", "minicpm-transformers") | |
| monkeypatch.setattr("hackathon_advisor.llama_embedding.platform.system", lambda: "Darwin") | |
| embedder = create_llama_cpp_embedder({"dimensions": 768}) | |
| assert isinstance(embedder, SubprocessLlamaCppEmbedder) | |
| embedder.close() | |
| def test_create_llama_embedder_keeps_in_process_when_isolation_disabled(monkeypatch) -> None: | |
| monkeypatch.setenv("ADVISOR_EMBEDDING_SUBPROCESS", "0") | |
| monkeypatch.setenv("ADVISOR_MODEL_BACKEND", "minicpm-transformers") | |
| monkeypatch.setattr("hackathon_advisor.llama_embedding.platform.system", lambda: "Darwin") | |
| embedder = create_llama_cpp_embedder({"dimensions": 768}) | |
| assert isinstance(embedder, LlamaCppEmbedder) | |