Spaces:
Running on Zero
Running on Zero
File size: 3,362 Bytes
ca766b5 d0718ca ca766b5 bb40b5a ca766b5 d0718ca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | from pathlib import Path
import sys
from types import ModuleType
from hackathon_advisor.data import DEFAULT_EMBEDDING_MODEL_FILE, DEFAULT_EMBEDDING_MODEL_REPO
from hackathon_advisor.llama_embedding import (
DEFAULT_N_CTX,
LlamaCppEmbedder,
SubprocessLlamaCppEmbedder,
create_llama_cpp_embedder,
)
def test_llama_embedder_uses_q8_defaults_and_configured_context(
monkeypatch,
tmp_path: Path,
) -> None:
model_path = tmp_path / "embedding.gguf"
model_path.write_bytes(b"gguf")
captured: dict = {}
hub = ModuleType("huggingface_hub")
def fake_hf_hub_download(repo_id: str, filename: str, repo_type: str) -> str:
captured["download"] = {
"repo_id": repo_id,
"filename": filename,
"repo_type": repo_type,
}
return str(model_path)
hub.hf_hub_download = fake_hf_hub_download
llama_cpp = ModuleType("llama_cpp")
llama_cpp.LLAMA_POOLING_TYPE_MEAN = 1
class FakeLlama:
def __init__(self, **kwargs) -> None:
captured["llama_kwargs"] = kwargs
def embed(self, text: str, normalize: bool) -> list[float]:
captured["embed"] = {"text": text, "normalize": normalize}
return [1.0, 0.0]
llama_cpp.Llama = FakeLlama
monkeypatch.setitem(sys.modules, "huggingface_hub", hub)
monkeypatch.setitem(sys.modules, "llama_cpp", llama_cpp)
vector = LlamaCppEmbedder().embed("private archive")
assert vector == [1.0, 0.0]
assert captured["download"] == {
"repo_id": DEFAULT_EMBEDDING_MODEL_REPO,
"filename": DEFAULT_EMBEDDING_MODEL_FILE,
"repo_type": "model",
}
assert captured["llama_kwargs"]["n_ctx"] == DEFAULT_N_CTX
assert captured["llama_kwargs"]["n_batch"] == DEFAULT_N_CTX
assert captured["llama_kwargs"]["n_ubatch"] == DEFAULT_N_CTX
assert captured["embed"] == {"text": "private archive", "normalize": True}
def test_create_llama_embedder_accepts_explicit_batch(monkeypatch) -> None:
monkeypatch.setenv("ADVISOR_EMBEDDING_BATCH", "256")
embedder = create_llama_cpp_embedder({"dimensions": 768})
assert embedder.n_batch == 256
def test_create_llama_embedder_can_isolate_native_runtime(monkeypatch) -> None:
monkeypatch.setenv("ADVISOR_EMBEDDING_SUBPROCESS", "1")
embedder = create_llama_cpp_embedder({"dimensions": 768})
assert isinstance(embedder, SubprocessLlamaCppEmbedder)
embedder.close()
def test_create_llama_embedder_isolates_macos_minicpm_runtime(monkeypatch) -> None:
monkeypatch.delenv("ADVISOR_EMBEDDING_SUBPROCESS", raising=False)
monkeypatch.setenv("ADVISOR_MODEL_BACKEND", "minicpm-transformers")
monkeypatch.setattr("hackathon_advisor.llama_embedding.platform.system", lambda: "Darwin")
embedder = create_llama_cpp_embedder({"dimensions": 768})
assert isinstance(embedder, SubprocessLlamaCppEmbedder)
embedder.close()
def test_create_llama_embedder_keeps_in_process_when_isolation_disabled(monkeypatch) -> None:
monkeypatch.setenv("ADVISOR_EMBEDDING_SUBPROCESS", "0")
monkeypatch.setenv("ADVISOR_MODEL_BACKEND", "minicpm-transformers")
monkeypatch.setattr("hackathon_advisor.llama_embedding.platform.system", lambda: "Darwin")
embedder = create_llama_cpp_embedder({"dimensions": 768})
assert isinstance(embedder, LlamaCppEmbedder)
|