| """Inference layer tests: SSE parsing, fixtures, backend selection.""" |
|
|
| import json |
| from pathlib import Path |
|
|
| import httpx |
| import pytest |
|
|
| from scrypt.inference import build_backend |
| from scrypt.inference.api import OpenAIChatBackend |
| from scrypt.inference.backend import ( |
| RecordingBackend, |
| ReplayBackend, |
| ScriptedBackend, |
| complete, |
| ) |
| from scrypt.inference.local import LocalSetupError, LlamaServer, preflight |
|
|
|
|
| def sse_response(*texts: str) -> bytes: |
| lines = [] |
| for t in texts: |
| payload = {"choices": [{"delta": {"content": t}}]} |
| lines.append(f"data: {json.dumps(payload)}") |
| lines.append("data: [DONE]") |
| return ("\n".join(lines) + "\n").encode() |
|
|
|
|
| async def test_openai_backend_parses_sse_stream(): |
| def handler(request: httpx.Request) -> httpx.Response: |
| body = json.loads(request.content) |
| assert body["stream"] is True |
| assert body["chat_template_kwargs"] == {"enable_thinking": False} |
| assert request.headers["authorization"] == "Bearer k" |
| return httpx.Response( |
| 200, |
| content=sse_response("The ", "scale ", "tips."), |
| headers={"content-type": "text/event-stream"}, |
| ) |
|
|
| client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) |
| backend = OpenAIChatBackend("http://test/v1", api_key="k", client=client) |
| text = await complete(backend, [{"role": "user", "content": "hi"}]) |
| assert text == "The scale tips." |
|
|
|
|
| async def test_record_then_replay_roundtrip(tmp_path: Path): |
| fixture = tmp_path / "fixtures.jsonl" |
| live = ScriptedBackend(default="recorded line") |
| messages = [{"role": "user", "content": "moment"}] |
|
|
| recorder = RecordingBackend(live, fixture) |
| assert await complete(recorder, messages) == "recorded line" |
|
|
| replay = ReplayBackend(fixture) |
| assert await complete(replay, messages) == "recorded line" |
| with pytest.raises(KeyError): |
| await complete(replay, [{"role": "user", "content": "unseen"}]) |
|
|
|
|
| def test_preflight_reports_missing_pieces(monkeypatch, tmp_path): |
| monkeypatch.setattr("scrypt.inference.local.find_binary", lambda: None) |
| monkeypatch.setattr("scrypt.inference.local.llama_cpp_available", lambda: False) |
| monkeypatch.setattr("scrypt.inference.local.SCRYPT_HOME", tmp_path) |
| problems = preflight() |
| assert any("llama-server" in p for p in problems) |
| assert any("model" in p for p in problems) |
|
|
|
|
| def test_installed_model_finds_any_gguf_name(monkeypatch, tmp_path): |
| """A hand-downloaded file with a custom name still counts.""" |
| import scrypt.inference.local as local |
|
|
| monkeypatch.setattr(local, "SCRYPT_HOME", tmp_path) |
| models = tmp_path / "models" |
| models.mkdir() |
| (models / "my-cool-quant-q4.gguf").write_bytes(b"x" * 10) |
| assert local.installed_model().name == "my-cool-quant-q4.gguf" |
|
|
|
|
| def test_server_command_falls_back_to_llama_cpp_python(monkeypatch, tmp_path): |
| import sys |
|
|
| import scrypt.inference.local as local |
|
|
| monkeypatch.setattr(local, "find_binary", lambda: None) |
| monkeypatch.setattr(local, "llama_cpp_available", lambda: True) |
| cmd = local.server_command(tmp_path / "m.gguf", 8731, 8192) |
| assert cmd[:3] == [sys.executable, "-m", "llama_cpp.server"] |
|
|
| monkeypatch.setattr(local, "llama_cpp_available", lambda: False) |
| assert local.server_command(tmp_path / "m.gguf", 8731, 8192) is None |
|
|
|
|
| def test_llama_server_start_refuses_without_setup(monkeypatch): |
| monkeypatch.setattr("scrypt.inference.local.find_binary", lambda: None) |
| with pytest.raises(LocalSetupError): |
| LlamaServer().start() |
|
|
|
|
| def test_build_backend_falls_back_to_scripted(monkeypatch): |
| monkeypatch.delenv("SCRYPT_API_KEY", raising=False) |
| monkeypatch.setenv("SCRYPT_BACKEND", "auto") |
| monkeypatch.setattr("scrypt.inference.preflight", lambda: ["no model"]) |
| backend, server, mode = build_backend() |
| assert mode == "scripted" and server is None |
| assert isinstance(backend, ScriptedBackend) |
|
|
|
|
| def test_build_backend_api_mode(monkeypatch): |
| monkeypatch.setenv("SCRYPT_BACKEND", "api") |
| monkeypatch.setenv("SCRYPT_API_KEY", "sk-test") |
| backend, server, mode = build_backend() |
| assert mode == "api" |
| assert isinstance(backend, OpenAIChatBackend) |
|
|
|
|
| def test_quant_ladder_tiers(monkeypatch): |
| from scrypt.inference.local import choose_quant |
|
|
| monkeypatch.delenv("SCRYPT_QUANT", raising=False) |
| assert choose_quant(128) == "Q8_0" |
| assert choose_quant(96) == "Q8_0" |
| assert choose_quant(64) == "Q5_K_M" |
| assert choose_quant(48) == "Q4_K_S" |
| assert choose_quant(32) == "Q3_K_S" |
| assert choose_quant(16) is None |
| assert choose_quant(0) == "Q4_K_S" |
|
|
|
|
| def test_quant_env_override(monkeypatch): |
| from scrypt.inference.local import choose_quant |
|
|
| monkeypatch.setenv("SCRYPT_QUANT", "Q6_K") |
| assert choose_quant(32) == "Q6_K" |
|
|
|
|
| def test_preflight_names_machine_tier(monkeypatch, tmp_path): |
| import scrypt.inference.local as local |
|
|
| monkeypatch.delenv("SCRYPT_QUANT", raising=False) |
| monkeypatch.setattr(local, "SCRYPT_HOME", tmp_path) |
| monkeypatch.setattr(local, "find_binary", lambda: None) |
| monkeypatch.setattr(local, "system_ram_gb", lambda: 33.0) |
| problems = local.preflight() |
| assert any("Q3_K_S" in p for p in problems) |
|
|
| monkeypatch.setattr(local, "system_ram_gb", lambda: 16.0) |
| problems = local.preflight() |
| assert any("API mode" in p for p in problems) |
| assert not any("Q3_K_S" in p for p in problems) |
|
|
|
|
| def test_installed_model_prefers_heaviest(monkeypatch, tmp_path): |
| import scrypt.inference.local as local |
|
|
| monkeypatch.setattr(local, "SCRYPT_HOME", tmp_path) |
| (tmp_path / "models").mkdir() |
| (tmp_path / "models" / local.model_file("Q3_K_S")).touch() |
| (tmp_path / "models" / local.model_file("Q5_K_M")).touch() |
| assert local.installed_model().name == local.model_file("Q5_K_M") |
|
|