Scrypt / tests /test_inference.py
IMJONEZZ's picture
SCRYPT: initial commit — game, sandbox, Warden, Space web layer
9fca766
Raw
History Blame Contribute Delete
5.93 kB
"""Inference layer tests: SSE parsing, fixtures, backend selection."""
import json
from pathlib import Path
import httpx
import pytest
from scrypt.inference import build_backend
from scrypt.inference.api import OpenAIChatBackend
from scrypt.inference.backend import (
RecordingBackend,
ReplayBackend,
ScriptedBackend,
complete,
)
from scrypt.inference.local import LocalSetupError, LlamaServer, preflight
def sse_response(*texts: str) -> bytes:
lines = []
for t in texts:
payload = {"choices": [{"delta": {"content": t}}]}
lines.append(f"data: {json.dumps(payload)}")
lines.append("data: [DONE]")
return ("\n".join(lines) + "\n").encode()
async def test_openai_backend_parses_sse_stream():
def handler(request: httpx.Request) -> httpx.Response:
body = json.loads(request.content)
assert body["stream"] is True
assert body["chat_template_kwargs"] == {"enable_thinking": False}
assert request.headers["authorization"] == "Bearer k"
return httpx.Response(
200,
content=sse_response("The ", "scale ", "tips."),
headers={"content-type": "text/event-stream"},
)
client = httpx.AsyncClient(transport=httpx.MockTransport(handler))
backend = OpenAIChatBackend("http://test/v1", api_key="k", client=client)
text = await complete(backend, [{"role": "user", "content": "hi"}])
assert text == "The scale tips."
async def test_record_then_replay_roundtrip(tmp_path: Path):
fixture = tmp_path / "fixtures.jsonl"
live = ScriptedBackend(default="recorded line")
messages = [{"role": "user", "content": "moment"}]
recorder = RecordingBackend(live, fixture)
assert await complete(recorder, messages) == "recorded line"
replay = ReplayBackend(fixture)
assert await complete(replay, messages) == "recorded line"
with pytest.raises(KeyError):
await complete(replay, [{"role": "user", "content": "unseen"}])
def test_preflight_reports_missing_pieces(monkeypatch, tmp_path):
monkeypatch.setattr("scrypt.inference.local.find_binary", lambda: None)
monkeypatch.setattr("scrypt.inference.local.llama_cpp_available", lambda: False)
monkeypatch.setattr("scrypt.inference.local.SCRYPT_HOME", tmp_path)
problems = preflight()
assert any("llama-server" in p for p in problems)
assert any("model" in p for p in problems)
def test_installed_model_finds_any_gguf_name(monkeypatch, tmp_path):
"""A hand-downloaded file with a custom name still counts."""
import scrypt.inference.local as local
monkeypatch.setattr(local, "SCRYPT_HOME", tmp_path)
models = tmp_path / "models"
models.mkdir()
(models / "my-cool-quant-q4.gguf").write_bytes(b"x" * 10)
assert local.installed_model().name == "my-cool-quant-q4.gguf"
def test_server_command_falls_back_to_llama_cpp_python(monkeypatch, tmp_path):
import sys
import scrypt.inference.local as local
monkeypatch.setattr(local, "find_binary", lambda: None)
monkeypatch.setattr(local, "llama_cpp_available", lambda: True)
cmd = local.server_command(tmp_path / "m.gguf", 8731, 8192)
assert cmd[:3] == [sys.executable, "-m", "llama_cpp.server"]
monkeypatch.setattr(local, "llama_cpp_available", lambda: False)
assert local.server_command(tmp_path / "m.gguf", 8731, 8192) is None
def test_llama_server_start_refuses_without_setup(monkeypatch):
monkeypatch.setattr("scrypt.inference.local.find_binary", lambda: None)
with pytest.raises(LocalSetupError):
LlamaServer().start()
def test_build_backend_falls_back_to_scripted(monkeypatch):
monkeypatch.delenv("SCRYPT_API_KEY", raising=False)
monkeypatch.setenv("SCRYPT_BACKEND", "auto")
monkeypatch.setattr("scrypt.inference.preflight", lambda: ["no model"])
backend, server, mode = build_backend()
assert mode == "scripted" and server is None
assert isinstance(backend, ScriptedBackend)
def test_build_backend_api_mode(monkeypatch):
monkeypatch.setenv("SCRYPT_BACKEND", "api")
monkeypatch.setenv("SCRYPT_API_KEY", "sk-test")
backend, server, mode = build_backend()
assert mode == "api"
assert isinstance(backend, OpenAIChatBackend)
def test_quant_ladder_tiers(monkeypatch):
from scrypt.inference.local import choose_quant
monkeypatch.delenv("SCRYPT_QUANT", raising=False)
assert choose_quant(128) == "Q8_0"
assert choose_quant(96) == "Q8_0"
assert choose_quant(64) == "Q5_K_M"
assert choose_quant(48) == "Q4_K_S"
assert choose_quant(32) == "Q3_K_S"
assert choose_quant(16) is None # booted to API mode
assert choose_quant(0) == "Q4_K_S" # unknown RAM -> safe default
def test_quant_env_override(monkeypatch):
from scrypt.inference.local import choose_quant
monkeypatch.setenv("SCRYPT_QUANT", "Q6_K")
assert choose_quant(32) == "Q6_K"
def test_preflight_names_machine_tier(monkeypatch, tmp_path):
import scrypt.inference.local as local
monkeypatch.delenv("SCRYPT_QUANT", raising=False)
monkeypatch.setattr(local, "SCRYPT_HOME", tmp_path)
monkeypatch.setattr(local, "find_binary", lambda: None)
monkeypatch.setattr(local, "system_ram_gb", lambda: 33.0)
problems = local.preflight()
assert any("Q3_K_S" in p for p in problems)
monkeypatch.setattr(local, "system_ram_gb", lambda: 16.0)
problems = local.preflight()
assert any("API mode" in p for p in problems)
assert not any("Q3_K_S" in p for p in problems)
def test_installed_model_prefers_heaviest(monkeypatch, tmp_path):
import scrypt.inference.local as local
monkeypatch.setattr(local, "SCRYPT_HOME", tmp_path)
(tmp_path / "models").mkdir()
(tmp_path / "models" / local.model_file("Q3_K_S")).touch()
(tmp_path / "models" / local.model_file("Q5_K_M")).touch()
assert local.installed_model().name == local.model_file("Q5_K_M")