Spaces:

build-small-hackathon
/

Scrypt

Running on Zero

App Files Files Community

Scrypt / tests /test_inference.py

IMJONEZZ

SCRYPT: initial commit — game, sandbox, Warden, Space web layer

9fca766 15 days ago

Raw

History Blame Contribute Delete

5.93 kB

	"""Inference layer tests: SSE parsing, fixtures, backend selection."""

	import json
	from pathlib import Path

	import httpx
	import pytest

	from scrypt.inference import build_backend
	from scrypt.inference.api import OpenAIChatBackend
	from scrypt.inference.backend import (
	RecordingBackend,
	ReplayBackend,
	ScriptedBackend,
	complete,
	)
	from scrypt.inference.local import LocalSetupError, LlamaServer, preflight


	def sse_response(*texts: str) -> bytes:
	lines = []
	for t in texts:
	payload = {"choices": [{"delta": {"content": t}}]}
	lines.append(f"data: {json.dumps(payload)}")
	lines.append("data: [DONE]")
	return ("\n".join(lines) + "\n").encode()


	async def test_openai_backend_parses_sse_stream():
	def handler(request: httpx.Request) -> httpx.Response:
	body = json.loads(request.content)
	assert body["stream"] is True
	assert body["chat_template_kwargs"] == {"enable_thinking": False}
	assert request.headers["authorization"] == "Bearer k"
	return httpx.Response(
	200,
	content=sse_response("The ", "scale ", "tips."),
	headers={"content-type": "text/event-stream"},
	)

	client = httpx.AsyncClient(transport=httpx.MockTransport(handler))
	backend = OpenAIChatBackend("http://test/v1", api_key="k", client=client)
	text = await complete(backend, [{"role": "user", "content": "hi"}])
	assert text == "The scale tips."


	async def test_record_then_replay_roundtrip(tmp_path: Path):
	fixture = tmp_path / "fixtures.jsonl"
	live = ScriptedBackend(default="recorded line")
	messages = [{"role": "user", "content": "moment"}]

	recorder = RecordingBackend(live, fixture)
	assert await complete(recorder, messages) == "recorded line"

	replay = ReplayBackend(fixture)
	assert await complete(replay, messages) == "recorded line"
	with pytest.raises(KeyError):
	await complete(replay, [{"role": "user", "content": "unseen"}])


	def test_preflight_reports_missing_pieces(monkeypatch, tmp_path):
	monkeypatch.setattr("scrypt.inference.local.find_binary", lambda: None)
	monkeypatch.setattr("scrypt.inference.local.llama_cpp_available", lambda: False)
	monkeypatch.setattr("scrypt.inference.local.SCRYPT_HOME", tmp_path)
	problems = preflight()
	assert any("llama-server" in p for p in problems)
	assert any("model" in p for p in problems)


	def test_installed_model_finds_any_gguf_name(monkeypatch, tmp_path):
	"""A hand-downloaded file with a custom name still counts."""
	import scrypt.inference.local as local

	monkeypatch.setattr(local, "SCRYPT_HOME", tmp_path)
	models = tmp_path / "models"
	models.mkdir()
	(models / "my-cool-quant-q4.gguf").write_bytes(b"x" * 10)
	assert local.installed_model().name == "my-cool-quant-q4.gguf"


	def test_server_command_falls_back_to_llama_cpp_python(monkeypatch, tmp_path):
	import sys

	import scrypt.inference.local as local

	monkeypatch.setattr(local, "find_binary", lambda: None)
	monkeypatch.setattr(local, "llama_cpp_available", lambda: True)
	cmd = local.server_command(tmp_path / "m.gguf", 8731, 8192)
	assert cmd[:3] == [sys.executable, "-m", "llama_cpp.server"]

	monkeypatch.setattr(local, "llama_cpp_available", lambda: False)
	assert local.server_command(tmp_path / "m.gguf", 8731, 8192) is None


	def test_llama_server_start_refuses_without_setup(monkeypatch):
	monkeypatch.setattr("scrypt.inference.local.find_binary", lambda: None)
	with pytest.raises(LocalSetupError):
	LlamaServer().start()


	def test_build_backend_falls_back_to_scripted(monkeypatch):
	monkeypatch.delenv("SCRYPT_API_KEY", raising=False)
	monkeypatch.setenv("SCRYPT_BACKEND", "auto")
	monkeypatch.setattr("scrypt.inference.preflight", lambda: ["no model"])
	backend, server, mode = build_backend()
	assert mode == "scripted" and server is None
	assert isinstance(backend, ScriptedBackend)


	def test_build_backend_api_mode(monkeypatch):
	monkeypatch.setenv("SCRYPT_BACKEND", "api")
	monkeypatch.setenv("SCRYPT_API_KEY", "sk-test")
	backend, server, mode = build_backend()
	assert mode == "api"
	assert isinstance(backend, OpenAIChatBackend)


	def test_quant_ladder_tiers(monkeypatch):
	from scrypt.inference.local import choose_quant

	monkeypatch.delenv("SCRYPT_QUANT", raising=False)
	assert choose_quant(128) == "Q8_0"
	assert choose_quant(96) == "Q8_0"
	assert choose_quant(64) == "Q5_K_M"
	assert choose_quant(48) == "Q4_K_S"
	assert choose_quant(32) == "Q3_K_S"
	assert choose_quant(16) is None # booted to API mode
	assert choose_quant(0) == "Q4_K_S" # unknown RAM -> safe default


	def test_quant_env_override(monkeypatch):
	from scrypt.inference.local import choose_quant

	monkeypatch.setenv("SCRYPT_QUANT", "Q6_K")
	assert choose_quant(32) == "Q6_K"


	def test_preflight_names_machine_tier(monkeypatch, tmp_path):
	import scrypt.inference.local as local

	monkeypatch.delenv("SCRYPT_QUANT", raising=False)
	monkeypatch.setattr(local, "SCRYPT_HOME", tmp_path)
	monkeypatch.setattr(local, "find_binary", lambda: None)
	monkeypatch.setattr(local, "system_ram_gb", lambda: 33.0)
	problems = local.preflight()
	assert any("Q3_K_S" in p for p in problems)

	monkeypatch.setattr(local, "system_ram_gb", lambda: 16.0)
	problems = local.preflight()
	assert any("API mode" in p for p in problems)
	assert not any("Q3_K_S" in p for p in problems)


	def test_installed_model_prefers_heaviest(monkeypatch, tmp_path):
	import scrypt.inference.local as local

	monkeypatch.setattr(local, "SCRYPT_HOME", tmp_path)
	(tmp_path / "models").mkdir()
	(tmp_path / "models" / local.model_file("Q3_K_S")).touch()
	(tmp_path / "models" / local.model_file("Q5_K_M")).touch()
	assert local.installed_model().name == local.model_file("Q5_K_M")