""" Tests for the model-backend dispatcher (run_model). These tests mock the inner _run_* functions so they don't load any real model — only the routing logic is exercised. """ import os import sys import unittest from unittest import mock HERE = os.path.dirname(os.path.abspath(__file__)) ROOT = os.path.dirname(HERE) sys.path.insert(0, ROOT) # Force a backend value before importing app so module-level reads don't # trigger a real model load. os.environ.setdefault("KINTSUGI_BACKEND", "ollama") import app # noqa: E402 class DispatcherRoutingTests(unittest.TestCase): """run_model must route based on BACKEND.""" SAMPLE_ARGS = ( "I dreamt of a river.", # text "Dream", # entry_type 2, # depth (DEPTH_LABELS key) [], # symbol_matches False, # grounded_jungian True, # include_question ) def test_routes_to_ollama_when_backend_is_ollama(self): with mock.patch.object(app, "BACKEND", "ollama"), \ mock.patch.object(app, "_run_ollama", return_value=("ok", None)) as m: text, err = app.run_model(*self.SAMPLE_ARGS) m.assert_called_once_with(*self.SAMPLE_ARGS) self.assertEqual(text, "ok") self.assertIsNone(err) def test_routes_to_transformers_when_backend_is_transformers(self): with mock.patch.object(app, "BACKEND", "transformers"), \ mock.patch.object(app, "_run_transformers", return_value=("tx", None)) as m: text, err = app.run_model(*self.SAMPLE_ARGS) m.assert_called_once_with(*self.SAMPLE_ARGS) self.assertEqual(text, "tx") self.assertIsNone(err) def test_routes_to_llama_cpp_when_backend_is_llama_cpp(self): with mock.patch.object(app, "BACKEND", "llama_cpp"), \ mock.patch.object(app, "_run_llama_cpp", return_value=("lc", None)) as m: text, err = app.run_model(*self.SAMPLE_ARGS) m.assert_called_once_with(*self.SAMPLE_ARGS) self.assertEqual(text, "lc") self.assertIsNone(err) def test_routes_to_llama_cpp_for_unknown_backend(self): # Unknown values fall through to the default (llama_cpp). with mock.patch.object(app, "BACKEND", "something-unrecognised"), \ mock.patch.object(app, "_run_llama_cpp", return_value=("lc", None)) as m: app.run_model(*self.SAMPLE_ARGS) m.assert_called_once_with(*self.SAMPLE_ARGS) class LlamaCppLoaderTests(unittest.TestCase): """_load_llama_cpp_model must lazy-load once and cache the instance.""" def setUp(self): # Reset the module-level cache between tests. app._LLAMA_CPP_MODEL = None app._LLAMA_CPP_ERROR = None def test_returns_cached_instance_on_second_call(self): fake_llama = mock.MagicMock(name="FakeLlama") with mock.patch("llama_cpp.Llama.from_pretrained", return_value=fake_llama) as ctor: first, err1 = app._load_llama_cpp_model() second, err2 = app._load_llama_cpp_model() self.assertIs(first, fake_llama) self.assertIs(second, fake_llama) self.assertIsNone(err1) self.assertIsNone(err2) ctor.assert_called_once() # And the call used our configured repo / file. kwargs = ctor.call_args.kwargs self.assertEqual(kwargs.get("repo_id"), app.LLAMA_REPO) self.assertEqual(kwargs.get("filename"), app.LLAMA_FILE) self.assertEqual(kwargs.get("n_ctx"), app.LLAMA_CTX) def test_returns_error_when_loader_raises(self): with mock.patch( "llama_cpp.Llama.from_pretrained", side_effect=RuntimeError("download failed"), ): instance, err = app._load_llama_cpp_model() self.assertIsNone(instance) self.assertIsNotNone(err) self.assertIn("download failed", err) class RunLlamaCppTests(unittest.TestCase): """_run_llama_cpp must format messages correctly and extract output.""" SAMPLE_ARGS = ( "I dreamt of a river.", "Dream", 2, [], False, True, ) def _fake_llama(self, content="A symbolic reading."): fake = mock.MagicMock(name="FakeLlama") fake.create_chat_completion.return_value = { "choices": [{"message": {"role": "assistant", "content": content}}] } return fake def test_returns_extracted_content(self): fake = self._fake_llama("River as threshold symbol.") with mock.patch.object(app, "_load_llama_cpp_model", return_value=(fake, None)): text, err = app._run_llama_cpp(*self.SAMPLE_ARGS) self.assertEqual(text, "River as threshold symbol.") self.assertIsNone(err) def test_appends_no_think_to_user_message(self): fake = self._fake_llama() with mock.patch.object(app, "_load_llama_cpp_model", return_value=(fake, None)): app._run_llama_cpp(*self.SAMPLE_ARGS) call_kwargs = fake.create_chat_completion.call_args.kwargs messages = call_kwargs["messages"] user_msg = next(m for m in messages if m["role"] == "user") self.assertIn("/no_think", user_msg["content"]) def test_passes_system_prompt(self): fake = self._fake_llama() with mock.patch.object(app, "_load_llama_cpp_model", return_value=(fake, None)): app._run_llama_cpp(*self.SAMPLE_ARGS) messages = fake.create_chat_completion.call_args.kwargs["messages"] system_msg = next(m for m in messages if m["role"] == "system") self.assertEqual(system_msg["content"], app.SYSTEM_PROMPT) def test_passes_generation_params(self): fake = self._fake_llama() with mock.patch.object(app, "_load_llama_cpp_model", return_value=(fake, None)): app._run_llama_cpp(*self.SAMPLE_ARGS) kwargs = fake.create_chat_completion.call_args.kwargs self.assertEqual(kwargs["temperature"], app.GEN_CONFIG["temperature"]) self.assertEqual(kwargs["top_p"], app.GEN_CONFIG["top_p"]) self.assertEqual(kwargs["max_tokens"], app.GEN_CONFIG["max_new_tokens"]) self.assertEqual(kwargs["repeat_penalty"], app.GEN_CONFIG["repetition_penalty"]) def test_returns_error_when_loader_fails(self): with mock.patch.object(app, "_load_llama_cpp_model", return_value=(None, "boom")): text, err = app._run_llama_cpp(*self.SAMPLE_ARGS) self.assertEqual(text, "") self.assertEqual(err, "boom") def test_returns_error_on_empty_output(self): fake = self._fake_llama(content=" ") with mock.patch.object(app, "_load_llama_cpp_model", return_value=(fake, None)): text, err = app._run_llama_cpp(*self.SAMPLE_ARGS) self.assertEqual(text, "") self.assertIsNotNone(err) if __name__ == "__main__": unittest.main()