champ-chatbot

Paused

App Files Files Community

champ-chatbot / tests /unit /helpers /test_llm_helper.py

qyle

deployment test

6fff7cf verified about 1 month ago

raw

history blame contribute delete

27.3 kB

	import pytest
	from unittest.mock import Mock, patch, AsyncMock
	from classes.base_models import ChatMessage
	from constants import MODEL_MAP
	from helpers.llm_helper import (
	_call_qwen,
	call_llm,
	_call_champ,
	_call_gemini,
	_call_openai,
	)


	class TestCallOpenAI:
	"""Test the _call_openai function"""

	@pytest.fixture
	def mock_openai_client(self):
	"""Mock OpenAI client"""
	client = Mock()
	return client

	@pytest.mark.asyncio
	async def test_call_openai_streams_response(self, mock_openai_client):
	"""Test that OpenAI streams response chunks"""
	# Mock streaming response
	chunk1 = Mock(type="response.output_text.delta", delta="Hello ")
	chunk2 = Mock(type="response.output_text.delta", delta="world")
	chunk3 = Mock(type="other_type", delta="ignored")

	async def mock_stream():
	for chunk in [chunk1, chunk2, chunk3]:
	yield chunk

	mock_openai_client.responses.create = AsyncMock(return_value=mock_stream())

	with patch("helpers.llm_helper.openai_client", mock_openai_client):
	result = []
	async for chunk in _call_openai(
	"gpt-5-mini", [{"role": "user", "content": "test"}]
	):
	result.append(chunk)

	assert result == ["Hello ", "world"] # Third chunk ignored (wrong type)

	@pytest.mark.asyncio
	async def test_call_openai_correct_parameters(self, mock_openai_client):
	"""Test that OpenAI client is called with correct parameters"""

	async def mock_stream():
	if False:
	yield

	mock_openai_client.responses.create = AsyncMock(return_value=mock_stream())

	msgs = [{"role": "user", "content": "Hello"}]

	with patch("helpers.llm_helper.openai_client", mock_openai_client):
	async for _ in _call_openai("gpt-5-mini", msgs):
	pass

	mock_openai_client.responses.create.assert_called_once_with(
	model="gpt-5-mini", input=msgs, stream=True
	)

	@pytest.mark.asyncio
	async def test_call_openai_empty_stream(self, mock_openai_client):
	"""Test handling of empty stream"""

	async def mock_stream():
	if False:
	yield

	mock_openai_client.responses.create = AsyncMock(return_value=mock_stream())

	with patch("helpers.llm_helper.openai_client", mock_openai_client):
	result = []
	async for chunk in _call_openai("gpt-5-mini", []):
	result.append(chunk)

	assert result == []

	@pytest.mark.asyncio
	async def test_call_openai_tracks_impacts(self, mock_openai_client):
	mock_chunk = Mock(type="response.output_text.delta", delta="test")

	async def mock_stream():
	yield mock_chunk

	mock_openai_client.responses.create = AsyncMock(return_value=mock_stream())
	mock_impact = {"gwp": {"min": 0.1, "max": 0.2}}

	with patch("helpers.llm_helper.openai_client", mock_openai_client):
	with patch(
	"helpers.llm_helper.get_openai_impacts", return_value=mock_impact
	) as mock_get_impacts:
	with patch(
	"helpers.llm_helper.log_environment_event"
	) as mock_log_environment_event:
	# Consume the generator
	async for _ in _call_openai("gpt-5-mini", []):
	pass

	mock_get_impacts.assert_called_once()

	mock_log_environment_event.assert_called_once_with(
	"inference", mock_impact, "openai"
	)


	class TestCallGemini:
	"""Test the _call_gemini function"""

	@pytest.fixture
	def mock_gemini_client(self):
	"""Mock Gemini client"""
	client = Mock()
	response = Mock()
	response.text = "Gemini response"
	client.models.generate_content.return_value = response
	return client

	def test_call_gemini_formats_transcript(self, mock_gemini_client):
	"""Test that messages are formatted into transcript"""
	msgs = [
	{"role": "system", "content": "You are helpful"},
	{"role": "user", "content": "Hello"},
	{"role": "assistant", "content": "Hi there"},
	]

	with patch("helpers.llm_helper.gemini_client", mock_gemini_client):
	result = _call_gemini("gemini-2.5-flash", msgs, "google-conservative")

	# Check that transcript was formatted correctly
	call_args = mock_gemini_client.models.generate_content.call_args
	contents = call_args[1]["contents"]
	assert "SYSTEM: You are helpful" in contents
	assert "USER: Hello" in contents
	assert "ASSISTANT: Hi there" in contents

	def test_call_gemini_returns_stripped_text(self, mock_gemini_client):
	"""Test that response text is stripped"""
	response = Mock()
	response.text = " Response with whitespace "
	mock_gemini_client.models.generate_content.return_value = response

	with patch("helpers.llm_helper.gemini_client", mock_gemini_client):
	result = _call_gemini("gemini-2.5-flash", [], "google-creative")

	assert result == "Response with whitespace"

	def test_call_gemini_handles_none_text(self, mock_gemini_client):
	"""Test handling when response.text is None"""
	response = Mock()
	response.text = None
	mock_gemini_client.models.generate_content.return_value = response

	with patch("helpers.llm_helper.gemini_client", mock_gemini_client):
	result = _call_gemini("gemini-2.5-flash", [], "google-conservative")

	assert result == ""

	def test_call_gemini_google_conservative(self, mock_gemini_client):
	response = Mock()
	response.text = None
	mock_gemini_client.models.generate_content.return_value = response

	with patch(
	"helpers.llm_helper.log_environment_event"
	) as mock_log_environment_event:
	with patch("helpers.llm_helper.gemini_client", mock_gemini_client):
	result = _call_gemini("gemini-2.5-flash", [], "google-conservative")

	call_args = mock_gemini_client.models.generate_content.call_args
	temperature = call_args[1]["config"]["temperature"]

	assert temperature == 0.2
	mock_log_environment_event.assert_called_once_with(
	"inference", response.impacts, "google-conservative"
	)

	def test_call_gemini_google_creative(self, mock_gemini_client):
	response = Mock()
	response.text = None
	mock_gemini_client.models.generate_content.return_value = response

	with patch(
	"helpers.llm_helper.log_environment_event"
	) as mock_log_environment_event:
	with patch("helpers.llm_helper.gemini_client", mock_gemini_client):
	result = _call_gemini("gemini-2.5-flash", [], "google-creative")

	call_args = mock_gemini_client.models.generate_content.call_args
	temperature = call_args[1]["config"]["temperature"]

	assert temperature == 1.0
	mock_log_environment_event.assert_called_once_with(
	"inference", response.impacts, "google-creative"
	)


	class TestCallChamp:
	"""Test the _call_champ function"""

	@pytest.fixture
	def sample_conversation(self):
	return [
	ChatMessage(role="user", content="Hello"),
	ChatMessage(role="assistant", content="Hi"),
	]

	def test_call_champ_with_no_documents(self, sample_conversation):
	"""Test CHAMP call without user documents"""
	mock_champ_service = Mock()
	mock_champ_service.invoke.return_value = (
	"Response",
	{"triage_triggered": False},
	["doc1", "doc2"],
	)

	with (
	patch("helpers.llm_helper.ChampService", return_value=mock_champ_service),
	patch("helpers.llm_helper.convert_messages_langchain") as mock_convert,
	):
	mock_convert.return_value = [Mock(), Mock()]

	reply, meta, context = _call_champ("en", sample_conversation, None)

	assert reply == "Response"
	assert meta == {"triage_triggered": False}
	assert context == ["doc1", "doc2"]

	def test_call_champ_with_documents(self, sample_conversation):
	"""Test CHAMP call with user documents"""
	docs = ["User doc 1", "User doc 2"]
	mock_champ_service = Mock()
	mock_champ_service.invoke.return_value = (
	"Response with context",
	{"triage_triggered": False},
	[],
	)

	with (
	patch(
	"helpers.llm_helper.create_session_vector_store"
	) as mock_create_store,
	patch("helpers.llm_helper.base_vector_store") as mock_base_store,
	patch("helpers.llm_helper.embedding_model") as mock_embedding,
	patch("helpers.llm_helper.ChampService", return_value=mock_champ_service),
	patch("helpers.llm_helper.convert_messages_langchain") as mock_convert,
	):
	mock_session_store = Mock()
	mock_create_store.return_value = mock_session_store
	mock_convert.return_value = [Mock()]

	reply, meta, context = _call_champ("en", sample_conversation, docs)

	# Should create session vector store with documents
	mock_create_store.assert_called_once_with(mock_base_store, mock_embedding, docs)

	def test_call_champ_converts_messages(self, sample_conversation):
	"""Test that messages are converted to LangChain format"""
	mock_champ_service = Mock()
	mock_champ_service.invoke.return_value = ("Reply", {}, [])

	with (
	patch("helpers.llm_helper.base_vector_store"),
	patch("helpers.llm_helper.ChampService", return_value=mock_champ_service),
	patch("helpers.llm_helper.convert_messages_langchain") as mock_convert,
	):
	mock_converted = [Mock(), Mock()]
	mock_convert.return_value = mock_converted

	_call_champ("en", sample_conversation, None)

	mock_convert.assert_called_once_with(sample_conversation)
	mock_champ_service.invoke.assert_called_once_with(mock_converted)

	def test_call_champ_french_language(self, sample_conversation):
	"""Test CHAMP with French language"""
	mock_champ_service = Mock()
	mock_champ_service.invoke.return_value = ("Réponse", {}, [])

	with (
	patch("helpers.llm_helper.base_vector_store") as mock_store,
	patch("helpers.llm_helper.ChampService") as mock_service_class,
	patch("helpers.llm_helper.convert_messages_langchain"),
	):
	mock_service_class.return_value = mock_champ_service

	_call_champ("fr", sample_conversation, None)

	# Verify ChampService was initialized with "fr"
	mock_service_class.assert_called_once()
	call_kwargs = mock_service_class.call_args[1]
	assert call_kwargs["lang"] == "fr"

	def test_call_champ_tracks_impacts(self):
	mock_reply = "test response"
	mock_triage_meta = Mock()
	mock_context = Mock()
	mock_impact = Mock()

	with patch("helpers.llm_helper._get_vector_store"):
	with patch("helpers.llm_helper.ChampService") as mock_champ_service_class:
	with patch("helpers.llm_helper.convert_messages_langchain"):
	with patch(
	"helpers.llm_helper.get_champ_impacts", return_value=mock_impact
	) as mock_get_impacts:
	with patch(
	"helpers.llm_helper.log_environment_event"
	) as mock_log_environment_event:
	mock_champ = Mock()
	mock_champ.invoke.return_value = (
	mock_reply,
	mock_triage_meta,
	mock_context,
	)
	mock_champ_service_class.return_value = mock_champ

	_call_champ("en", [], None)

	# Check get_champ_impacts was called
	mock_get_impacts.assert_called_once()

	mock_log_environment_event.assert_called_once_with(
	"inference", mock_impact, "champ"
	)


	class TestCallQwen:
	"""Test the _call_qwen function"""

	@pytest.fixture
	def sample_conversation(self):
	return [
	ChatMessage(role="user", content="Hello"),
	ChatMessage(role="assistant", content="Hi"),
	]

	def test_call_qwen_with_no_documents(self, sample_conversation):
	"""Test Qwen call without user documents"""
	mock_champ_service = Mock()
	mock_champ_service.invoke.return_value = (
	"Response",
	{"triage_triggered": False},
	["doc1", "doc2"],
	)

	with (
	patch("helpers.llm_helper.ChampService", return_value=mock_champ_service),
	patch("helpers.llm_helper.convert_messages_qwen") as mock_convert,
	):
	mock_convert.return_value = [Mock(), Mock()]

	reply, meta, context = _call_qwen("en", sample_conversation, None)

	assert reply == "Response"
	assert meta == {"triage_triggered": False}
	assert context == ["doc1", "doc2"]

	def test_call_qwen_with_documents(self, sample_conversation):
	"""Test Qwen call with user documents"""
	docs = ["User doc 1", "User doc 2"]
	mock_champ_service = Mock()
	mock_champ_service.invoke.return_value = (
	"Response with context",
	{"triage_triggered": False},
	[],
	)

	with (
	patch(
	"helpers.llm_helper.create_session_vector_store"
	) as mock_create_store,
	patch("helpers.llm_helper.base_vector_store") as mock_base_store,
	patch("helpers.llm_helper.embedding_model") as mock_embedding,
	patch("helpers.llm_helper.ChampService", return_value=mock_champ_service),
	patch("helpers.llm_helper.convert_messages_qwen") as mock_convert,
	):
	mock_session_store = Mock()
	mock_create_store.return_value = mock_session_store
	mock_convert.return_value = [Mock()]

	reply, meta, context = _call_qwen("en", sample_conversation, docs)

	# Should create session vector store with documents
	mock_create_store.assert_called_once_with(mock_base_store, mock_embedding, docs)

	def test_call_qwen_converts_messages(self, sample_conversation):
	"""Test that messages are converted to the Qwen adapted format"""
	mock_champ_service = Mock()
	mock_champ_service.invoke.return_value = ("Reply", {}, [])

	with (
	patch("helpers.llm_helper.base_vector_store"),
	patch("helpers.llm_helper.ChampService", return_value=mock_champ_service),
	patch("helpers.llm_helper.convert_messages_qwen") as mock_convert,
	):
	mock_converted = [Mock(), Mock()]
	mock_convert.return_value = mock_converted

	_call_qwen("en", sample_conversation, None)

	mock_convert.assert_called_once_with(sample_conversation)
	mock_champ_service.invoke.assert_called_once_with(mock_converted)

	def test_call_qwen_french_language(self, sample_conversation):
	"""Test Qwem with French language"""
	mock_champ_service = Mock()
	mock_champ_service.invoke.return_value = ("Réponse", {}, [])

	with (
	patch("helpers.llm_helper.base_vector_store") as mock_store,
	patch("helpers.llm_helper.ChampService") as mock_service_class,
	patch("helpers.llm_helper.convert_messages_qwen"),
	):
	mock_service_class.return_value = mock_champ_service

	_call_champ("fr", sample_conversation, None)

	# Verify ChampService was initialized with "fr"
	mock_service_class.assert_called_once()
	call_kwargs = mock_service_class.call_args[1]
	assert call_kwargs["lang"] == "fr"

	def test_call_qwen_tracks_impacts(self):
	mock_reply = "test response"
	mock_triage_meta = Mock()
	mock_context = Mock()
	mock_impact = Mock()

	with patch("helpers.llm_helper._get_vector_store"):
	with patch("helpers.llm_helper.ChampService") as mock_champ_service_class:
	with patch("helpers.llm_helper.convert_messages_qwen"):
	with patch(
	"helpers.llm_helper.get_qwen_impacts", return_value=mock_impact
	) as mock_get_impacts:
	with patch(
	"helpers.llm_helper.log_environment_event"
	) as mock_log_environment_event:
	mock_champ = Mock()
	mock_champ.invoke.return_value = (
	mock_reply,
	mock_triage_meta,
	mock_context,
	)
	mock_champ_service_class.return_value = mock_champ

	_call_qwen("en", [], None)

	# Check get_qwen_impacts was called
	mock_get_impacts.assert_called_once()

	mock_log_environment_event.assert_called_once_with(
	"inference", mock_impact, "qwen"
	)


	class TestCallLLM:
	"""Test the main call_llm function"""

	@pytest.fixture
	def sample_conversation(self):
	return [
	ChatMessage(role="user", content="Hello"),
	]

	# ==================== Model Type Validation ====================

	def test_call_llm_invalid_model_type(self, sample_conversation):
	"""Test that invalid model type raises ValueError"""
	with pytest.raises(ValueError, match="Unknown model_type: invalid-model"):
	call_llm("invalid-model", "en", sample_conversation, None)

	def test_call_llm_champ_model(self, sample_conversation):
	"""Test calling CHAMP model"""
	with patch("helpers.llm_helper._call_champ") as mock_champ:
	mock_champ.return_value = ("Reply", {}, [])

	result = call_llm("champ", "en", sample_conversation, None)

	mock_champ.assert_called_once_with("en", sample_conversation, None)

	def test_call_llm_openai_model(self, sample_conversation):
	"""Test calling OpenAI model"""
	with (
	patch("helpers.llm_helper._call_openai") as mock_openai,
	patch("helpers.llm_helper.convert_messages") as mock_convert,
	):
	mock_convert.return_value = [{"role": "user", "content": "test"}]

	async def mock_gen():
	yield "chunk"

	mock_openai.return_value = mock_gen()

	result = call_llm("openai", "en", sample_conversation, None)

	mock_openai.assert_called_once()
	# Verify model ID from MODEL_MAP is used
	assert mock_openai.call_args[0][0] == MODEL_MAP["openai"]

	def test_call_llm_google_conservative(self, sample_conversation):
	"""Test calling Google conservative model"""
	with (
	patch("helpers.llm_helper._call_gemini") as mock_gemini,
	patch("helpers.llm_helper.convert_messages") as mock_convert,
	):
	mock_convert.return_value = []
	mock_gemini.return_value = "Response"

	result = call_llm("google-conservative", "en", sample_conversation, None)

	mock_gemini.assert_called_once()
	assert mock_gemini.call_args.args[2] == "google-conservative"
	# Result should be tuple with empty metadata and context
	assert result == ("Response", {}, [])

	def test_call_llm_google_creative(self, sample_conversation):
	"""Test calling Google creative model"""
	with (
	patch("helpers.llm_helper._call_gemini") as mock_gemini,
	patch("helpers.llm_helper.convert_messages") as mock_convert,
	):
	mock_convert.return_value = []
	mock_gemini.return_value = "Creative response"

	result = call_llm("google-creative", "en", sample_conversation, None)

	mock_gemini.assert_called_once()
	assert mock_gemini.call_args.args[2] == "google-creative"
	assert result == ("Creative response", {}, [])

	# ==================== Message Conversion Tests ====================

	def test_call_llm_converts_messages_for_non_champ(self, sample_conversation):
	"""Test that messages are converted for non-CHAMP models"""
	with (
	patch("helpers.llm_helper._call_gemini") as mock_gemini,
	patch("helpers.llm_helper.convert_messages") as mock_convert,
	):
	mock_convert.return_value = [{"role": "user", "content": "converted"}]
	mock_gemini.return_value = "Response"

	call_llm("google-conservative", "en", sample_conversation, None)

	mock_convert.assert_called_once_with(
	sample_conversation, lang="en", docs_content=None
	)

	def test_call_llm_passes_documents_to_converter(self, sample_conversation):
	"""Test that documents are passed to message converter"""
	docs = ["doc1", "doc2"]

	with (
	patch("helpers.llm_helper._call_gemini") as mock_gemini,
	patch("helpers.llm_helper.convert_messages") as mock_convert,
	):
	mock_convert.return_value = []
	mock_gemini.return_value = "Response"

	call_llm("google-conservative", "en", sample_conversation, docs)

	mock_convert.assert_called_once()
	assert mock_convert.call_args[1]["docs_content"] == docs

	def test_call_llm_french_language_passed(self, sample_conversation):
	"""Test that French language is passed correctly"""
	with patch("helpers.llm_helper._call_champ") as mock_champ:
	mock_champ.return_value = ("Réponse", {}, [])

	call_llm("champ", "fr", sample_conversation, None)

	mock_champ.assert_called_once()
	assert mock_champ.call_args[0][0] == "fr"

	# ==================== Model Map Tests ====================

	def test_call_llm_uses_model_map(self, sample_conversation):
	"""Test that correct model IDs from MODEL_MAP are used"""
	with (
	patch("helpers.llm_helper._call_openai") as mock_openai,
	patch("helpers.llm_helper.convert_messages"),
	):

	async def mock_gen():
	yield "test"

	mock_openai.return_value = mock_gen()

	call_llm("openai", "en", sample_conversation, None)

	# Should use model ID from MODEL_MAP
	assert mock_openai.call_args[0][0] == "gpt-5-mini-2025-08-07"

	# ==================== Return Type Tests ====================

	def test_call_llm_champ_returns_tuple(self, sample_conversation):
	"""Test that CHAMP returns tuple of (str, dict, list)"""
	with patch("helpers.llm_helper._call_champ") as mock_champ:
	mock_champ.return_value = ("Reply", {"key": "value"}, ["context"])

	result = call_llm("champ", "en", sample_conversation, None)

	assert isinstance(result, tuple)
	assert len(result) == 3
	assert isinstance(result[0], str)
	assert isinstance(result[1], dict)
	assert isinstance(result[2], list)

	def test_call_llm_google_returns_tuple(self, sample_conversation):
	"""Test that Google models return tuple with empty metadata"""
	with (
	patch("helpers.llm_helper._call_gemini") as mock_gemini,
	patch("helpers.llm_helper.convert_messages"),
	):
	mock_gemini.return_value = "Response"

	result = call_llm("google-creative", "en", sample_conversation, None)

	assert result == ("Response", {}, [])

	# ==================== Edge Cases ====================

	def test_call_llm_empty_conversation(self):
	"""Test with empty conversation"""
	with patch("helpers.llm_helper._call_champ") as mock_champ:
	mock_champ.return_value = ("Reply", {}, [])

	call_llm("champ", "en", [], None)

	mock_champ.assert_called_once_with("en", [], None)

	def test_call_llm_with_documents(self, sample_conversation):
	"""Test that documents are passed through correctly"""
	docs = ["Document 1", "Document 2"]

	with patch("helpers.llm_helper._call_champ") as mock_champ:
	mock_champ.return_value = ("Reply", {}, [])

	call_llm("champ", "en", sample_conversation, docs)

	assert mock_champ.call_args[0][2] == docs

	# ==================== Integration Tests ====================

	def test_call_llm_all_model_types(self, sample_conversation):
	"""Test that all model types in MODEL_MAP are handled"""
	with (
	patch("helpers.llm_helper._call_champ") as mock_champ,
	patch("helpers.llm_helper._call_openai") as mock_openai,
	patch("helpers.llm_helper._call_gemini") as mock_gemini,
	patch("helpers.llm_helper.convert_messages"),
	):
	mock_champ.return_value = ("", {}, [])
	mock_gemini.return_value = ""

	async def mock_gen():
	if False:
	yield

	mock_openai.return_value = mock_gen()

	# Test each model type
	for model_type in MODEL_MAP.keys():
	call_llm(model_type, "en", sample_conversation, None)

	# Each should be called once
	assert mock_champ.call_count == 1
	assert mock_openai.call_count == 1
	assert mock_gemini.call_count == 2 # conservative + creative


	class TestModuleInitialization:
	"""Test module-level initialization"""

	def test_model_map_contains_expected_models(self):
	"""Test that MODEL_MAP contains expected model types"""
	expected_models = [
	"champ",
	"qwen",
	"openai",
	"google-conservative",
	"google-creative",
	]
	assert expected_models == list(MODEL_MAP.keys())

	def test_model_map_values_are_strings(self):
	"""Test that MODEL_MAP values are model ID strings"""
	for model_id in MODEL_MAP.values():
	assert isinstance(model_id, str)
	assert len(model_id) > 0