champ-chatbot / tests /unit /helpers /test_llm_helper.py
qyle's picture
deployment test
6fff7cf verified
import pytest
from unittest.mock import Mock, patch, AsyncMock
from classes.base_models import ChatMessage
from constants import MODEL_MAP
from helpers.llm_helper import (
_call_qwen,
call_llm,
_call_champ,
_call_gemini,
_call_openai,
)
class TestCallOpenAI:
"""Test the _call_openai function"""
@pytest.fixture
def mock_openai_client(self):
"""Mock OpenAI client"""
client = Mock()
return client
@pytest.mark.asyncio
async def test_call_openai_streams_response(self, mock_openai_client):
"""Test that OpenAI streams response chunks"""
# Mock streaming response
chunk1 = Mock(type="response.output_text.delta", delta="Hello ")
chunk2 = Mock(type="response.output_text.delta", delta="world")
chunk3 = Mock(type="other_type", delta="ignored")
async def mock_stream():
for chunk in [chunk1, chunk2, chunk3]:
yield chunk
mock_openai_client.responses.create = AsyncMock(return_value=mock_stream())
with patch("helpers.llm_helper.openai_client", mock_openai_client):
result = []
async for chunk in _call_openai(
"gpt-5-mini", [{"role": "user", "content": "test"}]
):
result.append(chunk)
assert result == ["Hello ", "world"] # Third chunk ignored (wrong type)
@pytest.mark.asyncio
async def test_call_openai_correct_parameters(self, mock_openai_client):
"""Test that OpenAI client is called with correct parameters"""
async def mock_stream():
if False:
yield
mock_openai_client.responses.create = AsyncMock(return_value=mock_stream())
msgs = [{"role": "user", "content": "Hello"}]
with patch("helpers.llm_helper.openai_client", mock_openai_client):
async for _ in _call_openai("gpt-5-mini", msgs):
pass
mock_openai_client.responses.create.assert_called_once_with(
model="gpt-5-mini", input=msgs, stream=True
)
@pytest.mark.asyncio
async def test_call_openai_empty_stream(self, mock_openai_client):
"""Test handling of empty stream"""
async def mock_stream():
if False:
yield
mock_openai_client.responses.create = AsyncMock(return_value=mock_stream())
with patch("helpers.llm_helper.openai_client", mock_openai_client):
result = []
async for chunk in _call_openai("gpt-5-mini", []):
result.append(chunk)
assert result == []
@pytest.mark.asyncio
async def test_call_openai_tracks_impacts(self, mock_openai_client):
mock_chunk = Mock(type="response.output_text.delta", delta="test")
async def mock_stream():
yield mock_chunk
mock_openai_client.responses.create = AsyncMock(return_value=mock_stream())
mock_impact = {"gwp": {"min": 0.1, "max": 0.2}}
with patch("helpers.llm_helper.openai_client", mock_openai_client):
with patch(
"helpers.llm_helper.get_openai_impacts", return_value=mock_impact
) as mock_get_impacts:
with patch(
"helpers.llm_helper.log_environment_event"
) as mock_log_environment_event:
# Consume the generator
async for _ in _call_openai("gpt-5-mini", []):
pass
mock_get_impacts.assert_called_once()
mock_log_environment_event.assert_called_once_with(
"inference", mock_impact, "openai"
)
class TestCallGemini:
"""Test the _call_gemini function"""
@pytest.fixture
def mock_gemini_client(self):
"""Mock Gemini client"""
client = Mock()
response = Mock()
response.text = "Gemini response"
client.models.generate_content.return_value = response
return client
def test_call_gemini_formats_transcript(self, mock_gemini_client):
"""Test that messages are formatted into transcript"""
msgs = [
{"role": "system", "content": "You are helpful"},
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"},
]
with patch("helpers.llm_helper.gemini_client", mock_gemini_client):
result = _call_gemini("gemini-2.5-flash", msgs, "google-conservative")
# Check that transcript was formatted correctly
call_args = mock_gemini_client.models.generate_content.call_args
contents = call_args[1]["contents"]
assert "SYSTEM: You are helpful" in contents
assert "USER: Hello" in contents
assert "ASSISTANT: Hi there" in contents
def test_call_gemini_returns_stripped_text(self, mock_gemini_client):
"""Test that response text is stripped"""
response = Mock()
response.text = " Response with whitespace "
mock_gemini_client.models.generate_content.return_value = response
with patch("helpers.llm_helper.gemini_client", mock_gemini_client):
result = _call_gemini("gemini-2.5-flash", [], "google-creative")
assert result == "Response with whitespace"
def test_call_gemini_handles_none_text(self, mock_gemini_client):
"""Test handling when response.text is None"""
response = Mock()
response.text = None
mock_gemini_client.models.generate_content.return_value = response
with patch("helpers.llm_helper.gemini_client", mock_gemini_client):
result = _call_gemini("gemini-2.5-flash", [], "google-conservative")
assert result == ""
def test_call_gemini_google_conservative(self, mock_gemini_client):
response = Mock()
response.text = None
mock_gemini_client.models.generate_content.return_value = response
with patch(
"helpers.llm_helper.log_environment_event"
) as mock_log_environment_event:
with patch("helpers.llm_helper.gemini_client", mock_gemini_client):
result = _call_gemini("gemini-2.5-flash", [], "google-conservative")
call_args = mock_gemini_client.models.generate_content.call_args
temperature = call_args[1]["config"]["temperature"]
assert temperature == 0.2
mock_log_environment_event.assert_called_once_with(
"inference", response.impacts, "google-conservative"
)
def test_call_gemini_google_creative(self, mock_gemini_client):
response = Mock()
response.text = None
mock_gemini_client.models.generate_content.return_value = response
with patch(
"helpers.llm_helper.log_environment_event"
) as mock_log_environment_event:
with patch("helpers.llm_helper.gemini_client", mock_gemini_client):
result = _call_gemini("gemini-2.5-flash", [], "google-creative")
call_args = mock_gemini_client.models.generate_content.call_args
temperature = call_args[1]["config"]["temperature"]
assert temperature == 1.0
mock_log_environment_event.assert_called_once_with(
"inference", response.impacts, "google-creative"
)
class TestCallChamp:
"""Test the _call_champ function"""
@pytest.fixture
def sample_conversation(self):
return [
ChatMessage(role="user", content="Hello"),
ChatMessage(role="assistant", content="Hi"),
]
def test_call_champ_with_no_documents(self, sample_conversation):
"""Test CHAMP call without user documents"""
mock_champ_service = Mock()
mock_champ_service.invoke.return_value = (
"Response",
{"triage_triggered": False},
["doc1", "doc2"],
)
with (
patch("helpers.llm_helper.ChampService", return_value=mock_champ_service),
patch("helpers.llm_helper.convert_messages_langchain") as mock_convert,
):
mock_convert.return_value = [Mock(), Mock()]
reply, meta, context = _call_champ("en", sample_conversation, None)
assert reply == "Response"
assert meta == {"triage_triggered": False}
assert context == ["doc1", "doc2"]
def test_call_champ_with_documents(self, sample_conversation):
"""Test CHAMP call with user documents"""
docs = ["User doc 1", "User doc 2"]
mock_champ_service = Mock()
mock_champ_service.invoke.return_value = (
"Response with context",
{"triage_triggered": False},
[],
)
with (
patch(
"helpers.llm_helper.create_session_vector_store"
) as mock_create_store,
patch("helpers.llm_helper.base_vector_store") as mock_base_store,
patch("helpers.llm_helper.embedding_model") as mock_embedding,
patch("helpers.llm_helper.ChampService", return_value=mock_champ_service),
patch("helpers.llm_helper.convert_messages_langchain") as mock_convert,
):
mock_session_store = Mock()
mock_create_store.return_value = mock_session_store
mock_convert.return_value = [Mock()]
reply, meta, context = _call_champ("en", sample_conversation, docs)
# Should create session vector store with documents
mock_create_store.assert_called_once_with(mock_base_store, mock_embedding, docs)
def test_call_champ_converts_messages(self, sample_conversation):
"""Test that messages are converted to LangChain format"""
mock_champ_service = Mock()
mock_champ_service.invoke.return_value = ("Reply", {}, [])
with (
patch("helpers.llm_helper.base_vector_store"),
patch("helpers.llm_helper.ChampService", return_value=mock_champ_service),
patch("helpers.llm_helper.convert_messages_langchain") as mock_convert,
):
mock_converted = [Mock(), Mock()]
mock_convert.return_value = mock_converted
_call_champ("en", sample_conversation, None)
mock_convert.assert_called_once_with(sample_conversation)
mock_champ_service.invoke.assert_called_once_with(mock_converted)
def test_call_champ_french_language(self, sample_conversation):
"""Test CHAMP with French language"""
mock_champ_service = Mock()
mock_champ_service.invoke.return_value = ("Réponse", {}, [])
with (
patch("helpers.llm_helper.base_vector_store") as mock_store,
patch("helpers.llm_helper.ChampService") as mock_service_class,
patch("helpers.llm_helper.convert_messages_langchain"),
):
mock_service_class.return_value = mock_champ_service
_call_champ("fr", sample_conversation, None)
# Verify ChampService was initialized with "fr"
mock_service_class.assert_called_once()
call_kwargs = mock_service_class.call_args[1]
assert call_kwargs["lang"] == "fr"
def test_call_champ_tracks_impacts(self):
mock_reply = "test response"
mock_triage_meta = Mock()
mock_context = Mock()
mock_impact = Mock()
with patch("helpers.llm_helper._get_vector_store"):
with patch("helpers.llm_helper.ChampService") as mock_champ_service_class:
with patch("helpers.llm_helper.convert_messages_langchain"):
with patch(
"helpers.llm_helper.get_champ_impacts", return_value=mock_impact
) as mock_get_impacts:
with patch(
"helpers.llm_helper.log_environment_event"
) as mock_log_environment_event:
mock_champ = Mock()
mock_champ.invoke.return_value = (
mock_reply,
mock_triage_meta,
mock_context,
)
mock_champ_service_class.return_value = mock_champ
_call_champ("en", [], None)
# Check get_champ_impacts was called
mock_get_impacts.assert_called_once()
mock_log_environment_event.assert_called_once_with(
"inference", mock_impact, "champ"
)
class TestCallQwen:
"""Test the _call_qwen function"""
@pytest.fixture
def sample_conversation(self):
return [
ChatMessage(role="user", content="Hello"),
ChatMessage(role="assistant", content="Hi"),
]
def test_call_qwen_with_no_documents(self, sample_conversation):
"""Test Qwen call without user documents"""
mock_champ_service = Mock()
mock_champ_service.invoke.return_value = (
"Response",
{"triage_triggered": False},
["doc1", "doc2"],
)
with (
patch("helpers.llm_helper.ChampService", return_value=mock_champ_service),
patch("helpers.llm_helper.convert_messages_qwen") as mock_convert,
):
mock_convert.return_value = [Mock(), Mock()]
reply, meta, context = _call_qwen("en", sample_conversation, None)
assert reply == "Response"
assert meta == {"triage_triggered": False}
assert context == ["doc1", "doc2"]
def test_call_qwen_with_documents(self, sample_conversation):
"""Test Qwen call with user documents"""
docs = ["User doc 1", "User doc 2"]
mock_champ_service = Mock()
mock_champ_service.invoke.return_value = (
"Response with context",
{"triage_triggered": False},
[],
)
with (
patch(
"helpers.llm_helper.create_session_vector_store"
) as mock_create_store,
patch("helpers.llm_helper.base_vector_store") as mock_base_store,
patch("helpers.llm_helper.embedding_model") as mock_embedding,
patch("helpers.llm_helper.ChampService", return_value=mock_champ_service),
patch("helpers.llm_helper.convert_messages_qwen") as mock_convert,
):
mock_session_store = Mock()
mock_create_store.return_value = mock_session_store
mock_convert.return_value = [Mock()]
reply, meta, context = _call_qwen("en", sample_conversation, docs)
# Should create session vector store with documents
mock_create_store.assert_called_once_with(mock_base_store, mock_embedding, docs)
def test_call_qwen_converts_messages(self, sample_conversation):
"""Test that messages are converted to the Qwen adapted format"""
mock_champ_service = Mock()
mock_champ_service.invoke.return_value = ("Reply", {}, [])
with (
patch("helpers.llm_helper.base_vector_store"),
patch("helpers.llm_helper.ChampService", return_value=mock_champ_service),
patch("helpers.llm_helper.convert_messages_qwen") as mock_convert,
):
mock_converted = [Mock(), Mock()]
mock_convert.return_value = mock_converted
_call_qwen("en", sample_conversation, None)
mock_convert.assert_called_once_with(sample_conversation)
mock_champ_service.invoke.assert_called_once_with(mock_converted)
def test_call_qwen_french_language(self, sample_conversation):
"""Test Qwem with French language"""
mock_champ_service = Mock()
mock_champ_service.invoke.return_value = ("Réponse", {}, [])
with (
patch("helpers.llm_helper.base_vector_store") as mock_store,
patch("helpers.llm_helper.ChampService") as mock_service_class,
patch("helpers.llm_helper.convert_messages_qwen"),
):
mock_service_class.return_value = mock_champ_service
_call_champ("fr", sample_conversation, None)
# Verify ChampService was initialized with "fr"
mock_service_class.assert_called_once()
call_kwargs = mock_service_class.call_args[1]
assert call_kwargs["lang"] == "fr"
def test_call_qwen_tracks_impacts(self):
mock_reply = "test response"
mock_triage_meta = Mock()
mock_context = Mock()
mock_impact = Mock()
with patch("helpers.llm_helper._get_vector_store"):
with patch("helpers.llm_helper.ChampService") as mock_champ_service_class:
with patch("helpers.llm_helper.convert_messages_qwen"):
with patch(
"helpers.llm_helper.get_qwen_impacts", return_value=mock_impact
) as mock_get_impacts:
with patch(
"helpers.llm_helper.log_environment_event"
) as mock_log_environment_event:
mock_champ = Mock()
mock_champ.invoke.return_value = (
mock_reply,
mock_triage_meta,
mock_context,
)
mock_champ_service_class.return_value = mock_champ
_call_qwen("en", [], None)
# Check get_qwen_impacts was called
mock_get_impacts.assert_called_once()
mock_log_environment_event.assert_called_once_with(
"inference", mock_impact, "qwen"
)
class TestCallLLM:
"""Test the main call_llm function"""
@pytest.fixture
def sample_conversation(self):
return [
ChatMessage(role="user", content="Hello"),
]
# ==================== Model Type Validation ====================
def test_call_llm_invalid_model_type(self, sample_conversation):
"""Test that invalid model type raises ValueError"""
with pytest.raises(ValueError, match="Unknown model_type: invalid-model"):
call_llm("invalid-model", "en", sample_conversation, None)
def test_call_llm_champ_model(self, sample_conversation):
"""Test calling CHAMP model"""
with patch("helpers.llm_helper._call_champ") as mock_champ:
mock_champ.return_value = ("Reply", {}, [])
result = call_llm("champ", "en", sample_conversation, None)
mock_champ.assert_called_once_with("en", sample_conversation, None)
def test_call_llm_openai_model(self, sample_conversation):
"""Test calling OpenAI model"""
with (
patch("helpers.llm_helper._call_openai") as mock_openai,
patch("helpers.llm_helper.convert_messages") as mock_convert,
):
mock_convert.return_value = [{"role": "user", "content": "test"}]
async def mock_gen():
yield "chunk"
mock_openai.return_value = mock_gen()
result = call_llm("openai", "en", sample_conversation, None)
mock_openai.assert_called_once()
# Verify model ID from MODEL_MAP is used
assert mock_openai.call_args[0][0] == MODEL_MAP["openai"]
def test_call_llm_google_conservative(self, sample_conversation):
"""Test calling Google conservative model"""
with (
patch("helpers.llm_helper._call_gemini") as mock_gemini,
patch("helpers.llm_helper.convert_messages") as mock_convert,
):
mock_convert.return_value = []
mock_gemini.return_value = "Response"
result = call_llm("google-conservative", "en", sample_conversation, None)
mock_gemini.assert_called_once()
assert mock_gemini.call_args.args[2] == "google-conservative"
# Result should be tuple with empty metadata and context
assert result == ("Response", {}, [])
def test_call_llm_google_creative(self, sample_conversation):
"""Test calling Google creative model"""
with (
patch("helpers.llm_helper._call_gemini") as mock_gemini,
patch("helpers.llm_helper.convert_messages") as mock_convert,
):
mock_convert.return_value = []
mock_gemini.return_value = "Creative response"
result = call_llm("google-creative", "en", sample_conversation, None)
mock_gemini.assert_called_once()
assert mock_gemini.call_args.args[2] == "google-creative"
assert result == ("Creative response", {}, [])
# ==================== Message Conversion Tests ====================
def test_call_llm_converts_messages_for_non_champ(self, sample_conversation):
"""Test that messages are converted for non-CHAMP models"""
with (
patch("helpers.llm_helper._call_gemini") as mock_gemini,
patch("helpers.llm_helper.convert_messages") as mock_convert,
):
mock_convert.return_value = [{"role": "user", "content": "converted"}]
mock_gemini.return_value = "Response"
call_llm("google-conservative", "en", sample_conversation, None)
mock_convert.assert_called_once_with(
sample_conversation, lang="en", docs_content=None
)
def test_call_llm_passes_documents_to_converter(self, sample_conversation):
"""Test that documents are passed to message converter"""
docs = ["doc1", "doc2"]
with (
patch("helpers.llm_helper._call_gemini") as mock_gemini,
patch("helpers.llm_helper.convert_messages") as mock_convert,
):
mock_convert.return_value = []
mock_gemini.return_value = "Response"
call_llm("google-conservative", "en", sample_conversation, docs)
mock_convert.assert_called_once()
assert mock_convert.call_args[1]["docs_content"] == docs
def test_call_llm_french_language_passed(self, sample_conversation):
"""Test that French language is passed correctly"""
with patch("helpers.llm_helper._call_champ") as mock_champ:
mock_champ.return_value = ("Réponse", {}, [])
call_llm("champ", "fr", sample_conversation, None)
mock_champ.assert_called_once()
assert mock_champ.call_args[0][0] == "fr"
# ==================== Model Map Tests ====================
def test_call_llm_uses_model_map(self, sample_conversation):
"""Test that correct model IDs from MODEL_MAP are used"""
with (
patch("helpers.llm_helper._call_openai") as mock_openai,
patch("helpers.llm_helper.convert_messages"),
):
async def mock_gen():
yield "test"
mock_openai.return_value = mock_gen()
call_llm("openai", "en", sample_conversation, None)
# Should use model ID from MODEL_MAP
assert mock_openai.call_args[0][0] == "gpt-5-mini-2025-08-07"
# ==================== Return Type Tests ====================
def test_call_llm_champ_returns_tuple(self, sample_conversation):
"""Test that CHAMP returns tuple of (str, dict, list)"""
with patch("helpers.llm_helper._call_champ") as mock_champ:
mock_champ.return_value = ("Reply", {"key": "value"}, ["context"])
result = call_llm("champ", "en", sample_conversation, None)
assert isinstance(result, tuple)
assert len(result) == 3
assert isinstance(result[0], str)
assert isinstance(result[1], dict)
assert isinstance(result[2], list)
def test_call_llm_google_returns_tuple(self, sample_conversation):
"""Test that Google models return tuple with empty metadata"""
with (
patch("helpers.llm_helper._call_gemini") as mock_gemini,
patch("helpers.llm_helper.convert_messages"),
):
mock_gemini.return_value = "Response"
result = call_llm("google-creative", "en", sample_conversation, None)
assert result == ("Response", {}, [])
# ==================== Edge Cases ====================
def test_call_llm_empty_conversation(self):
"""Test with empty conversation"""
with patch("helpers.llm_helper._call_champ") as mock_champ:
mock_champ.return_value = ("Reply", {}, [])
call_llm("champ", "en", [], None)
mock_champ.assert_called_once_with("en", [], None)
def test_call_llm_with_documents(self, sample_conversation):
"""Test that documents are passed through correctly"""
docs = ["Document 1", "Document 2"]
with patch("helpers.llm_helper._call_champ") as mock_champ:
mock_champ.return_value = ("Reply", {}, [])
call_llm("champ", "en", sample_conversation, docs)
assert mock_champ.call_args[0][2] == docs
# ==================== Integration Tests ====================
def test_call_llm_all_model_types(self, sample_conversation):
"""Test that all model types in MODEL_MAP are handled"""
with (
patch("helpers.llm_helper._call_champ") as mock_champ,
patch("helpers.llm_helper._call_openai") as mock_openai,
patch("helpers.llm_helper._call_gemini") as mock_gemini,
patch("helpers.llm_helper.convert_messages"),
):
mock_champ.return_value = ("", {}, [])
mock_gemini.return_value = ""
async def mock_gen():
if False:
yield
mock_openai.return_value = mock_gen()
# Test each model type
for model_type in MODEL_MAP.keys():
call_llm(model_type, "en", sample_conversation, None)
# Each should be called once
assert mock_champ.call_count == 1
assert mock_openai.call_count == 1
assert mock_gemini.call_count == 2 # conservative + creative
class TestModuleInitialization:
"""Test module-level initialization"""
def test_model_map_contains_expected_models(self):
"""Test that MODEL_MAP contains expected model types"""
expected_models = [
"champ",
"qwen",
"openai",
"google-conservative",
"google-creative",
]
assert expected_models == list(MODEL_MAP.keys())
def test_model_map_values_are_strings(self):
"""Test that MODEL_MAP values are model ID strings"""
for model_id in MODEL_MAP.values():
assert isinstance(model_id, str)
assert len(model_id) > 0