|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from unittest.mock import MagicMock, patch |
|
|
|
|
|
import numpy as np |
|
|
import pytest |
|
|
|
|
|
from nemo.deploy.nlp.query_llm import NemoQueryLLM, NemoQueryLLMBase, NemoQueryLLMHF, NemoQueryLLMPyTorch |
|
|
|
|
|
|
|
|
class TestNemoQueryLLMBase: |
|
|
def test_base_initialization(self): |
|
|
url = "localhost:8000" |
|
|
model_name = "test-model" |
|
|
query = NemoQueryLLMBase(url=url, model_name=model_name) |
|
|
assert query.url == url |
|
|
assert query.model_name == model_name |
|
|
|
|
|
|
|
|
class TestNemoQueryLLMPyTorch: |
|
|
@pytest.fixture |
|
|
def query(self): |
|
|
return NemoQueryLLMPyTorch(url="localhost:8000", model_name="test-model") |
|
|
|
|
|
def test_initialization(self, query): |
|
|
assert isinstance(query, NemoQueryLLMBase) |
|
|
assert query.url == "localhost:8000" |
|
|
assert query.model_name == "test-model" |
|
|
|
|
|
@patch('nemo.deploy.nlp.query_llm.ModelClient') |
|
|
def test_query_llm_basic(self, mock_client, query): |
|
|
|
|
|
mock_instance = MagicMock() |
|
|
mock_client.return_value.__enter__.return_value = mock_instance |
|
|
mock_instance.infer_batch.return_value = {"sentences": np.array([b"test response"])} |
|
|
mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
|
|
|
|
|
|
response = query.query_llm(prompts=["test prompt"], max_length=100, temperature=0.7, top_k=1, top_p=0.9) |
|
|
|
|
|
assert isinstance(response, dict) |
|
|
assert "choices" in response |
|
|
assert response["choices"][0]["text"] == "test response" |
|
|
|
|
|
@patch('nemo.deploy.nlp.query_llm.ModelClient') |
|
|
def test_query_llm_with_logprobs(self, mock_client, query): |
|
|
|
|
|
mock_instance = MagicMock() |
|
|
mock_client.return_value.__enter__.return_value = mock_instance |
|
|
mock_instance.infer_batch.return_value = { |
|
|
"sentences": np.array([b"test response"]), |
|
|
"log_probs": np.array([0.1, 0.2, 0.3]), |
|
|
} |
|
|
mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
|
|
|
|
|
|
response = query.query_llm(prompts=["test prompt"], max_length=100, compute_logprob=True) |
|
|
|
|
|
assert "logprobs" in response["choices"][0] |
|
|
assert "token_logprobs" in response["choices"][0]["logprobs"] |
|
|
|
|
|
|
|
|
class TestNemoQueryLLMHF: |
|
|
@pytest.fixture |
|
|
def query(self): |
|
|
return NemoQueryLLMHF(url="localhost:8000", model_name="test-model") |
|
|
|
|
|
def test_initialization(self, query): |
|
|
assert isinstance(query, NemoQueryLLMBase) |
|
|
assert query.url == "localhost:8000" |
|
|
assert query.model_name == "test-model" |
|
|
|
|
|
@patch('nemo.deploy.nlp.query_llm.ModelClient') |
|
|
def test_query_llm_basic(self, mock_client, query): |
|
|
|
|
|
mock_instance = MagicMock() |
|
|
mock_client.return_value.__enter__.return_value = mock_instance |
|
|
mock_instance.infer_batch.return_value = {"sentences": np.array([b"test response"])} |
|
|
mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
|
|
|
|
|
|
response = query.query_llm(prompts=["test prompt"], max_length=100, temperature=0.7, top_k=1, top_p=0.9) |
|
|
|
|
|
assert isinstance(response, dict) |
|
|
assert "choices" in response |
|
|
assert response["choices"][0]["text"] == "test response" |
|
|
|
|
|
@patch('nemo.deploy.nlp.query_llm.ModelClient') |
|
|
def test_query_llm_with_logits(self, mock_client, query): |
|
|
|
|
|
mock_instance = MagicMock() |
|
|
mock_client.return_value.__enter__.return_value = mock_instance |
|
|
mock_instance.infer_batch.return_value = { |
|
|
"sentences": np.array([b"test response"]), |
|
|
"logits": np.array([[0.1, 0.2, 0.3]]), |
|
|
} |
|
|
mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
|
|
|
|
|
|
response = query.query_llm(prompts=["test prompt"], max_length=100, output_logits=True) |
|
|
|
|
|
assert "logits" in response |
|
|
|
|
|
|
|
|
class TestNemoQueryLLM: |
|
|
@pytest.fixture |
|
|
def query(self): |
|
|
return NemoQueryLLM(url="localhost:8000", model_name="test-model") |
|
|
|
|
|
def test_initialization(self, query): |
|
|
assert isinstance(query, NemoQueryLLMBase) |
|
|
assert query.url == "localhost:8000" |
|
|
assert query.model_name == "test-model" |
|
|
|
|
|
@patch('nemo.deploy.nlp.query_llm.ModelClient') |
|
|
def test_query_llm_basic(self, mock_client, query): |
|
|
|
|
|
mock_instance = MagicMock() |
|
|
mock_client.return_value.__enter__.return_value = mock_instance |
|
|
mock_instance.infer_batch.return_value = {"outputs": np.array([b"test response"])} |
|
|
mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
|
|
|
|
|
|
response = query.query_llm(prompts=["test prompt"], max_output_len=100, temperature=0.7, top_k=1, top_p=0.9) |
|
|
|
|
|
assert isinstance(response[0], str) |
|
|
assert response[0] == "test response" |
|
|
|
|
|
@patch('nemo.deploy.nlp.query_llm.ModelClient') |
|
|
def test_query_llm_openai_format(self, mock_client, query): |
|
|
|
|
|
mock_instance = MagicMock() |
|
|
mock_client.return_value.__enter__.return_value = mock_instance |
|
|
mock_instance.infer_batch.return_value = {"outputs": np.array([b"test response"])} |
|
|
mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
|
|
|
|
|
|
response = query.query_llm(prompts=["test prompt"], max_output_len=100, openai_format_response=True) |
|
|
|
|
|
assert isinstance(response, dict) |
|
|
assert "choices" in response |
|
|
assert response["choices"][0]["text"] == "test response" |
|
|
|
|
|
@patch('nemo.deploy.nlp.query_llm.DecoupledModelClient') |
|
|
def test_query_llm_streaming(self, mock_client, query): |
|
|
|
|
|
mock_instance = MagicMock() |
|
|
mock_client.return_value.__enter__.return_value = mock_instance |
|
|
mock_instance.infer_batch.return_value = [ |
|
|
{"outputs": np.array([b"test"])}, |
|
|
{"outputs": np.array([b" response"])}, |
|
|
] |
|
|
mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
|
|
|
|
|
|
responses = list(query.query_llm_streaming(prompts=["test prompt"], max_output_len=100)) |
|
|
|
|
|
assert len(responses) == 2 |
|
|
assert responses[0] == "test" |
|
|
assert responses[1] == " response" |
|
|
|
|
|
@patch('nemo.deploy.nlp.query_llm.ModelClient') |
|
|
def test_query_llm_with_stop_words(self, mock_client, query): |
|
|
|
|
|
mock_instance = MagicMock() |
|
|
mock_client.return_value.__enter__.return_value = mock_instance |
|
|
mock_instance.infer_batch.return_value = {"outputs": np.array([b"test response"])} |
|
|
mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
|
|
|
|
|
|
response = query.query_llm(prompts=["test prompt"], max_output_len=100, stop_words_list=["stop"]) |
|
|
|
|
|
assert isinstance(response[0], str) |
|
|
assert response[0] == "test response" |
|
|
|
|
|
@patch('nemo.deploy.nlp.query_llm.ModelClient') |
|
|
def test_query_llm_with_bad_words(self, mock_client, query): |
|
|
|
|
|
mock_instance = MagicMock() |
|
|
mock_client.return_value.__enter__.return_value = mock_instance |
|
|
mock_instance.infer_batch.return_value = {"outputs": np.array([b"test response"])} |
|
|
mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
|
|
|
|
|
|
response = query.query_llm(prompts=["test prompt"], max_output_len=100, bad_words_list=["bad"]) |
|
|
|
|
|
assert isinstance(response[0], str) |
|
|
assert response[0] == "test response" |
|
|
|