| | import copy
|
| | from unittest.mock import MagicMock, patch
|
| |
|
| | import pytest
|
| | from litellm.exceptions import (
|
| | APIConnectionError,
|
| | InternalServerError,
|
| | RateLimitError,
|
| | ServiceUnavailableError,
|
| | )
|
| |
|
| | from openhands.core.config import LLMConfig
|
| | from openhands.core.exceptions import OperationCancelled
|
| | from openhands.core.message import Message, TextContent
|
| | from openhands.llm.llm import LLM
|
| | from openhands.llm.metrics import Metrics
|
| |
|
| |
|
| | @pytest.fixture(autouse=True)
|
| | def mock_logger(monkeypatch):
|
| |
|
| | mock_logger = MagicMock()
|
| | monkeypatch.setattr('openhands.llm.debug_mixin.llm_prompt_logger', mock_logger)
|
| | monkeypatch.setattr('openhands.llm.debug_mixin.llm_response_logger', mock_logger)
|
| | monkeypatch.setattr('openhands.llm.llm.logger', mock_logger)
|
| | return mock_logger
|
| |
|
| |
|
| | @pytest.fixture
|
| | def default_config():
|
| | return LLMConfig(
|
| | model='gpt-4o',
|
| | api_key='test_key',
|
| | num_retries=2,
|
| | retry_min_wait=1,
|
| | retry_max_wait=2,
|
| | )
|
| |
|
| |
|
| | def test_llm_init_with_default_config(default_config):
|
| | llm = LLM(default_config)
|
| | assert llm.config.model == 'gpt-4o'
|
| | assert llm.config.api_key.get_secret_value() == 'test_key'
|
| | assert isinstance(llm.metrics, Metrics)
|
| | assert llm.metrics.model_name == 'gpt-4o'
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm.get_model_info')
|
| | def test_llm_init_with_model_info(mock_get_model_info, default_config):
|
| | mock_get_model_info.return_value = {
|
| | 'max_input_tokens': 8000,
|
| | 'max_output_tokens': 2000,
|
| | }
|
| | llm = LLM(default_config)
|
| | llm.init_model_info()
|
| | assert llm.config.max_input_tokens == 8000
|
| | assert llm.config.max_output_tokens == 2000
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm.get_model_info')
|
| | def test_llm_init_without_model_info(mock_get_model_info, default_config):
|
| | mock_get_model_info.side_effect = Exception('Model info not available')
|
| | llm = LLM(default_config)
|
| | llm.init_model_info()
|
| | assert llm.config.max_input_tokens == 4096
|
| | assert llm.config.max_output_tokens == 4096
|
| |
|
| |
|
| | def test_llm_init_with_custom_config():
|
| | custom_config = LLMConfig(
|
| | model='custom-model',
|
| | api_key='custom_key',
|
| | max_input_tokens=5000,
|
| | max_output_tokens=1500,
|
| | temperature=0.8,
|
| | top_p=0.9,
|
| | )
|
| | llm = LLM(custom_config)
|
| | assert llm.config.model == 'custom-model'
|
| | assert llm.config.api_key.get_secret_value() == 'custom_key'
|
| | assert llm.config.max_input_tokens == 5000
|
| | assert llm.config.max_output_tokens == 1500
|
| | assert llm.config.temperature == 0.8
|
| | assert llm.config.top_p == 0.9
|
| |
|
| |
|
| | def test_llm_init_with_metrics():
|
| | config = LLMConfig(model='gpt-4o', api_key='test_key')
|
| | metrics = Metrics()
|
| | llm = LLM(config, metrics=metrics)
|
| | assert llm.metrics is metrics
|
| | assert (
|
| | llm.metrics.model_name == 'default'
|
| | )
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm_completion')
|
| | @patch('time.time')
|
| | def test_response_latency_tracking(mock_time, mock_litellm_completion):
|
| |
|
| | mock_time.side_effect = [1000.0, 1002.5]
|
| |
|
| |
|
| | mock_response = {
|
| | 'id': 'test-response-123',
|
| | 'choices': [{'message': {'content': 'Test response'}}],
|
| | }
|
| | mock_litellm_completion.return_value = mock_response
|
| |
|
| |
|
| | config = LLMConfig(model='gpt-4o', api_key='test_key')
|
| | llm = LLM(config)
|
| | response = llm.completion(messages=[{'role': 'user', 'content': 'Hello!'}])
|
| |
|
| |
|
| | assert len(llm.metrics.response_latencies) == 1
|
| | latency_record = llm.metrics.response_latencies[0]
|
| | assert latency_record.model == 'gpt-4o'
|
| | assert (
|
| | latency_record.latency == 2.5
|
| | )
|
| | assert latency_record.response_id == 'test-response-123'
|
| |
|
| |
|
| | assert response['id'] == 'test-response-123'
|
| | assert response['choices'][0]['message']['content'] == 'Test response'
|
| |
|
| |
|
| | mock_time.side_effect = [1000.0, 999.0]
|
| | llm.completion(messages=[{'role': 'user', 'content': 'Hello!'}])
|
| |
|
| |
|
| | assert len(llm.metrics.response_latencies) == 2
|
| | latency_record = llm.metrics.response_latencies[-1]
|
| | assert latency_record.latency == 0.0
|
| |
|
| |
|
| | def test_llm_reset():
|
| | llm = LLM(LLMConfig(model='gpt-4o-mini', api_key='test_key'))
|
| | initial_metrics = copy.deepcopy(llm.metrics)
|
| | initial_metrics.add_cost(1.0)
|
| | initial_metrics.add_response_latency(0.5, 'test-id')
|
| | llm.reset()
|
| | assert llm.metrics.accumulated_cost != initial_metrics.accumulated_cost
|
| | assert llm.metrics.costs != initial_metrics.costs
|
| | assert llm.metrics.response_latencies != initial_metrics.response_latencies
|
| | assert isinstance(llm.metrics, Metrics)
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm.get_model_info')
|
| | def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
|
| | default_config.model = 'openrouter:gpt-4o-mini'
|
| | mock_get_model_info.return_value = {
|
| | 'max_input_tokens': 7000,
|
| | 'max_output_tokens': 1500,
|
| | }
|
| | llm = LLM(default_config)
|
| | llm.init_model_info()
|
| | assert llm.config.max_input_tokens == 7000
|
| | assert llm.config.max_output_tokens == 1500
|
| | mock_get_model_info.assert_called_once_with('openrouter:gpt-4o-mini')
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm_completion')
|
| | def test_completion_with_mocked_logger(
|
| | mock_litellm_completion, default_config, mock_logger
|
| | ):
|
| | mock_litellm_completion.return_value = {
|
| | 'choices': [{'message': {'content': 'Test response'}}]
|
| | }
|
| |
|
| | llm = LLM(config=default_config)
|
| | response = llm.completion(
|
| | messages=[{'role': 'user', 'content': 'Hello!'}],
|
| | stream=False,
|
| | )
|
| |
|
| | assert response['choices'][0]['message']['content'] == 'Test response'
|
| | assert mock_litellm_completion.call_count == 1
|
| |
|
| | mock_logger.debug.assert_called()
|
| |
|
| |
|
| | @pytest.mark.parametrize(
|
| | 'exception_class,extra_args,expected_retries',
|
| | [
|
| | (
|
| | APIConnectionError,
|
| | {'llm_provider': 'test_provider', 'model': 'test_model'},
|
| | 2,
|
| | ),
|
| | (
|
| | InternalServerError,
|
| | {'llm_provider': 'test_provider', 'model': 'test_model'},
|
| | 2,
|
| | ),
|
| | (
|
| | ServiceUnavailableError,
|
| | {'llm_provider': 'test_provider', 'model': 'test_model'},
|
| | 2,
|
| | ),
|
| | (RateLimitError, {'llm_provider': 'test_provider', 'model': 'test_model'}, 2),
|
| | ],
|
| | )
|
| | @patch('openhands.llm.llm.litellm_completion')
|
| | def test_completion_retries(
|
| | mock_litellm_completion,
|
| | default_config,
|
| | exception_class,
|
| | extra_args,
|
| | expected_retries,
|
| | ):
|
| | mock_litellm_completion.side_effect = [
|
| | exception_class('Test error message', **extra_args),
|
| | {'choices': [{'message': {'content': 'Retry successful'}}]},
|
| | ]
|
| |
|
| | llm = LLM(config=default_config)
|
| | response = llm.completion(
|
| | messages=[{'role': 'user', 'content': 'Hello!'}],
|
| | stream=False,
|
| | )
|
| |
|
| | assert response['choices'][0]['message']['content'] == 'Retry successful'
|
| | assert mock_litellm_completion.call_count == expected_retries
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm_completion')
|
| | def test_completion_rate_limit_wait_time(mock_litellm_completion, default_config):
|
| | with patch('time.sleep') as mock_sleep:
|
| | mock_litellm_completion.side_effect = [
|
| | RateLimitError(
|
| | 'Rate limit exceeded', llm_provider='test_provider', model='test_model'
|
| | ),
|
| | {'choices': [{'message': {'content': 'Retry successful'}}]},
|
| | ]
|
| |
|
| | llm = LLM(config=default_config)
|
| | response = llm.completion(
|
| | messages=[{'role': 'user', 'content': 'Hello!'}],
|
| | stream=False,
|
| | )
|
| |
|
| | assert response['choices'][0]['message']['content'] == 'Retry successful'
|
| | assert mock_litellm_completion.call_count == 2
|
| |
|
| | mock_sleep.assert_called_once()
|
| | wait_time = mock_sleep.call_args[0][0]
|
| | assert (
|
| | default_config.retry_min_wait <= wait_time <= default_config.retry_max_wait
|
| | ), f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}'
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm_completion')
|
| | def test_completion_exhausts_retries(mock_litellm_completion, default_config):
|
| | mock_litellm_completion.side_effect = APIConnectionError(
|
| | 'Persistent error', llm_provider='test_provider', model='test_model'
|
| | )
|
| |
|
| | llm = LLM(config=default_config)
|
| | with pytest.raises(APIConnectionError):
|
| | llm.completion(
|
| | messages=[{'role': 'user', 'content': 'Hello!'}],
|
| | stream=False,
|
| | )
|
| |
|
| | assert mock_litellm_completion.call_count == llm.config.num_retries
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm_completion')
|
| | def test_completion_operation_cancelled(mock_litellm_completion, default_config):
|
| | mock_litellm_completion.side_effect = OperationCancelled('Operation cancelled')
|
| |
|
| | llm = LLM(config=default_config)
|
| | with pytest.raises(OperationCancelled):
|
| | llm.completion(
|
| | messages=[{'role': 'user', 'content': 'Hello!'}],
|
| | stream=False,
|
| | )
|
| |
|
| | assert mock_litellm_completion.call_count == 1
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm_completion')
|
| | def test_completion_keyboard_interrupt(mock_litellm_completion, default_config):
|
| | def side_effect(*args, **kwargs):
|
| | raise KeyboardInterrupt('Simulated KeyboardInterrupt')
|
| |
|
| | mock_litellm_completion.side_effect = side_effect
|
| |
|
| | llm = LLM(config=default_config)
|
| | with pytest.raises(OperationCancelled):
|
| | try:
|
| | llm.completion(
|
| | messages=[{'role': 'user', 'content': 'Hello!'}],
|
| | stream=False,
|
| | )
|
| | except KeyboardInterrupt:
|
| | raise OperationCancelled('Operation cancelled due to KeyboardInterrupt')
|
| |
|
| | assert mock_litellm_completion.call_count == 1
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm_completion')
|
| | def test_completion_keyboard_interrupt_handler(mock_litellm_completion, default_config):
|
| | global _should_exit
|
| |
|
| | def side_effect(*args, **kwargs):
|
| | global _should_exit
|
| | _should_exit = True
|
| | return {'choices': [{'message': {'content': 'Simulated interrupt response'}}]}
|
| |
|
| | mock_litellm_completion.side_effect = side_effect
|
| |
|
| | llm = LLM(config=default_config)
|
| | result = llm.completion(
|
| | messages=[{'role': 'user', 'content': 'Hello!'}],
|
| | stream=False,
|
| | )
|
| |
|
| | assert mock_litellm_completion.call_count == 1
|
| | assert result['choices'][0]['message']['content'] == 'Simulated interrupt response'
|
| | assert _should_exit
|
| |
|
| | _should_exit = False
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm_completion')
|
| | def test_completion_with_litellm_mock(mock_litellm_completion, default_config):
|
| | mock_response = {
|
| | 'choices': [{'message': {'content': 'This is a mocked response.'}}]
|
| | }
|
| | mock_litellm_completion.return_value = mock_response
|
| |
|
| | test_llm = LLM(config=default_config)
|
| | response = test_llm.completion(
|
| | messages=[{'role': 'user', 'content': 'Hello!'}],
|
| | stream=False,
|
| | drop_params=True,
|
| | )
|
| |
|
| |
|
| | assert response['choices'][0]['message']['content'] == 'This is a mocked response.'
|
| | mock_litellm_completion.assert_called_once()
|
| |
|
| |
|
| | call_args = mock_litellm_completion.call_args[1]
|
| | assert call_args['model'] == default_config.model
|
| | assert call_args['messages'] == [{'role': 'user', 'content': 'Hello!'}]
|
| | assert not call_args['stream']
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm_completion')
|
| | def test_completion_with_two_positional_args(mock_litellm_completion, default_config):
|
| | mock_response = {
|
| | 'choices': [{'message': {'content': 'Response to positional args.'}}]
|
| | }
|
| | mock_litellm_completion.return_value = mock_response
|
| |
|
| | test_llm = LLM(config=default_config)
|
| | response = test_llm.completion(
|
| | 'some-model-to-be-ignored',
|
| | [{'role': 'user', 'content': 'Hello from positional args!'}],
|
| | stream=False,
|
| | )
|
| |
|
| |
|
| | assert (
|
| | response['choices'][0]['message']['content'] == 'Response to positional args.'
|
| | )
|
| | mock_litellm_completion.assert_called_once()
|
| |
|
| |
|
| | call_args, call_kwargs = mock_litellm_completion.call_args
|
| | assert (
|
| | call_kwargs['model'] == default_config.model
|
| | )
|
| | assert call_kwargs['messages'] == [
|
| | {'role': 'user', 'content': 'Hello from positional args!'}
|
| | ]
|
| | assert not call_kwargs['stream']
|
| |
|
| |
|
| | assert (
|
| | len(call_args) == 0
|
| | )
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm_completion')
|
| | def test_llm_cloudflare_blockage(mock_litellm_completion, default_config):
|
| | from litellm.exceptions import APIError
|
| |
|
| | from openhands.core.exceptions import CloudFlareBlockageError
|
| |
|
| | llm = LLM(default_config)
|
| | mock_litellm_completion.side_effect = APIError(
|
| | message='Attention Required! | Cloudflare',
|
| | llm_provider='test_provider',
|
| | model='test_model',
|
| | status_code=403,
|
| | )
|
| |
|
| | with pytest.raises(CloudFlareBlockageError, match='Request blocked by CloudFlare'):
|
| | llm.completion(messages=[{'role': 'user', 'content': 'Hello'}])
|
| |
|
| |
|
| | mock_litellm_completion.assert_called_once()
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm.token_counter')
|
| | def test_get_token_count_with_dict_messages(mock_token_counter, default_config):
|
| | mock_token_counter.return_value = 42
|
| | llm = LLM(default_config)
|
| | messages = [{'role': 'user', 'content': 'Hello!'}]
|
| |
|
| | token_count = llm.get_token_count(messages)
|
| |
|
| | assert token_count == 42
|
| | mock_token_counter.assert_called_once_with(
|
| | model=default_config.model, messages=messages, custom_tokenizer=None
|
| | )
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm.token_counter')
|
| | def test_get_token_count_with_message_objects(
|
| | mock_token_counter, default_config, mock_logger
|
| | ):
|
| | llm = LLM(default_config)
|
| |
|
| |
|
| | message_obj = Message(role='user', content=[TextContent(text='Hello!')])
|
| | message_dict = {'role': 'user', 'content': 'Hello!'}
|
| |
|
| |
|
| | mock_token_counter.side_effect = [42, 42]
|
| |
|
| |
|
| | token_count_obj = llm.get_token_count([message_obj])
|
| | token_count_dict = llm.get_token_count([message_dict])
|
| |
|
| |
|
| | assert token_count_obj == token_count_dict
|
| | assert mock_token_counter.call_count == 2
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm.token_counter')
|
| | @patch('openhands.llm.llm.create_pretrained_tokenizer')
|
| | def test_get_token_count_with_custom_tokenizer(
|
| | mock_create_tokenizer, mock_token_counter, default_config
|
| | ):
|
| | mock_tokenizer = MagicMock()
|
| | mock_create_tokenizer.return_value = mock_tokenizer
|
| | mock_token_counter.return_value = 42
|
| |
|
| | config = copy.deepcopy(default_config)
|
| | config.custom_tokenizer = 'custom/tokenizer'
|
| | llm = LLM(config)
|
| | messages = [{'role': 'user', 'content': 'Hello!'}]
|
| |
|
| | token_count = llm.get_token_count(messages)
|
| |
|
| | assert token_count == 42
|
| | mock_create_tokenizer.assert_called_once_with('custom/tokenizer')
|
| | mock_token_counter.assert_called_once_with(
|
| | model=config.model, messages=messages, custom_tokenizer=mock_tokenizer
|
| | )
|
| |
|
| |
|
| | @patch('openhands.llm.llm.litellm.token_counter')
|
| | def test_get_token_count_error_handling(
|
| | mock_token_counter, default_config, mock_logger
|
| | ):
|
| | mock_token_counter.side_effect = Exception('Token counting failed')
|
| | llm = LLM(default_config)
|
| | messages = [{'role': 'user', 'content': 'Hello!'}]
|
| |
|
| | token_count = llm.get_token_count(messages)
|
| |
|
| | assert token_count == 0
|
| | mock_token_counter.assert_called_once()
|
| | mock_logger.error.assert_called_once_with(
|
| | 'Error getting token count for\n model gpt-4o\nToken counting failed'
|
| | )
|
| |
|