Spaces:

frontier-ai
/

gMAS

Running

gMAS / tests /test_tools_remaining.py

Артём Боярских

chore: initial commit

3193174 2 months ago

29.2 kB

	"""Tests for shell.py, function_calling.py, llm_integration.py"""

	import sys
	from unittest.mock import MagicMock

	import pytest

	from tools.base import ToolCall, ToolResult
	from tools.function_calling import (
	FunctionTool,
	FunctionWrapper,
	_extract_parameters_schema,
	_python_type_to_json_schema,
	)
	from tools.llm_integration import (
	LLMResponse,
	LLMToolCall,
	OpenAICaller,
	OpenAIToolsCaller,
	create_openai_tools_caller,
	parse_anthropic_response,
	parse_openai_response,
	)
	from tools.shell import ShellTool

	# ═══════════════════════════════════════════════════════════════
	# ShellTool
	# ═══════════════════════════════════════════════════════════════


	class TestShellTool:
	def test_name_and_description(self):
	tool = ShellTool()
	assert tool.name == "shell"
	assert "shell" in tool.description.lower() or "command" in tool.description.lower()

	def test_parameters_schema(self):
	tool = ShellTool()
	schema = tool.parameters_schema
	assert schema["type"] == "object"
	assert "command" in schema["properties"]

	def test_execute_empty_command(self):
	tool = ShellTool()
	result = tool.execute(command="")
	assert result.success is False
	assert result.error is not None
	assert "No command" in result.error

	def test_execute_simple_echo(self):
	tool = ShellTool(timeout=10)
	result = tool.execute(command="echo hello")
	assert result.success is True
	assert "hello" in result.output

	def test_execute_allowed_commands_whitelist(self):
	tool = ShellTool(allowed_commands=["echo"])
	result = tool.execute(command="echo test")
	assert result.success is True

	def test_execute_command_not_in_whitelist(self):
	tool = ShellTool(allowed_commands=["echo"])
	result = tool.execute(command="dir /b")
	assert result.success is False
	assert result.error is not None
	assert "not allowed" in result.error.lower()

	def test_execute_with_no_output(self):
	tool = ShellTool(timeout=10)
	if sys.platform == "win32":
	result = tool.execute(command="echo.")
	else:
	result = tool.execute(command="true")
	# Just check it doesn't raise
	assert isinstance(result, ToolResult)

	def test_max_output_size(self):
	tool = ShellTool(max_output_size=10)
	result = tool.execute(command="echo 12345678901234567890")
	if result.success:
	# Output may be truncated
	assert len(result.output) <= 100 # Some leeway for truncation text

	def test_execute_timeout(self):
	"""Test that timeout properly returns error."""
	tool = ShellTool(timeout=1)
	if sys.platform == "win32":
	result = tool.execute(command="ping -n 10 127.0.0.1 >nul")
	else:
	result = tool.execute(command="sleep 10")
	# Either times out or finishes fast (on fast machines)
	# Just verify it returns a ToolResult
	assert isinstance(result, ToolResult)

	def test_is_command_allowed_all(self):
	tool = ShellTool(allowed_commands=None)
	assert tool._is_command_allowed("any_command") is True

	def test_is_command_allowed_specific(self):
	tool = ShellTool(allowed_commands=["echo", "ls"])
	assert tool._is_command_allowed("echo hello") is True
	assert tool._is_command_allowed("rm -rf /") is False

	def test_is_command_allowed_empty_command(self):
	tool = ShellTool(allowed_commands=["echo"])
	# Empty string strip().split()[0] would fail, but the guard is in execute()
	# _is_command_allowed is called only after the empty check in execute()
	assert tool._is_command_allowed("") is False # empty string cmd_name = ""

	def test_execute_returns_exit_code_error(self):
	tool = ShellTool(timeout=10)
	if sys.platform == "win32":
	result = tool.execute(command="exit 1")
	else:
	result = tool.execute(command="false")
	# Non-zero exit code should return success=False
	assert isinstance(result, ToolResult)

	def test_execute_with_stderr_output(self):
	"""Cover line 144: stderr output appended to result."""
	tool = ShellTool(timeout=10)
	if sys.platform == "win32":
	# On Windows: write to stderr via powershell
	result = tool.execute(command="powershell -Command \"[Console]::Error.Write('err_output')\"")
	else:
	result = tool.execute(command="echo err_output 1>&2")
	# Just check it returns ToolResult (stderr may or may not be captured)
	assert isinstance(result, ToolResult)

	def test_execute_file_not_found(self):
	"""Cover lines 170-175: FileNotFoundError → return error ToolResult."""
	from unittest.mock import patch
	tool = ShellTool(timeout=10)

	with patch("subprocess.run", side_effect=FileNotFoundError("cmd not found")):
	result = tool.execute(command="nonexistent_command_xyz_abc")
	assert result.success is False
	assert result.error is not None
	assert "not found" in result.error.lower() or "command" in result.error.lower()

	def test_execute_oserror(self):
	"""Cover lines 176-181: OSError → return error ToolResult."""
	from unittest.mock import patch
	tool = ShellTool(timeout=10)

	with patch("subprocess.run", side_effect=OSError("os error occurred")):
	result = tool.execute(command="echo test")
	assert result.success is False
	assert result.error is not None
	assert "error" in result.error.lower()


	# ═══════════════════════════════════════════════════════════════
	# _python_type_to_json_schema
	# ═══════════════════════════════════════════════════════════════


	class TestPythonTypeToJsonSchema:
	def test_str(self):
	assert _python_type_to_json_schema(str) == {"type": "string"}

	def test_int(self):
	assert _python_type_to_json_schema(int) == {"type": "integer"}

	def test_float(self):
	assert _python_type_to_json_schema(float) == {"type": "number"}

	def test_bool(self):
	assert _python_type_to_json_schema(bool) == {"type": "boolean"}

	def test_list(self):
	assert _python_type_to_json_schema(list) == {"type": "array"}

	def test_dict(self):
	assert _python_type_to_json_schema(dict) == {"type": "object"}

	def test_none_type(self):
	assert _python_type_to_json_schema(type(None)) == {"type": "null"}

	def test_unknown_type(self):
	class Custom:
	pass
	result = _python_type_to_json_schema(Custom)
	assert result == {"type": "string"}

	def test_list_of_str(self):
	result = _python_type_to_json_schema(list[str])
	assert result["type"] == "array"
	assert result["items"] == {"type": "string"}

	def test_list_no_args(self):
	# List without type args — fallback to array
	result = _python_type_to_json_schema(list)
	assert result == {"type": "array"}

	def test_dict_typing(self):
	result = _python_type_to_json_schema(dict[str, int])
	assert result == {"type": "object"}

	def test_optional_type(self):
	result = _python_type_to_json_schema(str \| None)
	# Union type → fallback to string
	assert "type" in result


	# ═══════════════════════════════════════════════════════════════
	# _extract_parameters_schema
	# ═══════════════════════════════════════════════════════════════


	class TestExtractParametersSchema:
	def test_simple_function(self):
	def add(a: int, b: int) -> int:
	return a + b

	schema = _extract_parameters_schema(add)
	assert schema["type"] == "object"
	assert "a" in schema["properties"]
	assert "b" in schema["properties"]
	assert schema["required"] == ["a", "b"]

	def test_function_with_defaults(self):
	def greet(name: str, greeting: str = "Hello") -> str:
	return f"{greeting} {name}"

	schema = _extract_parameters_schema(greet)
	assert "name" in schema["required"]
	assert "greeting" not in schema.get("required", [])

	def test_no_params(self):
	def noop() -> None:
	pass

	schema = _extract_parameters_schema(noop)
	assert schema["properties"] == {}

	def test_skips_self(self):
	class MyClass:
	def method(self, x: int) -> int:
	return x

	schema = _extract_parameters_schema(MyClass.method)
	assert "self" not in schema["properties"]

	def test_untyped_params(self):
	def func(x, y):
	return x + y

	schema = _extract_parameters_schema(func)
	assert "x" in schema["properties"]
	assert "y" in schema["properties"]

	def test_get_type_hints_name_error_fallback(self):
	"""_extract_parameters_schema falls back to {} when get_type_hints raises NameError (lines 48-49)."""
	# Create a function with a forward reference to an undefined type dynamically
	# so that static type checkers don't flag the undefined name.
	# This causes NameError in get_type_hints at runtime.
	_ns: dict = {}
	exec('def bad_func(x: "UndefinedTypeName123abc") -> None: pass', _ns) # noqa: S102
	bad_func = _ns["bad_func"]

	# _extract_parameters_schema should not raise, using {} for hints
	schema = _extract_parameters_schema(bad_func)
	assert "x" in schema["properties"]
	# Type falls back to str (default in hints.get())
	assert schema["properties"]["x"] == {"type": "string"}

	def test_list_origin_no_args(self):
	"""_python_type_to_json_schema for list-origin type with no args (line 35)."""
	# Create a mock type with __origin__ = list but no __args__
	class MockListType:
	__origin__ = list
	# No __args__ attribute

	result = _python_type_to_json_schema(MockListType)
	assert result == {"type": "array"} # line 35


	# ═══════════════════════════════════════════════════════════════
	# FunctionWrapper
	# ═══════════════════════════════════════════════════════════════


	class TestFunctionWrapper:
	def test_basic_execution(self):
	def double(x: int) -> int:
	return x * 2

	wrapper = FunctionWrapper(double)
	result = wrapper.execute(x=5)
	assert result.success is True
	assert "10" in result.output

	def test_name_from_function(self):
	def my_function():
	pass

	wrapper = FunctionWrapper(my_function)
	assert wrapper.name == "my_function"

	def test_custom_name(self):
	def func():
	pass

	wrapper = FunctionWrapper(func, tool_name="custom")
	assert wrapper.name == "custom"

	def test_description_from_docstring(self):
	def func():
	"""My tool description."""

	wrapper = FunctionWrapper(func)
	assert wrapper.description == "My tool description."

	def test_custom_description(self):
	def func():
	pass

	wrapper = FunctionWrapper(func, tool_description="Custom desc")
	assert wrapper.description == "Custom desc"

	def test_execute_exception(self):
	def bad_func(x: int) -> int:
	msg = "bad input"
	raise ValueError(msg)

	wrapper = FunctionWrapper(bad_func)
	result = wrapper.execute(x=1)
	assert result.success is False
	assert result.error is not None
	assert "Execution error" in result.error

	def test_execute_no_output(self):
	def func() -> None:
	return None

	wrapper = FunctionWrapper(func)
	result = wrapper.execute()
	assert result.success is True
	assert "(no output)" in result.output

	def test_parameters_schema_generated(self):
	def func(a: str, b: int = 5) -> str:
	return f"{a}{b}"

	wrapper = FunctionWrapper(func)
	schema = wrapper.parameters_schema
	assert "a" in schema["properties"]
	assert "b" in schema["properties"]


	# ═══════════════════════════════════════════════════════════════
	# FunctionTool
	# ═══════════════════════════════════════════════════════════════


	class TestFunctionTool:
	def test_name_and_description(self):
	tool = FunctionTool()
	assert tool.name == "function_calling"
	assert isinstance(tool.description, str)

	def test_register_decorator(self):
	tool = FunctionTool()

	@tool.register
	def hello(name: str) -> str:
	"""Say hello."""
	return f"Hello, {name}!"

	assert "hello" in tool.list_functions()

	def test_register_with_name(self):
	tool = FunctionTool()

	@tool.register(name="greet", description="A greeting tool")
	def greet_func(name: str) -> str:
	return f"Greet {name}"

	assert "greet" in tool.list_functions()

	def test_add_function_chaining(self):
	tool = FunctionTool()
	result = tool.add_function(lambda x: x, name="identity")
	assert result is tool
	assert "identity" in tool.list_functions()

	def test_execute_registered_function(self):
	tool = FunctionTool()

	@tool.register
	def upper(text: str) -> str:
	return text.upper()

	result = tool.execute(function="upper", text="hello")
	assert result.success is True
	assert "HELLO" in result.output

	def test_execute_no_function_name(self):
	tool = FunctionTool()
	result = tool.execute(function="")
	assert result.success is False
	assert result.error is not None
	assert "No function name" in result.error

	def test_execute_unknown_function(self):
	tool = FunctionTool()
	result = tool.execute(function="nonexistent")
	assert result.success is False
	assert result.error is not None
	assert "not found" in result.error.lower()

	def test_get_function(self):
	tool = FunctionTool()

	@tool.register
	def my_fn():
	pass

	fn = tool.get_function("my_fn")
	assert fn is my_fn

	def test_get_function_nonexistent(self):
	tool = FunctionTool()
	assert tool.get_function("nope") is None

	def test_parameters_schema_with_functions(self):
	tool = FunctionTool()

	@tool.register
	def func1():
	pass

	schema = tool.parameters_schema
	assert "func1" in schema["properties"]["function"]["enum"]

	def test_parameters_schema_empty(self):
	tool = FunctionTool()
	schema = tool.parameters_schema
	assert schema["properties"]["function"]["enum"] == []


	# ═══════════════════════════════════════════════════════════════
	# LLMToolCall & LLMResponse
	# ═══════════════════════════════════════════════════════════════


	class TestLLMToolCall:
	def test_to_tool_call(self):
	llm_tc = LLMToolCall(id="call_1", name="search", arguments={"query": "test"})
	tc = llm_tc.to_tool_call()
	assert isinstance(tc, ToolCall)
	assert tc.name == "search"
	assert tc.arguments["query"] == "test"


	class TestLLMResponse:
	def test_has_tool_calls_false(self):
	resp = LLMResponse(content="Hello")
	assert resp.has_tool_calls is False

	def test_has_tool_calls_true(self):
	tc = LLMToolCall(id="1", name="fn", arguments={})
	resp = LLMResponse(content="", tool_calls=[tc])
	assert resp.has_tool_calls is True

	def test_get_tool_calls(self):
	tc = LLMToolCall(id="1", name="fn", arguments={"x": 1})
	resp = LLMResponse(content="", tool_calls=[tc])
	tool_calls = resp.get_tool_calls()
	assert len(tool_calls) == 1
	assert isinstance(tool_calls[0], ToolCall)

	def test_empty_content(self):
	resp = LLMResponse()
	assert resp.content == ""
	assert resp.has_tool_calls is False


	# ═══════════════════════════════════════════════════════════════
	# parse_openai_response
	# ═══════════════════════════════════════════════════════════════


	def _make_openai_response(content: str, tool_calls=None, function_call=None):
	"""Build a mock OpenAI API response."""
	message = MagicMock()
	message.content = content
	message.tool_calls = tool_calls
	message.function_call = function_call
	choice = MagicMock()
	choice.message = message
	response = MagicMock()
	response.choices = [choice]
	return response


	class TestParseOpenAIResponse:
	def test_plain_text_response(self):
	resp = _make_openai_response("Hello world", tool_calls=None)
	llm_resp = parse_openai_response(resp)
	assert llm_resp.content == "Hello world"
	assert not llm_resp.has_tool_calls

	def test_tool_calls_response(self):
	tc = MagicMock()
	tc.id = "call_1"
	tc.function.name = "search"
	tc.function.arguments = '{"query": "python"}'
	resp = _make_openai_response("", tool_calls=[tc])
	llm_resp = parse_openai_response(resp)
	assert llm_resp.has_tool_calls
	assert llm_resp.tool_calls[0].name == "search"
	assert llm_resp.tool_calls[0].arguments == {"query": "python"}

	def test_tool_call_invalid_json(self):
	tc = MagicMock()
	tc.id = "call_1"
	tc.function.name = "fn"
	tc.function.arguments = "not-json"
	resp = _make_openai_response("", tool_calls=[tc])
	llm_resp = parse_openai_response(resp)
	assert llm_resp.tool_calls[0].arguments == {}

	def test_tool_call_empty_arguments(self):
	tc = MagicMock()
	tc.id = "call_1"
	tc.function.name = "fn"
	tc.function.arguments = ""
	resp = _make_openai_response("", tool_calls=[tc])
	llm_resp = parse_openai_response(resp)
	assert llm_resp.tool_calls[0].arguments == {}

	def test_legacy_function_call(self):
	fc = MagicMock()
	fc.name = "legacy_fn"
	fc.arguments = '{"param": "value"}'
	resp = _make_openai_response("", tool_calls=None, function_call=fc)
	llm_resp = parse_openai_response(resp)
	assert llm_resp.has_tool_calls
	assert llm_resp.tool_calls[0].name == "legacy_fn"
	assert llm_resp.tool_calls[0].id == "legacy_call"

	def test_legacy_function_call_invalid_json(self):
	fc = MagicMock()
	fc.name = "fn"
	fc.arguments = "invalid"
	resp = _make_openai_response("", tool_calls=None, function_call=fc)
	llm_resp = parse_openai_response(resp)
	assert llm_resp.tool_calls[0].arguments == {}

	def test_no_tool_calls(self):
	resp = _make_openai_response("Just text", tool_calls=None, function_call=None)
	# function_call attribute should be falsy
	resp.choices[0].message.function_call = None
	llm_resp = parse_openai_response(resp)
	assert not llm_resp.has_tool_calls
	assert llm_resp.content == "Just text"


	# ═══════════════════════════════════════════════════════════════
	# parse_anthropic_response
	# ═══════════════════════════════════════════════════════════════


	def _make_anthropic_response(blocks):
	"""Build a mock Anthropic API response."""
	response = MagicMock()
	content_blocks = []
	for block_type, data in blocks:
	block = MagicMock()
	block.type = block_type
	if block_type == "text":
	block.text = data
	elif block_type == "tool_use":
	block.id = data.get("id", "tool_id")
	block.name = data.get("name", "fn")
	block.input = data.get("input", {})
	content_blocks.append(block)
	response.content = content_blocks
	return response


	class TestParseAnthropicResponse:
	def test_text_only(self):
	resp = _make_anthropic_response([("text", "Hello from Claude")])
	llm_resp = parse_anthropic_response(resp)
	assert llm_resp.content == "Hello from Claude"
	assert not llm_resp.has_tool_calls

	def test_tool_use(self):
	resp = _make_anthropic_response(
	[("tool_use", {"id": "t1", "name": "calculator", "input": {"expr": "2+2"}})]
	)
	llm_resp = parse_anthropic_response(resp)
	assert llm_resp.has_tool_calls
	assert llm_resp.tool_calls[0].name == "calculator"
	assert llm_resp.tool_calls[0].arguments == {"expr": "2+2"}

	def test_text_and_tool(self):
	resp = _make_anthropic_response(
	[
	("text", "I'll calculate that."),
	("tool_use", {"id": "t1", "name": "calc", "input": {"x": 5}}),
	]
	)
	llm_resp = parse_anthropic_response(resp)
	assert "I'll calculate that." in llm_resp.content
	assert llm_resp.has_tool_calls

	def test_tool_input_not_dict(self):
	resp = _make_anthropic_response(
	[("tool_use", {"id": "t1", "name": "fn", "input": "not_a_dict"})]
	)
	llm_resp = parse_anthropic_response(resp)
	assert llm_resp.tool_calls[0].arguments == {}

	def test_multiple_text_blocks(self):
	resp = _make_anthropic_response([("text", "Hello"), ("text", " World")])
	llm_resp = parse_anthropic_response(resp)
	assert "Hello" in llm_resp.content
	assert "World" in llm_resp.content


	# ═══════════════════════════════════════════════════════════════
	# OpenAICaller
	# ═══════════════════════════════════════════════════════════════


	class TestOpenAICaller:
	def _make_client(self, content="response text", tool_calls=None):
	client = MagicMock()
	message = MagicMock()
	message.content = content
	message.tool_calls = tool_calls
	message.function_call = None
	choice = MagicMock()
	choice.message = message
	completion = MagicMock()
	completion.choices = [choice]
	client.chat.completions.create.return_value = completion
	return client

	def test_call_without_tools(self):
	client = self._make_client("Hello!")
	caller = OpenAICaller(client, model="gpt-4")
	result = caller("Say hello")
	assert result == "Hello!"

	def test_call_with_tools(self):
	tc = MagicMock()
	tc.id = "call_1"
	tc.function.name = "search"
	tc.function.arguments = '{"q": "test"}'
	client = self._make_client("", tool_calls=[tc])
	caller = OpenAICaller(client, model="gpt-4")
	tools = [{"type": "function", "function": {"name": "search"}}]
	result = caller("Search for test", tools=tools)
	assert isinstance(result, LLMResponse)
	assert result.has_tool_calls

	def test_with_system_prompt(self):
	client = self._make_client("Hi")
	caller = OpenAICaller(client, system_prompt="You are a helpful assistant")
	caller("Hello")
	# System prompt added to messages
	call_args = client.chat.completions.create.call_args
	messages = call_args.kwargs["messages"]
	assert any(m["role"] == "system" for m in messages)

	def test_without_system_prompt(self):
	client = self._make_client("Hi")
	caller = OpenAICaller(client)
	caller("Hello")
	call_args = client.chat.completions.create.call_args
	messages = call_args.kwargs["messages"]
	assert not any(m["role"] == "system" for m in messages)

	def test_openai_tools_caller_alias(self):
	assert OpenAIToolsCaller is OpenAICaller

	def test_create_openai_caller_no_openai(self, monkeypatch):
	"""Test that create_openai_caller raises ImportError when openai is not available."""
	import builtins
	original_import = builtins.__import__

	def mock_import(name, args, *kwargs):
	if name == "openai":
	msg = "No module named 'openai'"
	raise ImportError(msg)
	return original_import(name, args, *kwargs)

	monkeypatch.setattr(builtins, "__import__", mock_import)

	# Since openai is already imported, we test the logic differently
	# Just verify the function exists and has the right signature
	import inspect
	sig = inspect.signature(create_openai_tools_caller)
	assert "api_key" in sig.parameters
	assert "model" in sig.parameters


	class TestCreateOpenAICaller:
	def test_create_openai_caller_success(self):
	"""Lines 312-326: create_openai_caller creates an OpenAICaller with mock OpenAI client."""
	from unittest.mock import MagicMock, patch

	from tools.llm_integration import create_openai_caller

	mock_openai_module = MagicMock()
	mock_client_instance = MagicMock()
	mock_openai_module.OpenAI.return_value = mock_client_instance

	with patch.dict("sys.modules", {"openai": mock_openai_module}):
	caller = create_openai_caller(
	api_key="test-key",
	model="gpt-4",
	temperature=0.5,
	)

	assert isinstance(caller, OpenAICaller)

	def test_create_openai_caller_with_base_url(self):
	"""Lines 321-322: base_url is passed to kwargs."""
	from unittest.mock import MagicMock, patch

	from tools.llm_integration import create_openai_caller

	mock_openai_module = MagicMock()
	mock_client_instance = MagicMock()
	mock_openai_module.OpenAI.return_value = mock_client_instance

	with patch.dict("sys.modules", {"openai": mock_openai_module}):
	caller = create_openai_caller(
	api_key="test-key",
	base_url="https://custom-api.example.com/v1",
	model="gpt-4",
	)

	assert isinstance(caller, OpenAICaller)

	def test_create_openai_caller_without_api_key(self):
	"""Lines 318-320: api_key is optional."""
	from unittest.mock import MagicMock, patch

	from tools.llm_integration import create_openai_caller

	mock_openai_module = MagicMock()
	mock_client_instance = MagicMock()
	mock_openai_module.OpenAI.return_value = mock_client_instance

	with patch.dict("sys.modules", {"openai": mock_openai_module}):
	caller = create_openai_caller(model="gpt-4")

	assert isinstance(caller, OpenAICaller)

	def test_create_openai_caller_import_error(self):
	"""Lines 314-316: ImportError when openai package is not available."""
	from unittest.mock import patch

	from tools.llm_integration import create_openai_caller

	# Temporarily remove openai from sys.modules
	with patch.dict("sys.modules", {"openai": None}), pytest.raises(ImportError, match="openai package required"):
	create_openai_caller(model="gpt-4")