agentbee / test /test_vision.py
mangubee's picture
fix: correct author name formatting in multiple files
e7b4937
"""
Tests for vision tool (multimodal image analysis)
Author: @mangubee
Date: 2026-01-02
Tests cover:
- Image loading and encoding
- Gemini vision analysis
- Claude vision analysis
- Fallback mechanism
- Retry logic
- Error handling
"""
import pytest
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
from src.tools.vision import (
load_and_encode_image,
analyze_image_gemini,
analyze_image_claude,
analyze_image,
)
# ============================================================================
# Test Fixtures
# ============================================================================
FIXTURES_DIR = Path(__file__).parent / "fixtures"
@pytest.fixture
def test_image_path():
"""Path to test image"""
return str(FIXTURES_DIR / "test_image.jpg")
@pytest.fixture
def mock_gemini_response():
"""Mock Gemini API response"""
mock_response = Mock()
mock_response.text = "This image shows a red square."
return mock_response
@pytest.fixture
def mock_claude_response():
"""Mock Claude API response"""
mock_content = Mock()
mock_content.text = "The image contains a red colored square."
mock_response = Mock()
mock_response.content = [mock_content]
return mock_response
@pytest.fixture
def mock_settings_gemini():
"""Mock Settings with Gemini API key"""
with patch('src.tools.vision.Settings') as mock:
settings_instance = Mock()
settings_instance.google_api_key = "test_google_key"
settings_instance.anthropic_api_key = None
mock.return_value = settings_instance
yield mock
@pytest.fixture
def mock_settings_claude():
"""Mock Settings with Claude API key"""
with patch('src.tools.vision.Settings') as mock:
settings_instance = Mock()
settings_instance.google_api_key = None
settings_instance.anthropic_api_key = "test_anthropic_key"
mock.return_value = settings_instance
yield mock
@pytest.fixture
def mock_settings_both():
"""Mock Settings with both API keys"""
with patch('src.tools.vision.Settings') as mock:
settings_instance = Mock()
settings_instance.google_api_key = "test_google_key"
settings_instance.anthropic_api_key = "test_anthropic_key"
mock.return_value = settings_instance
yield mock
# ============================================================================
# Image Loading Tests
# ============================================================================
def test_load_and_encode_image_success(test_image_path):
"""Test successful image loading and encoding"""
result = load_and_encode_image(test_image_path)
assert "data" in result
assert "mime_type" in result
assert result["mime_type"] == "image/jpeg"
assert result["size_mb"] > 0
assert len(result["data"]) > 0 # Base64 encoded data
def test_load_image_file_not_found():
"""Test image loading with missing file"""
with pytest.raises(FileNotFoundError):
load_and_encode_image("nonexistent_image.jpg")
def test_load_image_unsupported_format(tmp_path):
"""Test image loading with unsupported format"""
# Create a text file with .mp4 extension
fake_video = tmp_path / "video.mp4"
fake_video.write_text("not a real video")
with pytest.raises(ValueError, match="Unsupported image format"):
load_and_encode_image(str(fake_video))
# ============================================================================
# Gemini Vision Tests
# ============================================================================
def test_analyze_image_gemini_success(mock_settings_gemini, test_image_path, mock_gemini_response):
"""Test successful Gemini vision analysis"""
with patch('google.genai.Client') as mock_client_class:
# Mock Gemini client
mock_client = Mock()
mock_client.models.generate_content.return_value = mock_gemini_response
mock_client_class.return_value = mock_client
result = analyze_image_gemini(test_image_path, "What is in this image?")
assert result["model"] == "gemini-2.0-flash"
assert result["answer"] == "This image shows a red square."
assert result["question"] == "What is in this image?"
assert result["image_path"] == test_image_path
def test_analyze_image_gemini_default_question(mock_settings_gemini, test_image_path, mock_gemini_response):
"""Test Gemini with default question"""
with patch('google.genai.Client') as mock_client_class:
mock_client = Mock()
mock_client.models.generate_content.return_value = mock_gemini_response
mock_client_class.return_value = mock_client
result = analyze_image_gemini(test_image_path)
assert result["question"] == "Describe this image in detail."
def test_analyze_image_gemini_missing_api_key():
"""Test Gemini with missing API key"""
with patch('src.tools.vision.Settings') as mock_settings:
settings_instance = Mock()
settings_instance.google_api_key = None
mock_settings.return_value = settings_instance
with pytest.raises(ValueError, match="GOOGLE_API_KEY not configured"):
analyze_image_gemini("test.jpg")
def test_analyze_image_gemini_connection_error(mock_settings_gemini, test_image_path):
"""Test Gemini with connection error (triggers retry)"""
with patch('google.genai.Client') as mock_client_class:
mock_client = Mock()
mock_client.models.generate_content.side_effect = ConnectionError("Network error")
mock_client_class.return_value = mock_client
with pytest.raises(ConnectionError):
analyze_image_gemini(test_image_path)
# Verify retry happened
assert mock_client.models.generate_content.call_count == 3
# ============================================================================
# Claude Vision Tests
# ============================================================================
def test_analyze_image_claude_success(mock_settings_claude, test_image_path, mock_claude_response):
"""Test successful Claude vision analysis"""
with patch('anthropic.Anthropic') as mock_anthropic_class:
# Mock Claude client
mock_client = Mock()
mock_client.messages.create.return_value = mock_claude_response
mock_anthropic_class.return_value = mock_client
result = analyze_image_claude(test_image_path, "What is in this image?")
assert result["model"] == "claude-sonnet-4.5"
assert result["answer"] == "The image contains a red colored square."
assert result["question"] == "What is in this image?"
assert result["image_path"] == test_image_path
def test_analyze_image_claude_default_question(mock_settings_claude, test_image_path, mock_claude_response):
"""Test Claude with default question"""
with patch('anthropic.Anthropic') as mock_anthropic_class:
mock_client = Mock()
mock_client.messages.create.return_value = mock_claude_response
mock_anthropic_class.return_value = mock_client
result = analyze_image_claude(test_image_path)
assert result["question"] == "Describe this image in detail."
def test_analyze_image_claude_missing_api_key():
"""Test Claude with missing API key"""
with patch('src.tools.vision.Settings') as mock_settings:
settings_instance = Mock()
settings_instance.anthropic_api_key = None
mock_settings.return_value = settings_instance
with pytest.raises(ValueError, match="ANTHROPIC_API_KEY not configured"):
analyze_image_claude("test.jpg")
def test_analyze_image_claude_connection_error(mock_settings_claude, test_image_path):
"""Test Claude with connection error (triggers retry)"""
with patch('anthropic.Anthropic') as mock_anthropic_class:
mock_client = Mock()
mock_client.messages.create.side_effect = ConnectionError("Network error")
mock_anthropic_class.return_value = mock_client
with pytest.raises(ConnectionError):
analyze_image_claude(test_image_path)
# Verify retry happened
assert mock_client.messages.create.call_count == 3
# ============================================================================
# Unified Vision Analysis Tests
# ============================================================================
def test_analyze_image_uses_gemini(mock_settings_both, test_image_path, mock_gemini_response):
"""Test unified analysis prefers Gemini when both APIs available"""
with patch('google.genai.Client') as mock_gemini_class:
mock_client = Mock()
mock_client.models.generate_content.return_value = mock_gemini_response
mock_gemini_class.return_value = mock_client
result = analyze_image(test_image_path, "What is this?")
assert result["model"] == "gemini-2.0-flash"
assert "red square" in result["answer"].lower()
def test_analyze_image_fallback_to_claude(mock_settings_both, test_image_path, mock_claude_response):
"""Test unified analysis falls back to Claude when Gemini fails"""
with patch('google.genai.Client') as mock_gemini_class:
with patch('anthropic.Anthropic') as mock_claude_class:
# Gemini fails
mock_gemini_client = Mock()
mock_gemini_client.models.generate_content.side_effect = Exception("Gemini error")
mock_gemini_class.return_value = mock_gemini_client
# Claude succeeds
mock_claude_client = Mock()
mock_claude_client.messages.create.return_value = mock_claude_response
mock_claude_class.return_value = mock_claude_client
result = analyze_image(test_image_path, "What is this?")
assert result["model"] == "claude-sonnet-4.5"
assert "red" in result["answer"].lower()
def test_analyze_image_no_api_keys():
"""Test unified analysis with no API keys configured"""
with patch('src.tools.vision.Settings') as mock_settings:
settings_instance = Mock()
settings_instance.google_api_key = None
settings_instance.anthropic_api_key = None
mock_settings.return_value = settings_instance
with pytest.raises(ValueError, match="No vision API configured"):
analyze_image("test.jpg")
def test_analyze_image_both_fail(mock_settings_both, test_image_path):
"""Test unified analysis when both APIs fail"""
with patch('google.genai.Client') as mock_gemini_class:
with patch('anthropic.Anthropic') as mock_claude_class:
# Both fail
mock_gemini_client = Mock()
mock_gemini_client.models.generate_content.side_effect = Exception("Gemini error")
mock_gemini_class.return_value = mock_gemini_client
mock_claude_client = Mock()
mock_claude_client.messages.create.side_effect = Exception("Claude error")
mock_claude_class.return_value = mock_claude_client
with pytest.raises(Exception, match="both failed"):
analyze_image(test_image_path)