wanderlust.ai / src /wanderlust_ai /api /anthropic_tester.py
BlakeL's picture
Upload 115 files
3f9f85b verified
"""
Anthropic Claude API Testing Module
This module provides specialized testing for the Anthropic Claude API,
including proper error handling, response validation, and debugging tools.
"""
import asyncio
from typing import Any, Dict, List, Optional
from datetime import datetime
from .api_tester import APITester, APIConfig, APITestResult
class AnthropicTester(APITester):
"""
Specialized tester for Anthropic Claude API.
This class provides:
- Anthropic-specific API testing
- Claude model validation
- Message structure testing
- Token usage monitoring
"""
def __init__(self, api_key: str, timeout: int = 30, max_retries: int = 3):
"""Initialize Anthropic API tester."""
config = APIConfig(
name="anthropic",
base_url="https://api.anthropic.com",
api_key=api_key,
timeout=timeout,
max_retries=max_retries,
rate_limit_per_minute=50 # Anthropic's rate limit
)
super().__init__(config)
async def test_claude_connection(self) -> APITestResult:
"""Test basic connection to Anthropic Claude API."""
test_name = "claude_connection_test"
self.logger.info("Testing Anthropic Claude API connection...")
# Test with a simple message
test_message = {
"model": "claude-3-haiku-20240307",
"max_tokens": 10,
"messages": [
{
"role": "user",
"content": "Hello, respond with just 'Hi'"
}
]
}
success, data, error, status_code, response_time = await self._make_request(
"POST", "/v1/messages", data=test_message
)
if success:
self.logger.info("βœ… Anthropic Claude connection test passed")
return APITestResult(
success=True,
api_name="anthropic",
test_name=test_name,
response_time_ms=response_time,
status_code=status_code,
response_data=data,
debug_info={
"model_used": test_message["model"],
"tokens_requested": test_message["max_tokens"]
}
)
else:
self.logger.error(f"❌ Anthropic Claude connection test failed: {error}")
return APITestResult(
success=False,
api_name="anthropic",
test_name=test_name,
response_time_ms=response_time,
status_code=status_code,
error_message=error,
debug_info={
"model_used": test_message["model"],
"tokens_requested": test_message["max_tokens"]
}
)
async def test_claude_models(self) -> APITestResult:
"""Test different Claude models availability."""
test_name = "claude_models_test"
self.logger.info("Testing Anthropic Claude models...")
models_to_test = [
"claude-3-haiku-20240307",
"claude-3-sonnet-20240229",
"claude-3-opus-20240229"
]
results = {}
total_response_time = 0
for model in models_to_test:
test_message = {
"model": model,
"max_tokens": 5,
"messages": [
{
"role": "user",
"content": "Hi"
}
]
}
success, data, error, status_code, response_time = await self._make_request(
"POST", "/v1/messages", data=test_message
)
total_response_time += response_time
results[model] = {
"success": success,
"status_code": status_code,
"error": error,
"response_time_ms": response_time
}
if success:
self.logger.info(f"βœ… Model {model} is available")
else:
self.logger.warning(f"⚠️ Model {model} failed: {error}")
# Determine overall success
successful_models = sum(1 for r in results.values() if r["success"])
overall_success = successful_models > 0
if overall_success:
self.logger.info(f"βœ… Claude models test passed ({successful_models}/{len(models_to_test)} models available)")
else:
self.logger.error("❌ Claude models test failed (no models available)")
return APITestResult(
success=overall_success,
api_name="anthropic",
test_name=test_name,
response_time_ms=total_response_time,
response_data=results,
debug_info={
"models_tested": models_to_test,
"successful_models": successful_models,
"total_models": len(models_to_test)
}
)
async def test_message_structure(self) -> APITestResult:
"""Test Anthropic message structure validation."""
test_name = "message_structure_test"
self.logger.info("Testing Anthropic message structure...")
# Test valid message structure
valid_message = {
"model": "claude-3-haiku-20240307",
"max_tokens": 20,
"messages": [
{
"role": "user",
"content": "What is 2+2?"
}
]
}
success, data, error, status_code, response_time = await self._make_request(
"POST", "/v1/messages", data=valid_message
)
if success:
# Validate response structure
expected_fields = ["id", "type", "role", "content", "model", "stop_reason", "stop_sequence", "usage"]
missing_fields = []
for field in expected_fields:
if field not in data:
missing_fields.append(field)
if not missing_fields:
self.logger.info("βœ… Anthropic message structure test passed")
return APITestResult(
success=True,
api_name="anthropic",
test_name=test_name,
response_time_ms=response_time,
status_code=status_code,
response_data=data,
debug_info={
"expected_fields": expected_fields,
"missing_fields": [],
"usage": data.get("usage", {})
}
)
else:
self.logger.error(f"❌ Anthropic message structure test failed: missing fields {missing_fields}")
return APITestResult(
success=False,
api_name="anthropic",
test_name=test_name,
response_time_ms=response_time,
status_code=status_code,
error_message=f"Missing required fields: {missing_fields}",
response_data=data,
debug_info={
"expected_fields": expected_fields,
"missing_fields": missing_fields
}
)
else:
self.logger.error(f"❌ Anthropic message structure test failed: {error}")
return APITestResult(
success=False,
api_name="anthropic",
test_name=test_name,
response_time_ms=response_time,
status_code=status_code,
error_message=error
)
async def test_token_usage(self) -> APITestResult:
"""Test token usage tracking and limits."""
test_name = "token_usage_test"
self.logger.info("Testing Anthropic token usage...")
# Test with different message lengths
test_cases = [
{
"name": "short_message",
"content": "Hi",
"max_tokens": 10
},
{
"name": "medium_message",
"content": "Tell me about artificial intelligence in 50 words.",
"max_tokens": 50
},
{
"name": "long_message",
"content": "Write a detailed explanation of machine learning algorithms, including supervised learning, unsupervised learning, and reinforcement learning. Include examples and use cases for each type.",
"max_tokens": 200
}
]
results = {}
total_response_time = 0
for test_case in test_cases:
message = {
"model": "claude-3-haiku-20240307",
"max_tokens": test_case["max_tokens"],
"messages": [
{
"role": "user",
"content": test_case["content"]
}
]
}
success, data, error, status_code, response_time = await self._make_request(
"POST", "/v1/messages", data=message
)
total_response_time += response_time
if success and data:
usage = data.get("usage", {})
results[test_case["name"]] = {
"success": True,
"input_tokens": usage.get("input_tokens", 0),
"output_tokens": usage.get("output_tokens", 0),
"total_tokens": usage.get("total_tokens", 0),
"response_time_ms": response_time
}
self.logger.info(f"βœ… {test_case['name']}: {usage.get('total_tokens', 0)} tokens used")
else:
results[test_case["name"]] = {
"success": False,
"error": error,
"response_time_ms": response_time
}
self.logger.warning(f"⚠️ {test_case['name']} failed: {error}")
# Determine overall success
successful_tests = sum(1 for r in results.values() if r["success"])
overall_success = successful_tests > 0
if overall_success:
self.logger.info(f"βœ… Token usage test passed ({successful_tests}/{len(test_cases)} tests successful)")
else:
self.logger.error("❌ Token usage test failed")
return APITestResult(
success=overall_success,
api_name="anthropic",
test_name=test_name,
response_time_ms=total_response_time,
response_data=results,
debug_info={
"test_cases": test_cases,
"successful_tests": successful_tests,
"total_tests": len(test_cases)
}
)
async def test_error_handling(self) -> APITestResult:
"""Test Anthropic error handling scenarios."""
test_name = "error_handling_test"
self.logger.info("Testing Anthropic error handling...")
error_test_cases = [
{
"name": "invalid_model",
"message": {
"model": "invalid-model",
"max_tokens": 10,
"messages": [{"role": "user", "content": "Hi"}]
},
"expected_error": "model_not_found"
},
{
"name": "missing_messages",
"message": {
"model": "claude-3-haiku-20240307",
"max_tokens": 10
},
"expected_error": "validation_error"
},
{
"name": "invalid_role",
"message": {
"model": "claude-3-haiku-20240307",
"max_tokens": 10,
"messages": [{"role": "invalid", "content": "Hi"}]
},
"expected_error": "validation_error"
}
]
results = {}
total_response_time = 0
for test_case in error_test_cases:
success, data, error, status_code, response_time = await self._make_request(
"POST", "/v1/messages", data=test_case["message"]
)
total_response_time += response_time
# For error tests, we expect the request to fail
if not success:
results[test_case["name"]] = {
"success": True, # Test passed because we got expected error
"status_code": status_code,
"error_received": error,
"expected_error": test_case["expected_error"],
"response_time_ms": response_time
}
self.logger.info(f"βœ… {test_case['name']}: Expected error received")
else:
results[test_case["name"]] = {
"success": False, # Test failed because we didn't get expected error
"status_code": status_code,
"error_received": None,
"expected_error": test_case["expected_error"],
"response_time_ms": response_time
}
self.logger.warning(f"⚠️ {test_case['name']}: Expected error but got success")
# Determine overall success
successful_tests = sum(1 for r in results.values() if r["success"])
overall_success = successful_tests == len(test_cases)
if overall_success:
self.logger.info(f"βœ… Error handling test passed ({successful_tests}/{len(test_cases)} tests successful)")
else:
self.logger.error(f"❌ Error handling test failed ({successful_tests}/{len(test_cases)} tests successful)")
return APITestResult(
success=overall_success,
api_name="anthropic",
test_name=test_name,
response_time_ms=total_response_time,
response_data=results,
debug_info={
"test_cases": test_cases,
"successful_tests": successful_tests,
"total_tests": len(test_cases)
}
)
async def run_comprehensive_tests(self) -> List[APITestResult]:
"""Run all Anthropic-specific tests."""
self.logger.info("Running comprehensive Anthropic Claude API tests...")
tests = [
self.test_claude_connection(),
self.test_claude_models(),
self.test_message_structure(),
self.test_token_usage(),
self.test_error_handling()
]
results = await asyncio.gather(*tests, return_exceptions=True)
# Handle any exceptions
final_results = []
for i, result in enumerate(results):
if isinstance(result, Exception):
self.logger.error(f"Test {i} failed with exception: {result}")
final_results.append(APITestResult(
success=False,
api_name="anthropic",
test_name=f"test_{i}",
response_time_ms=0,
error_message=f"Test failed with exception: {result}"
))
else:
final_results.append(result)
return final_results