wanderlust.ai / src /wanderlust_ai /testing /mock_data_generator.py
BlakeL's picture
Upload 115 files
3f9f85b verified
"""
Mock Data Generator for AI Agent Testing
This module provides comprehensive mock data generation for testing AI agents
without hitting external APIs. Includes realistic flight, hotel, and POI data
with edge cases and various scenarios.
"""
import random
import uuid
from datetime import datetime, timedelta, time, timezone
from decimal import Decimal
from typing import Dict, List, Optional, Any, Union
from dataclasses import dataclass
from enum import Enum
from ..models.flight_models import FlightOption, Airline
from ..models.hotel_models import HotelOption, HotelChain, LocationType, RoomType, AmenityType
from ..models.poi_models import POI, ActivityCategory, ActivityType, ActivityLevel, WeatherDependency, PopularityLevel, PricingTier, TimeSlot
class MockScenario(str, Enum):
"""Different testing scenarios for mock data generation."""
NORMAL = "normal"
EDGE_CASE = "edge_case"
HIGH_DEMAND = "high_demand"
LOW_BUDGET = "low_budget"
LUXURY = "luxury"
ERROR_CONDITION = "error_condition"
EMPTY_RESULTS = "empty_results"
PARTIAL_FAILURE = "partial_failure"
@dataclass
class MockDataConfig:
"""Configuration for mock data generation."""
scenario: MockScenario = MockScenario.NORMAL
location: str = "New York"
start_date: datetime = None
end_date: datetime = None
budget: Decimal = Decimal("1000.00")
group_size: int = 1
include_edge_cases: bool = True
error_rate: float = 0.0 # 0.0 to 1.0
response_delay_ms: int = 100 # Simulate API latency
class MockDataGenerator:
"""Generates realistic mock data for testing AI agents."""
def __init__(self, config: MockDataConfig = None):
self.config = config or MockDataConfig()
self._setup_mock_data()
def _setup_mock_data(self):
"""Set up base mock data."""
# Flight data
self.airlines = [
"American Airlines", "Delta Air Lines", "United Airlines", "Southwest Airlines",
"JetBlue Airways", "Alaska Airlines", "Spirit Airlines", "Frontier Airlines"
]
self.aircraft_types = [
"Boeing 737", "Boeing 777", "Boeing 787", "Airbus A320", "Airbus A350", "Airbus A380"
]
# Hotel data
self.hotel_chains = list(HotelChain)
self.hotel_names = [
"Grand Hotel", "Plaza Hotel", "Central Hotel", "Garden Inn", "Royal Suites",
"Metropolitan Hotel", "Park View Hotel", "City Center Hotel", "Boutique Hotel", "Executive Inn"
]
# POI data
self.poi_names = [
"Museum of Art", "Historic District", "Central Park", "City Hall", "Shopping Mall",
"Concert Hall", "Sports Stadium", "Aquarium", "Botanical Garden", "Art Gallery"
]
# Location coordinates (major cities)
self.city_coordinates = {
"New York": {"lat": 40.7128, "lon": -74.0060},
"Los Angeles": {"lat": 34.0522, "lon": -118.2437},
"Chicago": {"lat": 41.8781, "lon": -87.6298},
"Houston": {"lat": 29.7604, "lon": -95.3698},
"Phoenix": {"lat": 33.4484, "lon": -112.0740},
"Philadelphia": {"lat": 39.9526, "lon": -75.1652},
"San Antonio": {"lat": 29.4241, "lon": -98.4936},
"San Diego": {"lat": 32.7157, "lon": -117.1611},
"Dallas": {"lat": 32.7767, "lon": -96.7970},
"San Jose": {"lat": 37.3382, "lon": -121.8863}
}
def generate_flights(self, count: int = 10) -> List[FlightOption]:
"""Generate mock flight data."""
flights = []
for i in range(count):
# Simulate error conditions
if random.random() < self.config.error_rate:
continue
# Generate realistic flight data
departure_time = self._generate_flight_time()
duration_hours = random.uniform(1.5, 8.0)
arrival_time = departure_time + timedelta(hours=duration_hours)
# Generate price based on scenario
base_price = self._generate_price(scenario_type="flight")
# Generate stops
stops = self._generate_stops()
# Select airline and generate proper flight number
airline = random.choice(self.airlines)
airline_code = self._get_airline_code(airline)
flight_number = f"{airline_code}{random.randint(100, 999)}"
flight = FlightOption(
airline=airline,
flight_number=flight_number,
departure_city=self.config.location.split(',')[0] if ',' in self.config.location else "New York",
arrival_city=random.choice(["Los Angeles", "Chicago", "Houston", "Phoenix"]),
departure_time=departure_time,
arrival_time=arrival_time,
duration_minutes=int(duration_hours * 60),
price=base_price,
aircraft_type=random.choice(self.aircraft_types),
stops=stops,
baggage_allowance="1 carry-on, 1 checked bag"
)
flights.append(flight)
return flights
def generate_hotels(self, count: int = 10) -> List[HotelOption]:
"""Generate mock hotel data."""
hotels = []
for i in range(count):
# Simulate error conditions
if random.random() < self.config.error_rate:
continue
# Generate realistic hotel data
base_price = self._generate_price(scenario_type="hotel")
# Get city coordinates
city_coords = self.city_coordinates.get(
self.config.location.split(',')[0] if ',' in self.config.location else "New York",
{"lat": 40.7128, "lon": -74.0060}
)
# Add some random offset for hotel location
lat = city_coords["lat"] + random.uniform(-0.1, 0.1)
lon = city_coords["lon"] + random.uniform(-0.1, 0.1)
# Generate check-in/out dates
if self.config.start_date:
check_in = self.config.start_date
else:
check_in = datetime.now() + timedelta(days=random.randint(1, 30))
check_out = check_in + timedelta(days=random.randint(1, 7))
# Make timezone-aware
from datetime import timezone
check_in = check_in.replace(hour=15, minute=0, second=0, microsecond=0, tzinfo=timezone.utc)
check_out = check_out.replace(hour=11, minute=0, second=0, microsecond=0, tzinfo=timezone.utc)
hotel = HotelOption(
name=f"{random.choice(self.hotel_names)} {self.config.location.split(',')[0] if ',' in self.config.location else 'NYC'}",
chain=random.choice(self.hotel_chains),
address=f"{random.randint(100, 999)} {random.choice(['Main St', 'Broadway', 'Park Ave', 'First St'])}, {self.config.location.split(',')[0] if ',' in self.config.location else 'New York'}, USA",
city=self.config.location.split(',')[0] if ',' in self.config.location else "New York",
country="USA",
latitude=lat,
longitude=lon,
price_per_night=base_price,
rating=round(random.uniform(3.0, 5.0), 1),
review_count=random.randint(50, 5000),
amenities=self._generate_amenities(),
room_types_available=self._generate_room_types(),
location_type=random.choice(list(LocationType)),
distance_city_center_km=round(random.uniform(0.1, 10.0), 1),
distance_airport_km=round(random.uniform(5.0, 50.0), 1),
check_in_date=check_in,
check_out_date=check_out,
max_occupancy=random.randint(1, 4),
rooms_available=random.randint(1, 20)
)
hotels.append(hotel)
return hotels
def generate_pois(self, count: int = 10) -> List[POI]:
"""Generate mock POI data."""
pois = []
for i in range(count):
# Simulate error conditions
if random.random() < self.config.error_rate:
continue
# Generate realistic POI data
category = random.choice(list(ActivityCategory))
activity_type = random.choice(list(ActivityType))
# Get city coordinates
city_coords = self.city_coordinates.get(
self.config.location.split(',')[0] if ',' in self.config.location else "New York",
{"lat": 40.7128, "lon": -74.0060}
)
# Add some random offset for POI location
lat = city_coords["lat"] + random.uniform(-0.05, 0.05)
lon = city_coords["lon"] + random.uniform(-0.05, 0.05)
# Generate pricing
pricing_tier = random.choice(list(PricingTier))
adult_price = None
if pricing_tier != PricingTier.FREE:
adult_price = Decimal(str(random.uniform(5.0, 50.0)))
poi = POI(
id=str(uuid.uuid4()),
name=f"{random.choice(self.poi_names)} {self.config.location.split(',')[0] if ',' in self.config.location else 'NYC'}",
description=f"A wonderful {category.value.lower()} experience in {self.config.location}",
location=f"{self.config.location}, USA",
latitude=lat,
longitude=lon,
category=category,
activity_type=activity_type,
activity_level=random.choice(list(ActivityLevel)),
duration_hours=random.uniform(1.0, 6.0),
weather_dependency=random.choice(list(WeatherDependency)),
popularity_level=random.choice(list(PopularityLevel)),
price_range=pricing_tier,
adult_price=adult_price,
child_price=adult_price * Decimal('0.5') if adult_price else None,
opening_hours=self._generate_opening_hours(),
rating=round(random.uniform(3.0, 5.0), 1),
review_count=random.randint(10, 1000),
website_url=f"https://example.com/poi/{uuid.uuid4()}",
booking_required=random.choice([True, False])
)
pois.append(poi)
return pois
def _get_airline_code(self, airline: str) -> str:
"""Get airline code for flight number generation."""
airline_codes = {
"American Airlines": "AA",
"Delta Air Lines": "DL",
"United Airlines": "UA",
"Southwest Airlines": "WN",
"JetBlue Airways": "JB",
"Alaska Airlines": "AS",
"Spirit Airlines": "NK",
"Frontier Airlines": "FR"
}
return airline_codes.get(airline, "XX")
def _generate_flight_time(self) -> datetime:
"""Generate realistic flight departure time."""
if not self.config.start_date:
base_date = datetime.now() + timedelta(days=random.randint(1, 30))
else:
base_date = self.config.start_date
# Generate time between 6 AM and 10 PM
hour = random.randint(6, 22)
minute = random.choice([0, 15, 30, 45])
# Create timezone-aware datetime
from datetime import timezone
return datetime.combine(base_date.date(), time(hour, minute), timezone.utc)
def _generate_price(self, scenario_type: str) -> Decimal:
"""Generate price based on scenario and type."""
if scenario_type == "flight":
base_ranges = {
MockScenario.NORMAL: (200, 800),
MockScenario.LOW_BUDGET: (100, 400),
MockScenario.LUXURY: (800, 2000),
MockScenario.HIGH_DEMAND: (400, 1200),
MockScenario.EDGE_CASE: (50, 3000)
}
elif scenario_type == "hotel":
base_ranges = {
MockScenario.NORMAL: (100, 300),
MockScenario.LOW_BUDGET: (50, 150),
MockScenario.LUXURY: (300, 800),
MockScenario.HIGH_DEMAND: (200, 500),
MockScenario.EDGE_CASE: (25, 1000)
}
else:
base_ranges = {
MockScenario.NORMAL: (10, 50),
MockScenario.LOW_BUDGET: (5, 25),
MockScenario.LUXURY: (50, 200),
MockScenario.HIGH_DEMAND: (25, 100),
MockScenario.EDGE_CASE: (0, 500)
}
min_price, max_price = base_ranges.get(self.config.scenario, (100, 500))
price = random.uniform(min_price, max_price)
return Decimal(str(round(price, 2)))
def _generate_stops(self) -> int:
"""Generate number of stops based on scenario."""
if self.config.scenario == MockScenario.EDGE_CASE:
return random.choice([0, 1, 2, 3]) # Include more stops
elif self.config.scenario == MockScenario.LOW_BUDGET:
return random.choice([0, 1, 2]) # More stops for budget flights
else:
return random.choice([0, 1]) # Mostly direct or 1 stop
def _generate_amenities(self) -> List[AmenityType]:
"""Generate realistic hotel amenities."""
all_amenities = list(AmenityType)
num_amenities = random.randint(3, len(all_amenities))
return random.sample(all_amenities, num_amenities)
def _generate_room_types(self) -> List[RoomType]:
"""Generate available room types."""
all_types = list(RoomType)
num_types = random.randint(1, len(all_types))
return random.sample(all_types, num_types)
def _generate_opening_hours(self) -> List[TimeSlot]:
"""Generate realistic opening hours."""
# Most POIs are open daily with some variation
hours = []
# Standard hours: 9 AM to 6 PM
start_time = time(9, 0)
end_time = time(18, 0)
# Add some variation
if random.random() < 0.3: # 30% chance of different hours
start_time = time(random.randint(8, 10), random.choice([0, 30]))
end_time = time(random.randint(17, 21), random.choice([0, 30]))
# Generate for each day of the week (0=Monday, 6=Sunday)
for day in range(7):
hours.append(TimeSlot(
start_time=start_time,
end_time=end_time,
day_of_week=day
))
return hours
def generate_error_response(self, error_type: str = "api_error") -> Dict[str, Any]:
"""Generate mock error responses for testing error handling."""
error_responses = {
"api_error": {
"error": "API_ERROR",
"message": "External API is temporarily unavailable",
"code": 500,
"retry_after": 30
},
"rate_limit": {
"error": "RATE_LIMIT_EXCEEDED",
"message": "Rate limit exceeded. Please try again later.",
"code": 429,
"retry_after": 60
},
"invalid_request": {
"error": "INVALID_REQUEST",
"message": "Invalid request parameters",
"code": 400,
"details": "Missing required field: location"
},
"no_results": {
"error": "NO_RESULTS",
"message": "No results found for the given criteria",
"code": 404,
"suggestions": ["Try different dates", "Expand search radius"]
},
"partial_failure": {
"error": "PARTIAL_FAILURE",
"message": "Some results could not be retrieved",
"code": 206,
"partial_results": True,
"failed_services": ["hotel_search", "poi_search"]
}
}
return error_responses.get(error_type, error_responses["api_error"])
def simulate_api_delay(self, base_delay_ms: int = None) -> None:
"""Simulate API response delay."""
import time
delay = base_delay_ms or self.config.response_delay_ms
# Add some random variation
actual_delay = delay + random.randint(-50, 50)
actual_delay = max(10, actual_delay) # Minimum 10ms
time.sleep(actual_delay / 1000.0)
# Factory functions for common test scenarios
def create_normal_scenario_data(location: str = "New York", count: int = 10) -> Dict[str, Any]:
"""Create mock data for normal scenario testing."""
config = MockDataConfig(
scenario=MockScenario.NORMAL,
location=location,
include_edge_cases=False,
error_rate=0.0
)
generator = MockDataGenerator(config)
return {
"flights": generator.generate_flights(count),
"hotels": generator.generate_hotels(count),
"pois": generator.generate_pois(count),
"scenario": MockScenario.NORMAL
}
def create_edge_case_data(location: str = "New York", count: int = 10) -> Dict[str, Any]:
"""Create mock data for edge case testing."""
config = MockDataConfig(
scenario=MockScenario.EDGE_CASE,
location=location,
include_edge_cases=True,
error_rate=0.1
)
generator = MockDataGenerator(config)
return {
"flights": generator.generate_flights(count),
"hotels": generator.generate_hotels(count),
"pois": generator.generate_pois(count),
"scenario": MockScenario.EDGE_CASE
}
def create_error_scenario_data(error_type: str = "api_error") -> Dict[str, Any]:
"""Create mock data for error scenario testing."""
config = MockDataConfig(
scenario=MockScenario.ERROR_CONDITION,
error_rate=1.0
)
generator = MockDataGenerator(config)
return {
"error": generator.generate_error_response(error_type),
"flights": [],
"hotels": [],
"pois": [],
"scenario": MockScenario.ERROR_CONDITION
}
def create_empty_results_data() -> Dict[str, Any]:
"""Create mock data for empty results testing."""
config = MockDataConfig(
scenario=MockScenario.EMPTY_RESULTS,
error_rate=0.0
)
generator = MockDataGenerator(config)
return {
"flights": [],
"hotels": [],
"pois": [],
"scenario": MockScenario.EMPTY_RESULTS
}
# Example usage and testing
if __name__ == "__main__":
print("🧪 Mock Data Generator Testing")
print("=" * 40)
# Test normal scenario
print("\n1. Normal Scenario:")
normal_data = create_normal_scenario_data("Los Angeles", 5)
print(f" Flights: {len(normal_data['flights'])}")
print(f" Hotels: {len(normal_data['hotels'])}")
print(f" POIs: {len(normal_data['pois'])}")
# Test edge case scenario
print("\n2. Edge Case Scenario:")
edge_data = create_edge_case_data("Chicago", 5)
print(f" Flights: {len(edge_data['flights'])}")
print(f" Hotels: {len(edge_data['hotels'])}")
print(f" POIs: {len(edge_data['pois'])}")
# Test error scenario
print("\n3. Error Scenario:")
error_data = create_error_scenario_data("rate_limit")
print(f" Error: {error_data['error']['error']}")
print(f" Message: {error_data['error']['message']}")
print("\n✅ Mock data generation working correctly!")