""" Mock Data Generator for AI Agent Testing This module provides comprehensive mock data generation for testing AI agents without hitting external APIs. Includes realistic flight, hotel, and POI data with edge cases and various scenarios. """ import random import uuid from datetime import datetime, timedelta, time, timezone from decimal import Decimal from typing import Dict, List, Optional, Any, Union from dataclasses import dataclass from enum import Enum from ..models.flight_models import FlightOption, Airline from ..models.hotel_models import HotelOption, HotelChain, LocationType, RoomType, AmenityType from ..models.poi_models import POI, ActivityCategory, ActivityType, ActivityLevel, WeatherDependency, PopularityLevel, PricingTier, TimeSlot class MockScenario(str, Enum): """Different testing scenarios for mock data generation.""" NORMAL = "normal" EDGE_CASE = "edge_case" HIGH_DEMAND = "high_demand" LOW_BUDGET = "low_budget" LUXURY = "luxury" ERROR_CONDITION = "error_condition" EMPTY_RESULTS = "empty_results" PARTIAL_FAILURE = "partial_failure" @dataclass class MockDataConfig: """Configuration for mock data generation.""" scenario: MockScenario = MockScenario.NORMAL location: str = "New York" start_date: datetime = None end_date: datetime = None budget: Decimal = Decimal("1000.00") group_size: int = 1 include_edge_cases: bool = True error_rate: float = 0.0 # 0.0 to 1.0 response_delay_ms: int = 100 # Simulate API latency class MockDataGenerator: """Generates realistic mock data for testing AI agents.""" def __init__(self, config: MockDataConfig = None): self.config = config or MockDataConfig() self._setup_mock_data() def _setup_mock_data(self): """Set up base mock data.""" # Flight data self.airlines = [ "American Airlines", "Delta Air Lines", "United Airlines", "Southwest Airlines", "JetBlue Airways", "Alaska Airlines", "Spirit Airlines", "Frontier Airlines" ] self.aircraft_types = [ "Boeing 737", "Boeing 777", "Boeing 787", "Airbus A320", "Airbus A350", "Airbus A380" ] # Hotel data self.hotel_chains = list(HotelChain) self.hotel_names = [ "Grand Hotel", "Plaza Hotel", "Central Hotel", "Garden Inn", "Royal Suites", "Metropolitan Hotel", "Park View Hotel", "City Center Hotel", "Boutique Hotel", "Executive Inn" ] # POI data self.poi_names = [ "Museum of Art", "Historic District", "Central Park", "City Hall", "Shopping Mall", "Concert Hall", "Sports Stadium", "Aquarium", "Botanical Garden", "Art Gallery" ] # Location coordinates (major cities) self.city_coordinates = { "New York": {"lat": 40.7128, "lon": -74.0060}, "Los Angeles": {"lat": 34.0522, "lon": -118.2437}, "Chicago": {"lat": 41.8781, "lon": -87.6298}, "Houston": {"lat": 29.7604, "lon": -95.3698}, "Phoenix": {"lat": 33.4484, "lon": -112.0740}, "Philadelphia": {"lat": 39.9526, "lon": -75.1652}, "San Antonio": {"lat": 29.4241, "lon": -98.4936}, "San Diego": {"lat": 32.7157, "lon": -117.1611}, "Dallas": {"lat": 32.7767, "lon": -96.7970}, "San Jose": {"lat": 37.3382, "lon": -121.8863} } def generate_flights(self, count: int = 10) -> List[FlightOption]: """Generate mock flight data.""" flights = [] for i in range(count): # Simulate error conditions if random.random() < self.config.error_rate: continue # Generate realistic flight data departure_time = self._generate_flight_time() duration_hours = random.uniform(1.5, 8.0) arrival_time = departure_time + timedelta(hours=duration_hours) # Generate price based on scenario base_price = self._generate_price(scenario_type="flight") # Generate stops stops = self._generate_stops() # Select airline and generate proper flight number airline = random.choice(self.airlines) airline_code = self._get_airline_code(airline) flight_number = f"{airline_code}{random.randint(100, 999)}" flight = FlightOption( airline=airline, flight_number=flight_number, departure_city=self.config.location.split(',')[0] if ',' in self.config.location else "New York", arrival_city=random.choice(["Los Angeles", "Chicago", "Houston", "Phoenix"]), departure_time=departure_time, arrival_time=arrival_time, duration_minutes=int(duration_hours * 60), price=base_price, aircraft_type=random.choice(self.aircraft_types), stops=stops, baggage_allowance="1 carry-on, 1 checked bag" ) flights.append(flight) return flights def generate_hotels(self, count: int = 10) -> List[HotelOption]: """Generate mock hotel data.""" hotels = [] for i in range(count): # Simulate error conditions if random.random() < self.config.error_rate: continue # Generate realistic hotel data base_price = self._generate_price(scenario_type="hotel") # Get city coordinates city_coords = self.city_coordinates.get( self.config.location.split(',')[0] if ',' in self.config.location else "New York", {"lat": 40.7128, "lon": -74.0060} ) # Add some random offset for hotel location lat = city_coords["lat"] + random.uniform(-0.1, 0.1) lon = city_coords["lon"] + random.uniform(-0.1, 0.1) # Generate check-in/out dates if self.config.start_date: check_in = self.config.start_date else: check_in = datetime.now() + timedelta(days=random.randint(1, 30)) check_out = check_in + timedelta(days=random.randint(1, 7)) # Make timezone-aware from datetime import timezone check_in = check_in.replace(hour=15, minute=0, second=0, microsecond=0, tzinfo=timezone.utc) check_out = check_out.replace(hour=11, minute=0, second=0, microsecond=0, tzinfo=timezone.utc) hotel = HotelOption( name=f"{random.choice(self.hotel_names)} {self.config.location.split(',')[0] if ',' in self.config.location else 'NYC'}", chain=random.choice(self.hotel_chains), address=f"{random.randint(100, 999)} {random.choice(['Main St', 'Broadway', 'Park Ave', 'First St'])}, {self.config.location.split(',')[0] if ',' in self.config.location else 'New York'}, USA", city=self.config.location.split(',')[0] if ',' in self.config.location else "New York", country="USA", latitude=lat, longitude=lon, price_per_night=base_price, rating=round(random.uniform(3.0, 5.0), 1), review_count=random.randint(50, 5000), amenities=self._generate_amenities(), room_types_available=self._generate_room_types(), location_type=random.choice(list(LocationType)), distance_city_center_km=round(random.uniform(0.1, 10.0), 1), distance_airport_km=round(random.uniform(5.0, 50.0), 1), check_in_date=check_in, check_out_date=check_out, max_occupancy=random.randint(1, 4), rooms_available=random.randint(1, 20) ) hotels.append(hotel) return hotels def generate_pois(self, count: int = 10) -> List[POI]: """Generate mock POI data.""" pois = [] for i in range(count): # Simulate error conditions if random.random() < self.config.error_rate: continue # Generate realistic POI data category = random.choice(list(ActivityCategory)) activity_type = random.choice(list(ActivityType)) # Get city coordinates city_coords = self.city_coordinates.get( self.config.location.split(',')[0] if ',' in self.config.location else "New York", {"lat": 40.7128, "lon": -74.0060} ) # Add some random offset for POI location lat = city_coords["lat"] + random.uniform(-0.05, 0.05) lon = city_coords["lon"] + random.uniform(-0.05, 0.05) # Generate pricing pricing_tier = random.choice(list(PricingTier)) adult_price = None if pricing_tier != PricingTier.FREE: adult_price = Decimal(str(random.uniform(5.0, 50.0))) poi = POI( id=str(uuid.uuid4()), name=f"{random.choice(self.poi_names)} {self.config.location.split(',')[0] if ',' in self.config.location else 'NYC'}", description=f"A wonderful {category.value.lower()} experience in {self.config.location}", location=f"{self.config.location}, USA", latitude=lat, longitude=lon, category=category, activity_type=activity_type, activity_level=random.choice(list(ActivityLevel)), duration_hours=random.uniform(1.0, 6.0), weather_dependency=random.choice(list(WeatherDependency)), popularity_level=random.choice(list(PopularityLevel)), price_range=pricing_tier, adult_price=adult_price, child_price=adult_price * Decimal('0.5') if adult_price else None, opening_hours=self._generate_opening_hours(), rating=round(random.uniform(3.0, 5.0), 1), review_count=random.randint(10, 1000), website_url=f"https://example.com/poi/{uuid.uuid4()}", booking_required=random.choice([True, False]) ) pois.append(poi) return pois def _get_airline_code(self, airline: str) -> str: """Get airline code for flight number generation.""" airline_codes = { "American Airlines": "AA", "Delta Air Lines": "DL", "United Airlines": "UA", "Southwest Airlines": "WN", "JetBlue Airways": "JB", "Alaska Airlines": "AS", "Spirit Airlines": "NK", "Frontier Airlines": "FR" } return airline_codes.get(airline, "XX") def _generate_flight_time(self) -> datetime: """Generate realistic flight departure time.""" if not self.config.start_date: base_date = datetime.now() + timedelta(days=random.randint(1, 30)) else: base_date = self.config.start_date # Generate time between 6 AM and 10 PM hour = random.randint(6, 22) minute = random.choice([0, 15, 30, 45]) # Create timezone-aware datetime from datetime import timezone return datetime.combine(base_date.date(), time(hour, minute), timezone.utc) def _generate_price(self, scenario_type: str) -> Decimal: """Generate price based on scenario and type.""" if scenario_type == "flight": base_ranges = { MockScenario.NORMAL: (200, 800), MockScenario.LOW_BUDGET: (100, 400), MockScenario.LUXURY: (800, 2000), MockScenario.HIGH_DEMAND: (400, 1200), MockScenario.EDGE_CASE: (50, 3000) } elif scenario_type == "hotel": base_ranges = { MockScenario.NORMAL: (100, 300), MockScenario.LOW_BUDGET: (50, 150), MockScenario.LUXURY: (300, 800), MockScenario.HIGH_DEMAND: (200, 500), MockScenario.EDGE_CASE: (25, 1000) } else: base_ranges = { MockScenario.NORMAL: (10, 50), MockScenario.LOW_BUDGET: (5, 25), MockScenario.LUXURY: (50, 200), MockScenario.HIGH_DEMAND: (25, 100), MockScenario.EDGE_CASE: (0, 500) } min_price, max_price = base_ranges.get(self.config.scenario, (100, 500)) price = random.uniform(min_price, max_price) return Decimal(str(round(price, 2))) def _generate_stops(self) -> int: """Generate number of stops based on scenario.""" if self.config.scenario == MockScenario.EDGE_CASE: return random.choice([0, 1, 2, 3]) # Include more stops elif self.config.scenario == MockScenario.LOW_BUDGET: return random.choice([0, 1, 2]) # More stops for budget flights else: return random.choice([0, 1]) # Mostly direct or 1 stop def _generate_amenities(self) -> List[AmenityType]: """Generate realistic hotel amenities.""" all_amenities = list(AmenityType) num_amenities = random.randint(3, len(all_amenities)) return random.sample(all_amenities, num_amenities) def _generate_room_types(self) -> List[RoomType]: """Generate available room types.""" all_types = list(RoomType) num_types = random.randint(1, len(all_types)) return random.sample(all_types, num_types) def _generate_opening_hours(self) -> List[TimeSlot]: """Generate realistic opening hours.""" # Most POIs are open daily with some variation hours = [] # Standard hours: 9 AM to 6 PM start_time = time(9, 0) end_time = time(18, 0) # Add some variation if random.random() < 0.3: # 30% chance of different hours start_time = time(random.randint(8, 10), random.choice([0, 30])) end_time = time(random.randint(17, 21), random.choice([0, 30])) # Generate for each day of the week (0=Monday, 6=Sunday) for day in range(7): hours.append(TimeSlot( start_time=start_time, end_time=end_time, day_of_week=day )) return hours def generate_error_response(self, error_type: str = "api_error") -> Dict[str, Any]: """Generate mock error responses for testing error handling.""" error_responses = { "api_error": { "error": "API_ERROR", "message": "External API is temporarily unavailable", "code": 500, "retry_after": 30 }, "rate_limit": { "error": "RATE_LIMIT_EXCEEDED", "message": "Rate limit exceeded. Please try again later.", "code": 429, "retry_after": 60 }, "invalid_request": { "error": "INVALID_REQUEST", "message": "Invalid request parameters", "code": 400, "details": "Missing required field: location" }, "no_results": { "error": "NO_RESULTS", "message": "No results found for the given criteria", "code": 404, "suggestions": ["Try different dates", "Expand search radius"] }, "partial_failure": { "error": "PARTIAL_FAILURE", "message": "Some results could not be retrieved", "code": 206, "partial_results": True, "failed_services": ["hotel_search", "poi_search"] } } return error_responses.get(error_type, error_responses["api_error"]) def simulate_api_delay(self, base_delay_ms: int = None) -> None: """Simulate API response delay.""" import time delay = base_delay_ms or self.config.response_delay_ms # Add some random variation actual_delay = delay + random.randint(-50, 50) actual_delay = max(10, actual_delay) # Minimum 10ms time.sleep(actual_delay / 1000.0) # Factory functions for common test scenarios def create_normal_scenario_data(location: str = "New York", count: int = 10) -> Dict[str, Any]: """Create mock data for normal scenario testing.""" config = MockDataConfig( scenario=MockScenario.NORMAL, location=location, include_edge_cases=False, error_rate=0.0 ) generator = MockDataGenerator(config) return { "flights": generator.generate_flights(count), "hotels": generator.generate_hotels(count), "pois": generator.generate_pois(count), "scenario": MockScenario.NORMAL } def create_edge_case_data(location: str = "New York", count: int = 10) -> Dict[str, Any]: """Create mock data for edge case testing.""" config = MockDataConfig( scenario=MockScenario.EDGE_CASE, location=location, include_edge_cases=True, error_rate=0.1 ) generator = MockDataGenerator(config) return { "flights": generator.generate_flights(count), "hotels": generator.generate_hotels(count), "pois": generator.generate_pois(count), "scenario": MockScenario.EDGE_CASE } def create_error_scenario_data(error_type: str = "api_error") -> Dict[str, Any]: """Create mock data for error scenario testing.""" config = MockDataConfig( scenario=MockScenario.ERROR_CONDITION, error_rate=1.0 ) generator = MockDataGenerator(config) return { "error": generator.generate_error_response(error_type), "flights": [], "hotels": [], "pois": [], "scenario": MockScenario.ERROR_CONDITION } def create_empty_results_data() -> Dict[str, Any]: """Create mock data for empty results testing.""" config = MockDataConfig( scenario=MockScenario.EMPTY_RESULTS, error_rate=0.0 ) generator = MockDataGenerator(config) return { "flights": [], "hotels": [], "pois": [], "scenario": MockScenario.EMPTY_RESULTS } # Example usage and testing if __name__ == "__main__": print("🧪 Mock Data Generator Testing") print("=" * 40) # Test normal scenario print("\n1. Normal Scenario:") normal_data = create_normal_scenario_data("Los Angeles", 5) print(f" Flights: {len(normal_data['flights'])}") print(f" Hotels: {len(normal_data['hotels'])}") print(f" POIs: {len(normal_data['pois'])}") # Test edge case scenario print("\n2. Edge Case Scenario:") edge_data = create_edge_case_data("Chicago", 5) print(f" Flights: {len(edge_data['flights'])}") print(f" Hotels: {len(edge_data['hotels'])}") print(f" POIs: {len(edge_data['pois'])}") # Test error scenario print("\n3. Error Scenario:") error_data = create_error_scenario_data("rate_limit") print(f" Error: {error_data['error']['error']}") print(f" Message: {error_data['error']['message']}") print("\n✅ Mock data generation working correctly!")