Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Real API Client for Travel Data | |
| This module provides real API integrations for flight, hotel, and activity searches | |
| using SerpAPI, Tavily, and other travel APIs. | |
| """ | |
| import asyncio | |
| import aiohttp | |
| import logging | |
| from datetime import datetime, date | |
| from decimal import Decimal | |
| from typing import Dict, List, Any, Optional | |
| from dataclasses import dataclass | |
| import os | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| load_dotenv() | |
| logger = logging.getLogger(__name__) | |
| class Flight: | |
| """Flight data structure""" | |
| origin: str | |
| destination: str | |
| departure_time: datetime | |
| arrival_time: datetime | |
| airline: str | |
| price: Decimal | |
| flight_number: str | |
| duration: str | |
| stops: int = 0 | |
| class Hotel: | |
| """Hotel data structure""" | |
| name: str | |
| location: str | |
| price_per_night: Decimal | |
| rating: float | |
| amenities: List[str] | |
| availability: bool = True | |
| check_in: Optional[date] = None | |
| check_out: Optional[date] = None | |
| class Activity: | |
| """Activity/POI data structure""" | |
| name: str | |
| location: str | |
| price: Decimal | |
| rating: float | |
| category: str | |
| description: str | |
| duration: str | |
| class SerpAPIClient: | |
| """Client for SerpAPI travel searches""" | |
| def __init__(self): | |
| self.api_key = os.getenv('SERPAPI_API_KEY') | |
| self.base_url = "https://serpapi.com/search" | |
| if not self.api_key: | |
| logger.warning("SERPAPI_API_KEY not found in environment variables") | |
| async def search_flights( | |
| self, | |
| origin: str, | |
| destination: str, | |
| departure_date: date, | |
| return_date: Optional[date] = None, | |
| passengers: int = 1 | |
| ) -> List[Flight]: | |
| """Search for flights using SerpAPI""" | |
| if not self.api_key: | |
| logger.error("SerpAPI key not available") | |
| return [] | |
| try: | |
| # Convert city names to airport codes for better API results | |
| origin_code = self._get_airport_code(origin) | |
| destination_code = self._get_airport_code(destination) | |
| params = { | |
| "engine": "google_flights", | |
| "api_key": self.api_key, | |
| "departure_id": origin_code, | |
| "arrival_id": destination_code, | |
| "outbound_date": departure_date.strftime("%Y-%m-%d"), | |
| "adults": passengers, | |
| "currency": "USD" | |
| } | |
| # Handle round trip vs one-way | |
| if return_date: | |
| params["return_date"] = return_date.strftime("%Y-%m-%d") | |
| # SerpAPI automatically detects round trip when return_date is provided | |
| else: | |
| # For one-way trips, we need to add a return_date that's the same as departure_date | |
| # This is a workaround for SerpAPI's requirement | |
| params["return_date"] = departure_date.strftime("%Y-%m-%d") | |
| # Create SSL context that doesn't verify certificates (for development) | |
| import ssl | |
| ssl_context = ssl.create_default_context() | |
| ssl_context.check_hostname = False | |
| ssl_context.verify_mode = ssl.CERT_NONE | |
| connector = aiohttp.TCPConnector(ssl=ssl_context) | |
| async with aiohttp.ClientSession(connector=connector) as session: | |
| async with session.get(self.base_url, params=params) as response: | |
| if response.status == 200: | |
| data = await response.json() | |
| logger.debug(f"SerpAPI response keys: {list(data.keys()) if isinstance(data, dict) else 'Not a dict'}") | |
| if "error" in data: | |
| logger.error(f"SerpAPI error: {data['error']}") | |
| return [] | |
| if "flights" in data: | |
| logger.debug(f"Flights data: {data['flights'][:2] if data['flights'] else 'Empty flights list'}") | |
| return self._parse_flight_results(data, origin, destination) | |
| else: | |
| response_text = await response.text() | |
| logger.error(f"SerpAPI request failed with status {response.status}: {response_text[:500]}") | |
| return [] | |
| except Exception as e: | |
| logger.error(f"Error searching flights: {e}") | |
| return [] | |
| async def search_hotels( | |
| self, | |
| location: str, | |
| check_in: date, | |
| check_out: date, | |
| guests: int = 1 | |
| ) -> List[Hotel]: | |
| """Search for hotels using SerpAPI""" | |
| if not self.api_key: | |
| logger.error("SerpAPI key not available") | |
| return [] | |
| try: | |
| params = { | |
| "engine": "google_hotels", | |
| "api_key": self.api_key, | |
| "q": f"hotels in {location}", | |
| "check_in_date": check_in.strftime("%Y-%m-%d"), | |
| "check_out_date": check_out.strftime("%Y-%m-%d"), | |
| "adults": guests, | |
| "currency": "USD" | |
| } | |
| # Create SSL context that doesn't verify certificates (for development) | |
| import ssl | |
| ssl_context = ssl.create_default_context() | |
| ssl_context.check_hostname = False | |
| ssl_context.verify_mode = ssl.CERT_NONE | |
| connector = aiohttp.TCPConnector(ssl=ssl_context) | |
| async with aiohttp.ClientSession(connector=connector) as session: | |
| async with session.get(self.base_url, params=params) as response: | |
| if response.status == 200: | |
| data = await response.json() | |
| return self._parse_hotel_results(data, check_in, check_out) | |
| else: | |
| response_text = await response.text() | |
| logger.error(f"SerpAPI hotel request failed with status {response.status}: {response_text[:500]}") | |
| return [] | |
| except Exception as e: | |
| logger.error(f"Error searching hotels: {e}") | |
| return [] | |
| def _get_airport_code(self, city_name: str) -> str: | |
| """Convert city name to airport code for SerpAPI""" | |
| # SerpAPI requires uppercase 3-letter airport codes | |
| airport_codes = { | |
| 'chicago': 'CHI', | |
| 'chicago, il': 'CHI', | |
| 'chicago, illinois': 'CHI', | |
| 'paris': 'PAR', | |
| 'london': 'LON', | |
| 'new york': 'NYC', | |
| 'new york, ny': 'NYC', | |
| 'los angeles': 'LAX', | |
| 'san francisco': 'SFO', | |
| 'miami': 'MIA', | |
| 'boston': 'BOS', | |
| 'seattle': 'SEA', | |
| 'denver': 'DEN', | |
| 'atlanta': 'ATL', | |
| 'dallas': 'DFW', | |
| 'houston': 'IAH', | |
| 'phoenix': 'PHX', | |
| 'las vegas': 'LAS', | |
| 'orlando': 'MCO', | |
| 'tokyo': 'NRT', | |
| 'tokyo, japan': 'NRT', | |
| 'sydney': 'SYD', | |
| 'melbourne': 'MEL', | |
| 'toronto': 'YYZ', | |
| 'vancouver': 'YVR', | |
| 'mexico city': 'MEX', | |
| 'sao paulo': 'GRU', | |
| 'rio de janeiro': 'GIG', | |
| 'madrid': 'MAD', | |
| 'barcelona': 'BCN', | |
| 'rome': 'FCO', | |
| 'milan': 'MXP', | |
| 'amsterdam': 'AMS', | |
| 'berlin': 'BER', | |
| 'munich': 'MUC', | |
| 'zurich': 'ZUR', | |
| 'vienna': 'VIE', | |
| 'prague': 'PRG', | |
| 'budapest': 'BUD', | |
| 'warsaw': 'WAW', | |
| 'moscow': 'SVO', | |
| 'istanbul': 'IST', | |
| 'dubai': 'DXB', | |
| 'singapore': 'SIN', | |
| 'hong kong': 'HKG', | |
| 'shanghai': 'PVG', | |
| 'beijing': 'PEK', | |
| 'seoul': 'ICN', | |
| 'bangkok': 'BKK', | |
| 'kuala lumpur': 'KUL', | |
| 'jakarta': 'CGK', | |
| 'manila': 'MNL', | |
| 'ho chi minh city': 'SGN', | |
| 'hanoi': 'HAN', | |
| 'mumbai': 'BOM', | |
| 'delhi': 'DEL', | |
| 'bangalore': 'BLR', | |
| 'chennai': 'MAA', | |
| 'kolkata': 'CCU', | |
| 'hyderabad': 'HYD', | |
| 'pune': 'PNQ', | |
| 'ahmedabad': 'AMD', | |
| 'jaipur': 'JAI', | |
| 'lucknow': 'LKO', | |
| 'kochi': 'COK', | |
| 'goa': 'GOI', | |
| 'coimbatore': 'CJB', | |
| 'indore': 'IDR', | |
| 'bhopal': 'BHO', | |
| 'kanpur': 'KNU', | |
| 'nagpur': 'NAG', | |
| 'visakhapatnam': 'VTZ', | |
| 'rajkot': 'RAJ', | |
| 'amritsar': 'ATQ', | |
| 'chandigarh': 'IXC', | |
| 'dehradun': 'DED', | |
| 'srinagar': 'SXR', | |
| 'leh': 'IXL', | |
| 'jammu': 'IXJ', | |
| 'shimla': 'SLV', | |
| 'manali': 'KUU', | |
| 'mcleod ganj': 'DHM', | |
| 'dharamshala': 'DHM', | |
| 'palampur': 'DHM', | |
| 'kangra': 'DHM', | |
| 'hamirpur': 'DHM', | |
| 'una': 'DHM', | |
| 'bilaspur': 'DHM', | |
| 'solan': 'DHM', | |
| 'sirmour': 'DHM', | |
| 'kinnaur': 'DHM', | |
| 'lahaul': 'DHM', | |
| 'spiti': 'DHM', | |
| 'kullu': 'DHM', | |
| 'mandi': 'DHM', | |
| 'chamba': 'DHM' | |
| } | |
| # Normalize the city name | |
| normalized_city = city_name.lower().strip() | |
| # Return airport code if found, otherwise return the original city name | |
| return airport_codes.get(normalized_city, city_name) | |
| def _parse_flight_results(self, data: Dict[str, Any], origin: str, destination: str) -> List[Flight]: | |
| """Parse flight results from SerpAPI response""" | |
| flights = [] | |
| try: | |
| # SerpAPI returns different structures - check for best_flights, other_flights, or flights | |
| flight_sources = [] | |
| if "best_flights" in data and data["best_flights"]: | |
| flight_sources.extend(data["best_flights"]) | |
| if "other_flights" in data and data["other_flights"]: | |
| flight_sources.extend(data["other_flights"]) | |
| if "flights" in data and data["flights"]: | |
| flight_sources.extend(data["flights"]) | |
| for flight_data in flight_sources[:5]: # Limit to 5 results | |
| try: | |
| # Handle different data structures from SerpAPI | |
| price = 0 | |
| if "price" in flight_data: | |
| price = flight_data["price"] | |
| elif "total_price" in flight_data: | |
| price = flight_data["total_price"] | |
| airline = "Unknown" | |
| # Check for airline in nested flights array | |
| if "flights" in flight_data and flight_data["flights"] and len(flight_data["flights"]) > 0: | |
| first_flight = flight_data["flights"][0] | |
| if "airline" in first_flight: | |
| airline = first_flight["airline"] | |
| elif "airlines" in first_flight and first_flight["airlines"]: | |
| airline = first_flight["airlines"][0] | |
| elif "airlines" in flight_data and flight_data["airlines"]: | |
| airline = flight_data["airlines"][0] | |
| elif "airline" in flight_data: | |
| airline = flight_data["airline"] | |
| # Handle flight times - use departure date as base if no specific times | |
| departure_time = datetime.now() | |
| arrival_time = datetime.now() | |
| # Try to get actual flight times from nested flights | |
| if "flights" in flight_data and flight_data["flights"] and len(flight_data["flights"]) > 0: | |
| first_flight = flight_data["flights"][0] | |
| if "departure_time" in first_flight: | |
| try: | |
| departure_time = datetime.fromisoformat(first_flight["departure_time"].replace("Z", "+00:00")) | |
| except: | |
| pass | |
| if "arrival_time" in first_flight: | |
| try: | |
| arrival_time = datetime.fromisoformat(first_flight["arrival_time"].replace("Z", "+00:00")) | |
| except: | |
| pass | |
| flight = Flight( | |
| origin=origin, | |
| destination=destination, | |
| departure_time=departure_time, | |
| arrival_time=arrival_time, | |
| airline=airline, | |
| price=Decimal(str(price)), | |
| flight_number=flight_data.get("flight_number", ""), | |
| duration=flight_data.get("total_duration", ""), | |
| stops=flight_data.get("stops", 0) | |
| ) | |
| flights.append(flight) | |
| except Exception as e: | |
| logger.warning(f"Error parsing flight data: {e}") | |
| continue | |
| # If no flights found in expected format, return empty list | |
| if not flights: | |
| logger.info(f"No flights found for {origin} to {destination}") | |
| return [] | |
| except Exception as e: | |
| logger.error(f"Error parsing flight results: {e}") | |
| return [] | |
| return flights | |
| def _parse_hotel_results(self, data: Dict[str, Any], check_in: date, check_out: date) -> List[Hotel]: | |
| """Parse hotel results from SerpAPI response""" | |
| hotels = [] | |
| try: | |
| # SerpAPI hotel parsing - check for properties | |
| if "properties" in data and data["properties"]: | |
| for hotel_data in data["properties"][:5]: # Limit to 5 results | |
| try: | |
| # Handle different price formats safely | |
| price = 0 | |
| price_str = "" | |
| if "rate_per_night" in hotel_data: | |
| price_str = str(hotel_data["rate_per_night"]) | |
| elif "total_rate" in hotel_data: | |
| price_str = str(hotel_data["total_rate"]) | |
| elif "price" in hotel_data: | |
| price_str = str(hotel_data["price"]) | |
| # Clean price string and convert to number | |
| if price_str: | |
| # Remove currency symbols and extract number | |
| import re | |
| price_match = re.search(r'[\d,]+\.?\d*', price_str.replace(',', '')) | |
| if price_match: | |
| try: | |
| price = float(price_match.group()) | |
| except: | |
| price = 0 | |
| hotel = Hotel( | |
| name=hotel_data.get("name", "Unknown Hotel"), | |
| location=hotel_data.get("description", "Unknown"), | |
| price_per_night=Decimal(str(price)), | |
| rating=float(hotel_data.get("overall_rating", 0)), | |
| amenities=hotel_data.get("amenities", []), | |
| check_in=check_in, | |
| check_out=check_out | |
| ) | |
| hotels.append(hotel) | |
| except Exception as e: | |
| logger.warning(f"Error parsing hotel data: {e}") | |
| continue | |
| # If no hotels found, return empty list | |
| if not hotels: | |
| logger.info(f"No hotels found for the specified location") | |
| return [] | |
| except Exception as e: | |
| logger.error(f"Error parsing hotel results: {e}") | |
| return [] | |
| return hotels | |
| class TavilySearchClient: | |
| """Client for Tavily web search API""" | |
| def __init__(self): | |
| self.api_key = os.getenv('TAVILY_API_KEY') | |
| self.base_url = "https://api.tavily.com/search" | |
| if not self.api_key: | |
| logger.warning("TAVILY_API_KEY not found in environment variables") | |
| async def search_activities( | |
| self, | |
| location: str, | |
| category: str = "attractions" | |
| ) -> List[Activity]: | |
| """Search for activities and attractions using Tavily with improved queries""" | |
| if not self.api_key: | |
| logger.error("Tavily API key not available") | |
| return [] | |
| try: | |
| # Use multiple targeted searches to get better results | |
| search_queries = [ | |
| f"top rated tourist attractions {location} 2024 entrance fees prices", | |
| f"famous landmarks monuments {location} admission costs tickets", | |
| f"museums galleries {location} ticket prices visitor information", | |
| f"parks gardens {location} public spaces free attractions", | |
| f"cultural sites heritage {location} historical places entrance", | |
| f"best things to do {location} 2024 with prices", | |
| f"must visit places {location} admission fees", | |
| f"popular attractions {location} ticket costs", | |
| f"free attractions {location} no cost activities", | |
| f"paid attractions {location} entrance fees" | |
| ] | |
| all_results = [] | |
| # Create SSL context that doesn't verify certificates (for development) | |
| import ssl | |
| ssl_context = ssl.create_default_context() | |
| ssl_context.check_hostname = False | |
| ssl_context.verify_mode = ssl.CERT_NONE | |
| connector = aiohttp.TCPConnector(ssl=ssl_context) | |
| async with aiohttp.ClientSession(connector=connector) as session: | |
| for query in search_queries: | |
| payload = { | |
| "api_key": self.api_key, | |
| "query": query, | |
| "search_depth": "advanced", | |
| "max_results": 5, | |
| "include_domains": ["tripadvisor.com", "viator.com", "getyourguide.com", "lonelyplanet.com", "timeout.com", "barcelonaturisme.com", "spain.info"] | |
| } | |
| async with session.post(self.base_url, json=payload) as response: | |
| if response.status == 200: | |
| data = await response.json() | |
| if "results" in data: | |
| all_results.extend(data["results"]) | |
| else: | |
| logger.warning(f"Tavily search failed for query: {query}") | |
| # Process and deduplicate results | |
| return self._parse_and_filter_activity_results(all_results, location) | |
| except Exception as e: | |
| logger.error(f"Error searching activities: {e}") | |
| return [] | |
| def _parse_activity_results(self, data: Dict[str, Any], location: str) -> List[Activity]: | |
| """Parse activity results from Tavily response""" | |
| activities = [] | |
| try: | |
| if "results" in data: | |
| for result in data["results"][:5]: # Limit to 5 results | |
| try: | |
| # Extract better information from Tavily results | |
| title = result.get("title", "Unknown Activity") | |
| content = result.get("content", "") | |
| # Try to extract rating from content | |
| rating = 0.0 | |
| import re | |
| rating_match = re.search(r'(\d+\.?\d*)\s*(?:stars?|⭐)', content.lower()) | |
| if rating_match: | |
| rating = float(rating_match.group(1)) | |
| # Try to extract price information | |
| price = Decimal("0") | |
| price_match = re.search(r'\$(\d+)', content) | |
| if price_match: | |
| price = Decimal(price_match.group(1)) | |
| activity = Activity( | |
| name=title, | |
| location=location, | |
| price=price, | |
| rating=rating, | |
| category="attraction", | |
| description=content[:200] + "..." if len(content) > 200 else content, | |
| duration="2-3 hours" # Default duration | |
| ) | |
| activities.append(activity) | |
| except Exception as e: | |
| logger.warning(f"Error parsing activity data: {e}") | |
| continue | |
| # If no activities found, return empty list | |
| if not activities: | |
| logger.info(f"No activities found for {location}") | |
| return [] | |
| except Exception as e: | |
| logger.error(f"Error parsing activity results: {e}") | |
| return [] | |
| def _parse_and_filter_activity_results(self, results: List[Dict[str, Any]], location: str) -> List[Activity]: | |
| """Parse and filter activity results from multiple Tavily searches""" | |
| activities = [] | |
| seen_attractions = set() | |
| try: | |
| for result in results: | |
| try: | |
| title = result.get("title", "Unknown Activity") | |
| content = result.get("content", "") | |
| url = result.get("url", "") | |
| # Skip only the most generic travel guides | |
| if any(phrase in title.lower() for phrase in [ | |
| "things to do", "best things", "top attractions", "travel guide", | |
| "complete guide", "everything you need", "ultimate guide", | |
| "one day in", "perfect itinerary", "hidden gems", "all tours" | |
| ]): | |
| continue | |
| # Extract attraction name from title (remove location suffixes) | |
| attraction_name = self._extract_attraction_name(title, location) | |
| # Skip if we've already seen this attraction | |
| if attraction_name.lower() in seen_attractions: | |
| continue | |
| seen_attractions.add(attraction_name.lower()) | |
| # Extract rating from content | |
| rating = self._extract_rating(content) | |
| # Extract price information | |
| price = self._extract_price(content) | |
| # Extract description | |
| description = self._extract_description(content, attraction_name) | |
| activity = Activity( | |
| name=attraction_name, | |
| location=location, | |
| price=price, | |
| rating=rating, | |
| category="attraction", | |
| description=description, | |
| duration="2-3 hours" # Default duration | |
| ) | |
| activities.append(activity) | |
| # Limit to 5 results | |
| if len(activities) >= 5: | |
| break | |
| except Exception as e: | |
| logger.warning(f"Error parsing activity data: {e}") | |
| continue | |
| return activities | |
| except Exception as e: | |
| logger.error(f"Error parsing activity results: {e}") | |
| return [] | |
| def _extract_attraction_name(self, title: str, location: str) -> str: | |
| """Extract clean attraction name from title""" | |
| import re | |
| # Handle specific patterns for tickets and tours | |
| if 'sagrada familia' in title.lower(): | |
| return 'Sagrada Familia' | |
| elif 'park güell' in title.lower() or 'park guell' in title.lower(): | |
| return 'Park Güell' | |
| elif 'casa batlló' in title.lower() or 'casa batllo' in title.lower(): | |
| return 'Casa Batlló' | |
| elif 'la pedrera' in title.lower(): | |
| return 'La Pedrera' | |
| elif 'gothic quarter' in title.lower(): | |
| return 'Gothic Quarter' | |
| elif 'camp nou' in title.lower(): | |
| return 'Camp Nou' | |
| elif 'las ramblas' in title.lower() or 'la rambla' in title.lower(): | |
| return 'Las Ramblas' | |
| elif 'montjuïc' in title.lower() or 'montjuic' in title.lower(): | |
| return 'Montjuïc' | |
| elif 'eiffel tower' in title.lower(): | |
| return 'Eiffel Tower' | |
| elif 'louvre' in title.lower(): | |
| return 'Louvre Museum' | |
| elif 'notre-dame' in title.lower(): | |
| return 'Notre-Dame Cathedral' | |
| elif 'arc de triomphe' in title.lower(): | |
| return 'Arc de Triomphe' | |
| elif 'tower of london' in title.lower(): | |
| return 'Tower of London' | |
| elif 'british museum' in title.lower(): | |
| return 'British Museum' | |
| elif 'buckingham palace' in title.lower(): | |
| return 'Buckingham Palace' | |
| elif 'tower bridge' in title.lower(): | |
| return 'Tower Bridge' | |
| elif 'hyde park' in title.lower(): | |
| return 'Hyde Park' | |
| elif 'tokyo skytree' in title.lower(): | |
| return 'Tokyo Skytree' | |
| elif 'senso-ji' in title.lower(): | |
| return 'Senso-ji Temple' | |
| elif 'meiji shrine' in title.lower(): | |
| return 'Meiji Shrine' | |
| elif 'harajuku' in title.lower(): | |
| return 'Harajuku District' | |
| # Remove common suffixes and prefixes | |
| title = re.sub(r'\s*-\s*.*$', '', title) # Remove everything after dash | |
| title = re.sub(r'\s*\|\s*.*$', '', title) # Remove everything after pipe | |
| title = re.sub(r'\s*\(.*\)\s*$', '', title) # Remove parenthetical info at end | |
| # Remove common ticket/tour prefixes | |
| title = re.sub(r'^(?:ticket|entry|skip.*line|tour|visit)\s*[:\s]*', '', title, flags=re.IGNORECASE) | |
| title = re.sub(r'\s*(?:ticket|entry|tour|visit|excursion).*$', '', title, flags=re.IGNORECASE) | |
| # Remove location name if it's at the end | |
| location_pattern = r'\s*' + re.escape(location.lower()) + r'\s*$' | |
| title = re.sub(location_pattern, '', title.lower()) | |
| # Capitalize first letter of each word | |
| return ' '.join(word.capitalize() for word in title.split()) | |
| def _extract_rating(self, content: str) -> float: | |
| """Extract rating from content""" | |
| import re | |
| # Look for various rating patterns | |
| patterns = [ | |
| r'(\d+\.?\d*)\s*stars?', | |
| r'(\d+\.?\d*)\s*⭐', | |
| r'rating[:\s]*(\d+\.?\d*)/10', | |
| r'(\d+\.?\d*)/5', | |
| r'(\d+\.?\d*)\s*out\s*of\s*5' | |
| ] | |
| for pattern in patterns: | |
| match = re.search(pattern, content.lower()) | |
| if match: | |
| rating = float(match.group(1)) | |
| # Convert to 5-star scale if needed | |
| if rating > 5: | |
| rating = rating / 2 | |
| return rating | |
| return 0.0 | |
| def _extract_price(self, content: str) -> Decimal: | |
| """Extract price from content with improved patterns""" | |
| import re | |
| # Look for comprehensive price patterns | |
| price_patterns = [ | |
| # Currency symbols with numbers | |
| r'\$(\d+(?:\.\d{2})?)', | |
| r'€(\d+(?:\.\d{2})?)', | |
| r'£(\d+(?:\.\d{2})?)', | |
| r'¥(\d+(?:\.\d{2})?)', | |
| # Admission/entrance fees | |
| r'admission[:\s]*\$?(\d+(?:\.\d{2})?)', | |
| r'entrance[:\s]*\$?(\d+(?:\.\d{2})?)', | |
| r'ticket[:\s]*\$?(\d+(?:\.\d{2})?)', | |
| r'entry[:\s]*\$?(\d+(?:\.\d{2})?)', | |
| r'cost[:\s]*\$?(\d+(?:\.\d{2})?)', | |
| r'price[:\s]*\$?(\d+(?:\.\d{2})?)', | |
| r'fee[:\s]*\$?(\d+(?:\.\d{2})?)', | |
| # Specific attraction pricing | |
| r'adult[:\s]*\$?(\d+(?:\.\d{2})?)', | |
| r'per\s+person[:\s]*\$?(\d+(?:\.\d{2})?)', | |
| r'from[:\s]*\$?(\d+(?:\.\d{2})?)', | |
| r'starting[:\s]*\$?(\d+(?:\.\d{2})?)', | |
| # Free indicators | |
| r'free\s+admission', | |
| r'no\s+charge', | |
| r'complimentary', | |
| r'gratis' | |
| ] | |
| # Check for free indicators first | |
| for pattern in price_patterns[-4:]: | |
| if re.search(pattern, content.lower()): | |
| return Decimal("0") | |
| # Look for price amounts | |
| for pattern in price_patterns[:-4]: | |
| match = re.search(pattern, content.lower()) | |
| if match: | |
| price = float(match.group(1)) | |
| # Filter out unrealistic prices (likely years or other numbers) | |
| if 0.5 <= price <= 1000: | |
| return Decimal(str(price)) | |
| return Decimal("0") | |
| def _extract_description(self, content: str, attraction_name: str) -> str: | |
| """Extract relevant description from content""" | |
| # Find sentences that mention the attraction name | |
| sentences = content.split('.') | |
| relevant_sentences = [] | |
| for sentence in sentences: | |
| if attraction_name.lower() in sentence.lower() and len(sentence.strip()) > 20: | |
| relevant_sentences.append(sentence.strip()) | |
| if relevant_sentences: | |
| description = '. '.join(relevant_sentences[:2]) # Take first 2 relevant sentences | |
| return description[:200] + "..." if len(description) > 200 else description | |
| # Fallback to first part of content | |
| return content[:200] + "..." if len(content) > 200 else content | |
| # Global API clients | |
| serp_client = SerpAPIClient() | |
| tavily_client = TavilySearchClient() | |