Spaces:

garvitcpp
/

accomodation-info-api

Paused

App Files Files Community

accomodation-info-api / services /utils /http_utils.py

garvitcpp

Update services/utils/http_utils.py

ebb0ab7 verified 11 months ago

raw

history blame

5.44 kB

	import aiohttp
	import logging
	import random
	import asyncio
	from typing import Optional

	# WebShare proxies
	WEBSHARE_PROXIES = [
	"198.23.239.134:6540:zvubytfw:ak6yit5k2tvj",
	"207.244.217.165:6712:zvubytfw:ak6yit5k2tvj",
	"107.172.163.27:6543:zvubytfw:ak6yit5k2tvj",
	"161.123.152.115:6360:zvubytfw:ak6yit5k2tvj",
	"23.94.138.75:6349:zvubytfw:ak6yit5k2tvj",
	"216.10.27.159:6837:zvubytfw:ak6yit5k2tvj",
	"136.0.207.84:6661:zvubytfw:ak6yit5k2tvj",
	"64.64.118.149:6732:zvubytfw:ak6yit5k2tvj",
	"142.147.128.93:6593:zvubytfw:ak6yit5k2tvj",
	"154.36.110.199:6853:zvubytfw:ak6yit5k2tvj"
	]

	# Track proxy performance
	proxy_failures = {}
	current_proxy_idx = -1

	logger = logging.getLogger(__name__)

	def get_proxy_url() -> str:
	"""Get a proxy URL using round-robin with failure detection"""
	global current_proxy_idx

	# Try all proxies if necessary
	for _ in range(len(WEBSHARE_PROXIES)):
	current_proxy_idx = (current_proxy_idx + 1) % len(WEBSHARE_PROXIES)
	proxy_str = WEBSHARE_PROXIES[current_proxy_idx]

	# Skip frequently failing proxies
	if proxy_failures.get(proxy_str, 0) >= 3:
	continue

	# Format proxy for aiohttp
	parts = proxy_str.split(':')
	if len(parts) == 4:
	ip, port, username, password = parts
	return f"http://{username}:{password}@{ip}:{port}"

	# If all proxies have failures, reset and try again
	proxy_failures.clear()
	return get_proxy_url()

	def mark_proxy_failure(proxy_url: str) -> None:
	"""Mark a proxy as failing"""
	for proxy_str in WEBSHARE_PROXIES:
	if proxy_str.split(':')[0] in proxy_url:
	proxy_failures[proxy_str] = proxy_failures.get(proxy_str, 0) + 1
	if proxy_failures[proxy_str] >= 3:
	asyncio.create_task(reset_proxy_after_delay(proxy_str, 300))
	break

	async def reset_proxy_after_delay(proxy_str: str, delay: int) -> None:
	"""Reset a proxy's failure count after a delay"""
	await asyncio.sleep(delay)
	if proxy_str in proxy_failures:
	proxy_failures[proxy_str] = 0

	async def fetch_page(session: aiohttp.ClientSession, url: str, headers: dict) -> Optional[str]:
	"""Fetch a page using proxies with retry logic"""
	logger.info(f"Requesting URL: {url}")

	# Enhanced headers
	enhanced_headers = headers.copy()
	enhanced_headers.update({
	"Accept-Encoding": "gzip, deflate, br", # Explicitly specify brotli support
	"Cache-Control": "no-cache",
	"Pragma": "no-cache",
	"Sec-Fetch-Dest": "document",
	"Sec-Fetch-Mode": "navigate",
	"Sec-Fetch-Site": "none",
	"Sec-Fetch-User": "?1",
	"Referer": "https://www.google.com/",
	})

	# Try up to 3 proxies
	for attempt in range(3):
	proxy_url = get_proxy_url()
	logger.info(f"Using proxy {proxy_url} (attempt {attempt+1})")

	try:
	# Use a longer timeout for proxies
	async with session.get(
	url,
	headers=enhanced_headers,
	proxy=proxy_url,
	timeout=25,
	ssl=False,
	allow_redirects=True
	) as response:
	if response.status in [200, 202]:
	content = await response.text()

	# Check if we got actual content, not a bot detection page
	if len(content) > 5000 and ("<html" in content or "<!DOCTYPE" in content):
	# Check for property content on search pages
	if "searchresults" in url or "search" in url:
	if "property-card" in content or "sr_property_block" in content or "sr_item" in content:
	logger.info(f"Successfully retrieved search results ({len(content)} bytes)")
	return content
	else:
	logger.warning("No property cards found in search results")
	mark_proxy_failure(proxy_url)
	else:
	logger.info(f"Successfully retrieved content ({len(content)} bytes)")
	return content
	else:
	logger.warning(f"Response too short or not HTML: {len(content)} bytes")
	mark_proxy_failure(proxy_url)
	else:
	logger.warning(f"Response status {response.status} from proxy {proxy_url}")
	mark_proxy_failure(proxy_url)
	except Exception as e:
	logger.error(f"Proxy request failed: {str(e)}")
	mark_proxy_failure(proxy_url)

	# Wait before next attempt
	await asyncio.sleep(2)

	# If proxies failed, try direct connection as last resort
	logger.warning("All proxies failed, trying direct connection")
	try:
	async with session.get(
	url,
	headers=enhanced_headers,
	timeout=15
	) as response:
	if response.status == 200:
	content = await response.text()
	if len(content) > 5000:
	return content
	except Exception as e:
	logger.error(f"Direct request also failed: {str(e)}")

	return None