import asyncio import random import re import time import aiohttp from logger import get_logger from state import state from parser import parse_html log = get_logger() # Raw browser fingerprint headers to emulate real user navigation paths USER_AGENTS = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4_1) AppleWebKit/605.1.15 Version/17.4.1 Safari/605.1.15", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/123.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0" ] # Shared global pacing timeline to protect single IP reputation windows _rate_limited_until = 0.0 async def check_fragment(word: str, proxy_url: str = None) -> str: """ Direct Public DOM Extraction Engine. Restructured for proxy-free operation to bypass token handshake verification entirely. """ global _rate_limited_until word = word.strip().replace("@", "").lower() url = f"https://fragment.com/username/{word}" # 4 Retries incorporating adaptive exponential backoff pacing for attempt in range(1, 5): current_time = time.time() # Enforce rate-limit pacing window dynamically if current_time < _rate_limited_until: await asyncio.sleep(_rate_limited_until - current_time + random.uniform(0.5, 1.5)) headers = { "User-Agent": random.choice(USER_AGENTS), "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Referer": "https://fragment.com/", "Cache-Control": "no-cache", "Pragma": "no-cache" } try: # Short connect timeout boundaries drop dead sockets cleanly timeout = aiohttp.ClientTimeout(total=12, connect=4) async with aiohttp.ClientSession(timeout=timeout) as session: # allow_redirects=True is critical to resolve unassigned usernames cleanly async with session.get(url, headers=headers, allow_redirects=True) as resp: status = resp.status if status in [429, 403]: # Back off exponentially to preserve single-IP request pools backoff = 5 + (3 ** attempt) + random.uniform(1.0, 3.0) _rate_limited_until = time.time() + backoff log.warning(f"⚠️ Single IP throttled (HTTP {status}) on '{word}'. Cooling pool for {backoff:.1f}s...") continue if status != 200: await asyncio.sleep(1.0) continue html = await resp.text() # Trap Cloudflare anti-bot challenge scripts directly if "Just a moment..." in html or "cf-browser-verification" in html or "cloudflare" in html.lower(): backoff = 15 + random.uniform(2.0, 5.0) _rate_limited_until = time.time() + backoff log.error("❌ Cloudflare Browser Challenge engaged. Slowing worker loops down...") continue # Capture the resolved destination URL metadata signature final_url = str(resp.url) # Offload the raw page layout data directly to your processing parser decision = parse_html(html, final_url, word) return decision except Exception as e: log.error(f"Network pipe transaction failure for '{word}': {str(e)}") await asyncio.sleep(1.5 * attempt) return "ERROR"