import asyncio import random import re import time from curl_cffi.requests import AsyncSession from logger import get_logger from parser import parse_html log = get_logger() # Shared global timeline across all worker threads to protect single IP footprints _rate_limited_until = 0.0 async def check_fragment(word: str, proxy_url: str = None) -> str: """ Direct DOM view extractor using curl_cffi impersonation. Optimized with adaptive randomized pacing maps to ensure single-IP longevity. """ global _rate_limited_until word = word.strip().replace("@", "").lower() url = f"https://fragment.com/username/{word}" # 4 Adaptive backoff loop attempts before declaring failure state for attempt in range(1, 5): current_time = time.time() # Enforce rate-limit cooling loops dynamically across threads if current_time < _rate_limited_until: wait_duration = _rate_limited_until - current_time + random.uniform(1.0, 3.0) log.info(f"⏳ Thread waiting layout triggered for '{word}'. Dynamic sleep: {wait_duration:.2f}s") await asyncio.sleep(wait_duration) # Add organic human jitter before starting the connection socket pool jitter = random.uniform(0.5, 2.5) await asyncio.sleep(jitter) try: log.info(f"🛰️ Requesting public DOM data grid for: @{word} [Attempt {attempt}/4]") # Short connection window parameters ensure blocked connections drop cleanly async with AsyncSession(impersonate="chrome120", timeout=10) as session: resp = await session.get(url, allow_redirects=True) final_url = str(resp.url) # Trace explicit rate limiting/firewall blocks if resp.status_code in [429, 403]: backoff = 15 + (5 * attempt) + random.uniform(2.0, 5.0) _rate_limited_until = time.time() + backoff log.warning(f"⚠️ Single IP Throttled (HTTP {resp.status_code}) on '{word}'. Cooling pool for {backoff:.1f}s...") continue if resp.status_code != 200: log.warning(f"⚠️ Non-200 Response status encountered for '{word}': HTTP {resp.status_code}") continue html = resp.text # Intercept Cloudflare CAPTCHA/Turnstile pages directly before parsing if "Just a moment..." in html or "cf-browser-verification" in html or "cloudflare" in html.lower(): backoff = 30 + (10 * attempt) + random.uniform(5.0, 10.0) _rate_limited_until = time.time() + backoff log.error(f"❌ Cloudflare Challenge Intercepted on '{word}'. Slowing workers down for {backoff:.1f}s...") continue # Offload raw page chunk payload string straight to the unified parser module decision = parse_html(html, final_url, word) log.info(f"🎯 Resilient Parse Map Complete for '{word}' -> Outcome Classification: {decision}") return decision except Exception as e: log.error(f"Network transaction fault for '{word}' during connection lookup: {str(e)}") await asyncio.sleep(2.0 * attempt) return "ERROR"