Spaces:
Paused
Paused
| import asyncio | |
| import random | |
| import re | |
| import time | |
| import aiohttp | |
| from logger import get_logger | |
| from state import state | |
| from parser import parse_html | |
| log = get_logger() | |
| # Raw browser fingerprint headers to emulate real user navigation paths | |
| USER_AGENTS = [ | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", | |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4_1) AppleWebKit/605.1.15 Version/17.4.1 Safari/605.1.15", | |
| "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/123.0.0.0 Safari/537.36", | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0" | |
| ] | |
| # Shared global pacing timeline to protect single IP reputation windows | |
| _rate_limited_until = 0.0 | |
| async def check_fragment(word: str, proxy_url: str = None) -> str: | |
| """ | |
| Direct Public DOM Extraction Engine. | |
| Restructured for proxy-free operation to bypass token handshake verification entirely. | |
| """ | |
| global _rate_limited_until | |
| word = word.strip().replace("@", "").lower() | |
| url = f"https://fragment.com/username/{word}" | |
| # 4 Retries incorporating adaptive exponential backoff pacing | |
| for attempt in range(1, 5): | |
| current_time = time.time() | |
| # Enforce rate-limit pacing window dynamically | |
| if current_time < _rate_limited_until: | |
| await asyncio.sleep(_rate_limited_until - current_time + random.uniform(0.5, 1.5)) | |
| headers = { | |
| "User-Agent": random.choice(USER_AGENTS), | |
| "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", | |
| "Accept-Language": "en-US,en;q=0.5", | |
| "Referer": "https://fragment.com/", | |
| "Cache-Control": "no-cache", | |
| "Pragma": "no-cache" | |
| } | |
| try: | |
| # Short connect timeout boundaries drop dead sockets cleanly | |
| timeout = aiohttp.ClientTimeout(total=12, connect=4) | |
| async with aiohttp.ClientSession(timeout=timeout) as session: | |
| # allow_redirects=True is critical to resolve unassigned usernames cleanly | |
| async with session.get(url, headers=headers, allow_redirects=True) as resp: | |
| status = resp.status | |
| if status in [429, 403]: | |
| # Back off exponentially to preserve single-IP request pools | |
| backoff = 5 + (3 ** attempt) + random.uniform(1.0, 3.0) | |
| _rate_limited_until = time.time() + backoff | |
| log.warning(f"⚠️ Single IP throttled (HTTP {status}) on '{word}'. Cooling pool for {backoff:.1f}s...") | |
| continue | |
| if status != 200: | |
| await asyncio.sleep(1.0) | |
| continue | |
| html = await resp.text() | |
| # Trap Cloudflare anti-bot challenge scripts directly | |
| if "Just a moment..." in html or "cf-browser-verification" in html or "cloudflare" in html.lower(): | |
| backoff = 15 + random.uniform(2.0, 5.0) | |
| _rate_limited_until = time.time() + backoff | |
| log.error("❌ Cloudflare Browser Challenge engaged. Slowing worker loops down...") | |
| continue | |
| # Capture the resolved destination URL metadata signature | |
| final_url = str(resp.url) | |
| # Offload the raw page layout data directly to your processing parser | |
| decision = parse_html(html, final_url, word) | |
| return decision | |
| except Exception as e: | |
| log.error(f"Network pipe transaction failure for '{word}': {str(e)}") | |
| await asyncio.sleep(1.5 * attempt) | |
| return "ERROR" |