Spaces:
Paused
Paused
File size: 3,892 Bytes
2418b45 873798b 8e12b43 b33ebac 4b0add0 b33ebac 4b0add0 69f02a2 ebcc930 b33ebac ebcc930 5afe800 8309889 5afe800 ebcc930 5afe800 ebcc930 8c15670 ebcc930 8c15670 8e12b43 ebcc930 5afe800 2e6f7cf 5afe800 ebcc930 b33ebac 935b8a6 ebcc930 8e12b43 ebcc930 b33ebac ebcc930 b33ebac ebcc930 b33ebac ebcc930 b33ebac ebcc930 b33ebac ebcc930 b33ebac ebcc930 b33ebac 5afe800 935b8a6 ebcc930 8e12b43 5afe800 ebcc930 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | import asyncio
import random
import re
import time
import aiohttp
from logger import get_logger
from state import state
from parser import parse_html
log = get_logger()
# Raw browser fingerprint headers to emulate real user navigation paths
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4_1) AppleWebKit/605.1.15 Version/17.4.1 Safari/605.1.15",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/123.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0"
]
# Shared global pacing timeline to protect single IP reputation windows
_rate_limited_until = 0.0
async def check_fragment(word: str, proxy_url: str = None) -> str:
"""
Direct Public DOM Extraction Engine.
Restructured for proxy-free operation to bypass token handshake verification entirely.
"""
global _rate_limited_until
word = word.strip().replace("@", "").lower()
url = f"https://fragment.com/username/{word}"
# 4 Retries incorporating adaptive exponential backoff pacing
for attempt in range(1, 5):
current_time = time.time()
# Enforce rate-limit pacing window dynamically
if current_time < _rate_limited_until:
await asyncio.sleep(_rate_limited_until - current_time + random.uniform(0.5, 1.5))
headers = {
"User-Agent": random.choice(USER_AGENTS),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Referer": "https://fragment.com/",
"Cache-Control": "no-cache",
"Pragma": "no-cache"
}
try:
# Short connect timeout boundaries drop dead sockets cleanly
timeout = aiohttp.ClientTimeout(total=12, connect=4)
async with aiohttp.ClientSession(timeout=timeout) as session:
# allow_redirects=True is critical to resolve unassigned usernames cleanly
async with session.get(url, headers=headers, allow_redirects=True) as resp:
status = resp.status
if status in [429, 403]:
# Back off exponentially to preserve single-IP request pools
backoff = 5 + (3 ** attempt) + random.uniform(1.0, 3.0)
_rate_limited_until = time.time() + backoff
log.warning(f"⚠️ Single IP throttled (HTTP {status}) on '{word}'. Cooling pool for {backoff:.1f}s...")
continue
if status != 200:
await asyncio.sleep(1.0)
continue
html = await resp.text()
# Trap Cloudflare anti-bot challenge scripts directly
if "Just a moment..." in html or "cf-browser-verification" in html or "cloudflare" in html.lower():
backoff = 15 + random.uniform(2.0, 5.0)
_rate_limited_until = time.time() + backoff
log.error("❌ Cloudflare Browser Challenge engaged. Slowing worker loops down...")
continue
# Capture the resolved destination URL metadata signature
final_url = str(resp.url)
# Offload the raw page layout data directly to your processing parser
decision = parse_html(html, final_url, word)
return decision
except Exception as e:
log.error(f"Network pipe transaction failure for '{word}': {str(e)}")
await asyncio.sleep(1.5 * attempt)
return "ERROR" |