File size: 3,892 Bytes
2418b45
 
873798b
8e12b43
b33ebac
4b0add0
b33ebac
 
4b0add0
 
69f02a2
ebcc930
b33ebac
 
 
 
 
 
 
ebcc930
5afe800
 
8309889
5afe800
ebcc930
 
5afe800
 
 
ebcc930
8c15670
ebcc930
8c15670
8e12b43
ebcc930
 
5afe800
2e6f7cf
5afe800
ebcc930
 
 
 
 
 
 
 
b33ebac
935b8a6
ebcc930
 
 
 
 
 
 
8e12b43
ebcc930
 
 
b33ebac
ebcc930
b33ebac
 
ebcc930
 
b33ebac
 
ebcc930
b33ebac
ebcc930
 
 
b33ebac
ebcc930
b33ebac
 
ebcc930
 
 
 
 
b33ebac
5afe800
935b8a6
ebcc930
8e12b43
5afe800
ebcc930
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import asyncio
import random
import re
import time
import aiohttp
from logger import get_logger
from state import state
from parser import parse_html

log = get_logger()

# Raw browser fingerprint headers to emulate real user navigation paths
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4_1) AppleWebKit/605.1.15 Version/17.4.1 Safari/605.1.15",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/123.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0"
]

# Shared global pacing timeline to protect single IP reputation windows
_rate_limited_until = 0.0

async def check_fragment(word: str, proxy_url: str = None) -> str:
    """
    Direct Public DOM Extraction Engine.
    Restructured for proxy-free operation to bypass token handshake verification entirely.
    """
    global _rate_limited_until
    word = word.strip().replace("@", "").lower()
    url = f"https://fragment.com/username/{word}"
    
    # 4 Retries incorporating adaptive exponential backoff pacing
    for attempt in range(1, 5):
        current_time = time.time()
        
        # Enforce rate-limit pacing window dynamically
        if current_time < _rate_limited_until:
            await asyncio.sleep(_rate_limited_until - current_time + random.uniform(0.5, 1.5))
            
        headers = {
            "User-Agent": random.choice(USER_AGENTS),
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.5",
            "Referer": "https://fragment.com/",
            "Cache-Control": "no-cache",
            "Pragma": "no-cache"
        }

        try:
            # Short connect timeout boundaries drop dead sockets cleanly
            timeout = aiohttp.ClientTimeout(total=12, connect=4)
            async with aiohttp.ClientSession(timeout=timeout) as session:
                
                # allow_redirects=True is critical to resolve unassigned usernames cleanly
                async with session.get(url, headers=headers, allow_redirects=True) as resp:
                    status = resp.status
                    
                    if status in [429, 403]:
                        # Back off exponentially to preserve single-IP request pools
                        backoff = 5 + (3 ** attempt) + random.uniform(1.0, 3.0)
                        _rate_limited_until = time.time() + backoff
                        log.warning(f"⚠️ Single IP throttled (HTTP {status}) on '{word}'. Cooling pool for {backoff:.1f}s...")
                        continue

                    if status != 200:
                        await asyncio.sleep(1.0)
                        continue

                    html = await resp.text()

                    # Trap Cloudflare anti-bot challenge scripts directly
                    if "Just a moment..." in html or "cf-browser-verification" in html or "cloudflare" in html.lower():
                        backoff = 15 + random.uniform(2.0, 5.0)
                        _rate_limited_until = time.time() + backoff
                        log.error("❌ Cloudflare Browser Challenge engaged. Slowing worker loops down...")
                        continue

                    # Capture the resolved destination URL metadata signature
                    final_url = str(resp.url)

                    # Offload the raw page layout data directly to your processing parser
                    decision = parse_html(html, final_url, word)
                    return decision

        except Exception as e:
            log.error(f"Network pipe transaction failure for '{word}': {str(e)}")
            await asyncio.sleep(1.5 * attempt)

    return "ERROR"