File size: 3,402 Bytes
2418b45
 
873798b
8e12b43
4bad7d8
4b0add0
b33ebac
4b0add0
 
69f02a2
4bad7d8
5afe800
 
8309889
5afe800
4bad7d8
 
5afe800
 
 
ebcc930
8c15670
4bad7d8
8c15670
8e12b43
ebcc930
4bad7d8
5afe800
4bad7d8
 
 
5afe800
4bad7d8
 
 
b33ebac
935b8a6
4bad7d8
 
 
 
 
 
ebcc930
4bad7d8
 
 
 
 
 
b33ebac
4bad7d8
 
 
b33ebac
4bad7d8
b33ebac
4bad7d8
 
 
 
 
 
ebcc930
4bad7d8
 
 
 
5afe800
935b8a6
4bad7d8
 
5afe800
ebcc930
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import asyncio
import random
import re
import time
from curl_cffi.requests import AsyncSession
from logger import get_logger
from parser import parse_html

log = get_logger()

# Shared global timeline across all worker threads to protect single IP footprints
_rate_limited_until = 0.0

async def check_fragment(word: str, proxy_url: str = None) -> str:
    """
    Direct DOM view extractor using curl_cffi impersonation.
    Optimized with adaptive randomized pacing maps to ensure single-IP longevity.
    """
    global _rate_limited_until
    word = word.strip().replace("@", "").lower()
    url = f"https://fragment.com/username/{word}"
    
    # 4 Adaptive backoff loop attempts before declaring failure state
    for attempt in range(1, 5):
        current_time = time.time()
        
        # Enforce rate-limit cooling loops dynamically across threads
        if current_time < _rate_limited_until:
            wait_duration = _rate_limited_until - current_time + random.uniform(1.0, 3.0)
            log.info(f"⏳ Thread waiting layout triggered for '{word}'. Dynamic sleep: {wait_duration:.2f}s")
            await asyncio.sleep(wait_duration)
            
        # Add organic human jitter before starting the connection socket pool
        jitter = random.uniform(0.5, 2.5)
        await asyncio.sleep(jitter)

        try:
            log.info(f"🛰️ Requesting public DOM data grid for: @{word} [Attempt {attempt}/4]")
            
            # Short connection window parameters ensure blocked connections drop cleanly
            async with AsyncSession(impersonate="chrome120", timeout=10) as session:
                resp = await session.get(url, allow_redirects=True)
                final_url = str(resp.url)
                
                # Trace explicit rate limiting/firewall blocks
                if resp.status_code in [429, 403]:
                    backoff = 15 + (5 * attempt) + random.uniform(2.0, 5.0)
                    _rate_limited_until = time.time() + backoff
                    log.warning(f"⚠️ Single IP Throttled (HTTP {resp.status_code}) on '{word}'. Cooling pool for {backoff:.1f}s...")
                    continue

                if resp.status_code != 200:
                    log.warning(f"⚠️ Non-200 Response status encountered for '{word}': HTTP {resp.status_code}")
                    continue

                html = resp.text

                # Intercept Cloudflare CAPTCHA/Turnstile pages directly before parsing
                if "Just a moment..." in html or "cf-browser-verification" in html or "cloudflare" in html.lower():
                    backoff = 30 + (10 * attempt) + random.uniform(5.0, 10.0)
                    _rate_limited_until = time.time() + backoff
                    log.error(f"❌ Cloudflare Challenge Intercepted on '{word}'. Slowing workers down for {backoff:.1f}s...")
                    continue

                # Offload raw page chunk payload string straight to the unified parser module
                decision = parse_html(html, final_url, word)
                log.info(f"🎯 Resilient Parse Map Complete for '{word}' -> Outcome Classification: {decision}")
                return decision

        except Exception as e:
            log.error(f"Network transaction fault for '{word}' during connection lookup: {str(e)}")
            await asyncio.sleep(2.0 * attempt)

    return "ERROR"