Spaces:

Muttered3
/

Dvs

Paused

File size: 3,402 Bytes

2418b45
 
873798b
8e12b43
fe67d53
4b0add0
b33ebac
4b0add0
 
69f02a2
fe67d53
5afe800
 
8309889
5afe800
fe67d53
 
5afe800
 
 
ebcc930
8c15670
fe67d53
8c15670
8e12b43
ebcc930
fe67d53
5afe800
fe67d53
 
 
5afe800
fe67d53
 
 
b33ebac
935b8a6
fe67d53
 
 
 
 
 
ebcc930
fe67d53
 
 
 
 
 
b33ebac
fe67d53
 
 
b33ebac
fe67d53
b33ebac
fe67d53
 
 
 
 
 
ebcc930
fe67d53
 
 
 
5afe800
935b8a6
fe67d53
 
5afe800
ebcc930

import asyncio
import random
import re
import time
from curl_cffi.requests import AsyncSession
from logger import get_logger
from parser import parse_html

log = get_logger()

# Shared global timeline across all worker threads to protect single IP footprints
_rate_limited_until = 0.0

async def check_fragment(word: str, proxy_url: str = None) -> str:
    """
    Direct DOM view extractor using curl_cffi impersonation.
    Optimized with adaptive randomized pacing maps to ensure single-IP longevity.
    """
    global _rate_limited_until
    word = word.strip().replace("@", "").lower()
    url = f"https://fragment.com/username/{word}"
    
    # 4 Adaptive backoff loop attempts before declaring failure state
    for attempt in range(1, 5):
        current_time = time.time()
        
        # Enforce rate-limit cooling loops dynamically across threads
        if current_time < _rate_limited_until:
            wait_duration = _rate_limited_until - current_time + random.uniform(1.0, 3.0)
            log.info(f"⏳ Thread waiting layout triggered for '{word}'. Dynamic sleep: {wait_duration:.2f}s")
            await asyncio.sleep(wait_duration)
            
        # Add organic human jitter before starting the connection socket pool
        jitter = random.uniform(0.5, 2.5)
        await asyncio.sleep(jitter)

        try:
            log.info(f"🛰️ Requesting public DOM data grid for: @{word} [Attempt {attempt}/4]")
            
            # Short connection window parameters ensure blocked connections drop cleanly
            async with AsyncSession(impersonate="chrome120", timeout=10) as session:
                resp = await session.get(url, allow_redirects=True)
                final_url = str(resp.url)
                
                # Trace explicit rate limiting/firewall blocks
                if resp.status_code in [429, 403]:
                    backoff = 15 + (5 * attempt) + random.uniform(2.0, 5.0)
                    _rate_limited_until = time.time() + backoff
                    log.warning(f"⚠️ Single IP Throttled (HTTP {resp.status_code}) on '{word}'. Cooling pool for {backoff:.1f}s...")
                    continue

                if resp.status_code != 200:
                    log.warning(f"⚠️ Non-200 Response status encountered for '{word}': HTTP {resp.status_code}")
                    continue

                html = resp.text

                # Intercept Cloudflare CAPTCHA/Turnstile pages directly before parsing
                if "Just a moment..." in html or "cf-browser-verification" in html or "cloudflare" in html.lower():
                    backoff = 30 + (10 * attempt) + random.uniform(5.0, 10.0)
                    _rate_limited_until = time.time() + backoff
                    log.error(f"❌ Cloudflare Challenge Intercepted on '{word}'. Slowing workers down for {backoff:.1f}s...")
                    continue

                # Offload raw page chunk payload string straight to the unified parser module
                decision = parse_html(html, final_url, word)
                log.info(f"🎯 Resilient Parse Map Complete for '{word}' -> Outcome Classification: {decision}")
                return decision

        except Exception as e:
            log.error(f"Network transaction fault for '{word}' during connection lookup: {str(e)}")
            await asyncio.sleep(2.0 * attempt)

    return "ERROR"