File size: 2,970 Bytes
e2fa3ac
8a3c68a
 
e2fa3ac
02a8ebc
1b2a7fe
e2fa3ac
 
02a8ebc
8a3c68a
1b2a7fe
02a8ebc
e2fa3ac
1b2a7fe
8a3c68a
e2fa3ac
1b2a7fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e2fa3ac
02a8ebc
 
e2fa3ac
02a8ebc
1b2a7fe
 
 
 
 
 
e2fa3ac
1b2a7fe
02a8ebc
 
 
 
 
 
8a3c68a
1b2a7fe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import re

def parse_html(html: str, final_url: str, word: str) -> str:
    """
    High-Performance Text Boundary Token Mapper optimized for reading 
    direct username landing pages and search loops cleanly.
    """
    if not html or not html.strip():
        return "AVAILABLE"

    # Flatten trailing breaks and spaces into clean strings
    clean_html = " ".join(html.split())
    word_clean = word.strip().replace("@", "").lower()
    final_url_lower = final_url.lower()

    # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
    # ENGINE 1: DIRECT DOM VIEW REACTION
    # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
    if '/username/' in final_url_lower and '?query=' not in final_url_lower:
        # Isolate class definitions safely without heavy BeautifulSoup dependency overhead
        status_match = re.search(r'class="tm-section-header-status[^"]*"[^>]*>\s*([^<]+?)\s*</span>', clean_html, re.IGNORECASE)
        if status_match:
            s = status_match.group(1).strip().lower()
            if "sold" in s: return "SOLD"
            if "taken" in s: return "TAKEN"
            if "auction" in s: return "ON_AUCTION"
            if "available" in s: return "AVAILABLE"
            if "sale" in s or "purchase" in s: return "FOR_SALE"

        if 'is already taken' in clean_html.lower() or 'make an offer' in clean_html.lower():
            return "TAKEN"

    # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
    # ENGINE 2: REDIRECTED SEARCH RESULTS TABLE ROW LOOKUP
    # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
    search_regex = re.compile(rf'>@{word_clean}<.*?class="[^"]*status[^"]*"[^>]*>\s*([^<]+?)\s*<', re.IGNORECASE)
    match = search_regex.search(clean_html)
    
    if match:
        s = match.group(1).strip().lower()
        if "auction" in s or "bidding" in s: return "ON_AUCTION"
        if "sold" in s: return "SOLD"
        if "unavailable" in s: return "UNAVAILABLE"
        if "taken" in s or "offer" in s: return "TAKEN"
        if "sale" in s or "purchase" in s: return "FOR_SALE"

    # Global text layout backup verification block scans
    fallback_text = clean_html.lower()
    if "on auction" in fallback_text: return "ON_AUCTION"
    if "sold for" in fallback_text or "recently sold" in fallback_text: return "SOLD"
    if "taken" in fallback_text or "make an offer" in fallback_text: return "TAKEN"
    if "for sale" in fallback_text or "purchase" in fallback_text: return "FOR_SALE"
    if "unavailable" in fallback_text: return "UNAVAILABLE"

    return "AVAILABLE"