File size: 3,941 Bytes
e2fa3ac
027851c
 
 
8a3c68a
 
e2fa3ac
02a8ebc
027851c
e2fa3ac
 
027851c
02a8ebc
8a3c68a
027851c
02a8ebc
e2fa3ac
1b2a7fe
8a3c68a
e2fa3ac
027851c
1b2a7fe
 
027851c
 
1b2a7fe
 
 
027851c
1b2a7fe
 
 
 
 
 
 
027851c
1b2a7fe
 
 
027851c
e2fa3ac
027851c
02a8ebc
 
e2fa3ac
02a8ebc
1b2a7fe
027851c
1b2a7fe
 
 
 
 
e2fa3ac
027851c
 
 
 
02a8ebc
027851c
02a8ebc
 
 
 
 
8a3c68a
1b2a7fe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import re
from logger import get_logger

log = get_logger()

def parse_html(html: str, final_url: str, word: str) -> str:
    """
    High-Performance Text Boundary Token Mapper optimized for reading 
    direct username landing pages and search loop arrays cleanly.
    """
    if not html or not html.strip():
        log.warning(f"๐Ÿ” Empty text payload passed to evaluation parser engine for target: {word}")
        return "AVAILABLE"

    # Flatten trailing breaks, tabs, and layout spaces into single line strings
    clean_html = " ".join(html.split())
    word_clean = word.strip().replace("@", "").lower()
    final_url_lower = final_url.lower()

    # โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
    # ENGINE 1: DIRECT DOM PROFILE SCANNING
    # โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
    if '/username/' in final_url_lower and '?query=' not in final_url_lower:
        log.info(f"๐Ÿ”Ž Converted Parser executing Profile Mode parsing path for word: {word_clean}")
        
        status_match = re.search(r'class="tm-section-header-status[^"]*"[^>]*>\s*([^<]+?)\s*</span>', clean_html, re.IGNORECASE)
        if status_match:
            s = status_match.group(1).strip().lower()
            log.info(f"๐Ÿ”ฎ Direct Class Marker found for '{word_clean}': '{s}'")
            if "sold" in s: return "SOLD"
            if "taken" in s: return "TAKEN"
            if "auction" in s: return "ON_AUCTION"
            if "available" in s: return "AVAILABLE"
            if "sale" in s or "purchase" in s: return "FOR_SALE"

        if 'is already taken' in clean_html.lower() or 'make an offer' in clean_html.lower():
            log.info(f"๐Ÿ”ฎ Found fallback text marker 'taken/offer' for: {word_clean}")
            return "TAKEN"

    # โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
    # ENGINE 2: REDIRECTED SEARCH RESULTS ROW LOOKUP
    # โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
    log.info(f"๐Ÿ”Ž Converted Parser executing Grid Mode search loop path for word: {word_clean}")
    search_regex = re.compile(rf'>@{word_clean}<.*?class="[^"]*status[^"]*"[^>]*>\s*([^<]+?)\s*<', re.IGNORECASE)
    match = search_regex.search(clean_html)
    
    if match:
        s = match.group(1).strip().lower()
        log.info(f"๐Ÿ”ฎ Search Grid Table status mapped for '{word_clean}': '{s}'")
        if "auction" in s or "bidding" in s: return "ON_AUCTION"
        if "sold" in s: return "SOLD"
        if "unavailable" in s: return "UNAVAILABLE"
        if "taken" in s or "offer" in s: return "TAKEN"
        if "sale" in s or "purchase" in s: return "FOR_SALE"

    # โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
    # ENGINE 3: GLOBAL LAYOUT REGEX FALLBACK SCANS
    # โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
    log.info(f"โš ๏ธ Row boundaries missed for '{word_clean}'. Executing global text scans...")
    fallback_text = clean_html.lower()
    
    if "on auction" in fallback_text: return "ON_AUCTION"
    if "sold for" in fallback_text or "recently sold" in fallback_text: return "SOLD"
    if "taken" in fallback_text or "make an offer" in fallback_text: return "TAKEN"
    if "for sale" in fallback_text or "purchase" in fallback_text: return "FOR_SALE"
    if "unavailable" in fallback_text: return "UNAVAILABLE"

    return "AVAILABLE"