Spaces:
Paused
Paused
File size: 2,970 Bytes
e2fa3ac 8a3c68a e2fa3ac 02a8ebc 1b2a7fe e2fa3ac 02a8ebc 8a3c68a 1b2a7fe 02a8ebc e2fa3ac 1b2a7fe 8a3c68a e2fa3ac 1b2a7fe e2fa3ac 02a8ebc e2fa3ac 02a8ebc 1b2a7fe e2fa3ac 1b2a7fe 02a8ebc 8a3c68a 1b2a7fe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | import re
def parse_html(html: str, final_url: str, word: str) -> str:
"""
High-Performance Text Boundary Token Mapper optimized for reading
direct username landing pages and search loops cleanly.
"""
if not html or not html.strip():
return "AVAILABLE"
# Flatten trailing breaks and spaces into clean strings
clean_html = " ".join(html.split())
word_clean = word.strip().replace("@", "").lower()
final_url_lower = final_url.lower()
# ββββββββββββββββββββββββββββββββββββββββββββββββββββ
# ENGINE 1: DIRECT DOM VIEW REACTION
# ββββββββββββββββββββββββββββββββββββββββββββββββββββ
if '/username/' in final_url_lower and '?query=' not in final_url_lower:
# Isolate class definitions safely without heavy BeautifulSoup dependency overhead
status_match = re.search(r'class="tm-section-header-status[^"]*"[^>]*>\s*([^<]+?)\s*</span>', clean_html, re.IGNORECASE)
if status_match:
s = status_match.group(1).strip().lower()
if "sold" in s: return "SOLD"
if "taken" in s: return "TAKEN"
if "auction" in s: return "ON_AUCTION"
if "available" in s: return "AVAILABLE"
if "sale" in s or "purchase" in s: return "FOR_SALE"
if 'is already taken' in clean_html.lower() or 'make an offer' in clean_html.lower():
return "TAKEN"
# ββββββββββββββββββββββββββββββββββββββββββββββββββββ
# ENGINE 2: REDIRECTED SEARCH RESULTS TABLE ROW LOOKUP
# ββββββββββββββββββββββββββββββββββββββββββββββββββββ
search_regex = re.compile(rf'>@{word_clean}<.*?class="[^"]*status[^"]*"[^>]*>\s*([^<]+?)\s*<', re.IGNORECASE)
match = search_regex.search(clean_html)
if match:
s = match.group(1).strip().lower()
if "auction" in s or "bidding" in s: return "ON_AUCTION"
if "sold" in s: return "SOLD"
if "unavailable" in s: return "UNAVAILABLE"
if "taken" in s or "offer" in s: return "TAKEN"
if "sale" in s or "purchase" in s: return "FOR_SALE"
# Global text layout backup verification block scans
fallback_text = clean_html.lower()
if "on auction" in fallback_text: return "ON_AUCTION"
if "sold for" in fallback_text or "recently sold" in fallback_text: return "SOLD"
if "taken" in fallback_text or "make an offer" in fallback_text: return "TAKEN"
if "for sale" in fallback_text or "purchase" in fallback_text: return "FOR_SALE"
if "unavailable" in fallback_text: return "UNAVAILABLE"
return "AVAILABLE" |