Spaces:
Paused
Paused
File size: 3,941 Bytes
e2fa3ac 027851c 8a3c68a e2fa3ac 02a8ebc 027851c e2fa3ac 027851c 02a8ebc 8a3c68a 027851c 02a8ebc e2fa3ac 1b2a7fe 8a3c68a e2fa3ac 027851c 1b2a7fe 027851c 1b2a7fe 027851c 1b2a7fe 027851c 1b2a7fe 027851c e2fa3ac 027851c 02a8ebc e2fa3ac 02a8ebc 1b2a7fe 027851c 1b2a7fe e2fa3ac 027851c 02a8ebc 027851c 02a8ebc 8a3c68a 1b2a7fe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | import re
from logger import get_logger
log = get_logger()
def parse_html(html: str, final_url: str, word: str) -> str:
"""
High-Performance Text Boundary Token Mapper optimized for reading
direct username landing pages and search loop arrays cleanly.
"""
if not html or not html.strip():
log.warning(f"๐ Empty text payload passed to evaluation parser engine for target: {word}")
return "AVAILABLE"
# Flatten trailing breaks, tabs, and layout spaces into single line strings
clean_html = " ".join(html.split())
word_clean = word.strip().replace("@", "").lower()
final_url_lower = final_url.lower()
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# ENGINE 1: DIRECT DOM PROFILE SCANNING
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
if '/username/' in final_url_lower and '?query=' not in final_url_lower:
log.info(f"๐ Converted Parser executing Profile Mode parsing path for word: {word_clean}")
status_match = re.search(r'class="tm-section-header-status[^"]*"[^>]*>\s*([^<]+?)\s*</span>', clean_html, re.IGNORECASE)
if status_match:
s = status_match.group(1).strip().lower()
log.info(f"๐ฎ Direct Class Marker found for '{word_clean}': '{s}'")
if "sold" in s: return "SOLD"
if "taken" in s: return "TAKEN"
if "auction" in s: return "ON_AUCTION"
if "available" in s: return "AVAILABLE"
if "sale" in s or "purchase" in s: return "FOR_SALE"
if 'is already taken' in clean_html.lower() or 'make an offer' in clean_html.lower():
log.info(f"๐ฎ Found fallback text marker 'taken/offer' for: {word_clean}")
return "TAKEN"
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# ENGINE 2: REDIRECTED SEARCH RESULTS ROW LOOKUP
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
log.info(f"๐ Converted Parser executing Grid Mode search loop path for word: {word_clean}")
search_regex = re.compile(rf'>@{word_clean}<.*?class="[^"]*status[^"]*"[^>]*>\s*([^<]+?)\s*<', re.IGNORECASE)
match = search_regex.search(clean_html)
if match:
s = match.group(1).strip().lower()
log.info(f"๐ฎ Search Grid Table status mapped for '{word_clean}': '{s}'")
if "auction" in s or "bidding" in s: return "ON_AUCTION"
if "sold" in s: return "SOLD"
if "unavailable" in s: return "UNAVAILABLE"
if "taken" in s or "offer" in s: return "TAKEN"
if "sale" in s or "purchase" in s: return "FOR_SALE"
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# ENGINE 3: GLOBAL LAYOUT REGEX FALLBACK SCANS
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
log.info(f"โ ๏ธ Row boundaries missed for '{word_clean}'. Executing global text scans...")
fallback_text = clean_html.lower()
if "on auction" in fallback_text: return "ON_AUCTION"
if "sold for" in fallback_text or "recently sold" in fallback_text: return "SOLD"
if "taken" in fallback_text or "make an offer" in fallback_text: return "TAKEN"
if "for sale" in fallback_text or "purchase" in fallback_text: return "FOR_SALE"
if "unavailable" in fallback_text: return "UNAVAILABLE"
return "AVAILABLE" |