import re from logger import get_logger log = get_logger() def parse_html(html: str, final_url: str, word: str) -> str: """ High-Performance Text Boundary Token Mapper optimized for reading direct username landing pages and search loop arrays cleanly. """ if not html or not html.strip(): log.warning(f"🔍 Empty text payload passed to evaluation parser engine for target: {word}") return "AVAILABLE" # Flatten trailing breaks, tabs, and layout spaces into single line strings clean_html = " ".join(html.split()) word_clean = word.strip().replace("@", "").lower() final_url_lower = final_url.lower() # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # ENGINE 1: DIRECT DOM PROFILE SCANNING # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ if '/username/' in final_url_lower and '?query=' not in final_url_lower: log.info(f"🔎 Converted Parser executing Profile Mode parsing path for word: {word_clean}") status_match = re.search(r'class="tm-section-header-status[^"]*"[^>]*>\s*([^<]+?)\s*', clean_html, re.IGNORECASE) if status_match: s = status_match.group(1).strip().lower() log.info(f"🔮 Direct Class Marker found for '{word_clean}': '{s}'") if "sold" in s: return "SOLD" if "taken" in s: return "TAKEN" if "auction" in s: return "ON_AUCTION" if "available" in s: return "AVAILABLE" if "sale" in s or "purchase" in s: return "FOR_SALE" if 'is already taken' in clean_html.lower() or 'make an offer' in clean_html.lower(): log.info(f"🔮 Found fallback text marker 'taken/offer' for: {word_clean}") return "TAKEN" # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # ENGINE 2: REDIRECTED SEARCH RESULTS ROW LOOKUP # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ log.info(f"🔎 Converted Parser executing Grid Mode search loop path for word: {word_clean}") search_regex = re.compile(rf'>@{word_clean}<.*?class="[^"]*status[^"]*"[^>]*>\s*([^<]+?)\s*<', re.IGNORECASE) match = search_regex.search(clean_html) if match: s = match.group(1).strip().lower() log.info(f"🔮 Search Grid Table status mapped for '{word_clean}': '{s}'") if "auction" in s or "bidding" in s: return "ON_AUCTION" if "sold" in s: return "SOLD" if "unavailable" in s: return "UNAVAILABLE" if "taken" in s or "offer" in s: return "TAKEN" if "sale" in s or "purchase" in s: return "FOR_SALE" # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # ENGINE 3: GLOBAL LAYOUT REGEX FALLBACK SCANS # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ log.info(f"⚠️ Row boundaries missed for '{word_clean}'. Executing global text scans...") fallback_text = clean_html.lower() if "on auction" in fallback_text: return "ON_AUCTION" if "sold for" in fallback_text or "recently sold" in fallback_text: return "SOLD" if "taken" in fallback_text or "make an offer" in fallback_text: return "TAKEN" if "for sale" in fallback_text or "purchase" in fallback_text: return "FOR_SALE" if "unavailable" in fallback_text: return "UNAVAILABLE" return "AVAILABLE"