Spaces:
Paused
Paused
| import re | |
| from logger import get_logger | |
| log = get_logger() | |
| def parse_html(html: str, final_url: str, word: str) -> str: | |
| """ | |
| High-Performance Text Boundary Token Mapper optimized for reading | |
| direct username landing pages and search loop arrays cleanly. | |
| """ | |
| if not html or not html.strip(): | |
| log.warning(f"๐ Empty text payload passed to evaluation parser engine for target: {word}") | |
| return "AVAILABLE" | |
| # Flatten trailing breaks, tabs, and layout spaces into single line strings | |
| clean_html = " ".join(html.split()) | |
| word_clean = word.strip().replace("@", "").lower() | |
| final_url_lower = final_url.lower() | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # ENGINE 1: DIRECT DOM PROFILE SCANNING | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| if '/username/' in final_url_lower and '?query=' not in final_url_lower: | |
| log.info(f"๐ Converted Parser executing Profile Mode parsing path for word: {word_clean}") | |
| status_match = re.search(r'class="tm-section-header-status[^"]*"[^>]*>\s*([^<]+?)\s*</span>', clean_html, re.IGNORECASE) | |
| if status_match: | |
| s = status_match.group(1).strip().lower() | |
| log.info(f"๐ฎ Direct Class Marker found for '{word_clean}': '{s}'") | |
| if "sold" in s: return "SOLD" | |
| if "taken" in s: return "TAKEN" | |
| if "auction" in s: return "ON_AUCTION" | |
| if "available" in s: return "AVAILABLE" | |
| if "sale" in s or "purchase" in s: return "FOR_SALE" | |
| if 'is already taken' in clean_html.lower() or 'make an offer' in clean_html.lower(): | |
| log.info(f"๐ฎ Found fallback text marker 'taken/offer' for: {word_clean}") | |
| return "TAKEN" | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # ENGINE 2: REDIRECTED SEARCH RESULTS ROW LOOKUP | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| log.info(f"๐ Converted Parser executing Grid Mode search loop path for word: {word_clean}") | |
| search_regex = re.compile(rf'>@{word_clean}<.*?class="[^"]*status[^"]*"[^>]*>\s*([^<]+?)\s*<', re.IGNORECASE) | |
| match = search_regex.search(clean_html) | |
| if match: | |
| s = match.group(1).strip().lower() | |
| log.info(f"๐ฎ Search Grid Table status mapped for '{word_clean}': '{s}'") | |
| if "auction" in s or "bidding" in s: return "ON_AUCTION" | |
| if "sold" in s: return "SOLD" | |
| if "unavailable" in s: return "UNAVAILABLE" | |
| if "taken" in s or "offer" in s: return "TAKEN" | |
| if "sale" in s or "purchase" in s: return "FOR_SALE" | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # ENGINE 3: GLOBAL LAYOUT REGEX FALLBACK SCANS | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| log.info(f"โ ๏ธ Row boundaries missed for '{word_clean}'. Executing global text scans...") | |
| fallback_text = clean_html.lower() | |
| if "on auction" in fallback_text: return "ON_AUCTION" | |
| if "sold for" in fallback_text or "recently sold" in fallback_text: return "SOLD" | |
| if "taken" in fallback_text or "make an offer" in fallback_text: return "TAKEN" | |
| if "for sale" in fallback_text or "purchase" in fallback_text: return "FOR_SALE" | |
| if "unavailable" in fallback_text: return "UNAVAILABLE" | |
| return "AVAILABLE" |