Spaces:
Paused
Paused
| import re | |
| def parse_html(html: str, final_url: str, word: str) -> str: | |
| """ | |
| High-Performance Text Boundary Token Mapper optimized for reading | |
| direct username landing pages and search loops cleanly. | |
| """ | |
| if not html or not html.strip(): | |
| return "AVAILABLE" | |
| # Flatten trailing breaks and spaces into clean strings | |
| clean_html = " ".join(html.split()) | |
| word_clean = word.strip().replace("@", "").lower() | |
| final_url_lower = final_url.lower() | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ENGINE 1: DIRECT DOM VIEW REACTION | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if '/username/' in final_url_lower and '?query=' not in final_url_lower: | |
| # Isolate class definitions safely without heavy BeautifulSoup dependency overhead | |
| status_match = re.search(r'class="tm-section-header-status[^"]*"[^>]*>\s*([^<]+?)\s*</span>', clean_html, re.IGNORECASE) | |
| if status_match: | |
| s = status_match.group(1).strip().lower() | |
| if "sold" in s: return "SOLD" | |
| if "taken" in s: return "TAKEN" | |
| if "auction" in s: return "ON_AUCTION" | |
| if "available" in s: return "AVAILABLE" | |
| if "sale" in s or "purchase" in s: return "FOR_SALE" | |
| if 'is already taken' in clean_html.lower() or 'make an offer' in clean_html.lower(): | |
| return "TAKEN" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ENGINE 2: REDIRECTED SEARCH RESULTS TABLE ROW LOOKUP | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| search_regex = re.compile(rf'>@{word_clean}<.*?class="[^"]*status[^"]*"[^>]*>\s*([^<]+?)\s*<', re.IGNORECASE) | |
| match = search_regex.search(clean_html) | |
| if match: | |
| s = match.group(1).strip().lower() | |
| if "auction" in s or "bidding" in s: return "ON_AUCTION" | |
| if "sold" in s: return "SOLD" | |
| if "unavailable" in s: return "UNAVAILABLE" | |
| if "taken" in s or "offer" in s: return "TAKEN" | |
| if "sale" in s or "purchase" in s: return "FOR_SALE" | |
| # Global text layout backup verification block scans | |
| fallback_text = clean_html.lower() | |
| if "on auction" in fallback_text: return "ON_AUCTION" | |
| if "sold for" in fallback_text or "recently sold" in fallback_text: return "SOLD" | |
| if "taken" in fallback_text or "make an offer" in fallback_text: return "TAKEN" | |
| if "for sale" in fallback_text or "purchase" in fallback_text: return "FOR_SALE" | |
| if "unavailable" in fallback_text: return "UNAVAILABLE" | |
| return "AVAILABLE" |