Dvs / parser.py
Muttered3's picture
Update parser.py
1b2a7fe verified
raw
history blame
2.97 kB
import re
def parse_html(html: str, final_url: str, word: str) -> str:
"""
High-Performance Text Boundary Token Mapper optimized for reading
direct username landing pages and search loops cleanly.
"""
if not html or not html.strip():
return "AVAILABLE"
# Flatten trailing breaks and spaces into clean strings
clean_html = " ".join(html.split())
word_clean = word.strip().replace("@", "").lower()
final_url_lower = final_url.lower()
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# ENGINE 1: DIRECT DOM VIEW REACTION
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
if '/username/' in final_url_lower and '?query=' not in final_url_lower:
# Isolate class definitions safely without heavy BeautifulSoup dependency overhead
status_match = re.search(r'class="tm-section-header-status[^"]*"[^>]*>\s*([^<]+?)\s*</span>', clean_html, re.IGNORECASE)
if status_match:
s = status_match.group(1).strip().lower()
if "sold" in s: return "SOLD"
if "taken" in s: return "TAKEN"
if "auction" in s: return "ON_AUCTION"
if "available" in s: return "AVAILABLE"
if "sale" in s or "purchase" in s: return "FOR_SALE"
if 'is already taken' in clean_html.lower() or 'make an offer' in clean_html.lower():
return "TAKEN"
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# ENGINE 2: REDIRECTED SEARCH RESULTS TABLE ROW LOOKUP
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
search_regex = re.compile(rf'>@{word_clean}<.*?class="[^"]*status[^"]*"[^>]*>\s*([^<]+?)\s*<', re.IGNORECASE)
match = search_regex.search(clean_html)
if match:
s = match.group(1).strip().lower()
if "auction" in s or "bidding" in s: return "ON_AUCTION"
if "sold" in s: return "SOLD"
if "unavailable" in s: return "UNAVAILABLE"
if "taken" in s or "offer" in s: return "TAKEN"
if "sale" in s or "purchase" in s: return "FOR_SALE"
# Global text layout backup verification block scans
fallback_text = clean_html.lower()
if "on auction" in fallback_text: return "ON_AUCTION"
if "sold for" in fallback_text or "recently sold" in fallback_text: return "SOLD"
if "taken" in fallback_text or "make an offer" in fallback_text: return "TAKEN"
if "for sale" in fallback_text or "purchase" in fallback_text: return "FOR_SALE"
if "unavailable" in fallback_text: return "UNAVAILABLE"
return "AVAILABLE"