import requests def hygiene_score(entity: str) -> int: """ If 'entity' is a URL: • robots.txt allows GPTBot → +30 • Response time < 0.3s → +20 • present → +20 • ... present → +20 • No paywall (“subscribe”/“login” not found) → +10 Cap at 100. Else (non-URL), return default 50. """ if not entity.startswith("http"): return 50 score = 0 try: # 1) Check robots.txt robots_url = entity.rstrip("/") + "/robots.txt" r = requests.get(robots_url, timeout=5) if r.status_code == 200 and "GPTBot" in r.text: score += 30 # 2) Page response time page_resp = requests.get(entity, timeout=5) if page_resp.elapsed.total_seconds() < 0.3: score += 20 # 3) Canonical tag if 'rel="canonical"' in page_resp.text: score += 20 # 4) Alt-text on images if " str: if score < 50: return ( "Ensure robots.txt allows GPTBot, add a canonical tag, " "include alt-text for all images, and remove any paywalls." ) return "Technical hygiene is solid; continue monitoring robots.txt and server speed."