Spaces:
Configuration error
Configuration error
| import requests | |
| def hygiene_score(entity: str) -> int: | |
| """ | |
| If 'entity' is a URL: | |
| • robots.txt allows GPTBot → +30 | |
| • Response time < 0.3s → +20 | |
| • <link rel="canonical"> present → +20 | |
| • <img alt="..."> present → +20 | |
| • No paywall (“subscribe”/“login” not found) → +10 | |
| Cap at 100. | |
| Else (non-URL), return default 50. | |
| """ | |
| if not entity.startswith("http"): | |
| return 50 | |
| score = 0 | |
| try: | |
| # 1) Check robots.txt | |
| robots_url = entity.rstrip("/") + "/robots.txt" | |
| r = requests.get(robots_url, timeout=5) | |
| if r.status_code == 200 and "GPTBot" in r.text: | |
| score += 30 | |
| # 2) Page response time | |
| page_resp = requests.get(entity, timeout=5) | |
| if page_resp.elapsed.total_seconds() < 0.3: | |
| score += 20 | |
| # 3) Canonical tag | |
| if 'rel="canonical"' in page_resp.text: | |
| score += 20 | |
| # 4) Alt-text on images | |
| if "<img" in page_resp.text and "alt=" in page_resp.text: | |
| score += 20 | |
| # 5) Paywall check | |
| lower = page_resp.text.lower() | |
| if "subscribe" not in lower and "login" not in lower: | |
| score += 10 | |
| return min(score, 100) | |
| except Exception: | |
| return 40 | |
| def hygiene_recommendation(entity: str, score: int) -> str: | |
| if score < 50: | |
| return ( | |
| "Ensure robots.txt allows GPTBot, add a canonical tag, " | |
| "include alt-text for all images, and remove any paywalls." | |
| ) | |
| return "Technical hygiene is solid; continue monitoring robots.txt and server speed." | |