Spaces:
Running
Running
File size: 588 Bytes
82372e5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | from __future__ import annotations
import re
import requests
def fetch_text(url: str, timeout: float = 6.0) -> tuple[str | None, str]:
if not url or "example.invalid" in url:
return None, "fixture_or_empty"
try:
response = requests.get(url, timeout=timeout, headers={"User-Agent": "MM1 prototype"})
response.raise_for_status()
text = re.sub(r"<[^>]+>", " ", response.text)
text = re.sub(r"\s+", " ", text).strip()
return text[:4000], "fetched"
except Exception as exc:
return None, f"error:{exc.__class__.__name__}"
|