File size: 588 Bytes
82372e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from __future__ import annotations

import re

import requests


def fetch_text(url: str, timeout: float = 6.0) -> tuple[str | None, str]:
    if not url or "example.invalid" in url:
        return None, "fixture_or_empty"
    try:
        response = requests.get(url, timeout=timeout, headers={"User-Agent": "MM1 prototype"})
        response.raise_for_status()
        text = re.sub(r"<[^>]+>", " ", response.text)
        text = re.sub(r"\s+", " ", text).strip()
        return text[:4000], "fetched"
    except Exception as exc:
        return None, f"error:{exc.__class__.__name__}"