# preprocessing.py
import requests
from PIL import Image
from io import BytesIO

def clean_text(text: str):
    text = text.strip()
    if len(text) > 2000:
        text = text[:2000]
    return text


def load_image_from_url(url: str):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
    }

    response = requests.get(url, headers=headers, timeout=10)
    response.raise_for_status()  # будет ловить ошибки 4xx/5xx

    image = Image.open(BytesIO(response.content)).convert("RGB")
    return image