File size: 550 Bytes
c3964c3
 
 
1847245
c3964c3
 
 
 
 
 
 
1847245
c3964c3
00886d1
 
 
 
 
 
1847245
 
c3964c3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# preprocessing.py
import requests
from PIL import Image
from io import BytesIO

def clean_text(text: str):
    text = text.strip()
    if len(text) > 2000:
        text = text[:2000]
    return text


def load_image_from_url(url: str):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
    }

    response = requests.get(url, headers=headers, timeout=10)
    response.raise_for_status()  # будет ловить ошибки 4xx/5xx

    image = Image.open(BytesIO(response.content)).convert("RGB")
    return image