File size: 2,465 Bytes
ab2012f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import base64
from PIL import Image
import requests
from io import BytesIO
import numpy as np

def is_placeholder_image(image: Image.Image) -> bool:
    img_array = np.array(image)
    
    if len(img_array.shape) != 3:
        return True
    
    height, width = img_array.shape[:2]
    
    gray = np.mean(img_array, axis=2)
    
    unique_colors = len(np.unique(gray))
    
    if unique_colors < 10:
        return True
    
    black_white_ratio = np.sum((gray < 20) | (gray > 235)) / (height * width)
    
    if black_white_ratio > 0.8:
        return True
    
    std_dev = np.std(gray)
    if std_dev < 15:
        return True
    
    sample_size = min(100, height // 10, width // 10)
    if sample_size < 2:
        return False
    
    step_h = height // sample_size
    step_w = width // sample_size
    
    grid_pattern = True
    for i in range(0, height - step_h, step_h):
        for j in range(0, width - step_w, step_w):
            block = gray[i:i+step_h, j:j+step_w]
            block_std = np.std(block)
            if block_std > 30:
                grid_pattern = False
                break
        if not grid_pattern:
            break
    
    if grid_pattern and black_white_ratio > 0.5:
        return True
    
    return False

def load_image_from_url(url_or_base64: str) -> Image.Image:
    try:
        if url_or_base64.startswith("data:image"):
            header, encoded = url_or_base64.split(",", 1)
            image_data = base64.b64decode(encoded)
            return Image.open(BytesIO(image_data)).convert("RGB")
        else:
            response = requests.get(url_or_base64, timeout=10)
            response.raise_for_status()
            return Image.open(BytesIO(response.content)).convert("RGB")
    except Exception as e:
        raise ValueError(f"Failed to load image: {str(e)}")

def filter_valid_images(images: list) -> list:
    valid_images = []
    for img in images:
        if not img or not isinstance(img, str) or img.strip() in ["", "string", "null", "undefined"]:
            continue
        try:
            pil_image = load_image_from_url(img)
            if not is_placeholder_image(pil_image):
                valid_images.append(pil_image)
            else:
                print(f"[IMAGE FILTER] Ignoring placeholder/empty image")
        except Exception as e:
            print(f"[IMAGE FILTER] Warning: Failed to load image: {e}, skipping")
            continue
    return valid_images