File size: 2,465 Bytes
ab2012f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import base64
from PIL import Image
import requests
from io import BytesIO
import numpy as np
def is_placeholder_image(image: Image.Image) -> bool:
img_array = np.array(image)
if len(img_array.shape) != 3:
return True
height, width = img_array.shape[:2]
gray = np.mean(img_array, axis=2)
unique_colors = len(np.unique(gray))
if unique_colors < 10:
return True
black_white_ratio = np.sum((gray < 20) | (gray > 235)) / (height * width)
if black_white_ratio > 0.8:
return True
std_dev = np.std(gray)
if std_dev < 15:
return True
sample_size = min(100, height // 10, width // 10)
if sample_size < 2:
return False
step_h = height // sample_size
step_w = width // sample_size
grid_pattern = True
for i in range(0, height - step_h, step_h):
for j in range(0, width - step_w, step_w):
block = gray[i:i+step_h, j:j+step_w]
block_std = np.std(block)
if block_std > 30:
grid_pattern = False
break
if not grid_pattern:
break
if grid_pattern and black_white_ratio > 0.5:
return True
return False
def load_image_from_url(url_or_base64: str) -> Image.Image:
try:
if url_or_base64.startswith("data:image"):
header, encoded = url_or_base64.split(",", 1)
image_data = base64.b64decode(encoded)
return Image.open(BytesIO(image_data)).convert("RGB")
else:
response = requests.get(url_or_base64, timeout=10)
response.raise_for_status()
return Image.open(BytesIO(response.content)).convert("RGB")
except Exception as e:
raise ValueError(f"Failed to load image: {str(e)}")
def filter_valid_images(images: list) -> list:
valid_images = []
for img in images:
if not img or not isinstance(img, str) or img.strip() in ["", "string", "null", "undefined"]:
continue
try:
pil_image = load_image_from_url(img)
if not is_placeholder_image(pil_image):
valid_images.append(pil_image)
else:
print(f"[IMAGE FILTER] Ignoring placeholder/empty image")
except Exception as e:
print(f"[IMAGE FILTER] Warning: Failed to load image: {e}, skipping")
continue
return valid_images
|