ai-rag / cv_module /src /processors /image_preprocessor.py
robrtt's picture
Clean rebuild: all features fixed
4d62ba6
from __future__ import annotations
import io
import base64
from pathlib import Path
from typing import Union
from dataclasses import dataclass, field
import numpy as np
from PIL import Image, ExifTags
from loguru import logger
@dataclass
class ImageInput:
"""Normalized image container — semua sumber dikonversi ke sini."""
pil_image: Image.Image
original_size: tuple[int, int] # (width, height)
source: str = "unknown"
filename: str = ""
format: str = "RGB"
metadata: dict = field(default_factory=dict)
@property
def width(self) -> int:
return self.pil_image.width
@property
def height(self) -> int:
return self.pil_image.height
@property
def numpy(self) -> np.ndarray:
"""Return as HWC uint8 numpy array (untuk OpenCV/YOLO)."""
return np.array(self.pil_image)
def to_base64(self) -> str:
buf = io.BytesIO()
self.pil_image.save(buf, format="JPEG", quality=85)
return base64.b64encode(buf.getvalue()).decode()
class ImagePreprocessor:
"""
Handle semua bentuk input gambar.
_from_url menggunakan per-phase timeout agresif:
connect: 5s — kalau server ga response dalam 5s, skip
read: 8s — kalau TTFB lambat (CDN throttle), skip
total: ~13s max
Ini mencegah CDN seperti Getty Images yang nge-block server
requests dari HF container IPs bikin seluruh pipeline hang.
"""
MAX_SIZE = (1920, 1920)
MAX_DOWNLOAD_BYTES = 10 * 1024 * 1024 # 10MB cap
@classmethod
def load(cls, source: Union[str, bytes, Path, Image.Image]) -> ImageInput:
if isinstance(source, Image.Image):
return cls._from_pil(source, source_name="pil_direct")
if isinstance(source, bytes):
return cls._from_bytes(source)
if isinstance(source, Path) or (
isinstance(source, str) and not source.startswith(("http", "data:"))
):
return cls._from_file(str(source))
if isinstance(source, str) and source.startswith("data:image"):
return cls._from_base64(source)
if isinstance(source, str) and source.startswith(("http://", "https://")):
return cls._from_url(source)
raise ValueError(f"Tipe input tidak dikenali: {type(source)}")
@classmethod
def _from_file(cls, path: str) -> ImageInput:
p = Path(path)
if not p.exists():
raise FileNotFoundError(f"Gambar tidak ditemukan: {path}")
img = Image.open(p)
img = cls._normalize(img)
return ImageInput(
pil_image=img,
original_size=(img.width, img.height),
source="file",
filename=p.name,
metadata={"path": str(p), "format": p.suffix},
)
@classmethod
def _from_bytes(cls, data: bytes, filename: str = "upload") -> ImageInput:
img = Image.open(io.BytesIO(data))
original_size = (img.width, img.height)
img = cls._normalize(img)
return ImageInput(
pil_image=img,
original_size=original_size,
source="bytes",
filename=filename,
metadata={"size_bytes": len(data)},
)
@classmethod
def _from_base64(cls, b64_str: str) -> ImageInput:
if "," in b64_str:
b64_str = b64_str.split(",", 1)[1]
data = base64.b64decode(b64_str)
return cls._from_bytes(data, filename="base64_input")
@classmethod
def _from_url(cls, url: str) -> ImageInput:
import httpx
logger.debug(f"Fetching image from URL: {url}")
# Timeout per-phase yang agresif.
# Ini penting untuk CDN/server yang nge-block HF container IPs:
# - Getty Images, Shutterstock, dll sering throttle server requests
# - connect: 8s — CDN Indonesia ke HF US bisa butuh lebih lama
# - read: 15s — TTFB max 15s, bukan total download
timeout = httpx.Timeout(connect=8.0, read=15.0, write=5.0, pool=2.0)
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/124.0.0.0 Safari/537.36"
),
"Accept": "image/webp,image/jpeg,image/png,image/*,*/*;q=0.8",
"Accept-Language": "id-ID,id;q=0.9,en-US;q=0.8,en;q=0.7",
"Referer": "https://www.google.com/",
}
try:
with httpx.Client(timeout=timeout, follow_redirects=True, max_redirects=3) as client:
with client.stream("GET", url, headers=headers) as resp:
resp.raise_for_status()
content_type = resp.headers.get("content-type", "")
if "html" in content_type or "text" in content_type:
raise ValueError(
f"URL mengembalikan {content_type} bukan gambar. "
"Pastikan URL langsung ke file gambar (jpg/png/webp)."
)
chunks = []
total = 0
for chunk in resp.iter_bytes(chunk_size=65536):
total += len(chunk)
if total > cls.MAX_DOWNLOAD_BYTES:
raise ValueError(
f"Gambar terlalu besar (>{cls.MAX_DOWNLOAD_BYTES//1024//1024}MB)"
)
chunks.append(chunk)
data = b"".join(chunks)
except httpx.ConnectTimeout:
raise RuntimeError(
f"Tidak bisa connect ke server gambar dalam 8s. "
"CDN ini kemungkinan memblok request dari server HF (US). "
"Coba upload gambar langsung (tab Upload) atau pakai URL dari "
"imgur.com, ibb.co, atau raw GitHub."
)
except httpx.ReadTimeout:
raise RuntimeError(
"Server gambar merespons terlalu lambat (>15s). "
"CDN lokal Indonesia sering throttle request dari server HF di US. "
"Coba upload gambar langsung atau pakai URL dari imgur/ibb.co."
)
except httpx.HTTPStatusError as e:
raise RuntimeError(
f"Server gambar mengembalikan error {e.response.status_code}. "
"Pastikan URL gambar valid dan publik."
)
except httpx.HTTPError as e:
raise RuntimeError(f"Gagal mengunduh gambar: {e}")
try:
img_input = cls._from_bytes(data, filename=url.split("/")[-1].split("?")[0] or "url_image")
except Exception as e:
raise ValueError(
f"File yang diunduh bukan gambar yang valid: {e}. "
"Pastikan URL mengarah langsung ke file gambar."
)
img_input.source = "url"
img_input.metadata["url"] = url
logger.info(f"Downloaded image: {total} bytes → {img_input.width}x{img_input.height}")
return img_input
@classmethod
def _from_pil(cls, img: Image.Image, source_name: str = "pil") -> ImageInput:
original_size = (img.width, img.height)
img = cls._normalize(img)
return ImageInput(pil_image=img, original_size=original_size, source=source_name)
@classmethod
def _normalize(cls, img: Image.Image) -> Image.Image:
"""Convert ke RGB, fix EXIF rotation, resize jika terlalu besar."""
try:
exif = img._getexif()
if exif:
for tag, val in exif.items():
if ExifTags.TAGS.get(tag) == "Orientation":
rotations = {3: 180, 6: 270, 8: 90}
if val in rotations:
img = img.rotate(rotations[val], expand=True)
except Exception:
pass
if img.mode != "RGB":
img = img.convert("RGB")
if img.width > cls.MAX_SIZE[0] or img.height > cls.MAX_SIZE[1]:
img.thumbnail(cls.MAX_SIZE, Image.LANCZOS)
logger.debug(f"Resized image to {img.width}x{img.height}")
return img