Fix: convert PIL to NumPy before ESRGAN inference

2ef631e 4 months ago

5.79 kB

	import os
	import io
	import torch
	import base64
	import requests
	import numpy as np
	from PIL import Image
	from realesrgan import RealESRGANer
	from basicsr.archs.rrdbnet_arch import RRDBNet


	class EndpointHandler:
	def __init__(self, path="."):
	print("🚀 [INIT] Starting EndpointHandler initialization...")
	print(f"📂 Working directory: {os.getcwd()}")
	print(f"📁 Model path root: {path}")

	self.model_url = (
	"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/"
	"RealESRGAN_x4plus.pth"
	)
	self.model_path = os.path.join(path, "RealESRGAN_x4plus.pth")

	if not os.path.exists(self.model_path):
	print(f"📥 [DOWNLOAD] Fetching model weights from {self.model_url}")
	r = requests.get(self.model_url)
	r.raise_for_status()
	with open(self.model_path, "wb") as f:
	f.write(r.content)
	print(f"✅ [DOWNLOAD] Saved model to {self.model_path}")
	else:
	print(f"✅ [CACHE] Model already exists at {self.model_path}")

	print("🧠 [MODEL] Building RRDBNet...")
	model = RRDBNet(
	num_in_ch=3,
	num_out_ch=3,
	num_feat=64,
	num_block=23,
	num_grow_ch=32,
	scale=4,
	)

	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"💻 [DEVICE] Using device: {device}")

	self.upsampler = RealESRGANer(
	scale=4,
	model_path=self.model_path,
	model=model,
	half=False,
	device=device,
	)

	print("✅ [INIT DONE] Real-ESRGAN model initialized and ready.\n\n")

	# ==========================================================
	# MAIN CALLABLE
	# ==========================================================
	def __call__(self, data):
	print("🛰️ [CALL] Endpoint invoked!")
	print(f"📦 [CALL] Raw data type: {type(data)}")
	print(f"🔍 [CALL] Data preview: {str(data)[:300]}...")

	try:
	print("➡️ [STEP] Preprocessing input...")
	image = self.preprocess(data)
	print(f"✅ [STEP] Preprocessing complete! Image size: {image.size}")

	print("➡️ [STEP] Running inference...")
	output = self.inference(image)
	print("✅ [STEP] Inference complete!")

	print("➡️ [STEP] Encoding output image...")
	result = self.postprocess(output)
	print("✅ [STEP] Postprocessing complete!")

	return result
	except Exception as e:
	print("💥 [ERROR] Exception during inference:", str(e))
	return {"error": str(e)}

	# ==========================================================
	# PREPROCESS
	# ==========================================================
	def preprocess(self, data):
	print(f"🔧 [PREPROCESS] Type received: {type(data)}")

	if isinstance(data, dict):
	print("🧩 [PREPROCESS] Detected dict input.")
	if "inputs" in data:
	data = data["inputs"]
	print(f"📨 [PREPROCESS] Found 'inputs' key: {type(data)}")

	if isinstance(data, Image.Image):
	print("🖼️ [PREPROCESS] Got PIL.Image.Image directly.")
	return data.convert("RGB")

	if isinstance(data, (bytes, bytearray)):
	print("🧾 [PREPROCESS] Treating input as raw bytes.")
	return Image.open(io.BytesIO(data)).convert("RGB")

	if isinstance(data, str):
	print(f"🧾 [PREPROCESS] Treating input as base64 string, len={len(data)}")
	decoded = base64.b64decode(data)
	return Image.open(io.BytesIO(decoded)).convert("RGB")

	if isinstance(data, list) and len(data) > 0:
	item = data[0]
	if isinstance(item, Image.Image):
	return item.convert("RGB")
	if isinstance(item, (bytes, bytearray)):
	return Image.open(io.BytesIO(item)).convert("RGB")
	if isinstance(item, str):
	return Image.open(io.BytesIO(base64.b64decode(item))).convert("RGB")

	raise ValueError("Unsupported input type. Expected image, bytes, or base64 data.")

	# ==========================================================
	# INFERENCE
	# ==========================================================
	def inference(self, image):
	print("🎯 [INFERENCE] Running ESRGAN upscaling...")
	print(f"📐 [INFERENCE] Input image size: {image.size}")

	# Convert PIL -> NumPy BGR for RealESRGAN
	img_np = np.array(image)[:, :, ::-1] # RGB -> BGR
	print(f"🔍 [INFERENCE] Converted to NumPy: shape={img_np.shape}, dtype={img_np.dtype}")

	output, _ = self.upsampler.enhance(img_np, outscale=4)
	print(f"✅ [INFERENCE] Output NumPy shape: {output.shape}")

	# Convert back to PIL RGB
	output_rgb = Image.fromarray(output[:, :, ::-1])
	print(f"✅ [INFERENCE] Converted back to PIL: size={output_rgb.size}")
	return output_rgb

	# ==========================================================
	# POSTPROCESS
	# ==========================================================
	def postprocess(self, output_image):
	print("📤 [POSTPROCESS] Encoding image to base64...")
	buf = io.BytesIO()
	output_image.save(buf, format="PNG")
	raw_bytes = buf.getvalue()
	print(f"📏 [POSTPROCESS] Output byte size: {len(raw_bytes)}")
	encoded = base64.b64encode(raw_bytes).decode("utf-8")
	print(f"✅ [POSTPROCESS] Encoded base64 length: {len(encoded)}")
	buf.close()
	return {"image": encoded}