qwen-test / handler.py
jide's picture
Upload folder using huggingface_hub
06e4a45 verified
"""
Handler for QwenImageLayeredPipeline.
Decomposes an input RGBA image into semantic layers (foreground, background, objects, etc.)
"""
from typing import Dict, List, Any
import torch
import base64
import io
from PIL import Image
# Try to import the specific pipeline class
try:
from diffusers import QwenImageLayeredPipeline
except ImportError:
from diffusers import DiffusionPipeline
QwenImageLayeredPipeline = None
class EndpointHandler:
def __init__(self, path=""):
# The correct model for layered decomposition
model_id = "Qwen/Qwen-Image-Layered"
print(f"Loading model {model_id}...")
if QwenImageLayeredPipeline:
print("Using explicit QwenImageLayeredPipeline class.")
self.pipeline = QwenImageLayeredPipeline.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
)
else:
print("Falling back to DiffusionPipeline auto-load.")
self.pipeline = DiffusionPipeline.from_pretrained(
model_id,
trust_remote_code=True,
torch_dtype=torch.bfloat16,
)
print(f"Loaded pipeline class: {type(self.pipeline).__name__}")
if torch.cuda.is_available():
self.pipeline.to("cuda")
print("Model ready!")
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Expects:
inputs.image: base64-encoded RGBA image
parameters.layers: number of layers to decompose into (default: 4)
parameters.num_inference_steps: inference steps (default: 50)
parameters.resolution: output resolution (default: 640)
Returns:
List of base64-encoded layer images
"""
inputs = data.pop("inputs", data)
parameters = data.pop("parameters", {})
# Parse the input image
image_data = inputs.get("image")
if not image_data:
raise ValueError("Missing 'image' in inputs. Please provide a base64-encoded RGBA image.")
try:
image_bytes = base64.b64decode(image_data)
image = Image.open(io.BytesIO(image_bytes)).convert("RGBA")
except Exception as e:
raise ValueError(f"Failed to decode image: {e}")
# Get parameters with defaults
layers = parameters.get("layers", 4)
num_inference_steps = parameters.get("num_inference_steps", 50)
resolution = parameters.get("resolution", 640)
prompt = parameters.get("prompt", "") # Usually empty for decomposition
print(f"Decomposing image into {layers} layers at resolution {resolution}...")
# Run the pipeline
with torch.autocast("cuda"):
output = self.pipeline(
image,
prompt,
num_inference_steps=num_inference_steps,
layers=layers,
resolution=resolution,
true_cfg_scale=4.0,
cfg_normalize=False,
use_en_prompt=True,
)
# Serialize output layers
images_response = []
if hasattr(output, "images") and output.images:
# output.images is a list of lists (per batch), we take the first batch
layer_images = output.images[0] if isinstance(output.images[0], list) else output.images
for i, layer_img in enumerate(layer_images):
if isinstance(layer_img, Image.Image):
buffered = io.BytesIO()
layer_img.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
images_response.append({
"layer_index": i,
"image": img_str
})
print(f"Returned {len(images_response)} layers.")
return images_response