KarthikAI
/

InstantID-i2i

@@ -1,43 +1,73 @@
-# handler.py
 import base64
 import io
 from PIL import Image
 import torch
 from diffusers import StableDiffusionImg2ImgPipeline
 pipe = None
 class EndpointHandler:
-    def __init__(self, model_dir):
-        # model_dir is where HF clones your repo; you can ignore it
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
     def init(self):
         global pipe
-        # Load the SD1.5 + InstantID adapter in one shot
-        pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
-            "karthikAI/InstantID-i2i",       # your HF repo
-            revision="main",
-            torch_dtype=torch.float16,
-            safety_checker=None
-        ).to(self.device)
-        pipe.enable_attention_slicing()
     def inference(self, model_inputs: dict) -> dict:
-        # Decode input image (base64)
-        img_data = base64.b64decode(model_inputs["image_base64"])
-        init_img = Image.open(io.BytesIO(img_data)).convert("RGB")
-        # Run img2img
-        out = pipe(
-            prompt=model_inputs.get("prompt", ""),
-            image=init_img,
-            strength=float(model_inputs.get("strength", 0.75)),
-            guidance_scale=float(model_inputs.get("guidance_scale", 7.5)),
-            num_inference_steps=int(model_inputs.get("num_inference_steps", 50)),
-        ).images[0]
-        # Encode output back to base64
-        buf = io.BytesIO()
-        out.save(buf, format="PNG")
-        return {"generated_image_base64": base64.b64encode(buf.getvalue()).decode("utf-8")}

 import base64
 import io
 from PIL import Image
 import torch
 from diffusers import StableDiffusionImg2ImgPipeline
+# Global pipeline instance
 pipe = None
 class EndpointHandler:
+    def __init__(self, model_dir: str):
+        # Determine device based on CUDA availability
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
     def init(self):
+        """
+        Load the InstantID-enhanced Stable Diffusion img2img model once when the endpoint starts.
+        """
         global pipe
+        if pipe is None:
+            pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
+                "karthikAI/InstantID-i2i",  # Your HF repo with InstantID adapter
+                revision="main",
+                torch_dtype=torch.float16,
+                safety_checker=None
+            ).to(self.device)
+            pipe.enable_attention_slicing()
     def inference(self, model_inputs: dict) -> dict:
+        """
+        Run a single img2img inference.
+        Expects a JSON payload with:
+          - "inputs": base64-encoded input image
+          - "parameters": {
+                "prompt": str,
+                "strength": float,
+                "guidance_scale": float,
+                "num_inference_steps": int,
+            }
+        Returns a dict with:
+          - "generated_image_base64": base64-encoded PNG
+        """
+        # 1. Decode the incoming image
+        b64_img = model_inputs.get("inputs")
+        if not b64_img:
+            raise ValueError("No image data provided under 'inputs'.")
+        image_bytes = base64.b64decode(b64_img)
+        init_img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        # 2. Extract parameters
+        params = model_inputs.get("parameters", {})
+        prompt = params.get("prompt", "")
+        strength = float(params.get("strength", 0.75))
+        guidance_scale = float(params.get("guidance_scale", 7.5))
+        num_steps = int(params.get("num_inference_steps", 50))
+        # 3. Run the img2img pipeline
+        result = pipe(
+            prompt=prompt,
+            init_image=init_img,
+            strength=strength,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_steps,
+        )
+        out_img = result.images[0]
+        # 4. Encode the output image back to base64
+        buffer = io.BytesIO()
+        out_img.save(buffer, format="PNG")
+        generated_b64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
+        return {"generated_image_base64": generated_b64}