BoardGenerator

Sleeping

App Files Files Community

OsamaAbdeljaber

CarolineM5 commited on Oct 14, 2025

Commit

65e8581

verified ·

1 Parent(s): 0482213

Upload inference.py (#2)

Browse files

- Upload inference.py (0352db982029a1d866eb5119cee262074b61fee9)

Co-authored-by: Caroline Marc <CarolineM5@users.noreply.huggingface.co>

Files changed (1) hide show

inference.py +107 -71

inference.py CHANGED Viewed

@@ -13,79 +13,115 @@ import torch.nn as nn
 from transformers import CLIPTokenizer, CLIPTextModel, CLIPImageProcessor
 from PIL import Image
 import random
-class UNetNoCondWrapper(nn.Module):
-    def __init__(self, base_unet: UNet2DModel):
-        super().__init__()
-        self.unet = base_unet
-    def forward(
-        self,
-        sample,
-        timestep,
-        encoder_hidden_states=None,
-        added_cond_kwargs=None,
-        cross_attention_kwargs=None,
-        return_dict=False,
-        **kwargs
-    ):
-        return self.unet(sample, timestep, return_dict=return_dict, **kwargs)
-    def __getattr__(self, name):
-        if name in ("unet", "forward", "__getstate__", "__setstate__"):
-            return super().__getattr__(name)
-        return getattr(self.unet, name)
-    def save_pretrained(self, save_directory, **kwargs):
-        # délègue à la vraie instance UNet2DModel
-        return self.unet.save_pretrained(save_directory, **kwargs)
-def inference(pipe, img1, img2, num_steps):
     seed = random.randrange(0, 2**32)
     torch.manual_seed(seed)
-    generator = torch.Generator("cpu").manual_seed(seed)
-    img1 = img1.resize((512, 512))
-    img2 = img2.resize((512, 512))
-    img1_np = np.array(img1)
-    if len(img1_np.shape) > 2:
-        img1_np = img1_np[:, :, 0]
-    img2_np = np.array(img2)
-    if len(img2_np.shape) > 2:
-        img2_np = img2_np[:, :, 0]
-    img1_np[img1_np > 200] = 255
-    img1_np[img1_np <= 200] = 0
-    img1_np = 255-img1_np
-    img_np = np.stack([img1_np, img2_np, img2_np], axis=2)
-    image = PIL.Image.fromarray(img_np)
-    image = PIL.ImageOps.exif_transpose(image)
-    all_images = []
-    num_inference_steps = num_steps
-    image_guidance_scale = 1.9
-    guidance_scale = 10
-    edited_image = pipe(
-       prompt=[""] ,
-       image=image,
-       num_inference_steps=num_inference_steps,
-       image_guidance_scale=image_guidance_scale,
-       guidance_scale=guidance_scale,
-       generator=generator,
-       safety_checker=None,
-       num_images_per_prompt=1
-    ).images
-    edited_image = edited_image[0].convert("L")
-    return edited_image

 from transformers import CLIPTokenizer, CLIPTextModel, CLIPImageProcessor
 from PIL import Image
 import random
+from contextlib import nullcontext
+def pil_from(x):
+    """Return a PIL.Image given either a PIL.Image or a path string."""
+    if isinstance(x, str):
+        return PIL.Image.open(x)
+    return x
+def inference(pipe, fiber_imgs, ring_imgs, num_steps):
+    """
+    fiber_imgs: list/tuple of 4 PIL.Image or paths (order: TL, TR, BL, BR)
+    ring_imgs:  list/tuple of 4 PIL.Image or paths (same order)
+    num_steps:  int (num inference steps)
+    returns: list of 4 PIL.Image (L mode), order [TL, TR, BL, BR]
+    """
+    # seed + generator
     seed = random.randrange(0, 2**32)
     torch.manual_seed(seed)
+    generator = torch.Generator("cpu").manual_seed(seed)
+    # sizes
+    tile = 512
+    canvas_size = tile * 2  # 1024
+    # normalize/validate inputs: accept lists or separate args
+    if not (isinstance(fiber_imgs, (list, tuple)) and len(fiber_imgs) == 4):
+        raise ValueError("fiber_imgs must be a list/tuple of 4 PIL images or file paths.")
+    if not (isinstance(ring_imgs, (list, tuple)) and len(ring_imgs) == 4):
+        raise ValueError("ring_imgs must be a list/tuple of 4 PIL images or file paths.")
+    # load & preprocess each face
+    faces_f = []
+    faces_r = []
+    for fpath in fiber_imgs:
+        im = pil_from(fpath).convert("L").resize((tile, tile), PIL.Image.BILINEAR)
+        faces_f.append(im)
+    for rpath in ring_imgs:
+        im = pil_from(rpath).convert("L").resize((tile, tile), PIL.Image.BILINEAR)
+        # binarize like in your old code
+        arr = np.array(im)
+        arr[arr > 200] = 255
+        arr[arr <= 200] = 0
+        im_bin = PIL.Image.fromarray(arr.astype(np.uint8))
+        faces_r.append(im_bin)
+    # build canvases (L mode)
+    canvas_f = PIL.Image.new("L", (canvas_size, canvas_size))
+    canvas_r = PIL.Image.new("L", (canvas_size, canvas_size))
+    # paste into corners: order = [TL, TR, BL, BR]
+    canvas_f.paste(faces_f[0], (0, 0))        # TL
+    canvas_f.paste(faces_f[1], (tile, 0))     # TR
+    canvas_f.paste(faces_f[2], (0, tile))     # BL
+    canvas_f.paste(faces_f[3], (tile, tile))  # BR
+    canvas_r.paste(faces_r[0], (0, 0))
+    canvas_r.paste(faces_r[1], (tile, 0))
+    canvas_r.paste(faces_r[2], (0, tile))
+    canvas_r.paste(faces_r[3], (tile, tile))
+    # stack channels: [fiber, ring, ring] -> H,W,3
+    arr_f = np.array(canvas_f).astype(np.uint8)
+    arr_r = np.array(canvas_r).astype(np.uint8)
+    arr_in = np.stack([arr_f, arr_r, arr_r], axis=2)  # H,W,3
+    input_image = PIL.Image.fromarray(arr_in)         # PIL RGB
+    # run pipeline (use autocast consistent with device)
+    edited_images = []
+    if torch.backends.mps.is_available():
+        autocast_ctx = nullcontext()
+    else:
+        autocast_ctx = torch.autocast(torch.device("cuda").type if torch.cuda.is_available() else "cpu")
+    with autocast_ctx:
+        out = pipe(
+            prompt="",  # empty prompt (your model ignores prompt)
+            image=input_image,
+            num_inference_steps=num_steps,
+            image_guidance_scale=1.9,
+            guidance_scale=10.0,
+            generator=generator,
+            safety_checker=None,
+            num_images_per_prompt=1,
+        )
+        # out.images may be a list; take first
+        pred = out.images[0]
+    # ensure pred is canvas_size x canvas_size
+    if pred.size != (canvas_size, canvas_size):
+        pred = pred.resize((canvas_size, canvas_size), PIL.Image.BILINEAR)
+    # split into 4 tiles in same order TL, TR, BL, BR
+    tl = pred.crop((0, 0, tile, tile))
+    tr = pred.crop((tile, 0, canvas_size, tile))
+    bl = pred.crop((0, tile, tile, canvas_size))
+    br = pred.crop((tile, tile, canvas_size, canvas_size))
+    # close opened images to free handles
+    for im in faces_f + faces_r:
+        try:
+            im.close()
+        except Exception:
+            pass
+    try:
+        canvas_f.close(); canvas_r.close()
+    except Exception:
+        pass
+    return [tl, tr, bl, br]