Spaces:

CarolineM5
/

BoardGenerator_4_Faces

Sleeping

App Files Files Community

CarolineM5 commited on Oct 30, 2025

Commit

0600e9e

verified ·

1 Parent(s): a28b5ab

Upload 2 files

Browse files

Files changed (2) hide show

app.py +44 -62
inference.py +40 -56

app.py CHANGED Viewed

@@ -45,83 +45,67 @@ class UNetNoCondWrapper(nn.Module):
         return getattr(self.unet, name)
     def save_pretrained(self, save_directory, **kwargs):
-        # délègue à la vraie instance UNet2DModel
         return self.unet.save_pretrained(save_directory, **kwargs)
 # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# model_id = "CarolineM5/InstructPix2Pix_WithoutPrompt_4_faces"
-# vae = AutoencoderKL.from_pretrained(model_id, subfolder="vae").to(device)
-# scheduler = DDPMScheduler.from_pretrained(model_id, subfolder="scheduler")
-# tokenizer = CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer")
-# text_encoder = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder").to(device)
-# feature_extractor = CLIPImageProcessor.from_pretrained(model_id, subfolder="feature_extractor")
-# # 2) Chargez votre UNet non‑conditionné et wrappez‑le
-# base_unet = UNet2DModel.from_pretrained(model_id, subfolder="unet").to(device)
-# wrapped_unet = UNetNoCondWrapper(base_unet).to(device)
-# # 3) Construisez la pipeline manuellement
-# pipe = StableDiffusionInstructPix2PixPipeline(
-#     vae=vae,
-#     text_encoder=text_encoder,
-#     tokenizer=tokenizer,
-#     unet=wrapped_unet,
-#     scheduler=scheduler,
-#     safety_checker=None,
-#     feature_extractor=feature_extractor,
-# )
-# pipe = pipe.to(torch.float32).to(device)
 # @spaces.GPU
-# def pil_to_data_uri(img: Image.Image) -> str:
-#     buf = io.BytesIO()
-#     img.save(buf, format="PNG")
-#     b = base64.b64encode(buf.getvalue()).decode("utf-8")
-#     return f"data:image/png;base64,{b}"
 def build_textured_cube(pil_imgs, face_rotations=None):
     """
-    Crée un parallélépipède texturé (OBJ + MTL + textures).
-    - pil_imgs: liste/tuple de 4 PIL.Image dans l'ordre [front, right, back, left]
-    - Retour: (chemin_absolu_obj, tmpdir)
-    Defaults:
-        default_rots = {"front": 0, "right": 270, "back": 180, "left": 90, "top": 0, "bottom": 0}
-        face_order = ["top","right","bottom","left","front","back"]
-    Notes:
-      - Ecrit les fichiers dans /tmp/gradio si possible (HF Spaces).
-      - front/back utilisent la taille de pil_imgs[0] ; left/right utilisent leur propre largeur (rectangles).
     """
     import os
     import tempfile
     from PIL import Image
-    # validation
     if not (isinstance(pil_imgs, (list, tuple)) and len(pil_imgs) >= 4):
-        raise ValueError("build_textured_cube attend une liste/tuple de 4 images PIL (front, right, back, left).")
-    # defaults rotation & ordre
     default_rots = {"front": 0, "right": 270, "back": 180, "left": 270, "top": 0, "bottom": 0}
-    # default_rots = {"front": 0, "right": 0, "back": 0, "left": 0, "top": 0, "bottom": 0}
     if face_rotations is None:
         face_rotations = default_rots
     else:
         for k, v in default_rots.items():
             face_rotations.setdefault(k, v)
-    # dossier temporaire (préférer /tmp/gradio sur HF)
     base_dir = "/tmp/gradio"
     if os.path.isdir(base_dir) and os.access(base_dir, os.W_OK):
         tmpdir = tempfile.mkdtemp(prefix="parallelep_", dir=base_dir)
     else:
         tmpdir = tempfile.mkdtemp(prefix="parallelep_")
-    # noms relatifs pour textures (mtl utilisera ces noms)
     tex_names = {
         "front": "tex_front.png",
         "right": "tex_right.png",
@@ -131,34 +115,32 @@ def build_textured_cube(pil_imgs, face_rotations=None):
         "bottom": "tex_bottom.png",
     }
-    # récupérer tailles (on suppose que inference a déjà redimensionné left/right si besoin)
     front_w, front_h = pil_imgs[0].size
     right_w, right_h = pil_imgs[1].size
     ratio = 45/145
     right_w = int(front_w * ratio)
-    # définir dimensions physiques du parallélépipède (en "px" puis on normalise)
-    width_px = float(front_w)    # largeur X (front width)
-    height_px = float(right_w)   # hauteur Z
-    # profondeur Y : on prend la largeur des faces latérales (moyenne left/right)
     depth_px = float(front_h)
-    # normalisation pour garder des coordonnées de l'ordre de ±0.5
     max_dim = max(width_px, depth_px, height_px, 1.0)
     scale = 1.0 / max_dim
-    half_x = (width_px * 0.5) * scale   # demi-largeur en X
-    half_y = (depth_px * 0.5) * scale   # demi-profondeur en Y
-    half_z = (height_px * 0.5) * scale  # demi-hauteur en Z
-    # mapping attendu pour pil_imgs
     mapping_order = ["front", "right", "back", "left"]
-    # sauvegarder textures (avec rotation demandée) dans tmpdir
     for img, face_name in zip(pil_imgs[:4], mapping_order):
         im = img.convert("RGB")
         angle = face_rotations.get(face_name, 0)
         if angle % 360 != 0:
-            # PIL rotate: angle en degrés, positif = CCW
             im = im.rotate(angle, resample=Image.BICUBIC, expand=False)
         path = os.path.join(tmpdir, tex_names[face_name])
         im.save(path, format="PNG")
@@ -167,7 +149,7 @@ def build_textured_cube(pil_imgs, face_rotations=None):
         except Exception:
             pass
-    # top/bottom textures noires (même taille que front pour cohérence)
     black = Image.new("RGB", (front_w, front_h), (0, 0, 0))
     for face_name in ("top", "bottom"):
         im = black
@@ -181,7 +163,7 @@ def build_textured_cube(pil_imgs, face_rotations=None):
         except Exception:
             pass
-    # --- wri .mtl (références relatives) ---
     mtl_path = os.path.join(tmpdir, "parallelep.mtl")
     with open(mtl_path, "w", encoding="utf-8") as f:
         f.write("# Material file for parallelepiped\n")
@@ -294,7 +276,7 @@ def build_textured_cube(pil_imgs, face_rotations=None):
 # -------------------------
 def run(fibers: Image.Image, rings: Image.Image, num_steps: int):
     try:
-        outputs = inference(fibers, rings, num_steps)
         if not (isinstance(outputs, (list, tuple)) and len(outputs) >= 4):
             raise ValueError("user_inference must return a list/tuple of 4 images.")

         return getattr(self.unet, name)
     def save_pretrained(self, save_directory, **kwargs):
         return self.unet.save_pretrained(save_directory, **kwargs)
 # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model_id = "CarolineM5/InstructPix2Pix_WithoutPrompt_4_faces"
+vae = AutoencoderKL.from_pretrained(model_id, subfolder="vae").to(device)
+scheduler = DDPMScheduler.from_pretrained(model_id, subfolder="scheduler")
+tokenizer = CLIPTokenizer.from_pretrained(model_id, subfolder="tokenizer")
+text_encoder = CLIPTextModel.from_pretrained(model_id, subfolder="text_encoder").to(device)
+feature_extractor = CLIPImageProcessor.from_pretrained(model_id, subfolder="feature_extractor")
+# 2) Chargez votre UNet non‑conditionné et wrappez‑le
+base_unet = UNet2DModel.from_pretrained(model_id, subfolder="unet").to(device)
+wrapped_unet = UNetNoCondWrapper(base_unet).to(device)
+# 3) Construisez la pipeline manuellement
+pipe = StableDiffusionInstructPix2PixPipeline(
+    vae=vae,
+    text_encoder=text_encoder,
+    tokenizer=tokenizer,
+    unet=wrapped_unet,
+    scheduler=scheduler,
+    safety_checker=None,
+    feature_extractor=feature_extractor,
+)
+pipe = pipe.to(torch.float32).to(device)
 # @spaces.GPU
 def build_textured_cube(pil_imgs, face_rotations=None):
     """
+    Creates a textured parallelepiped (OBJ + MTL + textures).
     """
     import os
     import tempfile
     from PIL import Image
     if not (isinstance(pil_imgs, (list, tuple)) and len(pil_imgs) >= 4):
+        raise ValueError("build_textured_cube expects a list/tuple of 4 PIL images (front, right, back, left).")
     default_rots = {"front": 0, "right": 270, "back": 180, "left": 270, "top": 0, "bottom": 0}
     if face_rotations is None:
         face_rotations = default_rots
     else:
         for k, v in default_rots.items():
             face_rotations.setdefault(k, v)
     base_dir = "/tmp/gradio"
     if os.path.isdir(base_dir) and os.access(base_dir, os.W_OK):
         tmpdir = tempfile.mkdtemp(prefix="parallelep_", dir=base_dir)
     else:
         tmpdir = tempfile.mkdtemp(prefix="parallelep_")
+    # relative names for textures (mtl will use these names)
     tex_names = {
         "front": "tex_front.png",
         "right": "tex_right.png",
         "bottom": "tex_bottom.png",
     }
     front_w, front_h = pil_imgs[0].size
     right_w, right_h = pil_imgs[1].size
     ratio = 45/145
     right_w = int(front_w * ratio)
+    # define the physical dimensions of the parallelepiped (in “px”, then normalize)
+    width_px = float(front_w)
+    height_px = float(right_w)
     depth_px = float(front_h)
+    # normalization to keep coordinates within ±0.5
     max_dim = max(width_px, depth_px, height_px, 1.0)
     scale = 1.0 / max_dim
+    half_x = (width_px * 0.5) * scale
+    half_y = (depth_px * 0.5) * scale
+    half_z = (height_px * 0.5) * scale
     mapping_order = ["front", "right", "back", "left"]
+    # save textures in tmpdir
     for img, face_name in zip(pil_imgs[:4], mapping_order):
         im = img.convert("RGB")
         angle = face_rotations.get(face_name, 0)
         if angle % 360 != 0:
+            # PIL rotate: angle in degrees, positive = CCW
             im = im.rotate(angle, resample=Image.BICUBIC, expand=False)
         path = os.path.join(tmpdir, tex_names[face_name])
         im.save(path, format="PNG")
         except Exception:
             pass
+    # black top/bottom
     black = Image.new("RGB", (front_w, front_h), (0, 0, 0))
     for face_name in ("top", "bottom"):
         im = black
         except Exception:
             pass
+    # --- write .mtl ---
     mtl_path = os.path.join(tmpdir, "parallelep.mtl")
     with open(mtl_path, "w", encoding="utf-8") as f:
         f.write("# Material file for parallelepiped\n")
 # -------------------------
 def run(fibers: Image.Image, rings: Image.Image, num_steps: int):
     try:
+        outputs = inference(pipe, fibers, rings, num_steps)
         if not (isinstance(outputs, (list, tuple)) and len(outputs) >= 4):
             raise ValueError("user_inference must return a list/tuple of 4 images.")

inference.py CHANGED Viewed

@@ -21,8 +21,8 @@ def pil_from(x):
     if isinstance(x, str):
         return PIL.Image.open(x)
     return x
-def inference(fiber_imgs, ring_imgs, num_steps):
-# def inference(pipe, fiber_imgs, ring_imgs, num_steps):
     """
     fiber_imgs:  PIL.Image or paths
     ring_imgs:  PIL.Image or paths
@@ -31,68 +31,52 @@ def inference(fiber_imgs, ring_imgs, num_steps):
     returns: list of 4 PIL.Image (L mode), order [1, 4, 3, 2]
     """
     # seed + generator
-    # seed = random.randrange(0, 2**32)
-    # torch.manual_seed(seed)
-    # generator = torch.Generator("cpu").manual_seed(seed)
     # sizes
     tile = 512
     canvas_size = tile * 2
-    ratio = 45/145
-    original_width, original_height = (512,512)
-    new_width = int(original_width * ratio)
-    # # stack channels: [fiber, ring, ring] -> H,W,3
-    # arr_f = np.array(fiber_imgs).astype(np.uint8)
-    # arr_r = np.array(ring_imgs).astype(np.uint8)
-    # arr_in = np.stack([arr_f, arr_r, arr_r], axis=2)  # H,W,3
-    # input_image = PIL.Image.fromarray(arr_in)         # PIL RGB
-    # # run pipeline (use autocast consistent with device)
-    # edited_images = []
-    # if torch.backends.mps.is_available():
-    #     autocast_ctx = nullcontext()
-    # else:
-    #     autocast_ctx = torch.autocast(torch.device("cuda").type if torch.cuda.is_available() else "cpu")
-    # with autocast_ctx:
-    #     out = pipe(
-    #         prompt="",  # empty prompt (your model ignores prompt)
-    #         image=input_image,
-    #         num_inference_steps=num_steps,
-    #         image_guidance_scale=1.9,
-    #         guidance_scale=10.0,
-    #         generator=generator,
-    #         safety_checker=None,
-    #         num_images_per_prompt=1,
-    #     )
-    #     # out.images may be a list; take first
-    #     pred = out.images[0]
-    # # ensure pred is canvas_size x canvas_size
-    # if pred.size != (canvas_size, canvas_size):
-    #     pred = pred.resize((canvas_size, canvas_size), PIL.Image.BILINEAR)
     # split into 4 tiles in same order TL, TR, BL, BR
-    # tl = pred.crop((0, 0, tile, tile))
-    # tr = pred.crop((tile, 0, canvas_size, tile))
-    # bl = pred.crop((0, tile, tile, canvas_size))
-    # br = pred.crop((tile, tile, canvas_size, canvas_size))
-    ring_imgs = PIL.Image.fromarray(ring_imgs)
-    fiber_imgs = PIL.Image.fromarray(fiber_imgs)
-    tl = ring_imgs.crop((0, 0, tile, tile))
-    tr = ring_imgs.crop((tile, 0, canvas_size, tile))
-    bl = ring_imgs.crop((0, tile, tile, canvas_size))
-    br = ring_imgs.crop((tile, tile, canvas_size, canvas_size))
-    # tr = cv2.resize(np.asarray(tr), (new_width, original_height), interpolation=cv2.INTER_LANCZOS4)
-    # br = cv2.resize(np.asarray(br), (new_width, original_height), interpolation=cv2.INTER_LANCZOS4)
-    # tr = PIL.Image.fromarray(tr)
-    # br = PIL.Image.fromarray(br)
     # close opened images to free handles
     fiber_imgs.close()

     if isinstance(x, str):
         return PIL.Image.open(x)
     return x
+def inference(pipe, fiber_imgs, ring_imgs, num_steps):
     """
     fiber_imgs:  PIL.Image or paths
     ring_imgs:  PIL.Image or paths
     returns: list of 4 PIL.Image (L mode), order [1, 4, 3, 2]
     """
     # seed + generator
+    seed = random.randrange(0, 2**32)
+    torch.manual_seed(seed)
+    generator = torch.Generator("cpu").manual_seed(seed)
     # sizes
     tile = 512
     canvas_size = tile * 2
+    # stack channels: [fiber, ring, ring] -> H,W,3
+    arr_f = np.array(fiber_imgs).astype(np.uint8)
+    arr_r = np.array(ring_imgs).astype(np.uint8)
+    arr_in = np.stack([arr_f, arr_r, arr_r], axis=2)  # H,W,3
+    input_image = PIL.Image.fromarray(arr_in)         # PIL RGB
+    # run pipeline (use autocast consistent with device)
+    if torch.backends.mps.is_available():
+        autocast_ctx = nullcontext()
+    else:
+        autocast_ctx = torch.autocast(torch.device("cuda").type if torch.cuda.is_available() else "cpu")
+    with autocast_ctx:
+        out = pipe(
+            prompt="",  # empty prompt (your model ignores prompt)
+            image=input_image,
+            num_inference_steps=num_steps,
+            image_guidance_scale=1.9,
+            guidance_scale=10.0,
+            generator=generator,
+            safety_checker=None,
+            num_images_per_prompt=1,
+        )
+        # out.images may be a list; take first
+        pred = out.images[0]
+    # ensure pred is canvas_size x canvas_size
+    if pred.size != (canvas_size, canvas_size):
+        pred = pred.resize((canvas_size, canvas_size), PIL.Image.BILINEAR)
     # split into 4 tiles in same order TL, TR, BL, BR
+    tl = pred.crop((0, 0, tile, tile))
+    tr = pred.crop((tile, 0, canvas_size, tile))
+    bl = pred.crop((0, tile, tile, canvas_size))
+    br = pred.crop((tile, tile, canvas_size, canvas_size))
     # close opened images to free handles
     fiber_imgs.close()