Spaces:

zimhe
/

SpatialDiffusion

Sleeping

File size: 22,513 Bytes

a521a3f


import torch
from torch import Tensor
import torchvision.transforms.functional as TF
import torch.nn.functional as F
import cv2
import py360convert
import argparse
import os
import numpy as np
from numpy.typing import NDArray
from PIL import Image

# 6 视角定义（前、后、左、右、上、下）
FACES = ["front", "back", "left", "right", "top", "bottom"]

FACE_CAPTION_MAP={
            "front": "caption_front",
            "back": "caption_back",
            "left": "caption_left",
            "right": "caption_right",
            "top": "caption_top",
            "bottom": "caption_bottom"
        }

FACE_KEYS_MAP = {
            "front": "F",
            "back": "B",
            "left": "L",
            "right": "R",
            "top": "U",
            "bottom": "D"
        }


def load_cubemap_dict(cubemap_path_dict:dict):
    """
    从字典中加载 Cubemap 图像
    """
    cubemap_dict = {}
    for face, path in cubemap_path_dict.items():
        image = cv2.imread(path)
        if image is None:
            print(f"❌ 读取失败: {path}")
            continue
        cubemap_dict[FACE_KEYS_MAP[face]] = image
    return cubemap_dict


def convert_to_cubemap(image, size=512):
    """
    使用 py360convert 将 equirectangular 全景图转换为 6 视角 Cubemap
    """
    cubemap_dict = py360convert.e2c(image, face_w=size, mode="bilinear", cube_format="dict")
    return cubemap_dict

def to_cubemap_dict(images:list[NDArray]):
    cubemap_dict={}
    for i, face in enumerate(FACES):
        key=FACE_KEYS_MAP[face]
        cubemap_dict[key]=images[i]
        
    return cubemap_dict

def convert_to_equirectangular(cubemap_dict, width=1024,height=512):
    """
    使用 py360convert 将 6 视角 Cubemap 转换为 equirectangular 全景图
    """
    equirectangular_image = py360convert.c2e(cubemap_dict, w=width,h=height, mode="bilinear",cube_format="dict")
    
    if equirectangular_image.dtype == np.float32:
        equirectangular_image = np.clip(equirectangular_image * 255, 0, 255).astype(np.uint8)

    return Image.fromarray(equirectangular_image)


def process_image_e2c(input_path, output_dir, size=512):
    """
    读取 equirectangular 全景图，转换为 Cubemap 并保存 6 张单独的图像
    """
    image = cv2.imread(input_path)
    if image is None:
        print(f"❌ 读取失败: {input_path}")
        return
    
    os.makedirs(output_dir, exist_ok=True)

    # 生成 Cubemap
    cubemap_images = convert_to_cubemap(image, size)
    
    print(cubemap_images.keys())

    # 保存 6 张图像
    for face in FACES:
        output_path = os.path.join(output_dir, f"{face}.png")
        face_key = FACE_KEYS_MAP[face]
        cv2.imwrite(output_path, cubemap_images[face_key])
        print(f"✅ {face} 视角已保存: {output_path}")

def process_image_c2e(cubemap_path_dict, output_path, width,height):
    """
    读取 6 视角 Cubemap，转换为 equirectangular 全景图并保存
    """
    cubemap_dict=load_cubemap_dict(cubemap_path_dict)
    # 生成 equirectangular 全景图
    equirectangular_image = convert_to_equirectangular(cubemap_dict, width,height)
    cv2.imwrite(output_path, equirectangular_image)
    print(f"✅ 全景图已保存: {output_path}")



def perspective_transform_patch(patch: torch.Tensor, delta):
    """
    对输入的 patch 使用 torchvision.transforms.functional.perspective 进行透视变换。
    
    参数:
      patch: Tensor，形状 (C, H, W)，图像 patch
      offset: float，表示左右方向的偏移量（单位：像素），用于定义目标透视变换的 endpoints
              例如：正值表示上边向右平移，下边向左平移；负值则相反。
    
    返回:
      transformed: 透视变换后的 patch，Tensor，形状与 patch 相同
    """
    C, H, W = patch.shape

    # 定义原始四个角的坐标（顺序为：上左, 上右, 下右, 下左）
    startpoints = [
        [0, 0],         # top-left
        [W, 0],         # top-right
        [W, H],         # bottom-right
        [0, H]          # bottom-left
    ]
    
    endpoints=[[sp_i + d_i for sp_i, d_i in zip(sp, d)] for sp, d in zip(startpoints, delta)]
    # 注意：F.perspective 接受的 startpoints 和 endpoints 应为 List[List[float]]
    # 透视变换支持直接传入 tensor，但这里直接使用 list 即可。
    return TF.perspective(patch, startpoints, endpoints, interpolation=TF.InterpolationMode.BILINEAR)
  

def stretch_edge_patch(patch,pad_width,edge_key):
    C,H,W=patch.shape
    H_new=H + 2*pad_width
    W_new=W + 2*pad_width
    if edge_key=="top":
        top_edge=TF.resize(patch,(pad_width,W_new))
        delta_top = [
            [0, 0],             # top-left 
            [0, 0],             # top-right 
            [-pad_width , 0],   # bottom-right 
            [pad_width , 0]     # bottom-left 
        ]
        return perspective_transform_patch(top_edge, delta_top)
    
    elif edge_key=="bottom":
        bottom_edge=TF.resize(patch,(pad_width,W_new))
        delta_bottom=[
            [pad_width, 0],     # top-left 
            [-pad_width, 0],    # top-right 
            [0, 0],             # bottom-right 
            [0 , 0]             # bottom-left 
        ]
        return perspective_transform_patch(bottom_edge,delta_bottom)
    elif edge_key=="left":
        left_edge=TF.resize(patch,(H_new,pad_width))
        delta_left=[
            [0, 0],             # top-left 变为 (offset, 0)
            [0, pad_width],             # top-right 变为 (W+offset, 0)
            [0, -pad_width],   # bottom-right 变为 (W-offset, H)
            [0 , 0]     # bottom-left 变为 (-offset, H)
        ]
        return perspective_transform_patch(left_edge,delta_left)
    elif edge_key=="right":
        right_edge=TF.resize(patch,(H_new,pad_width))
        delta_right=[
            [0, pad_width],             # top-left 变为 (offset, 0)
            [0, 0],             # top-right 变为 (W+offset, 0)
            [0, 0],   # bottom-right 变为 (W-offset, H)
            [0 , -pad_width]     # bottom-left 变为 (-offset, H)
        ]
        return perspective_transform_patch(right_edge,delta_right)
        
# -------------------------------
# 定义各面拼接函数
# -------------------------------

def pad_front(face:Tensor, faces, pad_width):
    """
    对前视图进行边缘拼接：
      上侧：拼接上视图的下边缘 (取 top[:, -w:, :])
      下侧：拼接下视图的上边缘 (取 bottom[:, :w, :])
      左侧：拼接左视图的右边缘 (取 left[:, :, -w:])
      右侧：拼接右视图的左边缘 (取 right[:, :, :w])
    """
    C, H, W = face.shape
    H_new=H + 2*pad_width
    W_new=W + 2*pad_width
    padded = torch.zeros((C, H_new, W_new), dtype=face.dtype, device=face.device)
    # 中间放置前视图
    padded[:, pad_width:pad_width+H, pad_width:pad_width+W] = face
    # 上边缘
    top_edge=faces['top'][:, -pad_width:, :]
    padded[:, 0:pad_width, 0:W+2*pad_width] += stretch_edge_patch(top_edge,pad_width,"top")
    
    # 下边缘
    bottom_edge=faces['bottom'][:, :pad_width, :]
    padded[:, H+pad_width:H+2*pad_width, :] += stretch_edge_patch(bottom_edge,pad_width,"bottom")
    
    # 左边缘
    left_edge=faces['left'][:, :, -pad_width:]
    padded[:, :, 0:pad_width] += stretch_edge_patch(left_edge,pad_width,"left")
    
    # 右边缘
    right_edge=faces['right'][:, :, :pad_width]
    padded[:, :, W+pad_width:W+2*pad_width] += stretch_edge_patch(right_edge,pad_width,"right")
    return padded

def pad_right(face, faces, pad_width):
    """
    对右视图进行边缘拼接：
      左侧：拼接前视图的右边缘 (front[:, :, -w:])
      右侧：拼接后视图的左边缘 (back[:, :, :w])
      上侧：拼接上视图的右边缘 (top[:, :, -w:])
      下侧：拼接下视图的右边缘 (bottom[:, :, -w:])
    """
    C, H, W = face.shape
    H_new=H + 2*pad_width
    W_new=W + 2*pad_width
    padded = torch.zeros((C, H_new, W_new), dtype=face.dtype, device=face.device)
    padded[:, pad_width:pad_width+H, pad_width:pad_width+W] = face
    
    left_edge=faces['front'][:, :, -pad_width:]
    padded[:, :, 0:pad_width] += stretch_edge_patch(left_edge,pad_width,"left")
    
    right_edge=faces['back'][:, :, :pad_width]
    padded[:, :, W+pad_width:W+2*pad_width] += stretch_edge_patch(right_edge,pad_width,"right")
    
    # 上侧：拼接上视图的右边缘，顺时针旋转90度
    # 原始 top 边缘为 shape (C, H, w) ，旋转后变为 (C, w, H)
    top_edge = torch.rot90(faces['top'][:, :, -pad_width:], k=3, dims=(1,2))
    
    padded[:, 0:pad_width, :] += stretch_edge_patch(top_edge,pad_width,"top")
    
    # 下侧：拼接下视图的右边缘，逆时针旋转90度
    # 原始 bottom 边缘为 shape (C, H, w) ，旋转后变为 (C, w, H)
    bottom_edge = torch.rot90(faces['bottom'][:, :, -pad_width:], k=1, dims=(1,2))
    padded[:, H+pad_width:H+2*pad_width, :] += stretch_edge_patch(bottom_edge,pad_width,"bottom")
    return padded

def pad_back(face, faces, pad_width):
    """
    对后视图进行边缘拼接：
      左侧：拼接右视图的右边缘 (right[:, :, -w:])
      右侧：拼接左视图的左边缘 (left[:, :, :w])
      上侧：拼接上视图的上边缘 (top[:, :w, :])
      下侧：拼接下视图的下边缘 (bottom[:, -w:, :])
    """
    C, H, W = face.shape
    H_new=H + 2*pad_width
    W_new=W + 2*pad_width
    padded = torch.zeros((C, H_new, W_new), dtype=face.dtype, device=face.device)
    padded[:, pad_width:pad_width+H, pad_width:pad_width+W] = face
    
    left_edge=faces['right'][:, :, -pad_width:]
    padded[:, :, 0:pad_width] += stretch_edge_patch(left_edge,pad_width,"left")
    
    right_edge=faces['left'][:, :, :pad_width]
    padded[:, :, W+pad_width:W+2*pad_width] += stretch_edge_patch(right_edge,pad_width,"right")
    
    # 上侧：使用上视图的上边缘，并旋转180度
    # 旋转180度可使用 torch.rot90(..., k=2, dims=(1,2))
    top_edge = torch.rot90(faces['top'][:, :pad_width, :], k=2, dims=(1,2))
    padded[:, 0:pad_width, :] +=stretch_edge_patch(top_edge,pad_width,"top")
    
    # 下侧：使用下视图的下边缘，并旋转180度
    bottom_edge = torch.rot90(faces['bottom'][:, -pad_width:, :], k=2, dims=(1,2))
    padded[:, H+pad_width:H+2*pad_width, :] += stretch_edge_patch(bottom_edge,pad_width,"bottom")
    return padded

def pad_left(face, faces, pad_width):
    """
    对左视图进行边缘拼接：
      左侧：拼接后视图的右边缘 (back[:, :, -w:])
      右侧：拼接前视图的左边缘 (front[:, :, :w])
      上侧：拼接上视图的左边缘 (top[:, :, :w])
      下侧：拼接下视图的左边缘 (bottom[:, :, :w])
    """
    C, H, W = face.shape
    padded = torch.zeros((C, H + 2*pad_width, W + 2*pad_width), dtype=face.dtype, device=face.device)
    padded[:, pad_width:pad_width+H, pad_width:pad_width+W] = face
    
    left_edge=faces['back'][:, :, -pad_width:]
    padded[:, :, 0:pad_width] += stretch_edge_patch(left_edge,pad_width,"left")
    
    right_edge=faces['front'][:, :, :pad_width]
    padded[:, :, W+pad_width:W+2*pad_width] += stretch_edge_patch(right_edge,pad_width,"right")
    
    top_edge=torch.rot90(faces['top'][:, :, :pad_width],k=1,dims=(1,2))
    padded[:, 0:pad_width, :] += stretch_edge_patch(top_edge,pad_width,"top")
    
    bottom_edge=torch.rot90(faces['bottom'][:, :, :pad_width],k=3,dims=(1,2))
    padded[:, H+pad_width:H+2*pad_width, :] += stretch_edge_patch(bottom_edge,pad_width,"bottom")
    return padded

def pad_top(face, faces, pad_width):
    """
    对上视图进行边缘拼接：
      下侧：拼接前视图的上边缘 (front[:, :w, :])
      左侧：拼接左视图的上边缘 (left[:, :w, :])
      右侧：拼接右视图的上边缘 (right[:, :w, :])
      上侧：拼接后视图的上边缘 (back[:, :w, :])
    """
    C, H, W = face.shape
    padded = torch.zeros((C, H + 2*pad_width, W + 2*pad_width), dtype=face.dtype, device=face.device)
    padded[:, pad_width:pad_width+H, pad_width:pad_width+W] = face
    
    bottom_edge=faces['front'][:, :pad_width, :]
    padded[:, H+pad_width:H+2*pad_width, :] +=stretch_edge_patch(bottom_edge,pad_width,"bottom")
    
    left_edge=torch.rot90(faces['left'][:, :pad_width, :],k=3,dims=(1,2))
    padded[:, :, 0:pad_width] += stretch_edge_patch(left_edge,pad_width,"left")
    
    right_edge=torch.rot90(faces['right'][:, :pad_width, :],k=1,dims=(1,2))
    padded[:, :, W+pad_width:W+2*pad_width]+=stretch_edge_patch(right_edge,pad_width,"right")
    
    top_edge=torch.rot90(faces['back'][:, :pad_width, :], k=2, dims=(1,2))
    
    padded[:, 0:pad_width, :] +=stretch_edge_patch(top_edge,pad_width,"top")
    return padded

def pad_bottom(face, faces, pad_width):
    """
    对下视图进行边缘拼接：
      上侧：拼接前视图的下边缘 (front[:, -w:, :])
      左侧：拼接左视图的下边缘 (left[:, -w:, :])
      右侧：拼接右视图的下边缘 (right[:, -w:, :])
      下侧：拼接后视图的下边缘 (back[:, :-w, :])
    """
    C, H, W = face.shape
    padded = torch.zeros((C, H + 2*pad_width, W + 2*pad_width), dtype=face.dtype, device=face.device)
    padded[:, pad_width:pad_width+H, pad_width:pad_width+W] = face
    
    top_edge=faces['front'][:, -pad_width:, :]
    padded[:, 0:pad_width, :] += stretch_edge_patch(top_edge,pad_width,"top")
    
    left_edge=torch.rot90(faces['left'][:, -pad_width:, :],k=1,dims=(1,2))
    padded[:, :, 0:pad_width] += stretch_edge_patch(left_edge,pad_width,"left")
    
    right_edge=torch.rot90(faces['right'][:, -pad_width:, :],k=3,dims=(1,2))
    padded[:, :, W+pad_width:W+2*pad_width] += stretch_edge_patch(right_edge,pad_width,"right")
    
    bottom_edge=torch.rot90(faces['back'][:, -pad_width:, :],k=2,dims=(1,2))
    padded[:, H+pad_width:H+2*pad_width, :] += stretch_edge_patch(bottom_edge,pad_width,"bottom")
    return padded

pad_funcs = {
        "front": pad_front,
        "right": pad_right,
        "back": pad_back,
        "left": pad_left,
        "top": pad_top,
        "bottom": pad_bottom,
    }

def pad_face(faces: dict, width: int, face_name: str)->Tensor:
    """
    根据 face_name 调用对应的拼接函数
    """
   
    if face_name not in pad_funcs:
        raise ValueError(f"Invalid face name: {face_name}. Must be one of {list(pad_funcs.keys())}.")
    return pad_funcs[face_name](faces[face_name], faces, width)


def prepare_mask(image,facename):
    """
    根据 facename 为每张图生成对应的 mask。
    如果 facename 为 "front"，mask 全部置为 1，其它置为 0。
    生成的 mask 形状为 (1, H, W)，即与图像的高度和宽度一致，但只有 1 个通道。
    
    参数:
        image (torch.Tensor): 图像 tensor，形状应为 (C, H, W) 或者 (N, C, H, W) 中的单张图像
        facename (str): 表示图像对应的面名称，例如 "front", "back" 等
        
    返回:
        torch.Tensor: 生成的 mask，形状为 (1, H, W)
    """
    # 如果 image 是 (C, H, W)，那么 H=image.shape[1], W=image.shape[2]
    # 如果 image 是 (N, C, H, W)，可以使用 image[0] 取得一张图像的尺寸
    if image.ndim == 3:
        H, W = image.shape[1], image.shape[2]
    elif image.ndim == 4:
        H, W = image.shape[2], image.shape[3]
    else:
        raise ValueError("Unsupported image shape")

    mask_shape = (1, H, W)
    if facename == "front":
        return torch.zeros(mask_shape, dtype=image.dtype, device=image.device)
    else:
        return torch.ones(mask_shape, dtype=image.dtype, device=image.device)
    

def generate_cubemap_uv(H, W):
    """ 生成 cube face 上每个点的 3D 归一化坐标 (x, y, z) 并计算 UV 映射 """
    
    H=int(H)
    W=int(W)

    # 生成 [-1,1] 范围的 grid（cube face 上的 x, y 坐标）
    u_range = torch.linspace(-1, 1, W).view(1, -1).expand(H, -1)  # HxW
    v_range = torch.linspace(-1, 1, H).view(-1, 1).expand(-1, W)  # HxW

    # 设定六个面 (x, y, z) 归一化坐标
    faces = {
        "front":  (u_range, v_range, torch.ones_like(u_range)),  # (x, y, z=1)
        "back":   (-u_range, v_range, -torch.ones_like(u_range)),  # (-x, y, z=-1)
        "left":   (-torch.ones_like(u_range), v_range, u_range),  # (-1, y, -x)
        "right":  (torch.ones_like(u_range), v_range, -u_range),  # (1, y, x)
        "top":    (u_range, -torch.ones_like(u_range), v_range),  # (x, 1, y)
        "bottom": (u_range, torch.ones_like(u_range), -v_range),  # (x, -1, -y)
    }

    # 计算六个面的 UV
    uv_faces = {}
    for face, (x, y, z) in faces.items():
        u = torch.atan2(x, z)/(2*torch.pi)+0.5
        v = torch.atan2(y, torch.sqrt(x ** 2 + z ** 2))/(2*torch.pi)+0.5
        uv_faces[face] = torch.stack([u,v], dim=0)  # shape: (2, H, W)
    
    return uv_faces  # 返回每个面的 UV 坐标

import torch

def generate_cubemap_uv_padding(H, W, padding_pixels=0):
    """ 生成 cube face 上每个点的 3D 归一化坐标 (x, y, z) 并计算 UV 映射，支持自定义 padding """

    H = int(H)
    W = int(W)

    # 计算 padding 的比例
    padding_ratio = padding_pixels / W  # 例如 50 / 512 ≈ 0.0977

    # 计算扩展后的尺寸
    H_new = H + 2 * padding_pixels
    W_new = W + 2 * padding_pixels

    # 生成扩展范围的 grid（从 [-1-padding_ratio, 1+padding_ratio]）
    u_range = torch.linspace(-1 - padding_ratio, 1 + padding_ratio, W_new).view(1, -1).expand(H_new, -1)
    v_range = torch.linspace(-1 - padding_ratio, 1 + padding_ratio, H_new).view(-1, 1).expand(-1, W_new)

    # 定义六个面的 3D 归一化坐标
    faces = {
        "front":  (u_range, v_range, torch.ones_like(u_range)),
        "back":   (-u_range, v_range, -torch.ones_like(u_range)),
        "left":   (-torch.ones_like(u_range), v_range, u_range),
        "right":  (torch.ones_like(u_range), v_range, -u_range),
        "top":    (u_range, -torch.ones_like(u_range), v_range),
        "bottom": (u_range, torch.ones_like(u_range), -v_range),
    }

    # 计算六个面的 UV
    uv_faces = {}
    for face, (x, y, z) in faces.items():
        u = torch.atan2(x, z) / (2 * torch.pi) + 0.5
        v = torch.atan2(y, torch.sqrt(x ** 2 + z ** 2)) / (2 * torch.pi) + 0.5
        uv = torch.stack([u, v], dim=0)  # shape: (2, H_new, W_new)
        # 使用双线性插值将 UV resize 回 (2, H, W)
        uv_resized = F.interpolate(uv.unsqueeze(0), size=(H, W), mode='bilinear', align_corners=True).squeeze(0)
        
        uv_faces[face] = uv_resized

    return uv_faces


def merge_uv_with_latent(latent, uv_maps,dim=1):
    # 调整 uv_maps 的大小，使其与 latent 的空间尺寸一致
    # 注意：这里采用双线性插值，并设置 align_corners=False
    uv_maps_resized = F.interpolate(uv_maps, size=latent.shape[-2:], mode="bilinear", align_corners=False)
    
    # 在通道维度上拼接，即 dim=1
    latent_with_uv = torch.cat([latent, uv_maps_resized], dim=dim)
    return latent_with_uv
    
    

def resize_and_crop(image: np.ndarray, padding: int) -> np.ndarray:
    """
    先将输入的图片 resize 到 (H + padding * 2, W + padding * 2)，
    然后再剪裁掉外侧四个边缘各 padding 宽度，恢复到原来的 H, W。

    参数:
        image (np.ndarray): 输入的图片，形状为 (H, W, C)。
        padding (int): 需要添加的边界宽度。

    返回:
        np.ndarray: 处理后的图片，形状仍为 (H, W, C)。
    """
    if not isinstance(image, np.ndarray):
        raise ValueError("输入图片必须是 numpy 数组格式")

    H, W = image.shape[:2]  # 获取原始尺寸

    # Step 1: Resize 到 (H + padding * 2, W + padding * 2)
    resized_image = cv2.resize(image, (W + 2 * padding, H + 2 * padding), interpolation=cv2.INTER_LINEAR)

    # Step 2: 裁剪掉外侧 padding 的宽度，恢复到原来的 (H, W)
    cropped_image = resized_image[padding:H + padding, padding:W + padding]

    return cropped_image  # 返回 numpy.ndarray

def cubemap_unfold(cubemaps,H:int=512,W:int=512,channels:int=3,transparent:bool=False)->Image.Image:
    # 拼接成 3x4 的布局
    # 整体画布尺寸：3 行，每行 H 像素；4 列，每列 W 像素
    canvas_H = 3 * H
    canvas_W = 4 * W
    
    num_channels=channels if transparent==False else channels+1
    # 确保 canvas 也是正确的形状
    canvas = np.zeros(shape=(canvas_H, canvas_W, num_channels), dtype=cubemaps[0].dtype)
    
    if channels==1:
        canvas=np.squeeze(canvas, axis=-1)
    

    face_imgs = {face: cubemaps[i] for i, face in enumerate(FACES)}
    
    alpha_layer=num_channels-1
    
    # 布局安排（以 0 为起始索引）：
    # 第一行：只在 (0,1) 位置放 top
    # 第二行：依次为 left, front, right, back（对应列 0,1,2,3）
    # 第三行：只在 (2,1) 位置放 bottom

    # 将 top 放在第一行第二列
    row, col = 0, 1
    if channels==1:
        canvas[row*H:(row+1)*H, col*W:(col+1)*W,0] = face_imgs['top']
    else:
        canvas[row*H:(row+1)*H, col*W:(col+1)*W,:channels] = face_imgs['top']
        
    if transparent:
        canvas[row * H:(row + 1) * H, col * W:(col + 1) * W, alpha_layer] = 255  # Set alpha to opaque


    # 将 left, front, right, back 分别放在第二行（行索引 1）从列 0 到 3
    row = 1
    for i, face in enumerate(['left', 'front', 'right', 'back']):
        col = i  # 分别放在第 0,1,2,3 列
        if channels==1:
             canvas[row*H:(row+1)*H, col*W:(col+1)*W,0] = face_imgs[face]
        else:
            canvas[row*H:(row+1)*H, col*W:(col+1)*W,:channels] = face_imgs[face]
            
        if transparent:
            canvas[row * H:(row + 1) * H, col * W:(col + 1) * W, alpha_layer] = 255

    # 将 bottom 放在第三行第二列
    row, col = 2, 1
    
    if channels==1:
        canvas[row*H:(row+1)*H, col*W:(col+1)*W,0] = face_imgs['bottom']
    else:
        canvas[row*H:(row+1)*H, col*W:(col+1)*W,:channels] = face_imgs['bottom']
        
        
    if transparent:
        canvas[row * H:(row + 1) * H, col * W:(col + 1) * W, alpha_layer] = 255  # Set alpha to opaque
    
    
    if channels==1:
        return Image.fromarray(canvas,mode="L")
    
    if np.issubdtype(canvas.dtype, np.floating):
        canvas = np.clip(canvas * 255, 0, 255).astype(np.uint8)
        
    return Image.fromarray(canvas)