Spaces:

wliu283
/

RealWonder

Runtime error

File size: 4,414 Bytes

fc36e06

import torch
import numpy as np
from scipy.spatial.transform import Rotation
from PIL import Image
from typing import List, Optional, Tuple
from torchvision.io import write_video
from torchvision.transforms.functional import pil_to_tensor
import os
from glob import glob
import imageio.v2 as imageio

PRESET_Z_VALUE = 0.0


def pt3d_to_gs(xyz, no_z_offset=False):
    z_offset = 0.0 if no_z_offset else PRESET_Z_VALUE
    if isinstance(xyz, torch.Tensor):
        xyz_new = xyz.clone()
        xyz_new[..., 0] = -1 * xyz[..., 0]
        xyz_new[..., 1] = xyz[..., 2]
        xyz_new[..., 2] = xyz[..., 1]
    elif isinstance(xyz, np.ndarray):
        xyz_new = xyz.copy()
        xyz_new[..., 0] = -1 * xyz[..., 0]
        xyz_new[..., 1] = xyz[..., 2]
        xyz_new[..., 2] = xyz[..., 1]
    else:
        raise ValueError(f"Input type {type(xyz)} is not supported")
    xyz_new[..., 2] += z_offset
    return xyz_new


def gs_to_pt3d(xyz, no_z_offset=False):
    z_offset = 0.0 if no_z_offset else PRESET_Z_VALUE
    if isinstance(xyz, torch.Tensor):
        xyz_new = xyz.clone()
        xyz_new[..., 0] = -1 * xyz[..., 0]
        xyz_new[..., 1] = xyz[..., 2]
        xyz_new[..., 2] = xyz[..., 1]
    elif isinstance(xyz, np.ndarray):
        xyz_new = xyz.copy()
        xyz_new[..., 0] = -1 * xyz[..., 0]
        xyz_new[..., 1] = xyz[..., 2]
        xyz_new[..., 2] = xyz[..., 1]
    else:
        raise ValueError(f"Input type {type(xyz)} is not supported")
    xyz_new[..., 1] -= z_offset
    return xyz_new


def pose_to_transform_matrix(pos, quat):
    """Convert position and quaternion to 4x4 transformation matrix for Genesis."""
    if hasattr(pos, 'cpu'):
        pos = pos.cpu().numpy()
    if hasattr(quat, 'cpu'):
        quat = quat.cpu().numpy()
    pos = np.array(pos, dtype=np.float64)
    quat = np.array(quat, dtype=np.float64)
    quat_scipy = quat[[1, 2, 3, 0]]  # [w,x,y,z] -> [x,y,z,w]
    quat_scipy = quat_scipy / np.linalg.norm(quat_scipy)
    rot = Rotation.from_quat(quat_scipy)
    rot_matrix = rot.as_matrix()
    transform_matrix = np.eye(4, dtype=np.float64)
    transform_matrix[:3, :3] = rot_matrix
    transform_matrix[:3, 3] = pos
    return transform_matrix


def resize_and_crop_pil(image: Image.Image, start_y=None) -> Image.Image:
    width, height = image.size
    assert width == 512 and height == 512, f"Expected 512x512 image, got {width}x{height}"
    resized_image = image.resize((832, 832), resample=Image.BILINEAR)
    crop_width = 832
    crop_height = 480
    start_x = 0
    if start_y is None:
        start_y = (832 - crop_height) // 2
    cropped_image = resized_image.crop((start_x, start_y, start_x + crop_width, start_y + crop_height))
    return cropped_image


def save_video_from_pil(
    frames: List[Image.Image],
    out_path: str,
    fps: int = 16,
    size: Optional[Tuple[int, int]] = None,
    codec: str = "libx264",
    crf: int = 18,
    preset: str = "medium",
    yuv420p: bool = True,
) -> None:
    if not frames:
        raise ValueError("frames is empty")
    if size is None:
        size = (frames[0].width, frames[0].height)
    W, H = size
    tensor_frames = []
    for im in frames:
        im = im.convert("RGB")
        if (im.width, im.height) != (W, H):
            im = im.resize((W, H), Image.BICUBIC)
        t = pil_to_tensor(im).permute(1, 2, 0).contiguous()
        tensor_frames.append(t)
    video = torch.stack(tensor_frames, dim=0)
    options = {"crf": str(crf), "preset": preset}
    if yuv420p:
        options["pix_fmt"] = "yuv420p"
    write_video(filename=out_path, video_array=video, fps=fps, video_codec=codec, options=options)


def save_gif_from_image_folder(input_folder, gif_path, duration=0.1):
    image_exts = ('*.png', '*.jpg', '*.jpeg')
    input_images = []
    for ext in image_exts:
        input_images.extend(glob(os.path.join(input_folder, ext)))
    input_images = sorted(input_images)
    if not input_images:
        print("No images found in input folder.")
        return
    frames = []
    for img_path in input_images:
        try:
            img = imageio.imread(img_path)
            frames.append(img)
        except Exception as e:
            print(f"[ERROR] Skipping {img_path}: {e}")
    if frames:
        imageio.mimsave(gif_path, frames, duration=duration, loop=0)
        print(f"GIF saved to {gif_path}")
    else:
        print("No valid images to save as GIF.")