Image-to-Video
zzwustc's picture
Upload folder using huggingface_hub
ef296aa verified
raw
history blame
4.06 kB
import os
import argparse
from PIL import Image
from glob import glob
import numpy as np
import json
import torch
import torchvision
from torch.nn import functional as F
def create_folder(path, verbose=False, exist_ok=True, safe=True):
if os.path.exists(path) and not exist_ok:
if not safe:
raise OSError
return False
try:
os.makedirs(path)
except:
if not safe:
raise OSError
return False
if verbose:
print(f"Created folder: {path}")
return True
def read_video(path, start_step=0, time_steps=None, channels="first", exts=("jpg", "png"), resolution=None):
if path.endswith(".mp4"):
video = read_video_from_file(path, start_step, time_steps, channels, resolution)
else:
video = read_video_from_folder(path, start_step, time_steps, channels, resolution, exts)
return video
def read_video_from_file(path, start_step, time_steps, channels, resolution):
video, _, _ = torchvision.io.read_video(path, output_format="TCHW", pts_unit="sec")
if time_steps is None:
time_steps = len(video) - start_step
video = video[start_step: start_step + time_steps]
if resolution is not None:
video = F.interpolate(video, size=resolution, mode="bilinear")
if channels == "last":
video = video.permute(0, 2, 3, 1)
video = video / 255.
return video
def read_video_from_folder(path, start_step, time_steps, channels, resolution, exts):
paths = []
for ext in exts:
paths += glob(os.path.join(path, f"*.{ext}"))
paths = sorted(paths)
if time_steps is None:
time_steps = len(paths) - start_step
video = []
for step in range(start_step, start_step + time_steps):
frame = read_frame(paths[step], resolution, channels)
video.append(frame)
video = torch.stack(video)
return video
def read_frame(path, resolution=None, channels="first"):
frame = Image.open(path).convert('RGB')
frame = np.array(frame)
frame = frame.astype(np.float32)
frame = frame / 255
frame = torch.from_numpy(frame)
frame = frame.permute(2, 0, 1)
if resolution is not None:
frame = F.interpolate(frame[None], size=resolution, mode="bilinear")[0]
if channels == "last":
frame = frame.permute(1, 2, 0)
return frame
def write_video(video, path, channels="first", zero_padded=True, ext="png", dtype="torch"):
if dtype == "numpy":
video = torch.from_numpy(video)
if path.endswith(".mp4"):
write_video_to_file(video, path, channels)
else:
write_video_to_folder(video, path, channels, zero_padded, ext)
def write_video_to_file(video, path, channels):
create_folder(os.path.dirname(path))
if channels == "first":
video = video.permute(0, 2, 3, 1)
video = (video.cpu() * 255.).to(torch.uint8)
torchvision.io.write_video(path, video, 24, "h264", options={"pix_fmt": "yuv420p", "crf": "23"})
return video
def write_video_to_folder(video, path, channels, zero_padded, ext):
create_folder(path)
time_steps = video.shape[0]
for step in range(time_steps):
pad = "0" * (len(str(time_steps)) - len(str(step))) if zero_padded else ""
frame_path = os.path.join(path, f"{pad}{step}.{ext}")
write_frame(video[step], frame_path, channels)
def write_frame(frame, path, channels="first"):
create_folder(os.path.dirname(path))
frame = frame.cpu().numpy()
if channels == "first":
frame = np.transpose(frame, (1, 2, 0))
frame = np.clip(np.round(frame * 255), 0, 255).astype(np.uint8)
frame = Image.fromarray(frame)
frame.save(path)
def read_tracks(path):
return np.load(path)
def write_tracks(tracks, path):
np.save(path, tracks)
def read_config(path):
with open(path, 'r') as f:
config = json.load(f)
args = argparse.Namespace(**config)
return args