Spaces:

UDCAI
/

VACE-Preprocessor

Runtime error

App Files Files Community

VACE-Preprocessor / app.py

UDCAI

Create app.py

b67d96d verified 9 months ago

raw

history blame contribute delete

56.1 kB

	# -- coding: utf-8 --
	# Copyright (c) Alibaba, Inc. and its affiliates.
	import shutil
	import sys
	import json
	import os
	import argparse
	import datetime
	import copy
	import random

	import cv2
	import imageio
	import numpy as np
	import gradio as gr
	import tempfile
	from pycocotools import mask as mask_utils

	sys.path.insert(0, os.path.sep.join(os.path.realpath(__file__).split(os.path.sep)[:-3]))
	from vace.annotators.utils import single_rle_to_mask, read_video_frames, save_one_video, read_video_one_frame
	from vace.configs import VACE_IMAGE_PREPROCCESS_CONFIGS, VACE_IMAGE_MASK_PREPROCCESS_CONFIGS, VACE_IMAGE_MASKAUG_PREPROCCESS_CONFIGS, VACE_VIDEO_PREPROCCESS_CONFIGS, VACE_VIDEO_MASK_PREPROCCESS_CONFIGS, VACE_VIDEO_MASKAUG_PREPROCCESS_CONFIGS, VACE_COMPOSITION_PREPROCCESS_CONFIGS
	import vace.annotators as annotators


	def tid_maker():
	return '{0:%Y%m%d%H%M%S%f}'.format(datetime.datetime.now())

	def dict_to_markdown_table(d):
	markdown = "\| Key \| Value \|\n"
	markdown += "\| --- \| ----- \|\n"
	for key, value in d.items():
	markdown += f"\| {key} \| {value} \|\n"
	return markdown


	class VACEImageTag():
	def __init__(self, cfg):
	self.save_dir = os.path.join(cfg.save_dir, 'image')
	if not os.path.exists(self.save_dir):
	os.makedirs(self.save_dir)

	self.image_anno_processor = {}
	self.load_image_anno_list = ["image_plain", "image_depth", "image_gray", "image_pose", "image_scribble", "image_outpainting"]
	for anno_name, anno_cfg in copy.deepcopy(VACE_IMAGE_PREPROCCESS_CONFIGS).items():
	if anno_name not in self.load_image_anno_list: continue
	class_name = anno_cfg.pop("NAME")
	input_params = anno_cfg.pop("INPUTS")
	output_params = anno_cfg.pop("OUTPUTS")
	anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
	self.image_anno_processor[anno_name] = {"inputs": input_params, "outputs": output_params,
	"anno_ins": anno_ins}

	self.mask_anno_processor = {}
	self.load_mask_anno_list = ["image_mask_plain", "image_mask_seg", "image_mask_draw", "image_mask_face"]
	for anno_name, anno_cfg in copy.deepcopy(VACE_IMAGE_MASK_PREPROCCESS_CONFIGS).items():
	if anno_name not in self.load_mask_anno_list: continue
	class_name = anno_cfg.pop("NAME")
	input_params = anno_cfg.pop("INPUTS")
	output_params = anno_cfg.pop("OUTPUTS")
	anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
	self.mask_anno_processor[anno_name] = {"inputs": input_params, "outputs": output_params,
	"anno_ins": anno_ins}

	self.maskaug_anno_processor = {}
	self.load_maskaug_anno_list = ["image_maskaug_plain", "image_maskaug_invert", "image_maskaug", "image_maskaug_region_random", "image_maskaug_region_crop"]
	for anno_name, anno_cfg in copy.deepcopy(VACE_IMAGE_MASKAUG_PREPROCCESS_CONFIGS).items():
	if anno_name not in self.load_maskaug_anno_list: continue
	class_name = anno_cfg.pop("NAME")
	input_params = anno_cfg.pop("INPUTS")
	output_params = anno_cfg.pop("OUTPUTS")
	anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
	self.maskaug_anno_processor[anno_name] = {"inputs": input_params, "outputs": output_params,
	"anno_ins": anno_ins}

	self.seg_type = ['maskpointtrack', 'maskbboxtrack', 'masktrack', 'salientmasktrack', 'salientbboxtrack', 'label', 'caption']
	self.seg_draw_type = ['maskpoint', 'maskbbox', 'mask']

	def create_ui_image(self, args, *kwargs):
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.input_process_image = gr.ImageMask(
	label="input_process_image",
	layers=False,
	type='pil',
	format='png',
	interactive=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.output_process_image = gr.Image(
	label="output_process_image",
	value=None,
	type='pil',
	image_mode='RGB',
	format='png',
	interactive=False)
	with gr.Column(scale=1):
	with gr.Row():
	self.output_process_masked_image = gr.Image(
	label="output_process_masked_image",
	value=None,
	type='pil',
	image_mode='RGB',
	format='png',
	interactive=False)
	with gr.Column(scale=1):
	with gr.Row():
	self.output_process_mask = gr.Image(
	label="output_process_mask",
	value=None,
	type='pil',
	image_mode='L',
	format='png',
	interactive=False)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.image_process_type = gr.Dropdown(
	label='Image Annotator',
	choices=list(self.image_anno_processor.keys()),
	value=list(self.image_anno_processor.keys())[0],
	interactive=True)
	with gr.Row(visible=False) as self.outpainting_setting:
	self.outpainting_direction = gr.Dropdown(
	multiselect=True,
	label='Outpainting Direction',
	choices=['left', 'right', 'up', 'down'],
	value=['left', 'right', 'up', 'down'],
	interactive=True)
	self.outpainting_ratio = gr.Slider(
	label='Outpainting Ratio',
	minimum=0.0,
	maximum=2.0,
	step=0.1,
	value=0.3,
	interactive=True)
	with gr.Column(scale=1):
	with gr.Row():
	self.mask_process_type = gr.Dropdown(
	label='Mask Annotator',
	choices=list(self.mask_anno_processor.keys()),
	value=list(self.mask_anno_processor.keys())[0],
	interactive=True)
	with gr.Row():
	self.mask_opacity = gr.Slider(
	label='Mask Opacity',
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	value=1.0,
	interactive=True)
	self.mask_gray = gr.Checkbox(
	label='Mask Gray',
	value=True,
	interactive=True)
	with gr.Row(visible=False) as self.segment_setting:
	self.mask_type = gr.Dropdown(
	label='Segment Type',
	choices=self.seg_type,
	value='maskpointtrack',
	interactive=True)
	self.mask_segtag = gr.Textbox(
	label='Mask Seg Tag',
	value='',
	interactive=True)
	with gr.Column(scale=1):
	with gr.Row():
	self.mask_aug_process_type = gr.Dropdown(
	label='Mask Aug Annotator',
	choices=list(self.maskaug_anno_processor.keys()),
	value=list(self.maskaug_anno_processor.keys())[0],
	interactive=True)
	with gr.Row(visible=False) as self.maskaug_setting:
	self.mask_aug_type = gr.Dropdown(
	label='Mask Aug Type',
	choices=['random', 'original', 'original_expand', 'hull', 'hull_expand', 'bbox', 'bbox_expand'],
	value='original',
	interactive=True)
	self.mask_expand_ratio = gr.Slider(
	label='Mask Expand Ratio',
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	value=0.3,
	interactive=True)
	self.mask_expand_iters = gr.Slider(
	label='Mask Expand Iters',
	minimum=1,
	maximum=10,
	step=1,
	value=5,
	interactive=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.process_button = gr.Button(
	value='[1]Sample Process',
	elem_classes='type_row',
	elem_id='process_button',
	visible=True)
	with gr.Row():
	self.save_button = gr.Button(
	value='[2]Sample Save',
	elem_classes='type_row',
	elem_id='save_button',
	visible=True)
	with gr.Row():
	self.save_log = gr.Markdown()


	def change_process_type(self, image_process_type, mask_process_type, mask_aug_process_type):
	outpainting_setting_visible = False
	segment_setting = False
	maskaug_setting = False
	segment_choices = self.seg_type
	if image_process_type == "image_outpainting":
	outpainting_setting_visible = True
	if mask_process_type in ["image_mask_seg", "image_mask_draw"]:
	segment_setting = True
	if mask_process_type in ["image_mask_draw"]:
	segment_choices = self.seg_draw_type
	if mask_aug_process_type in ["image_maskaug", "image_maskaug_region_random", "image_maskaug_region_crop"]:
	maskaug_setting = True
	return gr.update(visible=outpainting_setting_visible), gr.update(visible=segment_setting), gr.update(choices=segment_choices, value=segment_choices[0]), gr.update(visible=maskaug_setting)

	def process_image_data(self, input_process_image, image_process_type, outpainting_direction, outpainting_ratio, mask_process_type, mask_type, mask_segtag, mask_opacity, mask_gray, mask_aug_process_type, mask_aug_type, mask_expand_ratio, mask_expand_iters):
	image = np.array(input_process_image['background'].convert('RGB'))
	mask = np.array(input_process_image['layers'][0].split()[-1].convert('L'))
	image_shape = image.shape

	if image_process_type in ['image_outpainting']:
	ret_data = self.image_anno_processor[image_process_type]['anno_ins'].forward(image, direction=outpainting_direction, expand_ratio=outpainting_ratio)
	image, mask = ret_data['image'], ret_data['mask']
	else:
	image = self.image_anno_processor[image_process_type]['anno_ins'].forward(image)
	if image.shape != image_shape:
	image = cv2.resize(image, image_shape[:2][::-1], interpolation=cv2.INTER_LINEAR)

	if mask_process_type in ["image_mask_seg"]:
	mask = mask[..., None]
	mask = self.mask_anno_processor[mask_process_type]['anno_ins'].forward(image, mask=mask, label=mask_segtag, caption=mask_segtag, mode=mask_type)['mask']
	elif mask_process_type in ['image_mask_draw']:
	ret_data = self.mask_anno_processor[mask_process_type]['anno_ins'].forward(mask=mask, mode=mask_type)
	mask = ret_data['mask'] if isinstance(ret_data, dict) and 'mask' in ret_data else ret_data
	elif mask_process_type in ['image_mask_face']:
	ret_data = self.mask_anno_processor[mask_process_type]['anno_ins'].forward(image=image)
	mask = ret_data['mask'] if isinstance(ret_data, dict) and 'mask' in ret_data else ret_data
	else:
	ret_data = self.mask_anno_processor[mask_process_type]['anno_ins'].forward(mask=mask)
	mask = ret_data['mask'] if isinstance(ret_data, dict) and 'mask' in ret_data else ret_data

	mask_cfg = {
	'mode': mask_aug_type,
	'kwargs': {
	'expand_ratio': mask_expand_ratio,
	'expand_iters': mask_expand_iters
	}
	}
	if mask_aug_process_type == 'image_maskaug':
	mask = self.maskaug_anno_processor[mask_aug_process_type]['anno_ins'].forward(np.array(mask), mask_cfg)
	elif mask_aug_process_type in ["image_maskaug_region_random", "image_maskaug_region_crop"]:
	image = self.maskaug_anno_processor[mask_aug_process_type]['anno_ins'].forward(np.array(image), np.array(mask), mask_cfg=mask_cfg)
	else:
	ret_data = self.maskaug_anno_processor[mask_aug_process_type]['anno_ins'].forward(mask=mask)
	mask = ret_data['mask'] if isinstance(ret_data, dict) and 'mask' in ret_data else ret_data

	if mask_opacity > 0:
	if mask.shape[:2] != image.shape[:2]:
	raise gr.Error(f"Mask shape {mask.shape[:2]} should be the same as image shape {image.shape[:2]} or set mask_opacity to 0.")
	if mask_gray:
	masked_image = image.copy()
	masked_image[mask == 255] = 127.5
	else:
	mask_weight = mask / 255 * mask_opacity
	masked_image = np.clip(image * (1 - mask_weight[:, :, None]), 0, 255).astype(np.uint8)
	else:
	masked_image = image
	return image, masked_image, mask

	def save_image_data(self, input_image, image, masked_image, mask):
	save_data = {
	"input_image": input_image['background'].convert('RGB') if isinstance(input_image, dict) else input_image,
	"input_image_mask": input_image['layers'][0].split()[-1].convert('L') if isinstance(input_image, dict) else None,
	"output_image": image,
	"output_masked_image": masked_image,
	"output_image_mask": mask
	}
	save_info = {}
	tid = tid_maker()
	for name, image in save_data.items():
	if image is None: continue
	save_image_dir = os.path.join(self.save_dir, tid[:8])
	if not os.path.exists(save_image_dir): os.makedirs(save_image_dir)
	save_image_path = os.path.join(save_image_dir, tid + '-' + name + '.png')
	save_info[name] = save_image_path
	image.save(save_image_path)
	gr.Info(f'Save {name} to {save_image_path}', duration=15)
	save_txt_path = os.path.join(self.save_dir, tid[:8], tid + '.txt')
	save_info['save_info'] = save_txt_path
	with open(save_txt_path, 'w') as f:
	f.write(json.dumps(save_info, ensure_ascii=False))
	return dict_to_markdown_table(save_info)


	def set_callbacks_image(self, **kwargs):
	inputs = [self.input_process_image, self.image_process_type, self.outpainting_direction, self.outpainting_ratio, self.mask_process_type, self.mask_type, self.mask_segtag, self.mask_opacity, self.mask_gray, self.mask_aug_process_type, self.mask_aug_type, self.mask_expand_ratio, self.mask_expand_iters]
	outputs = [self.output_process_image, self.output_process_masked_image, self.output_process_mask]
	self.process_button.click(self.process_image_data,
	inputs=inputs,
	outputs=outputs)
	self.save_button.click(self.save_image_data,
	inputs=[self.input_process_image, self.output_process_image, self.output_process_masked_image, self.output_process_mask],
	outputs=[self.save_log])
	process_inputs = [self.image_process_type, self.mask_process_type, self.mask_aug_process_type]
	process_outputs = [self.outpainting_setting, self.segment_setting, self.mask_type, self.maskaug_setting]
	self.image_process_type.change(self.change_process_type, inputs=process_inputs, outputs=process_outputs)
	self.mask_process_type.change(self.change_process_type, inputs=process_inputs, outputs=process_outputs)
	self.mask_aug_process_type.change(self.change_process_type, inputs=process_inputs, outputs=process_outputs)


	class VACEVideoTag():
	def __init__(self, cfg):
	self.save_dir = os.path.join(cfg.save_dir, 'video')
	if not os.path.exists(self.save_dir):
	os.makedirs(self.save_dir)

	self.video_anno_processor = {}
	self.load_video_anno_list = ["plain", "depth", "flow", "gray", "pose", "scribble", "outpainting", "outpainting_inner", "framerefext"]
	for anno_name, anno_cfg in copy.deepcopy(VACE_VIDEO_PREPROCCESS_CONFIGS).items():
	if anno_name not in self.load_video_anno_list: continue
	class_name = anno_cfg.pop("NAME")
	input_params = anno_cfg.pop("INPUTS")
	output_params = anno_cfg.pop("OUTPUTS")
	anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
	self.video_anno_processor[anno_name] = {"inputs": input_params, "outputs": output_params,
	"anno_ins": anno_ins}

	self.mask_anno_processor = {}
	self.load_mask_anno_list = ["mask_expand", "mask_seg"]
	for anno_name, anno_cfg in copy.deepcopy(VACE_VIDEO_MASK_PREPROCCESS_CONFIGS).items():
	if anno_name not in self.load_mask_anno_list: continue
	class_name = anno_cfg.pop("NAME")
	input_params = anno_cfg.pop("INPUTS")
	output_params = anno_cfg.pop("OUTPUTS")
	anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
	self.mask_anno_processor[anno_name] = {"inputs": input_params, "outputs": output_params,
	"anno_ins": anno_ins}

	self.maskaug_anno_processor = {}
	self.load_maskaug_anno_list = ["maskaug_plain", "maskaug_invert", "maskaug", "maskaug_layout"]
	for anno_name, anno_cfg in copy.deepcopy(VACE_VIDEO_MASKAUG_PREPROCCESS_CONFIGS).items():
	if anno_name not in self.load_maskaug_anno_list: continue
	class_name = anno_cfg.pop("NAME")
	input_params = anno_cfg.pop("INPUTS")
	output_params = anno_cfg.pop("OUTPUTS")
	anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
	self.maskaug_anno_processor[anno_name] = {"inputs": input_params, "outputs": output_params,
	"anno_ins": anno_ins}


	def create_ui_video(self, args, *kwargs):
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	self.input_process_video = gr.Video(
	label="input_process_video",
	sources=['upload'],
	interactive=True)
	self.input_process_image_show = gr.Image(
	label="input_process_image_show",
	format='png',
	interactive=False)
	with gr.Column(scale=2):
	self.input_process_image = gr.ImageMask(
	label="input_process_image",
	layers=False,
	type='pil',
	format='png',
	interactive=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.output_process_video = gr.Video(
	label="output_process_video",
	value=None,
	interactive=False)
	with gr.Column(scale=1):
	with gr.Row():
	self.output_process_masked_video = gr.Video(
	label="output_process_masked_video",
	value=None,
	interactive=False)
	with gr.Column(scale=1):
	with gr.Row():
	self.output_process_video_mask = gr.Video(
	label="output_process_video_mask",
	value=None,
	interactive=False)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.video_process_type = gr.Dropdown(
	label='Video Annotator',
	choices=list(self.video_anno_processor.keys()),
	value=list(self.video_anno_processor.keys())[0],
	interactive=True)
	with gr.Row(visible=False) as self.outpainting_setting:
	self.outpainting_direction = gr.Dropdown(
	multiselect=True,
	label='Outpainting Direction',
	choices=['left', 'right', 'up', 'down'],
	value=['left', 'right', 'up', 'down'],
	interactive=True)
	self.outpainting_ratio = gr.Slider(
	label='Outpainting Ratio',
	minimum=0.0,
	maximum=2.0,
	step=0.1,
	value=0.3,
	interactive=True)
	with gr.Row(visible=False) as self.frame_reference_setting:
	self.frame_reference_mode = gr.Dropdown(
	label='Frame Reference Mode',
	choices=['first', 'last', 'firstlast', 'random'],
	value='first',
	interactive=True)
	self.frame_reference_num = gr.Textbox(
	label='Frame Reference Num',
	value='1',
	interactive=True)
	with gr.Column(scale=1):
	with gr.Row():
	self.mask_process_type = gr.Dropdown(
	label='Mask Annotator',
	choices=list(self.mask_anno_processor.keys()),
	value=list(self.mask_anno_processor.keys())[0],
	interactive=True)
	with gr.Row():
	self.mask_opacity = gr.Slider(
	label='Mask Opacity',
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	value=1.0,
	interactive=True)
	self.mask_gray = gr.Checkbox(
	label='Mask Gray',
	value=True,
	interactive=True)
	with gr.Row(visible=False) as self.segment_setting:
	self.mask_type = gr.Dropdown(
	label='Segment Type',
	choices=['maskpointtrack', 'maskbboxtrack', 'masktrack', 'salientmasktrack', 'salientbboxtrack',
	'label', 'caption'],
	value='maskpointtrack',
	interactive=True)
	self.mask_segtag = gr.Textbox(
	label='Mask Seg Tag',
	value='',
	interactive=True)
	with gr.Column(scale=1):
	with gr.Row():
	self.mask_aug_process_type = gr.Dropdown(
	label='Mask Aug Annotator',
	choices=list(self.maskaug_anno_processor.keys()),
	value=list(self.maskaug_anno_processor.keys())[0],
	interactive=True)
	with gr.Row(visible=False) as self.maskaug_setting:
	self.mask_aug_type = gr.Dropdown(
	label='Mask Aug Type',
	choices=['random', 'original', 'original_expand', 'hull', 'hull_expand', 'bbox', 'bbox_expand'],
	value='original',
	interactive=True)
	self.mask_expand_ratio = gr.Slider(
	label='Mask Expand Ratio',
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	value=0.3,
	interactive=True)
	self.mask_expand_iters = gr.Slider(
	label='Mask Expand Iters',
	minimum=1,
	maximum=10,
	step=1,
	value=5,
	interactive=True)
	self.mask_layout_label = gr.Textbox(
	label='Mask Layout Label',
	value='',
	interactive=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.process_button = gr.Button(
	value='[1]Sample Process',
	elem_classes='type_row',
	elem_id='process_button',
	visible=True)
	with gr.Row():
	self.save_button = gr.Button(
	value='[2]Sample Save',
	elem_classes='type_row',
	elem_id='save_button',
	visible=True)
	with gr.Row():
	self.save_log = gr.Markdown()

	def process_video_data(self, input_process_video, input_process_image, video_process_type, outpainting_direction, outpainting_ratio, frame_reference_mode, frame_reference_num, mask_process_type, mask_type, mask_segtag, mask_opacity, mask_gray, mask_aug_process_type, mask_aug_type, mask_expand_ratio, mask_expand_iters, mask_layout_label):
	video_frames, fps, width, height, total_frames = read_video_frames(input_process_video, use_type='cv2', info=True)

	# image = np.array(input_process_image['background'].convert('RGB'))
	mask = input_process_image['layers'][0].split()[-1].convert('L')
	if mask.height != height and mask.width != width:
	mask = mask.resize((width, height))

	if mask_process_type in ['mask_seg']:
	mask_data = self.mask_anno_processor[mask_process_type]['anno_ins'].forward(video=input_process_video, mask=mask, label=mask_segtag, caption=mask_segtag, mode=mask_type, return_frame=False)
	mask_frames = mask_data['masks']
	elif mask_process_type in ['mask_expand']:
	mask_frames = self.mask_anno_processor[mask_process_type]['anno_ins'].forward(mask=np.array(mask), expand_num=total_frames)
	else:
	raise NotImplementedError

	output_video = []
	if video_process_type in ['framerefext']:
	output_data = self.video_anno_processor[video_process_type]['anno_ins'].forward(video_frames, ref_cfg={'mode': frame_reference_mode}, ref_num=frame_reference_num)
	output_video, mask_frames = output_data['frames'], output_data['masks']
	elif video_process_type in ['outpainting', 'outpainting_inner']:
	# ratio = ((16 / 9 * height) / width - 1) / 2
	output_data = self.video_anno_processor[video_process_type]['anno_ins'].forward(video_frames, direction=outpainting_direction, expand_ratio=outpainting_ratio)
	output_video, mask_frames = output_data['frames'], output_data['masks']
	else:
	output_video = self.video_anno_processor[video_process_type]['anno_ins'].forward(video_frames)


	mask_cfg = {
	'mode': mask_aug_type,
	'kwargs': {
	'expand_ratio': mask_expand_ratio,
	'expand_iters': mask_expand_iters
	}
	}
	# print(mask_cfg)
	if mask_aug_process_type == 'maskaug_layout':
	output_video = self.maskaug_anno_processor[mask_aug_process_type]['anno_ins'].forward(mask_frames, mask_cfg=mask_cfg, label=mask_layout_label)
	mask_aug_frames = [ np.ones_like(submask) * 255 for submask in mask_frames ]
	else:
	mask_aug_frames = self.maskaug_anno_processor[mask_aug_process_type]['anno_ins'].forward(mask_frames)

	with (tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_video_path, \
	tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as masked_video_path, \
	tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as mask_video_path):
	output_video_writer = imageio.get_writer(output_video_path.name, codec='libx264', fps=fps, quality=8, macro_block_size=None)
	masked_video_writer = imageio.get_writer(masked_video_path.name, codec='libx264', fps=fps, quality=8, macro_block_size=None)
	mask_video_writer = imageio.get_writer(mask_video_path.name, codec='libx264', fps=fps, quality=8, macro_block_size=None)
	for i in range(total_frames):
	output_frame = output_video[i] if len(output_video) > 0 else video_frames[i]
	frame = output_video[i] if len(output_video) > 0 else video_frames[i]
	mask = mask_aug_frames[i]
	if mask_gray:
	masked_image = frame.copy()
	masked_image[mask == 255] = 127.5
	else:
	mask_weight = mask / 255 * mask_opacity
	masked_image = np.clip(frame * (1 - mask_weight[:, :, None]), 0, 255).astype(np.uint8)
	output_video_writer.append_data(output_frame)
	masked_video_writer.append_data(masked_image)
	mask_video_writer.append_data(mask)
	output_video_writer.close()
	masked_video_writer.close()
	mask_video_writer.close()

	return output_video_path.name, masked_video_path.name, mask_video_path.name

	def save_video_data(self, input_video_path, input_image, video_path, masked_video_path, mask_path):

	save_image_data = {
	"input_image": input_image['background'].convert('RGB') if isinstance(input_image, dict) else input_image,
	"input_image_mask": input_image['layers'][0].split()[-1].convert('L') if isinstance(input_image, dict) else None
	}
	save_video_data = {
	"input_video": input_video_path,
	"output_video": video_path,
	"output_masked_video": masked_video_path,
	"output_video_mask": mask_path
	}
	save_info = {}
	tid = tid_maker()
	for name, image in save_image_data.items():
	if image is None: continue
	save_image_dir = os.path.join(self.save_dir, tid[:8])
	if not os.path.exists(save_image_dir): os.makedirs(save_image_dir)
	save_image_path = os.path.join(save_image_dir, tid + '-' + name + '.png')
	save_info[name] = save_image_path
	image.save(save_image_path)
	gr.Info(f'Save {name} to {save_image_path}', duration=15)
	for name, ori_video_path in save_video_data.items():
	if ori_video_path is None: continue
	save_video_dir = os.path.join(self.save_dir, tid[:8])
	if not os.path.exists(save_video_dir): os.makedirs(save_video_dir)
	save_video_path = os.path.join(save_video_dir, tid + '-' + name + os.path.splitext(ori_video_path)[-1])
	save_info[name] = save_video_path
	shutil.copy(ori_video_path, save_video_path)
	gr.Info(f'Save {name} to {save_video_path}', duration=15)

	save_txt_path = os.path.join(self.save_dir, tid[:8], tid + '.txt')
	save_info['save_info'] = save_txt_path
	with open(save_txt_path, 'w') as f:
	f.write(json.dumps(save_info, ensure_ascii=False))
	return dict_to_markdown_table(save_info)


	def change_process_type(self, video_process_type, mask_process_type, mask_aug_process_type):
	frame_reference_setting_visible = False
	outpainting_setting_visible = False
	segment_setting = False
	maskaug_setting = False
	if video_process_type in ["framerefext"]:
	frame_reference_setting_visible = True
	elif video_process_type in ["outpainting", "outpainting_inner"]:
	outpainting_setting_visible = True
	if mask_process_type in ["mask_seg"]:
	segment_setting = True
	if mask_aug_process_type in ["maskaug", "maskaug_layout"]:
	maskaug_setting = True
	return gr.update(visible=frame_reference_setting_visible), gr.update(visible=outpainting_setting_visible), gr.update(visible=segment_setting), gr.update(visible=maskaug_setting)


	def set_callbacks_video(self, **kwargs):
	inputs = [self.input_process_video, self.input_process_image, self.video_process_type, self.outpainting_direction, self.outpainting_ratio, self.frame_reference_mode, self.frame_reference_num, self.mask_process_type, self.mask_type, self.mask_segtag, self.mask_opacity, self.mask_gray, self.mask_aug_process_type, self.mask_aug_type, self.mask_expand_ratio, self.mask_expand_iters, self.mask_layout_label]
	outputs = [self.output_process_video, self.output_process_masked_video, self.output_process_video_mask]
	self.process_button.click(self.process_video_data, inputs=inputs, outputs=outputs)
	self.input_process_video.change(read_video_one_frame, inputs=[self.input_process_video], outputs=[self.input_process_image_show])
	self.save_button.click(self.save_video_data,
	inputs=[self.input_process_video, self.input_process_image, self.output_process_video, self.output_process_masked_video, self.output_process_video_mask],
	outputs=[self.save_log])
	process_inputs = [self.video_process_type, self.mask_process_type, self.mask_aug_process_type]
	process_outputs = [self.frame_reference_setting, self.outpainting_setting, self.segment_setting, self.maskaug_setting]
	self.video_process_type.change(self.change_process_type, inputs=process_inputs, outputs=process_outputs)
	self.mask_process_type.change(self.change_process_type, inputs=process_inputs, outputs=process_outputs)
	self.mask_aug_process_type.change(self.change_process_type, inputs=process_inputs, outputs=process_outputs)



	class VACETagComposition():
	def __init__(self, cfg):
	self.save_dir = os.path.join(cfg.save_dir, 'composition')
	if not os.path.exists(self.save_dir):
	os.makedirs(self.save_dir)

	anno_name = 'composition'
	anno_cfg = copy.deepcopy(VACE_COMPOSITION_PREPROCCESS_CONFIGS[anno_name])
	class_name = anno_cfg.pop("NAME")
	input_params = anno_cfg.pop("INPUTS")
	output_params = anno_cfg.pop("OUTPUTS")
	anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
	self.comp_anno_processor = {"inputs": input_params, "outputs": output_params,
	"anno_ins": anno_ins}
	self.process_types = ["repaint", "extension", "control"]

	def create_ui_composition(self, args, *kwargs):
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	self.input_process_video_1 = gr.Video(
	label="input_process_video_1",
	sources=['upload'],
	interactive=True)
	with gr.Column(scale=1):
	self.input_process_video_2 = gr.Video(
	label="input_process_video_1",
	sources=['upload'],
	interactive=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.input_process_video_mask_1 = gr.Video(
	label="input_process_video_mask_1",
	sources=['upload'],
	interactive=True)
	with gr.Column(scale=1):
	with gr.Row():
	self.input_process_video_mask_2 = gr.Video(
	label="input_process_video_mask_2",
	sources=['upload'],
	interactive=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.input_process_type_1 = gr.Dropdown(
	label='input_process_type_1',
	choices=list(self.process_types),
	value=list(self.process_types)[0],
	interactive=True)
	with gr.Column(scale=1):
	with gr.Row():
	self.input_process_type_2 = gr.Dropdown(
	label='input_process_type_2',
	choices=list(self.process_types),
	value=list(self.process_types)[0],
	interactive=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.process_button = gr.Button(
	value='[1]Sample Process',
	elem_classes='type_row',
	elem_id='process_button',
	visible=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	self.output_process_video = gr.Video(
	label="output_process_video",
	sources=['upload'],
	interactive=False)
	with gr.Column(scale=1):
	self.output_process_mask = gr.Video(
	label="output_process_mask",
	sources=['upload'],
	interactive=False)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.save_button = gr.Button(
	value='[2]Sample Save',
	elem_classes='type_row',
	elem_id='save_button',
	visible=True)
	with gr.Row():
	self.save_log = gr.Markdown()

	def process_composition_data(self, input_process_video_1, input_process_video_2, input_process_video_mask_1, input_process_video_mask_2, input_process_type_1, input_process_type_2):
	# "repaint", "extension", "control"
	# ('repaint', 'repaint') / ('repaint', 'extension') / ('repaint', 'control')
	# ('extension', 'extension') / ('extension', 'repaint') / ('extension', 'control')
	# ('control', 'control') / ('control', 'repaint') / ('control', 'extension')

	video_frames_1, video_fps_1, video_width_1, video_height_1, video_total_frames_1 = read_video_frames(input_process_video_1, use_type='cv2', info=True)
	video_frames_2, video_fps_2, video_width_2, video_height_2, video_total_frames_2 = read_video_frames(input_process_video_2, use_type='cv2', info=True)
	mask_frames_1, mask_fps_1, mask_width_1, mask_height_1, mask_total_frames_1 = read_video_frames(input_process_video_mask_1, use_type='cv2', info=True)
	mask_frames_2, mask_fps_2, mask_width_2, mask_height_2, mask_total_frames_2 = read_video_frames(input_process_video_mask_2, use_type='cv2', info=True)
	mask_frames_1 = [np.where(mask > 127, 1, 0).astype(np.uint8) for mask in mask_frames_1]
	mask_frames_2 = [np.where(mask > 127, 1, 0).astype(np.uint8) for mask in mask_frames_2]

	assert video_width_1 == video_width_2 == mask_width_1 == mask_width_2
	assert video_height_1 == video_height_2 == mask_height_1 == mask_height_2
	assert video_fps_1 == video_fps_2

	output_video, output_mask = self.comp_anno_processor['anno_ins'].forward(input_process_type_1, input_process_type_2, video_frames_1, video_frames_2, mask_frames_1, mask_frames_2)

	fps = video_fps_1
	total_frames = len(output_video)
	if output_video is not None and output_mask is not None:
	with (tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_video_path, \
	tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as mask_video_path):
	output_video_writer = imageio.get_writer(output_video_path.name, codec='libx264', fps=fps, quality=8, macro_block_size=None)
	mask_video_writer = imageio.get_writer(mask_video_path.name, codec='libx264', fps=fps, quality=8, macro_block_size=None)
	for i in range(total_frames):
	output_video_writer.append_data(output_video[i])
	mask_video_writer.append_data(output_mask[i])
	output_video_writer.close()
	mask_video_writer.close()

	return output_video_path.name, mask_video_path.name
	else:
	return None, None

	def save_composition_data(self, video_path, mask_path):
	save_video_data = {
	"output_video": video_path,
	"output_video_mask": mask_path
	}
	save_info = {}
	tid = tid_maker()
	for name, ori_video_path in save_video_data.items():
	if ori_video_path is None: continue
	save_video_dir = os.path.join(self.save_dir, tid[:8])
	if not os.path.exists(save_video_dir): os.makedirs(save_video_dir)
	save_video_path = os.path.join(save_video_dir, tid + '-' + name + os.path.splitext(ori_video_path)[-1])
	save_info[name] = save_video_path
	shutil.copy(ori_video_path, save_video_path)
	gr.Info(f'Save {name} to {save_video_path}', duration=15)
	save_txt_path = os.path.join(self.save_dir, tid[:8], tid + '.txt')
	save_info['save_info'] = save_txt_path
	with open(save_txt_path, 'w') as f:
	f.write(json.dumps(save_info, ensure_ascii=False))
	return dict_to_markdown_table(save_info)

	def set_callbacks_composition(self, **kwargs):
	inputs = [self.input_process_video_1, self.input_process_video_2, self.input_process_video_mask_1, self.input_process_video_mask_2, self.input_process_type_1, self.input_process_type_2]
	outputs = [self.output_process_video, self.output_process_mask]
	self.process_button.click(self.process_composition_data,
	inputs=inputs,
	outputs=outputs)
	self.save_button.click(self.save_composition_data,
	inputs=[self.output_process_video, self.output_process_mask],
	outputs=[self.save_log])


	class VACEVideoTool():
	def __init__(self, cfg):
	self.save_dir = os.path.join(cfg.save_dir, 'video_tool')
	if not os.path.exists(self.save_dir):
	os.makedirs(self.save_dir)
	self.process_types = ["expand_frame", "expand_clipframe", "concat_clip", "blank_mask"]

	def create_ui_video_tool(self, args, *kwargs):
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.input_process_image_1 = gr.Image(
	label="input_process_image_1",
	type='pil',
	format='png',
	interactive=True)
	with gr.Column(scale=1):
	with gr.Row():
	self.input_process_image_2 = gr.Image(
	label="input_process_image_2",
	type='pil',
	format='png',
	interactive=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	self.input_process_video_1 = gr.Video(
	label="input_process_video_1",
	sources=['upload'],
	interactive=True)
	with gr.Column(scale=1):
	self.input_process_video_2 = gr.Video(
	label="input_process_video_1",
	sources=['upload'],
	interactive=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.input_process_video_mask_1 = gr.Video(
	label="input_process_video_mask_1",
	sources=['upload'],
	interactive=True)
	with gr.Column(scale=1):
	with gr.Row():
	self.input_process_video_mask_2 = gr.Video(
	label="input_process_video_mask_2",
	sources=['upload'],
	interactive=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.input_process_type = gr.Dropdown(
	label='input_process_type',
	choices=list(self.process_types),
	value=list(self.process_types)[0],
	interactive=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.output_height = gr.Textbox(
	label='resolutions_height',
	value=720,
	interactive=True)
	self.output_width = gr.Textbox(
	label='resolutions_width',
	value=1280,
	interactive=True)
	self.frame_rate = gr.Textbox(
	label='frame_rate',
	value=16,
	interactive=True)
	self.num_frames = gr.Textbox(
	label='num_frames',
	value=81,
	interactive=True)
	self.mask_gray = gr.Checkbox(
	label='Mask Gray',
	value=False,
	interactive=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.process_button = gr.Button(
	value='[1]Sample Process',
	elem_classes='type_row',
	elem_id='process_button',
	visible=True)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.output_process_image = gr.Image(
	label="output_process_image",
	value=None,
	type='pil',
	image_mode='RGB',
	format='png',
	interactive=False)
	with gr.Column(scale=1):
	self.output_process_video = gr.Video(
	label="output_process_video",
	sources=['upload'],
	interactive=False)
	with gr.Column(scale=1):
	self.output_process_mask = gr.Video(
	label="output_process_mask",
	sources=['upload'],
	interactive=False)
	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	with gr.Row():
	self.save_button = gr.Button(
	value='[2]Sample Save',
	elem_classes='type_row',
	elem_id='save_button',
	visible=True)
	with gr.Row():
	self.save_log = gr.Markdown()

	def process_tool_data(self, input_process_image_1, input_process_image_2, input_process_video_1, input_process_video_2, input_process_video_mask_1, input_process_video_mask_2, input_process_type, output_height, output_width, frame_rate, num_frames):
	output_height, output_width, frame_rate, num_frames = int(output_height), int(output_width), int(frame_rate), int(num_frames)
	output_video, output_mask = None, None
	if input_process_type == 'expand_frame':
	assert input_process_image_1 or input_process_image_2
	output_video = [np.ones((output_height, output_width, 3), dtype=np.uint8) * 127.5] * num_frames
	output_mask = [np.ones((output_height, output_width), dtype=np.uint8) * 255] * num_frames
	if input_process_image_1 is not None:
	output_video[0] = np.array(input_process_image_1.resize((output_width, output_height)))
	output_mask[0] = np.zeros((output_height, output_width))
	if input_process_image_2 is not None:
	output_video[-1] = np.array(input_process_image_2.resize((output_width, output_height)))
	output_mask[-1] = np.zeros((output_height, output_width))
	elif input_process_type == 'expand_clipframe':
	video_frames, fps, width, height, total_frames = read_video_frames(input_process_video_1, use_type='cv2', info=True)
	frame_rate = fps
	output_video = video_frames + [np.ones((height, width, 3), dtype=np.uint8) * 127.5] * num_frames
	output_mask = [np.zeros((height, width), dtype=np.uint8)] * total_frames + [np.ones((height, width), dtype=np.uint8) * 255] * num_frames
	output_video[-1] = np.array(input_process_image_2.resize((width, height)))
	output_mask[-1] = np.zeros((height, width))
	elif input_process_type == 'concat_clip':
	video_frames_1, fps_1, width_1, height_1, total_frames_1 = read_video_frames(input_process_video_1, use_type='cv2', info=True)
	video_frames_2, fps_2, width_2, height_2, total_frames_2 = read_video_frames(input_process_video_2, use_type='cv2', info=True)
	if width_1 != width_2 or height_1 != height_2:
	video_frames_2 = [np.array(frame.resize((width_1, height_1))) for frame in video_frames_2]
	frame_rate = fps_1
	output_video = video_frames_1 + video_frames_2
	output_mask = [np.ones((height_1, width_1), dtype=np.uint8) * 255] * len(output_video)
	elif input_process_type == 'blank_mask':
	output_mask = [np.ones((output_height, output_width), dtype=np.uint8) * 255] * num_frames
	else:
	raise NotImplementedError
	output_image_path = None

	if output_video is not None:
	with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_path:
	flag = save_one_video(videos=output_video, file_path=output_path.name, fps=frame_rate)
	output_video_path = output_path.name if flag else None
	else:
	output_video_path = None

	if output_mask is not None:
	with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_path:
	flag = save_one_video(videos=output_mask, file_path=output_path.name, fps=frame_rate)
	output_mask_path = output_path.name if flag else None
	else:
	output_mask_path = None
	return output_image_path, output_video_path, output_mask_path


	def save_tool_data(self, image_path, video_path, mask_path):
	save_video_data = {
	"output_video": video_path,
	"output_video_mask": mask_path
	}
	save_info = {}
	tid = tid_maker()
	for name, ori_video_path in save_video_data.items():
	if ori_video_path is None: continue
	save_video_path = os.path.join(self.save_dir, tid[:8], tid + '-' + name + os.path.splitext(ori_video_path)[-1])
	save_info[name] = save_video_path
	shutil.copy(ori_video_path, save_video_path)
	gr.Info(f'Save {name} to {save_video_path}', duration=15)
	save_txt_path = os.path.join(self.save_dir, tid[:8], tid + '.txt')
	save_info['save_info'] = save_txt_path
	with open(save_txt_path, 'w') as f:
	f.write(json.dumps(save_info, ensure_ascii=False))
	return dict_to_markdown_table(save_info)

	def set_callbacks_video_tool(self, **kwargs):
	inputs = [self.input_process_image_1, self.input_process_image_2, self.input_process_video_1, self.input_process_video_2, self.input_process_video_mask_1, self.input_process_video_mask_2, self.input_process_type, self.output_height, self.output_width, self.frame_rate, self.num_frames]
	outputs = [self.output_process_image, self.output_process_video, self.output_process_mask]
	self.process_button.click(self.process_tool_data,
	inputs=inputs,
	outputs=outputs)
	self.save_button.click(self.save_tool_data,
	inputs=[self.output_process_image, self.output_process_video, self.output_process_mask],
	outputs=[self.save_log])


	class VACETag():

	def __init__(self, cfg):
	self.cfg = cfg
	self.save_dir = cfg.save_dir
	self.current_index = 0
	self.loaded_data = {}

	self.vace_video_tag = VACEVideoTag(cfg)
	self.vace_image_tag = VACEImageTag(cfg)
	self.vace_tag_composition = VACETagComposition(cfg)
	# self.vace_video_tool = VACEVideoTool(cfg)


	def create_ui(self, args, *kwargs):
	gr.Markdown("""
	<div style="text-align: center; font-size: 24px; font-weight: bold; margin-bottom: 15px;">
	<a href="https://ali-vilab.github.io/VACE-Page/" style="text-decoration: none; color: inherit;">VACE Preprocessor</a>
	</div>
	""")
	with gr.Tabs(elem_id='VACE Tag') as vace_tab:
	with gr.TabItem('VACE Video Tag', id=1, elem_id='video_tab'):
	self.vace_video_tag.create_ui_video(args, *kwargs)
	with gr.TabItem('VACE Image Tag', id=2, elem_id='image_tab'):
	self.vace_image_tag.create_ui_image(args, *kwargs)
	with gr.TabItem('VACE Composition Tag', id=3, elem_id='composition_tab'):
	self.vace_tag_composition.create_ui_composition(args, *kwargs)
	# with gr.TabItem('VACE Video Tool', id=4, elem_id='video_tool_tab'):
	# self.vace_video_tool.create_ui_video_tool(args, *kwargs)


	def set_callbacks(self, **kwargs):
	self.vace_video_tag.set_callbacks_video(**kwargs)
	self.vace_image_tag.set_callbacks_image(**kwargs)
	self.vace_tag_composition.set_callbacks_composition(**kwargs)
	# self.vace_video_tool.set_callbacks_video_tool(**kwargs)


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Argparser for VACE-Preprocessor:\n')
	parser.add_argument('--server_port', dest='server_port', help='', default=7860)
	parser.add_argument('--server_name', dest='server_name', help='', default='0.0.0.0')
	parser.add_argument('--root_path', dest='root_path', help='', default=None)
	parser.add_argument('--save_dir', dest='save_dir', help='', default='cache')
	args = parser.parse_args()

	if not os.path.exists(args.save_dir):
	os.makedirs(args.save_dir, exist_ok=True)

	vace_tag = VACETag(args)
	with gr.Blocks() as demo:
	vace_tag.create_ui()
	vace_tag.set_callbacks()
	demo.queue(status_update_rate=1).launch(server_name=args.server_name,
	server_port=int(args.server_port),
	show_api=False, show_error=True,
	debug=True)