|
|
import gradio as gr |
|
|
import os |
|
|
import shutil |
|
|
import yaml |
|
|
import tempfile |
|
|
import cv2 |
|
|
import huggingface_hub |
|
|
import subprocess |
|
|
import threading |
|
|
import torch |
|
|
from subprocess import getoutput |
|
|
|
|
|
|
|
|
space_id = os.environ.get('SPACE_ID', '') |
|
|
is_shared_ui = True if "fffiloni/MimicMotion" in space_id else False |
|
|
available_property = False if is_shared_ui else True |
|
|
|
|
|
|
|
|
os.makedirs('./models/DWPose', exist_ok=True) |
|
|
|
|
|
is_gpu_associated = torch.cuda.is_available() |
|
|
if is_gpu_associated: |
|
|
gpu_info = getoutput('nvidia-smi') |
|
|
if("A10G" in gpu_info): |
|
|
which_gpu = "A10G" |
|
|
elif("T4" in gpu_info): |
|
|
which_gpu = "T4" |
|
|
else: |
|
|
which_gpu = "CPU" |
|
|
|
|
|
def stream_output(pipe): |
|
|
for line in iter(pipe.readline, ''): |
|
|
print(line, end='') |
|
|
pipe.close() |
|
|
|
|
|
HF_TKN = os.environ.get("GATED_HF_TOKEN") |
|
|
huggingface_hub.login(token=HF_TKN) |
|
|
|
|
|
huggingface_hub.hf_hub_download( |
|
|
repo_id='yzd-v/DWPose', |
|
|
filename='yolox_l.onnx', |
|
|
local_dir='./models/DWPose' |
|
|
) |
|
|
|
|
|
huggingface_hub.hf_hub_download( |
|
|
repo_id='yzd-v/DWPose', |
|
|
filename='dw-ll_ucoco_384.onnx', |
|
|
local_dir='./models/DWPose' |
|
|
) |
|
|
|
|
|
huggingface_hub.hf_hub_download( |
|
|
repo_id='tencent/MimicMotion', |
|
|
filename='MimicMotion_1.pth', |
|
|
local_dir='./models' |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
downloaded_path = huggingface_hub.hf_hub_download( |
|
|
repo_id='LogicGoInfotechSpaces/Mimic', |
|
|
filename='models/MimicMotion_1-1.pth', |
|
|
) |
|
|
|
|
|
target_path = './models/MimicMotion_1-1.pth' |
|
|
if not os.path.exists(target_path): |
|
|
shutil.copy(downloaded_path, target_path) |
|
|
print(f"Copied MimicMotion_1-1.pth to {target_path}") |
|
|
|
|
|
def print_directory_contents(path): |
|
|
for root, dirs, files in os.walk(path): |
|
|
level = root.replace(path, '').count(os.sep) |
|
|
indent = ' ' * 4 * (level) |
|
|
print(f"{indent}{os.path.basename(root)}/") |
|
|
subindent = ' ' * 4 * (level + 1) |
|
|
for f in files: |
|
|
print(f"{subindent}{f}") |
|
|
|
|
|
def check_outputs_folder(folder_path): |
|
|
|
|
|
if os.path.exists(folder_path) and os.path.isdir(folder_path): |
|
|
|
|
|
for filename in os.listdir(folder_path): |
|
|
file_path = os.path.join(folder_path, filename) |
|
|
try: |
|
|
if os.path.isfile(file_path) or os.path.islink(file_path): |
|
|
os.unlink(file_path) |
|
|
elif os.path.isdir(file_path): |
|
|
shutil.rmtree(file_path) |
|
|
except Exception as e: |
|
|
print(f'Failed to delete {file_path}. Reason: {e}') |
|
|
else: |
|
|
print(f'The folder {folder_path} does not exist.') |
|
|
|
|
|
def check_for_mp4_in_outputs(): |
|
|
|
|
|
outputs_folder = './outputs' |
|
|
|
|
|
|
|
|
if not os.path.exists(outputs_folder): |
|
|
return None |
|
|
|
|
|
|
|
|
mp4_files = [f for f in os.listdir(outputs_folder) if f.endswith('.mp4')] |
|
|
|
|
|
|
|
|
if mp4_files: |
|
|
return os.path.join(outputs_folder, mp4_files[0]) |
|
|
else: |
|
|
return None |
|
|
|
|
|
def get_video_fps(video_path): |
|
|
|
|
|
video_capture = cv2.VideoCapture(video_path) |
|
|
|
|
|
if not video_capture.isOpened(): |
|
|
raise ValueError("Error opening video file") |
|
|
|
|
|
|
|
|
fps = video_capture.get(cv2.CAP_PROP_FPS) |
|
|
|
|
|
|
|
|
video_capture.release() |
|
|
|
|
|
return fps |
|
|
|
|
|
def load_examples(ref_image_in, ref_video_in): |
|
|
return "./examples/mimicmotion_result1_example.mp4" |
|
|
|
|
|
def infer(ref_image_in, ref_video_in, num_inference_steps, guidance_scale, output_frames_per_second, seed, checkpoint_version, progress=gr.Progress(track_tqdm=True)): |
|
|
|
|
|
check_outputs_folder('./outputs') |
|
|
|
|
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir: |
|
|
print("Temporary directory created:", temp_dir) |
|
|
|
|
|
|
|
|
ref_video_path = ref_video_in |
|
|
ref_image_path = ref_image_in |
|
|
num_frames = 16 |
|
|
resolution = 576 |
|
|
frames_overlap = 6 |
|
|
num_inference_steps = num_inference_steps |
|
|
noise_aug_strength = 0 |
|
|
guidance_scale = guidance_scale |
|
|
sample_stride = 2 |
|
|
fps = output_frames_per_second |
|
|
seed = seed |
|
|
|
|
|
|
|
|
data = { |
|
|
'base_model_path': 'stabilityai/stable-video-diffusion-img2vid-xt-1-1', |
|
|
'ckpt_path': f'models/{checkpoint_version}', |
|
|
'test_case': [ |
|
|
{ |
|
|
'ref_video_path': ref_video_path, |
|
|
'ref_image_path': ref_image_path, |
|
|
'num_frames': num_frames, |
|
|
'resolution': resolution, |
|
|
'frames_overlap': frames_overlap, |
|
|
'num_inference_steps': num_inference_steps, |
|
|
'noise_aug_strength': noise_aug_strength, |
|
|
'guidance_scale': guidance_scale, |
|
|
'sample_stride': sample_stride, |
|
|
'fps': fps, |
|
|
'seed': seed |
|
|
} |
|
|
] |
|
|
} |
|
|
|
|
|
|
|
|
file_path = os.path.join(temp_dir, 'config.yaml') |
|
|
|
|
|
|
|
|
with open(file_path, 'w') as file: |
|
|
yaml.dump(data, file, default_flow_style=False) |
|
|
|
|
|
print("YAML file 'config.yaml' created successfully in", file_path) |
|
|
|
|
|
|
|
|
command = ['python', 'inference.py', '--inference_config', file_path] |
|
|
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1) |
|
|
|
|
|
|
|
|
stdout_thread = threading.Thread(target=stream_output, args=(process.stdout,)) |
|
|
stderr_thread = threading.Thread(target=stream_output, args=(process.stderr,)) |
|
|
|
|
|
|
|
|
|
|
|
stdout_thread.start() |
|
|
stderr_thread.start() |
|
|
|
|
|
|
|
|
process.wait() |
|
|
stdout_thread.join() |
|
|
stderr_thread.join() |
|
|
|
|
|
print("Inference script finished with return code:", process.returncode) |
|
|
|
|
|
|
|
|
print_directory_contents('./outputs') |
|
|
|
|
|
|
|
|
mp4_file_path = check_for_mp4_in_outputs() |
|
|
print(mp4_file_path) |
|
|
|
|
|
return mp4_file_path |
|
|
|
|
|
output_video = gr.Video(label="Output Video") |
|
|
|
|
|
css = """ |
|
|
img#model-badge { |
|
|
margin-top: -1px; |
|
|
height: 24px; |
|
|
} |
|
|
img#paper-badge { |
|
|
height: 18.5px; |
|
|
margin-top: 0.5px; |
|
|
} |
|
|
div#warning-duplicate { |
|
|
background-color: #ebf5ff; |
|
|
padding: 0 16px 16px; |
|
|
margin: 20px 0; |
|
|
color: #030303!important; |
|
|
} |
|
|
div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p { |
|
|
color: #0f4592!important; |
|
|
} |
|
|
div#warning-duplicate strong { |
|
|
color: #0f4592; |
|
|
} |
|
|
p.actions { |
|
|
display: flex; |
|
|
align-items: center; |
|
|
margin: 20px 0; |
|
|
} |
|
|
div#warning-duplicate .actions a { |
|
|
display: inline-block; |
|
|
margin-right: 10px; |
|
|
} |
|
|
div#warning-setgpu { |
|
|
background-color: #fff4eb; |
|
|
padding: 0 16px 16px; |
|
|
margin: 20px 0; |
|
|
color: #030303!important; |
|
|
} |
|
|
div#warning-setgpu > .gr-prose > h2, div#warning-setgpu > .gr-prose > p { |
|
|
color: #92220f!important; |
|
|
} |
|
|
div#warning-setgpu a, div#warning-setgpu b { |
|
|
color: #91230f; |
|
|
} |
|
|
div#warning-setgpu p.actions > a { |
|
|
display: inline-block; |
|
|
background: #1f1f23; |
|
|
border-radius: 40px; |
|
|
padding: 6px 24px; |
|
|
color: antiquewhite; |
|
|
text-decoration: none; |
|
|
font-weight: 600; |
|
|
font-size: 1.2em; |
|
|
} |
|
|
div#warning-ready { |
|
|
background-color: #ecfdf5; |
|
|
padding: 0 16px 16px; |
|
|
margin: 20px 0; |
|
|
color: #030303!important; |
|
|
} |
|
|
div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p { |
|
|
color: #057857!important; |
|
|
} |
|
|
.custom-color { |
|
|
color: #030303 !important; |
|
|
} |
|
|
""" |
|
|
with gr.Blocks(css=css) as demo: |
|
|
with gr.Column(): |
|
|
gr.Markdown("# MimicMotion") |
|
|
gr.Markdown("High-quality human motion video generation with pose-guided control") |
|
|
gr.HTML(""" |
|
|
<div style="display:flex;column-gap:4px;"> |
|
|
<a href='http://tencent.github.io/MimicMotion'> |
|
|
<img src='https://img.shields.io/badge/Project-Page-Green'> |
|
|
</a> |
|
|
<a href='https://arxiv.org/abs/2406.19680'> |
|
|
<img src='https://img.shields.io/badge/Paper-Arxiv-red'> |
|
|
</a> |
|
|
<a href="https://huggingface.co/tencent/MimicMotion"> |
|
|
<img id="model-badge" src="https://huggingface.co/datasets/huggingface/badges/resolve/main/model-on-hf-sm.svg" alt="Model on HF"> |
|
|
</a> |
|
|
<a href='https://huggingface.co/papers/2406.19680'> |
|
|
<img id="paper-badge" src="https://huggingface.co/datasets/huggingface/badges/resolve/main/paper-page-sm.svg" alt="Paper page"> |
|
|
</a> |
|
|
</div> |
|
|
""") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
if is_shared_ui: |
|
|
top_description = gr.HTML(f''' |
|
|
<div class="gr-prose"> |
|
|
<h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg> |
|
|
Attention: this Space need to be duplicated to work</h2> |
|
|
<p class="main-message custom-color"> |
|
|
To make it work, <strong>duplicate the Space</strong> and run it on your own profile using a <strong>private</strong> GPU (A10G-large recommended).<br /> |
|
|
A A10G-large costs <strong>US$1.50/h</strong>. You'll also need to set your own secret hf_token to access gated stabilityai/stable-video-diffusion-img2vid-xt-1-1 repo. |
|
|
</p> |
|
|
<p class="actions custom-color"> |
|
|
<a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}?duplicate=true"> |
|
|
<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg-dark.svg" alt="Duplicate this Space" /> |
|
|
</a> |
|
|
to start experimenting with this demo |
|
|
</p> |
|
|
</div> |
|
|
''', elem_id="warning-duplicate") |
|
|
else: |
|
|
if(is_gpu_associated): |
|
|
top_description = gr.HTML(f''' |
|
|
<div class="gr-prose"> |
|
|
<h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg> |
|
|
You have successfully associated a {which_gpu} GPU to this Space 🎉</h2> |
|
|
<p class="custom-color"> |
|
|
You will be billed by the minute from when you activated the GPU until when it is turned off. |
|
|
</p> |
|
|
</div> |
|
|
''', elem_id="warning-ready") |
|
|
else: |
|
|
top_description = gr.HTML(f''' |
|
|
<div class="gr-prose"> |
|
|
<h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg> |
|
|
You have successfully duplicated the MimicMotion Space 🎉</h2> |
|
|
<p class="custom-color">There's only one step left before you can properly play with this demo: <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}/settings" style="text-decoration: underline" target="_blank">attribute a GPU</b> to it (via the Settings tab)</a> and run the app below. |
|
|
You will be billed by the minute from when you activate the GPU until when it is turned off.</p> |
|
|
<p class="actions custom-color"> |
|
|
<a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}/settings">🔥 Set recommended GPU</a> |
|
|
</p> |
|
|
</div> |
|
|
''', elem_id="warning-setgpu") |
|
|
with gr.Row(): |
|
|
ref_image_in = gr.Image(label="Person Image Reference", type="filepath") |
|
|
ref_video_in = gr.Video(label="Person Video Reference") |
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
|
num_inference_steps = gr.Slider(label="num inference steps", minimum=12, maximum=50, value=25, step=1, interactive=available_property) |
|
|
guidance_scale = gr.Slider(label="guidance scale", minimum=0.1, maximum=10, value=2, step=0.1, interactive=available_property) |
|
|
with gr.Row(): |
|
|
output_frames_per_second = gr.Slider(label="fps", minimum=1, maximum=60, value=16, step=1, interactive=available_property) |
|
|
seed = gr.Number(label="Seed", value=42, interactive=available_property) |
|
|
checkpoint_version = gr.Dropdown(label="Checkpoint Version", choices=["MimicMotion_1.pth", "MimicMotion_1-1.pth"], value="MimicMotion_1.pth", interactive=available_property, filterable=False) |
|
|
submit_btn = gr.Button("Submit", interactive=available_property) |
|
|
output_video.render() |
|
|
submit_btn.click( |
|
|
fn = infer, |
|
|
inputs = [ref_image_in, ref_video_in, num_inference_steps, guidance_scale, output_frames_per_second, seed, checkpoint_version], |
|
|
outputs = [output_video] |
|
|
) |
|
|
|
|
|
demo.launch(show_api=False, show_error=False) |