Upload 285 files
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +19 -0
- ckpts/LivePortrait/.gitignore +34 -0
- ckpts/LivePortrait/.vscode/settings.json +19 -0
- ckpts/LivePortrait/LICENSE +30 -0
- ckpts/LivePortrait/app.py +493 -0
- ckpts/LivePortrait/app_animals.py +249 -0
- ckpts/LivePortrait/assets/.gitignore +2 -0
- ckpts/LivePortrait/assets/docs/LivePortrait-Gradio-2024-07-19.jpg +3 -0
- ckpts/LivePortrait/assets/docs/animals-mode-gradio-2024-08-02.jpg +3 -0
- ckpts/LivePortrait/assets/docs/changelog/2024-07-10.md +22 -0
- ckpts/LivePortrait/assets/docs/changelog/2024-07-19.md +24 -0
- ckpts/LivePortrait/assets/docs/changelog/2024-07-24.md +12 -0
- ckpts/LivePortrait/assets/docs/changelog/2024-08-02.md +75 -0
- ckpts/LivePortrait/assets/docs/changelog/2024-08-05.md +18 -0
- ckpts/LivePortrait/assets/docs/changelog/2024-08-06.md +9 -0
- ckpts/LivePortrait/assets/docs/changelog/2024-08-19.md +65 -0
- ckpts/LivePortrait/assets/docs/changelog/2025-01-01.md +29 -0
- ckpts/LivePortrait/assets/docs/directory-structure.md +28 -0
- ckpts/LivePortrait/assets/docs/driving-option-multiplier-2024-08-02.jpg +3 -0
- ckpts/LivePortrait/assets/docs/editing-portrait-2024-08-06.jpg +3 -0
- ckpts/LivePortrait/assets/docs/how-to-install-ffmpeg.md +29 -0
- ckpts/LivePortrait/assets/docs/image-driven-image-2024-08-19.jpg +3 -0
- ckpts/LivePortrait/assets/docs/image-driven-portrait-animation-2024-08-19.jpg +3 -0
- ckpts/LivePortrait/assets/docs/inference-animals.gif +3 -0
- ckpts/LivePortrait/assets/docs/inference.gif +3 -0
- ckpts/LivePortrait/assets/docs/pose-edit-2024-07-24.jpg +3 -0
- ckpts/LivePortrait/assets/docs/retargeting-video-2024-08-02.jpg +3 -0
- ckpts/LivePortrait/assets/docs/showcase.gif +3 -0
- ckpts/LivePortrait/assets/docs/showcase2.gif +3 -0
- ckpts/LivePortrait/assets/docs/speed.md +13 -0
- ckpts/LivePortrait/assets/examples/driving/aggrieved.pkl +0 -0
- ckpts/LivePortrait/assets/examples/driving/d0.mp4 +3 -0
- ckpts/LivePortrait/assets/examples/driving/d1.pkl +0 -0
- ckpts/LivePortrait/assets/examples/driving/d10.mp4 +3 -0
- ckpts/LivePortrait/assets/examples/driving/d11.mp4 +3 -0
- ckpts/LivePortrait/assets/examples/driving/d12.jpg +3 -0
- ckpts/LivePortrait/assets/examples/driving/d12.mp4 +3 -0
- ckpts/LivePortrait/assets/examples/driving/d13.mp4 +3 -0
- ckpts/LivePortrait/assets/examples/driving/d14.mp4 +3 -0
- ckpts/LivePortrait/assets/examples/driving/d18.mp4 +3 -0
- ckpts/LivePortrait/assets/examples/driving/d19.jpg +3 -0
- ckpts/LivePortrait/assets/examples/driving/d19.mp4 +3 -0
- ckpts/LivePortrait/assets/examples/driving/d2.pkl +0 -0
- ckpts/LivePortrait/assets/examples/driving/d20.mp4 +3 -0
- ckpts/LivePortrait/assets/examples/driving/d3.mp4 +3 -0
- ckpts/LivePortrait/assets/examples/driving/d30.jpg +3 -0
- ckpts/LivePortrait/assets/examples/driving/d38.jpg +3 -0
- ckpts/LivePortrait/assets/examples/driving/d5.pkl +0 -0
- ckpts/LivePortrait/assets/examples/driving/d6.mp4 +3 -0
- ckpts/LivePortrait/assets/examples/driving/d7.pkl +0 -0
.gitattributes
CHANGED
|
@@ -17,3 +17,22 @@ ckpts/naturalspeech3_facodec/ns3_facodec_decoder.bin filter=lfs diff=lfs merge=l
|
|
| 17 |
ckpts/naturalspeech3_facodec/ns3_facodec_encoder_v2.bin filter=lfs diff=lfs merge=lfs -text
|
| 18 |
ckpts/naturalspeech3_facodec/ns3_facodec_encoder.bin filter=lfs diff=lfs merge=lfs -text
|
| 19 |
ckpts/naturalspeech3_facodec/ns3_facodec_redecoder.bin filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
ckpts/naturalspeech3_facodec/ns3_facodec_encoder_v2.bin filter=lfs diff=lfs merge=lfs -text
|
| 18 |
ckpts/naturalspeech3_facodec/ns3_facodec_encoder.bin filter=lfs diff=lfs merge=lfs -text
|
| 19 |
ckpts/naturalspeech3_facodec/ns3_facodec_redecoder.bin filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
ckpts/LivePortrait/assets/examples/driving/d8.pkl filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
ckpts/LivePortrait/pretrained_weights/insightface/models/buffalo_l/2d106det.onnx filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
ckpts/LivePortrait/pretrained_weights/insightface/models/buffalo_l/det_10g.onnx filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models_v1.1/appearance_feature_extractor.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models_v1.1/motion_extractor.pth filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models_v1.1/spade_generator.pth filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models_v1.1/warping_module.pth filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models/appearance_feature_extractor.pth filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models/motion_extractor.pth filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models/spade_generator.pth filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models/warping_module.pth filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait_animals/retargeting_models/stitching_retargeting_module.pth filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait_animals/xpose.pth filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait/base_models/appearance_feature_extractor.pth filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait/base_models/motion_extractor.pth filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait/base_models/spade_generator.pth filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait/base_models/warping_module.pth filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait/landmark.onnx filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
ckpts/LivePortrait/pretrained_weights/liveportrait/retargeting_models/stitching_retargeting_module.pth filter=lfs diff=lfs merge=lfs -text
|
ckpts/LivePortrait/.gitignore
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
**/__pycache__/
|
| 4 |
+
*.py[cod]
|
| 5 |
+
**/*.py[cod]
|
| 6 |
+
*$py.class
|
| 7 |
+
|
| 8 |
+
# Model weights
|
| 9 |
+
**/*.pth
|
| 10 |
+
**/*.onnx
|
| 11 |
+
|
| 12 |
+
pretrained_weights/*.md
|
| 13 |
+
pretrained_weights/docs
|
| 14 |
+
pretrained_weights/liveportrait
|
| 15 |
+
pretrained_weights/liveportrait_animals
|
| 16 |
+
|
| 17 |
+
# Ipython notebook
|
| 18 |
+
*.ipynb
|
| 19 |
+
|
| 20 |
+
# Temporary files or benchmark resources
|
| 21 |
+
animations/*
|
| 22 |
+
tmp/*
|
| 23 |
+
.vscode/launch.json
|
| 24 |
+
**/*.DS_Store
|
| 25 |
+
gradio_temp/**
|
| 26 |
+
|
| 27 |
+
# Windows dependencies
|
| 28 |
+
ffmpeg/
|
| 29 |
+
LivePortrait_env/
|
| 30 |
+
|
| 31 |
+
# XPose build files
|
| 32 |
+
src/utils/dependencies/XPose/models/UniPose/ops/build
|
| 33 |
+
src/utils/dependencies/XPose/models/UniPose/ops/dist
|
| 34 |
+
src/utils/dependencies/XPose/models/UniPose/ops/MultiScaleDeformableAttention.egg-info
|
ckpts/LivePortrait/.vscode/settings.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[python]": {
|
| 3 |
+
"editor.tabSize": 4
|
| 4 |
+
},
|
| 5 |
+
"files.eol": "\n",
|
| 6 |
+
"files.insertFinalNewline": true,
|
| 7 |
+
"files.trimFinalNewlines": true,
|
| 8 |
+
"files.trimTrailingWhitespace": true,
|
| 9 |
+
"files.exclude": {
|
| 10 |
+
"**/.git": true,
|
| 11 |
+
"**/.svn": true,
|
| 12 |
+
"**/.hg": true,
|
| 13 |
+
"**/CVS": true,
|
| 14 |
+
"**/.DS_Store": true,
|
| 15 |
+
"**/Thumbs.db": true,
|
| 16 |
+
"**/*.crswap": true,
|
| 17 |
+
"**/__pycache__": true
|
| 18 |
+
}
|
| 19 |
+
}
|
ckpts/LivePortrait/LICENSE
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2024 Kuaishou Visual Generation and Interaction Center
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
The code of InsightFace is released under the MIT License.
|
| 26 |
+
The models of InsightFace are for non-commercial research purposes only.
|
| 27 |
+
|
| 28 |
+
If you want to use the LivePortrait project for commercial purposes, you
|
| 29 |
+
should remove and replace InsightFace’s detection models to fully comply with
|
| 30 |
+
the MIT license.
|
ckpts/LivePortrait/app.py
ADDED
|
@@ -0,0 +1,493 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# coding: utf-8
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
The entrance of the gradio for human
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import tyro
|
| 9 |
+
import subprocess
|
| 10 |
+
import gradio as gr
|
| 11 |
+
import os.path as osp
|
| 12 |
+
from src.utils.helper import load_description
|
| 13 |
+
from src.gradio_pipeline import GradioPipeline
|
| 14 |
+
from src.config.crop_config import CropConfig
|
| 15 |
+
from src.config.argument_config import ArgumentConfig
|
| 16 |
+
from src.config.inference_config import InferenceConfig
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def partial_fields(target_class, kwargs):
|
| 20 |
+
return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def fast_check_ffmpeg():
|
| 24 |
+
try:
|
| 25 |
+
subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
|
| 26 |
+
return True
|
| 27 |
+
except:
|
| 28 |
+
return False
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# set tyro theme
|
| 32 |
+
tyro.extras.set_accent_color("bright_cyan")
|
| 33 |
+
args = tyro.cli(ArgumentConfig)
|
| 34 |
+
|
| 35 |
+
ffmpeg_dir = os.path.join(os.getcwd(), "ffmpeg")
|
| 36 |
+
if osp.exists(ffmpeg_dir):
|
| 37 |
+
os.environ["PATH"] += (os.pathsep + ffmpeg_dir)
|
| 38 |
+
|
| 39 |
+
if not fast_check_ffmpeg():
|
| 40 |
+
raise ImportError(
|
| 41 |
+
"FFmpeg is not installed. Please install FFmpeg (including ffmpeg and ffprobe) before running this script. https://ffmpeg.org/download.html"
|
| 42 |
+
)
|
| 43 |
+
# specify configs for inference
|
| 44 |
+
inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
|
| 45 |
+
crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
|
| 46 |
+
# global_tab_selection = None
|
| 47 |
+
|
| 48 |
+
gradio_pipeline = GradioPipeline(
|
| 49 |
+
inference_cfg=inference_cfg,
|
| 50 |
+
crop_cfg=crop_cfg,
|
| 51 |
+
args=args
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
if args.gradio_temp_dir not in (None, ''):
|
| 55 |
+
os.environ["GRADIO_TEMP_DIR"] = args.gradio_temp_dir
|
| 56 |
+
os.makedirs(args.gradio_temp_dir, exist_ok=True)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def gpu_wrapped_execute_video(*args, **kwargs):
|
| 60 |
+
return gradio_pipeline.execute_video(*args, **kwargs)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def gpu_wrapped_execute_image_retargeting(*args, **kwargs):
|
| 64 |
+
return gradio_pipeline.execute_image_retargeting(*args, **kwargs)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def gpu_wrapped_execute_video_retargeting(*args, **kwargs):
|
| 68 |
+
return gradio_pipeline.execute_video_retargeting(*args, **kwargs)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def reset_sliders(*args, **kwargs):
|
| 72 |
+
return 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.5, True, True
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
# assets
|
| 76 |
+
title_md = "assets/gradio/gradio_title.md"
|
| 77 |
+
example_portrait_dir = "assets/examples/source"
|
| 78 |
+
example_video_dir = "assets/examples/driving"
|
| 79 |
+
data_examples_i2v = [
|
| 80 |
+
[osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False],
|
| 81 |
+
[osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False],
|
| 82 |
+
[osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False],
|
| 83 |
+
[osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d18.mp4"), True, True, True, False],
|
| 84 |
+
[osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d19.mp4"), True, True, True, False],
|
| 85 |
+
[osp.join(example_portrait_dir, "s2.jpg"), osp.join(example_video_dir, "d13.mp4"), True, True, True, True],
|
| 86 |
+
]
|
| 87 |
+
data_examples_v2v = [
|
| 88 |
+
[osp.join(example_portrait_dir, "s13.mp4"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False, 3e-7],
|
| 89 |
+
# [osp.join(example_portrait_dir, "s14.mp4"), osp.join(example_video_dir, "d18.mp4"), True, True, True, False, False, 3e-7],
|
| 90 |
+
# [osp.join(example_portrait_dir, "s15.mp4"), osp.join(example_video_dir, "d19.mp4"), True, True, True, False, False, 3e-7],
|
| 91 |
+
[osp.join(example_portrait_dir, "s18.mp4"), osp.join(example_video_dir, "d6.mp4"), True, True, True, False, 3e-7],
|
| 92 |
+
# [osp.join(example_portrait_dir, "s19.mp4"), osp.join(example_video_dir, "d6.mp4"), True, True, True, False, False, 3e-7],
|
| 93 |
+
[osp.join(example_portrait_dir, "s20.mp4"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False, 3e-7],
|
| 94 |
+
]
|
| 95 |
+
#################### interface logic ####################
|
| 96 |
+
|
| 97 |
+
# Define components first
|
| 98 |
+
retargeting_source_scale = gr.Number(minimum=1.8, maximum=3.2, value=2.5, step=0.05, label="crop scale")
|
| 99 |
+
video_retargeting_source_scale = gr.Number(minimum=1.8, maximum=3.2, value=2.3, step=0.05, label="crop scale")
|
| 100 |
+
driving_smooth_observation_variance_retargeting = gr.Number(value=3e-6, label="motion smooth strength", minimum=1e-11, maximum=1e-2, step=1e-8)
|
| 101 |
+
video_retargeting_silence = gr.Checkbox(value=False, label="keeping the lip silent")
|
| 102 |
+
eye_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target eyes-open ratio")
|
| 103 |
+
lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target lip-open ratio")
|
| 104 |
+
video_lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target lip-open ratio")
|
| 105 |
+
head_pitch_slider = gr.Slider(minimum=-15.0, maximum=15.0, value=0, step=1, label="relative pitch")
|
| 106 |
+
head_yaw_slider = gr.Slider(minimum=-25, maximum=25, value=0, step=1, label="relative yaw")
|
| 107 |
+
head_roll_slider = gr.Slider(minimum=-15.0, maximum=15.0, value=0, step=1, label="relative roll")
|
| 108 |
+
mov_x = gr.Slider(minimum=-0.19, maximum=0.19, value=0.0, step=0.01, label="x-axis movement")
|
| 109 |
+
mov_y = gr.Slider(minimum=-0.19, maximum=0.19, value=0.0, step=0.01, label="y-axis movement")
|
| 110 |
+
mov_z = gr.Slider(minimum=0.9, maximum=1.2, value=1.0, step=0.01, label="z-axis movement")
|
| 111 |
+
lip_variation_zero = gr.Slider(minimum=-0.09, maximum=0.09, value=0, step=0.01, label="pouting")
|
| 112 |
+
lip_variation_one = gr.Slider(minimum=-20.0, maximum=15.0, value=0, step=0.01, label="pursing 😐")
|
| 113 |
+
lip_variation_two = gr.Slider(minimum=0.0, maximum=15.0, value=0, step=0.01, label="grin 😁")
|
| 114 |
+
lip_variation_three = gr.Slider(minimum=-90.0, maximum=120.0, value=0, step=1.0, label="lip close <-> open")
|
| 115 |
+
smile = gr.Slider(minimum=-0.3, maximum=1.3, value=0, step=0.01, label="smile 😄")
|
| 116 |
+
wink = gr.Slider(minimum=0, maximum=39, value=0, step=0.01, label="wink 😉")
|
| 117 |
+
eyebrow = gr.Slider(minimum=-30, maximum=30, value=0, step=0.01, label="eyebrow 🤨")
|
| 118 |
+
eyeball_direction_x = gr.Slider(minimum=-30.0, maximum=30.0, value=0, step=0.01, label="eye gaze (horizontal) 👀")
|
| 119 |
+
eyeball_direction_y = gr.Slider(minimum=-63.0, maximum=63.0, value=0, step=0.01, label="eye gaze (vertical) 🙄")
|
| 120 |
+
retargeting_input_image = gr.Image(type="filepath")
|
| 121 |
+
retargeting_input_video = gr.Video()
|
| 122 |
+
output_image = gr.Image(type="numpy")
|
| 123 |
+
output_image_paste_back = gr.Image(type="numpy")
|
| 124 |
+
retargeting_output_image = gr.Image(type="numpy")
|
| 125 |
+
retargeting_output_image_paste_back = gr.Image(type="numpy")
|
| 126 |
+
output_video = gr.Video(autoplay=False)
|
| 127 |
+
output_video_paste_back = gr.Video(autoplay=False)
|
| 128 |
+
|
| 129 |
+
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo:
|
| 130 |
+
gr.HTML(load_description(title_md))
|
| 131 |
+
|
| 132 |
+
gr.Markdown(load_description("assets/gradio/gradio_description_upload.md"))
|
| 133 |
+
with gr.Row():
|
| 134 |
+
with gr.Column():
|
| 135 |
+
with gr.Tabs():
|
| 136 |
+
with gr.TabItem("🖼️ Source Image") as tab_image:
|
| 137 |
+
with gr.Accordion(open=True, label="Source Image"):
|
| 138 |
+
source_image_input = gr.Image(type="filepath")
|
| 139 |
+
gr.Examples(
|
| 140 |
+
examples=[
|
| 141 |
+
[osp.join(example_portrait_dir, "s9.jpg")],
|
| 142 |
+
[osp.join(example_portrait_dir, "s6.jpg")],
|
| 143 |
+
[osp.join(example_portrait_dir, "s10.jpg")],
|
| 144 |
+
[osp.join(example_portrait_dir, "s5.jpg")],
|
| 145 |
+
[osp.join(example_portrait_dir, "s7.jpg")],
|
| 146 |
+
[osp.join(example_portrait_dir, "s12.jpg")],
|
| 147 |
+
[osp.join(example_portrait_dir, "s22.jpg")],
|
| 148 |
+
[osp.join(example_portrait_dir, "s23.jpg")],
|
| 149 |
+
],
|
| 150 |
+
inputs=[source_image_input],
|
| 151 |
+
cache_examples=False,
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
with gr.TabItem("🎞️ Source Video") as tab_video:
|
| 155 |
+
with gr.Accordion(open=True, label="Source Video"):
|
| 156 |
+
source_video_input = gr.Video()
|
| 157 |
+
gr.Examples(
|
| 158 |
+
examples=[
|
| 159 |
+
[osp.join(example_portrait_dir, "s13.mp4")],
|
| 160 |
+
# [osp.join(example_portrait_dir, "s14.mp4")],
|
| 161 |
+
# [osp.join(example_portrait_dir, "s15.mp4")],
|
| 162 |
+
[osp.join(example_portrait_dir, "s18.mp4")],
|
| 163 |
+
# [osp.join(example_portrait_dir, "s19.mp4")],
|
| 164 |
+
[osp.join(example_portrait_dir, "s20.mp4")],
|
| 165 |
+
],
|
| 166 |
+
inputs=[source_video_input],
|
| 167 |
+
cache_examples=False,
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
tab_selection = gr.Textbox(visible=False)
|
| 171 |
+
tab_image.select(lambda: "Image", None, tab_selection)
|
| 172 |
+
tab_video.select(lambda: "Video", None, tab_selection)
|
| 173 |
+
with gr.Accordion(open=True, label="Cropping Options for Source Image or Video"):
|
| 174 |
+
with gr.Row():
|
| 175 |
+
flag_do_crop_input = gr.Checkbox(value=True, label="do crop (source)")
|
| 176 |
+
scale = gr.Number(value=2.3, label="source crop scale", minimum=1.8, maximum=3.2, step=0.05)
|
| 177 |
+
vx_ratio = gr.Number(value=0.0, label="source crop x", minimum=-0.5, maximum=0.5, step=0.01)
|
| 178 |
+
vy_ratio = gr.Number(value=-0.125, label="source crop y", minimum=-0.5, maximum=0.5, step=0.01)
|
| 179 |
+
|
| 180 |
+
with gr.Column():
|
| 181 |
+
with gr.Tabs():
|
| 182 |
+
with gr.TabItem("🎞️ Driving Video") as v_tab_video:
|
| 183 |
+
with gr.Accordion(open=True, label="Driving Video"):
|
| 184 |
+
driving_video_input = gr.Video()
|
| 185 |
+
gr.Examples(
|
| 186 |
+
examples=[
|
| 187 |
+
[osp.join(example_video_dir, "d0.mp4")],
|
| 188 |
+
[osp.join(example_video_dir, "d18.mp4")],
|
| 189 |
+
[osp.join(example_video_dir, "d19.mp4")],
|
| 190 |
+
[osp.join(example_video_dir, "d14.mp4")],
|
| 191 |
+
[osp.join(example_video_dir, "d6.mp4")],
|
| 192 |
+
[osp.join(example_video_dir, "d20.mp4")],
|
| 193 |
+
],
|
| 194 |
+
inputs=[driving_video_input],
|
| 195 |
+
cache_examples=False,
|
| 196 |
+
)
|
| 197 |
+
with gr.TabItem("🖼️ Driving Image") as v_tab_image:
|
| 198 |
+
with gr.Accordion(open=True, label="Driving Image"):
|
| 199 |
+
driving_image_input = gr.Image(type="filepath")
|
| 200 |
+
gr.Examples(
|
| 201 |
+
examples=[
|
| 202 |
+
[osp.join(example_video_dir, "d30.jpg")],
|
| 203 |
+
[osp.join(example_video_dir, "d9.jpg")],
|
| 204 |
+
[osp.join(example_video_dir, "d19.jpg")],
|
| 205 |
+
[osp.join(example_video_dir, "d8.jpg")],
|
| 206 |
+
[osp.join(example_video_dir, "d12.jpg")],
|
| 207 |
+
[osp.join(example_video_dir, "d38.jpg")],
|
| 208 |
+
],
|
| 209 |
+
inputs=[driving_image_input],
|
| 210 |
+
cache_examples=False,
|
| 211 |
+
)
|
| 212 |
+
|
| 213 |
+
with gr.TabItem("📁 Driving Pickle") as v_tab_pickle:
|
| 214 |
+
with gr.Accordion(open=True, label="Driving Pickle"):
|
| 215 |
+
driving_video_pickle_input = gr.File(type="filepath", file_types=[".pkl"])
|
| 216 |
+
gr.Examples(
|
| 217 |
+
examples=[
|
| 218 |
+
[osp.join(example_video_dir, "d1.pkl")],
|
| 219 |
+
[osp.join(example_video_dir, "d2.pkl")],
|
| 220 |
+
[osp.join(example_video_dir, "d5.pkl")],
|
| 221 |
+
[osp.join(example_video_dir, "d7.pkl")],
|
| 222 |
+
[osp.join(example_video_dir, "d8.pkl")],
|
| 223 |
+
],
|
| 224 |
+
inputs=[driving_video_pickle_input],
|
| 225 |
+
cache_examples=False,
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
v_tab_selection = gr.Textbox(visible=False)
|
| 229 |
+
v_tab_video.select(lambda: "Video", None, v_tab_selection)
|
| 230 |
+
v_tab_image.select(lambda: "Image", None, v_tab_selection)
|
| 231 |
+
v_tab_pickle.select(lambda: "Pickle", None, v_tab_selection)
|
| 232 |
+
# with gr.Accordion(open=False, label="Animation Instructions"):
|
| 233 |
+
# gr.Markdown(load_description("assets/gradio/gradio_description_animation.md"))
|
| 234 |
+
with gr.Accordion(open=True, label="Cropping Options for Driving Video"):
|
| 235 |
+
with gr.Row():
|
| 236 |
+
flag_crop_driving_video_input = gr.Checkbox(value=False, label="do crop (driving)")
|
| 237 |
+
scale_crop_driving_video = gr.Number(value=2.2, label="driving crop scale", minimum=1.8, maximum=3.2, step=0.05)
|
| 238 |
+
vx_ratio_crop_driving_video = gr.Number(value=0.0, label="driving crop x", minimum=-0.5, maximum=0.5, step=0.01)
|
| 239 |
+
vy_ratio_crop_driving_video = gr.Number(value=-0.1, label="driving crop y", minimum=-0.5, maximum=0.5, step=0.01)
|
| 240 |
+
|
| 241 |
+
with gr.Row():
|
| 242 |
+
with gr.Accordion(open=True, label="Animation Options"):
|
| 243 |
+
with gr.Row():
|
| 244 |
+
flag_normalize_lip = gr.Checkbox(value=False, label="normalize lip")
|
| 245 |
+
flag_relative_input = gr.Checkbox(value=True, label="relative motion")
|
| 246 |
+
flag_remap_input = gr.Checkbox(value=True, label="paste-back")
|
| 247 |
+
flag_stitching_input = gr.Checkbox(value=True, label="stitching")
|
| 248 |
+
animation_region = gr.Radio(["exp", "pose", "lip", "eyes", "all"], value="all", label="animation region")
|
| 249 |
+
driving_option_input = gr.Radio(['expression-friendly', 'pose-friendly'], value="expression-friendly", label="driving option (i2v)")
|
| 250 |
+
driving_multiplier = gr.Number(value=1.0, label="driving multiplier (i2v)", minimum=0.0, maximum=2.0, step=0.02)
|
| 251 |
+
driving_smooth_observation_variance = gr.Number(value=3e-7, label="motion smooth strength (v2v)", minimum=1e-11, maximum=1e-2, step=1e-8)
|
| 252 |
+
|
| 253 |
+
gr.Markdown(load_description("assets/gradio/gradio_description_animate_clear.md"))
|
| 254 |
+
with gr.Row():
|
| 255 |
+
process_button_animation = gr.Button("🚀 Animate", variant="primary")
|
| 256 |
+
with gr.Row():
|
| 257 |
+
with gr.Column():
|
| 258 |
+
output_video_i2v = gr.Video(autoplay=False, label="The animated video in the original image space")
|
| 259 |
+
with gr.Column():
|
| 260 |
+
output_video_concat_i2v = gr.Video(autoplay=False, label="The animated video")
|
| 261 |
+
with gr.Row():
|
| 262 |
+
with gr.Column():
|
| 263 |
+
output_image_i2i = gr.Image(type="numpy", label="The animated image in the original image space", visible=False)
|
| 264 |
+
with gr.Column():
|
| 265 |
+
output_image_concat_i2i = gr.Image(type="numpy", label="The animated image", visible=False)
|
| 266 |
+
with gr.Row():
|
| 267 |
+
process_button_reset = gr.ClearButton([source_image_input, source_video_input, driving_video_pickle_input, driving_video_input, driving_image_input, output_video_i2v, output_video_concat_i2v, output_image_i2i, output_image_concat_i2i], value="🧹 Clear")
|
| 268 |
+
|
| 269 |
+
with gr.Row():
|
| 270 |
+
# Examples
|
| 271 |
+
gr.Markdown("## You could also choose the examples below by one click ⬇️")
|
| 272 |
+
with gr.Row():
|
| 273 |
+
with gr.Tabs():
|
| 274 |
+
with gr.TabItem("🖼️ Portrait Animation"):
|
| 275 |
+
gr.Examples(
|
| 276 |
+
examples=data_examples_i2v,
|
| 277 |
+
fn=gpu_wrapped_execute_video,
|
| 278 |
+
inputs=[
|
| 279 |
+
source_image_input,
|
| 280 |
+
driving_video_input,
|
| 281 |
+
flag_relative_input,
|
| 282 |
+
flag_do_crop_input,
|
| 283 |
+
flag_remap_input,
|
| 284 |
+
flag_crop_driving_video_input,
|
| 285 |
+
],
|
| 286 |
+
outputs=[output_image, output_image_paste_back],
|
| 287 |
+
examples_per_page=len(data_examples_i2v),
|
| 288 |
+
cache_examples=False,
|
| 289 |
+
)
|
| 290 |
+
with gr.TabItem("🎞️ Portrait Video Editing"):
|
| 291 |
+
gr.Examples(
|
| 292 |
+
examples=data_examples_v2v,
|
| 293 |
+
fn=gpu_wrapped_execute_video,
|
| 294 |
+
inputs=[
|
| 295 |
+
source_video_input,
|
| 296 |
+
driving_video_input,
|
| 297 |
+
flag_relative_input,
|
| 298 |
+
flag_do_crop_input,
|
| 299 |
+
flag_remap_input,
|
| 300 |
+
flag_crop_driving_video_input,
|
| 301 |
+
driving_smooth_observation_variance,
|
| 302 |
+
],
|
| 303 |
+
outputs=[output_image, output_image_paste_back],
|
| 304 |
+
examples_per_page=len(data_examples_v2v),
|
| 305 |
+
cache_examples=False,
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
# Retargeting Image
|
| 309 |
+
gr.Markdown(load_description("assets/gradio/gradio_description_retargeting.md"), visible=True)
|
| 310 |
+
with gr.Row(visible=True):
|
| 311 |
+
flag_do_crop_input_retargeting_image = gr.Checkbox(value=True, label="do crop (source)")
|
| 312 |
+
flag_stitching_retargeting_input = gr.Checkbox(value=True, label="stitching")
|
| 313 |
+
retargeting_source_scale.render()
|
| 314 |
+
eye_retargeting_slider.render()
|
| 315 |
+
lip_retargeting_slider.render()
|
| 316 |
+
with gr.Row(visible=True):
|
| 317 |
+
with gr.Column():
|
| 318 |
+
with gr.Accordion(open=True, label="Facial movement sliders"):
|
| 319 |
+
with gr.Row(visible=True):
|
| 320 |
+
head_pitch_slider.render()
|
| 321 |
+
head_yaw_slider.render()
|
| 322 |
+
head_roll_slider.render()
|
| 323 |
+
with gr.Row(visible=True):
|
| 324 |
+
mov_x.render()
|
| 325 |
+
mov_y.render()
|
| 326 |
+
mov_z.render()
|
| 327 |
+
with gr.Column():
|
| 328 |
+
with gr.Accordion(open=True, label="Facial expression sliders"):
|
| 329 |
+
with gr.Row(visible=True):
|
| 330 |
+
lip_variation_zero.render()
|
| 331 |
+
lip_variation_one.render()
|
| 332 |
+
lip_variation_two.render()
|
| 333 |
+
with gr.Row(visible=True):
|
| 334 |
+
lip_variation_three.render()
|
| 335 |
+
smile.render()
|
| 336 |
+
wink.render()
|
| 337 |
+
with gr.Row(visible=True):
|
| 338 |
+
eyebrow.render()
|
| 339 |
+
eyeball_direction_x.render()
|
| 340 |
+
eyeball_direction_y.render()
|
| 341 |
+
with gr.Row(visible=True):
|
| 342 |
+
reset_button = gr.Button("🔄 Reset")
|
| 343 |
+
reset_button.click(
|
| 344 |
+
fn=reset_sliders,
|
| 345 |
+
inputs=None,
|
| 346 |
+
outputs=[
|
| 347 |
+
head_pitch_slider, head_yaw_slider, head_roll_slider, mov_x, mov_y, mov_z,
|
| 348 |
+
lip_variation_zero, lip_variation_one, lip_variation_two, lip_variation_three, smile, wink, eyebrow, eyeball_direction_x, eyeball_direction_y,
|
| 349 |
+
retargeting_source_scale, flag_stitching_retargeting_input, flag_do_crop_input_retargeting_image
|
| 350 |
+
]
|
| 351 |
+
)
|
| 352 |
+
with gr.Row(visible=True):
|
| 353 |
+
with gr.Column():
|
| 354 |
+
with gr.Accordion(open=True, label="Retargeting Image Input"):
|
| 355 |
+
retargeting_input_image.render()
|
| 356 |
+
gr.Examples(
|
| 357 |
+
examples=[
|
| 358 |
+
[osp.join(example_portrait_dir, "s9.jpg")],
|
| 359 |
+
[osp.join(example_portrait_dir, "s6.jpg")],
|
| 360 |
+
[osp.join(example_portrait_dir, "s10.jpg")],
|
| 361 |
+
[osp.join(example_portrait_dir, "s5.jpg")],
|
| 362 |
+
[osp.join(example_portrait_dir, "s7.jpg")],
|
| 363 |
+
[osp.join(example_portrait_dir, "s12.jpg")],
|
| 364 |
+
[osp.join(example_portrait_dir, "s22.jpg")],
|
| 365 |
+
# [osp.join(example_portrait_dir, "s23.jpg")],
|
| 366 |
+
[osp.join(example_portrait_dir, "s42.jpg")],
|
| 367 |
+
],
|
| 368 |
+
inputs=[retargeting_input_image],
|
| 369 |
+
cache_examples=False,
|
| 370 |
+
)
|
| 371 |
+
with gr.Column():
|
| 372 |
+
with gr.Accordion(open=True, label="Retargeting Result"):
|
| 373 |
+
retargeting_output_image.render()
|
| 374 |
+
with gr.Column():
|
| 375 |
+
with gr.Accordion(open=True, label="Paste-back Result"):
|
| 376 |
+
retargeting_output_image_paste_back.render()
|
| 377 |
+
with gr.Row(visible=True):
|
| 378 |
+
process_button_reset_retargeting = gr.ClearButton(
|
| 379 |
+
[
|
| 380 |
+
retargeting_input_image,
|
| 381 |
+
retargeting_output_image,
|
| 382 |
+
retargeting_output_image_paste_back,
|
| 383 |
+
],
|
| 384 |
+
value="🧹 Clear"
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
+
# Retargeting Video
|
| 388 |
+
gr.Markdown(load_description("assets/gradio/gradio_description_retargeting_video.md"), visible=True)
|
| 389 |
+
with gr.Row(visible=True):
|
| 390 |
+
flag_do_crop_input_retargeting_video = gr.Checkbox(value=True, label="do crop (source)")
|
| 391 |
+
video_retargeting_source_scale.render()
|
| 392 |
+
video_lip_retargeting_slider.render()
|
| 393 |
+
driving_smooth_observation_variance_retargeting.render()
|
| 394 |
+
video_retargeting_silence.render()
|
| 395 |
+
with gr.Row(visible=True):
|
| 396 |
+
process_button_retargeting_video = gr.Button("🚗 Retargeting Video", variant="primary")
|
| 397 |
+
with gr.Row(visible=True):
|
| 398 |
+
with gr.Column():
|
| 399 |
+
with gr.Accordion(open=True, label="Retargeting Video Input"):
|
| 400 |
+
retargeting_input_video.render()
|
| 401 |
+
gr.Examples(
|
| 402 |
+
examples=[
|
| 403 |
+
[osp.join(example_portrait_dir, "s13.mp4")],
|
| 404 |
+
# [osp.join(example_portrait_dir, "s18.mp4")],
|
| 405 |
+
# [osp.join(example_portrait_dir, "s20.mp4")],
|
| 406 |
+
[osp.join(example_portrait_dir, "s29.mp4")],
|
| 407 |
+
[osp.join(example_portrait_dir, "s32.mp4")],
|
| 408 |
+
[osp.join(example_video_dir, "d3.mp4")],
|
| 409 |
+
],
|
| 410 |
+
inputs=[retargeting_input_video],
|
| 411 |
+
cache_examples=False,
|
| 412 |
+
)
|
| 413 |
+
with gr.Column():
|
| 414 |
+
with gr.Accordion(open=True, label="Retargeting Result"):
|
| 415 |
+
output_video.render()
|
| 416 |
+
with gr.Column():
|
| 417 |
+
with gr.Accordion(open=True, label="Paste-back Result"):
|
| 418 |
+
output_video_paste_back.render()
|
| 419 |
+
with gr.Row(visible=True):
|
| 420 |
+
process_button_reset_retargeting = gr.ClearButton(
|
| 421 |
+
[
|
| 422 |
+
video_lip_retargeting_slider,
|
| 423 |
+
retargeting_input_video,
|
| 424 |
+
output_video,
|
| 425 |
+
output_video_paste_back
|
| 426 |
+
],
|
| 427 |
+
value="🧹 Clear"
|
| 428 |
+
)
|
| 429 |
+
|
| 430 |
+
# binding functions for buttons
|
| 431 |
+
process_button_animation.click(
|
| 432 |
+
fn=gpu_wrapped_execute_video,
|
| 433 |
+
inputs=[
|
| 434 |
+
source_image_input,
|
| 435 |
+
source_video_input,
|
| 436 |
+
driving_video_input,
|
| 437 |
+
driving_image_input,
|
| 438 |
+
driving_video_pickle_input,
|
| 439 |
+
flag_normalize_lip,
|
| 440 |
+
flag_relative_input,
|
| 441 |
+
flag_do_crop_input,
|
| 442 |
+
flag_remap_input,
|
| 443 |
+
flag_stitching_input,
|
| 444 |
+
animation_region,
|
| 445 |
+
driving_option_input,
|
| 446 |
+
driving_multiplier,
|
| 447 |
+
flag_crop_driving_video_input,
|
| 448 |
+
scale,
|
| 449 |
+
vx_ratio,
|
| 450 |
+
vy_ratio,
|
| 451 |
+
scale_crop_driving_video,
|
| 452 |
+
vx_ratio_crop_driving_video,
|
| 453 |
+
vy_ratio_crop_driving_video,
|
| 454 |
+
driving_smooth_observation_variance,
|
| 455 |
+
tab_selection,
|
| 456 |
+
v_tab_selection,
|
| 457 |
+
],
|
| 458 |
+
outputs=[output_video_i2v, output_video_i2v, output_video_concat_i2v, output_video_concat_i2v, output_image_i2i, output_image_i2i, output_image_concat_i2i, output_image_concat_i2i],
|
| 459 |
+
show_progress=True
|
| 460 |
+
)
|
| 461 |
+
|
| 462 |
+
|
| 463 |
+
retargeting_input_image.change(
|
| 464 |
+
fn=gradio_pipeline.init_retargeting_image,
|
| 465 |
+
inputs=[retargeting_source_scale, eye_retargeting_slider, lip_retargeting_slider, retargeting_input_image],
|
| 466 |
+
outputs=[eye_retargeting_slider, lip_retargeting_slider]
|
| 467 |
+
)
|
| 468 |
+
|
| 469 |
+
sliders = [eye_retargeting_slider, lip_retargeting_slider, head_pitch_slider, head_yaw_slider, head_roll_slider, mov_x, mov_y, mov_z, lip_variation_zero, lip_variation_one, lip_variation_two, lip_variation_three, smile, wink, eyebrow, eyeball_direction_x, eyeball_direction_y]
|
| 470 |
+
for slider in sliders:
|
| 471 |
+
# NOTE: gradio >= 4.0.0 may cause slow response
|
| 472 |
+
slider.change(
|
| 473 |
+
fn=gpu_wrapped_execute_image_retargeting,
|
| 474 |
+
inputs=[
|
| 475 |
+
eye_retargeting_slider, lip_retargeting_slider, head_pitch_slider, head_yaw_slider, head_roll_slider, mov_x, mov_y, mov_z,
|
| 476 |
+
lip_variation_zero, lip_variation_one, lip_variation_two, lip_variation_three, smile, wink, eyebrow, eyeball_direction_x, eyeball_direction_y,
|
| 477 |
+
retargeting_input_image, retargeting_source_scale, flag_stitching_retargeting_input, flag_do_crop_input_retargeting_image
|
| 478 |
+
],
|
| 479 |
+
outputs=[retargeting_output_image, retargeting_output_image_paste_back],
|
| 480 |
+
)
|
| 481 |
+
|
| 482 |
+
process_button_retargeting_video.click(
|
| 483 |
+
fn=gpu_wrapped_execute_video_retargeting,
|
| 484 |
+
inputs=[video_lip_retargeting_slider, retargeting_input_video, video_retargeting_source_scale, driving_smooth_observation_variance_retargeting, video_retargeting_silence, flag_do_crop_input_retargeting_video],
|
| 485 |
+
outputs=[output_video, output_video_paste_back],
|
| 486 |
+
show_progress=True
|
| 487 |
+
)
|
| 488 |
+
|
| 489 |
+
demo.launch(
|
| 490 |
+
server_port=args.server_port,
|
| 491 |
+
share=args.share,
|
| 492 |
+
server_name=args.server_name
|
| 493 |
+
)
|
ckpts/LivePortrait/app_animals.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# coding: utf-8
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
The entrance of the gradio for animal
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import tyro
|
| 9 |
+
import subprocess
|
| 10 |
+
import gradio as gr
|
| 11 |
+
import os.path as osp
|
| 12 |
+
from src.utils.helper import load_description
|
| 13 |
+
from src.gradio_pipeline import GradioPipelineAnimal
|
| 14 |
+
from src.config.crop_config import CropConfig
|
| 15 |
+
from src.config.argument_config import ArgumentConfig
|
| 16 |
+
from src.config.inference_config import InferenceConfig
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def partial_fields(target_class, kwargs):
|
| 20 |
+
return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def fast_check_ffmpeg():
|
| 24 |
+
try:
|
| 25 |
+
subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
|
| 26 |
+
return True
|
| 27 |
+
except:
|
| 28 |
+
return False
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# set tyro theme
|
| 32 |
+
tyro.extras.set_accent_color("bright_cyan")
|
| 33 |
+
args = tyro.cli(ArgumentConfig)
|
| 34 |
+
|
| 35 |
+
ffmpeg_dir = os.path.join(os.getcwd(), "ffmpeg")
|
| 36 |
+
if osp.exists(ffmpeg_dir):
|
| 37 |
+
os.environ["PATH"] += (os.pathsep + ffmpeg_dir)
|
| 38 |
+
|
| 39 |
+
if not fast_check_ffmpeg():
|
| 40 |
+
raise ImportError(
|
| 41 |
+
"FFmpeg is not installed. Please install FFmpeg (including ffmpeg and ffprobe) before running this script. https://ffmpeg.org/download.html"
|
| 42 |
+
)
|
| 43 |
+
# specify configs for inference
|
| 44 |
+
inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
|
| 45 |
+
crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
|
| 46 |
+
|
| 47 |
+
gradio_pipeline_animal: GradioPipelineAnimal = GradioPipelineAnimal(
|
| 48 |
+
inference_cfg=inference_cfg,
|
| 49 |
+
crop_cfg=crop_cfg,
|
| 50 |
+
args=args
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
if args.gradio_temp_dir not in (None, ''):
|
| 54 |
+
os.environ["GRADIO_TEMP_DIR"] = args.gradio_temp_dir
|
| 55 |
+
os.makedirs(args.gradio_temp_dir, exist_ok=True)
|
| 56 |
+
|
| 57 |
+
def gpu_wrapped_execute_video(*args, **kwargs):
|
| 58 |
+
return gradio_pipeline_animal.execute_video(*args, **kwargs)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# assets
|
| 62 |
+
title_md = "assets/gradio/gradio_title.md"
|
| 63 |
+
example_portrait_dir = "assets/examples/source"
|
| 64 |
+
example_video_dir = "assets/examples/driving"
|
| 65 |
+
data_examples_i2v = [
|
| 66 |
+
[osp.join(example_portrait_dir, "s41.jpg"), osp.join(example_video_dir, "d3.mp4"), True, False, False, False],
|
| 67 |
+
[osp.join(example_portrait_dir, "s40.jpg"), osp.join(example_video_dir, "d6.mp4"), True, False, False, False],
|
| 68 |
+
[osp.join(example_portrait_dir, "s25.jpg"), osp.join(example_video_dir, "d19.mp4"), True, False, False, False],
|
| 69 |
+
]
|
| 70 |
+
data_examples_i2v_pickle = [
|
| 71 |
+
[osp.join(example_portrait_dir, "s25.jpg"), osp.join(example_video_dir, "wink.pkl"), True, False, False, False],
|
| 72 |
+
[osp.join(example_portrait_dir, "s40.jpg"), osp.join(example_video_dir, "talking.pkl"), True, False, False, False],
|
| 73 |
+
[osp.join(example_portrait_dir, "s41.jpg"), osp.join(example_video_dir, "aggrieved.pkl"), True, False, False, False],
|
| 74 |
+
]
|
| 75 |
+
#################### interface logic ####################
|
| 76 |
+
|
| 77 |
+
# Define components first
|
| 78 |
+
output_image = gr.Image(type="numpy")
|
| 79 |
+
output_image_paste_back = gr.Image(type="numpy")
|
| 80 |
+
output_video_i2v = gr.Video(autoplay=False)
|
| 81 |
+
output_video_concat_i2v = gr.Video(autoplay=False)
|
| 82 |
+
output_video_i2v_gif = gr.Image(type="numpy")
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo:
|
| 86 |
+
gr.HTML(load_description(title_md))
|
| 87 |
+
|
| 88 |
+
gr.Markdown(load_description("assets/gradio/gradio_description_upload_animal.md"))
|
| 89 |
+
with gr.Row():
|
| 90 |
+
with gr.Column():
|
| 91 |
+
with gr.Accordion(open=True, label="🐱 Source Animal Image"):
|
| 92 |
+
source_image_input = gr.Image(type="filepath")
|
| 93 |
+
gr.Examples(
|
| 94 |
+
examples=[
|
| 95 |
+
[osp.join(example_portrait_dir, "s25.jpg")],
|
| 96 |
+
[osp.join(example_portrait_dir, "s30.jpg")],
|
| 97 |
+
[osp.join(example_portrait_dir, "s31.jpg")],
|
| 98 |
+
[osp.join(example_portrait_dir, "s32.jpg")],
|
| 99 |
+
[osp.join(example_portrait_dir, "s33.jpg")],
|
| 100 |
+
[osp.join(example_portrait_dir, "s39.jpg")],
|
| 101 |
+
[osp.join(example_portrait_dir, "s40.jpg")],
|
| 102 |
+
[osp.join(example_portrait_dir, "s41.jpg")],
|
| 103 |
+
[osp.join(example_portrait_dir, "s38.jpg")],
|
| 104 |
+
[osp.join(example_portrait_dir, "s36.jpg")],
|
| 105 |
+
],
|
| 106 |
+
inputs=[source_image_input],
|
| 107 |
+
cache_examples=False,
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
with gr.Accordion(open=True, label="Cropping Options for Source Image"):
|
| 111 |
+
with gr.Row():
|
| 112 |
+
flag_do_crop_input = gr.Checkbox(value=True, label="do crop (source)")
|
| 113 |
+
scale = gr.Number(value=2.3, label="source crop scale", minimum=1.8, maximum=3.2, step=0.05)
|
| 114 |
+
vx_ratio = gr.Number(value=0.0, label="source crop x", minimum=-0.5, maximum=0.5, step=0.01)
|
| 115 |
+
vy_ratio = gr.Number(value=-0.125, label="source crop y", minimum=-0.5, maximum=0.5, step=0.01)
|
| 116 |
+
|
| 117 |
+
with gr.Column():
|
| 118 |
+
with gr.Tabs():
|
| 119 |
+
with gr.TabItem("📁 Driving Pickle") as tab_pickle:
|
| 120 |
+
with gr.Accordion(open=True, label="Driving Pickle"):
|
| 121 |
+
driving_video_pickle_input = gr.File()
|
| 122 |
+
gr.Examples(
|
| 123 |
+
examples=[
|
| 124 |
+
[osp.join(example_video_dir, "wink.pkl")],
|
| 125 |
+
[osp.join(example_video_dir, "shy.pkl")],
|
| 126 |
+
[osp.join(example_video_dir, "aggrieved.pkl")],
|
| 127 |
+
[osp.join(example_video_dir, "open_lip.pkl")],
|
| 128 |
+
[osp.join(example_video_dir, "laugh.pkl")],
|
| 129 |
+
[osp.join(example_video_dir, "talking.pkl")],
|
| 130 |
+
[osp.join(example_video_dir, "shake_face.pkl")],
|
| 131 |
+
],
|
| 132 |
+
inputs=[driving_video_pickle_input],
|
| 133 |
+
cache_examples=False,
|
| 134 |
+
)
|
| 135 |
+
with gr.TabItem("🎞️ Driving Video") as tab_video:
|
| 136 |
+
with gr.Accordion(open=True, label="Driving Video"):
|
| 137 |
+
driving_video_input = gr.Video()
|
| 138 |
+
gr.Examples(
|
| 139 |
+
examples=[
|
| 140 |
+
# [osp.join(example_video_dir, "d0.mp4")],
|
| 141 |
+
# [osp.join(example_video_dir, "d18.mp4")],
|
| 142 |
+
[osp.join(example_video_dir, "d19.mp4")],
|
| 143 |
+
[osp.join(example_video_dir, "d14.mp4")],
|
| 144 |
+
[osp.join(example_video_dir, "d6.mp4")],
|
| 145 |
+
[osp.join(example_video_dir, "d3.mp4")],
|
| 146 |
+
],
|
| 147 |
+
inputs=[driving_video_input],
|
| 148 |
+
cache_examples=False,
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
tab_selection = gr.Textbox(visible=False)
|
| 152 |
+
tab_pickle.select(lambda: "Pickle", None, tab_selection)
|
| 153 |
+
tab_video.select(lambda: "Video", None, tab_selection)
|
| 154 |
+
with gr.Accordion(open=True, label="Cropping Options for Driving Video"):
|
| 155 |
+
with gr.Row():
|
| 156 |
+
flag_crop_driving_video_input = gr.Checkbox(value=False, label="do crop (driving)")
|
| 157 |
+
scale_crop_driving_video = gr.Number(value=2.2, label="driving crop scale", minimum=1.8, maximum=3.2, step=0.05)
|
| 158 |
+
vx_ratio_crop_driving_video = gr.Number(value=0.0, label="driving crop x", minimum=-0.5, maximum=0.5, step=0.01)
|
| 159 |
+
vy_ratio_crop_driving_video = gr.Number(value=-0.1, label="driving crop y", minimum=-0.5, maximum=0.5, step=0.01)
|
| 160 |
+
|
| 161 |
+
with gr.Row():
|
| 162 |
+
with gr.Accordion(open=False, label="Animation Options"):
|
| 163 |
+
with gr.Row():
|
| 164 |
+
flag_stitching = gr.Checkbox(value=False, label="stitching (not recommended)")
|
| 165 |
+
flag_remap_input = gr.Checkbox(value=False, label="paste-back (not recommended)")
|
| 166 |
+
driving_multiplier = gr.Number(value=1.0, label="driving multiplier", minimum=0.0, maximum=2.0, step=0.02)
|
| 167 |
+
|
| 168 |
+
gr.Markdown(load_description("assets/gradio/gradio_description_animate_clear.md"))
|
| 169 |
+
with gr.Row():
|
| 170 |
+
process_button_animation = gr.Button("🚀 Animate", variant="primary")
|
| 171 |
+
with gr.Row():
|
| 172 |
+
with gr.Column():
|
| 173 |
+
with gr.Accordion(open=True, label="The animated video in the cropped image space"):
|
| 174 |
+
output_video_i2v.render()
|
| 175 |
+
with gr.Column():
|
| 176 |
+
with gr.Accordion(open=True, label="The animated gif in the cropped image space"):
|
| 177 |
+
output_video_i2v_gif.render()
|
| 178 |
+
with gr.Column():
|
| 179 |
+
with gr.Accordion(open=True, label="The animated video"):
|
| 180 |
+
output_video_concat_i2v.render()
|
| 181 |
+
with gr.Row():
|
| 182 |
+
process_button_reset = gr.ClearButton([source_image_input, driving_video_input, output_video_i2v, output_video_concat_i2v, output_video_i2v_gif], value="🧹 Clear")
|
| 183 |
+
|
| 184 |
+
with gr.Row():
|
| 185 |
+
# Examples
|
| 186 |
+
gr.Markdown("## You could also choose the examples below by one click ⬇️")
|
| 187 |
+
with gr.Row():
|
| 188 |
+
with gr.Tabs():
|
| 189 |
+
with gr.TabItem("📁 Driving Pickle") as tab_video:
|
| 190 |
+
gr.Examples(
|
| 191 |
+
examples=data_examples_i2v_pickle,
|
| 192 |
+
fn=gpu_wrapped_execute_video,
|
| 193 |
+
inputs=[
|
| 194 |
+
source_image_input,
|
| 195 |
+
driving_video_pickle_input,
|
| 196 |
+
flag_do_crop_input,
|
| 197 |
+
flag_stitching,
|
| 198 |
+
flag_remap_input,
|
| 199 |
+
flag_crop_driving_video_input,
|
| 200 |
+
],
|
| 201 |
+
outputs=[output_image, output_image_paste_back, output_video_i2v_gif],
|
| 202 |
+
examples_per_page=len(data_examples_i2v_pickle),
|
| 203 |
+
cache_examples=False,
|
| 204 |
+
)
|
| 205 |
+
with gr.TabItem("🎞️ Driving Video") as tab_video:
|
| 206 |
+
gr.Examples(
|
| 207 |
+
examples=data_examples_i2v,
|
| 208 |
+
fn=gpu_wrapped_execute_video,
|
| 209 |
+
inputs=[
|
| 210 |
+
source_image_input,
|
| 211 |
+
driving_video_input,
|
| 212 |
+
flag_do_crop_input,
|
| 213 |
+
flag_stitching,
|
| 214 |
+
flag_remap_input,
|
| 215 |
+
flag_crop_driving_video_input,
|
| 216 |
+
],
|
| 217 |
+
outputs=[output_image, output_image_paste_back, output_video_i2v_gif],
|
| 218 |
+
examples_per_page=len(data_examples_i2v),
|
| 219 |
+
cache_examples=False,
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
process_button_animation.click(
|
| 223 |
+
fn=gpu_wrapped_execute_video,
|
| 224 |
+
inputs=[
|
| 225 |
+
source_image_input,
|
| 226 |
+
driving_video_input,
|
| 227 |
+
driving_video_pickle_input,
|
| 228 |
+
flag_do_crop_input,
|
| 229 |
+
flag_remap_input,
|
| 230 |
+
driving_multiplier,
|
| 231 |
+
flag_stitching,
|
| 232 |
+
flag_crop_driving_video_input,
|
| 233 |
+
scale,
|
| 234 |
+
vx_ratio,
|
| 235 |
+
vy_ratio,
|
| 236 |
+
scale_crop_driving_video,
|
| 237 |
+
vx_ratio_crop_driving_video,
|
| 238 |
+
vy_ratio_crop_driving_video,
|
| 239 |
+
tab_selection,
|
| 240 |
+
],
|
| 241 |
+
outputs=[output_video_i2v, output_video_concat_i2v, output_video_i2v_gif],
|
| 242 |
+
show_progress=True
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
demo.launch(
|
| 246 |
+
server_port=args.server_port,
|
| 247 |
+
share=args.share,
|
| 248 |
+
server_name=args.server_name
|
| 249 |
+
)
|
ckpts/LivePortrait/assets/.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
examples/driving/*.pkl
|
| 2 |
+
examples/driving/*_crop.mp4
|
ckpts/LivePortrait/assets/docs/LivePortrait-Gradio-2024-07-19.jpg
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/docs/animals-mode-gradio-2024-08-02.jpg
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/docs/changelog/2024-07-10.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## 2024/07/10
|
| 2 |
+
|
| 3 |
+
**First, thank you all for your attention, support, sharing, and contributions to LivePortrait!** ❤️
|
| 4 |
+
The popularity of LivePortrait has exceeded our expectations. If you encounter any issues or other problems and we do not respond promptly, please accept our apologies. We are still actively updating and improving this repository.
|
| 5 |
+
|
| 6 |
+
### Updates
|
| 7 |
+
|
| 8 |
+
- <strong>Audio and video concatenating: </strong> If the driving video contains audio, it will automatically be included in the generated video. Additionally, the generated video will maintain the same FPS as the driving video. If you run LivePortrait on Windows, you need to install `ffprobe` and `ffmpeg` exe, see issue [#94](https://github.com/KwaiVGI/LivePortrait/issues/94).
|
| 9 |
+
|
| 10 |
+
- <strong>Driving video auto-cropping: </strong> Implemented automatic cropping for driving videos by tracking facial landmarks and calculating a global cropping box with a 1:1 aspect ratio. Alternatively, you can crop using video editing software or other tools to achieve a 1:1 ratio. Auto-cropping is not enbaled by default, you can specify it by `--flag_crop_driving_video`.
|
| 11 |
+
|
| 12 |
+
- <strong>Motion template making: </strong> Added the ability to create motion templates to protect privacy. The motion template is a `.pkl` file that only contains the motions of the driving video. Theoretically, it is impossible to reconstruct the original face from the template. These motion templates can be used to generate videos without needing the original driving video. By default, the motion template will be generated and saved as a `.pkl` file with the same name as the driving video, e.g., `d0.mp4` -> `d0.pkl`. Once generated, you can specify it using the `-d` or `--driving` option.
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
### About driving video
|
| 16 |
+
|
| 17 |
+
- For a guide on using your own driving video, see the [driving video auto-cropping](https://github.com/KwaiVGI/LivePortrait/tree/main?tab=readme-ov-file#driving-video-auto-cropping) section.
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
### Others
|
| 21 |
+
|
| 22 |
+
- If you encounter a black box problem, disable half-precision inference by using `--no_flag_use_half_precision`, reported by issue [#40](https://github.com/KwaiVGI/LivePortrait/issues/40), [#48](https://github.com/KwaiVGI/LivePortrait/issues/48), [#62](https://github.com/KwaiVGI/LivePortrait/issues/62).
|
ckpts/LivePortrait/assets/docs/changelog/2024-07-19.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## 2024/07/19
|
| 2 |
+
|
| 3 |
+
**Once again, we would like to express our heartfelt gratitude for your love, attention, and support for LivePortrait! 🎉**
|
| 4 |
+
We are excited to announce the release of an implementation of Portrait Video Editing (aka v2v) today! Special thanks to the hard work of the LivePortrait team: [Dingyun Zhang](https://github.com/Mystery099), [Zhizhou Zhong](https://github.com/zzzweakman), and [Jianzhu Guo](https://github.com/cleardusk).
|
| 5 |
+
|
| 6 |
+
### Updates
|
| 7 |
+
|
| 8 |
+
- <strong>Portrait video editing (v2v):</strong> Implemented a version of Portrait Video Editing (aka v2v). Ensure you have `pykalman` package installed, which has been added in [`requirements_base.txt`](../../../requirements_base.txt). You can specify the source video using the `-s` or `--source` option, adjust the temporal smoothness of motion with `--driving_smooth_observation_variance`, enable head pose motion transfer with `--flag_video_editing_head_rotation`, and ensure the eye-open scalar of each source frame matches the first source frame before animation with `--flag_source_video_eye_retargeting`.
|
| 9 |
+
|
| 10 |
+
- <strong>More options in Gradio:</strong> We have upgraded the Gradio interface and added more options. These include `Cropping Options for Source Image or Video` and `Cropping Options for Driving Video`, providing greater flexibility and control.
|
| 11 |
+
|
| 12 |
+
<p align="center">
|
| 13 |
+
<img src="../LivePortrait-Gradio-2024-07-19.jpg" alt="LivePortrait" width="800px">
|
| 14 |
+
<br>
|
| 15 |
+
The Gradio Interface for LivePortrait
|
| 16 |
+
</p>
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
### Community Contributions
|
| 20 |
+
|
| 21 |
+
- **ONNX/TensorRT Versions of LivePortrait:** Explore optimized versions of LivePortrait for faster performance:
|
| 22 |
+
- [FasterLivePortrait](https://github.com/warmshao/FasterLivePortrait) by [warmshao](https://github.com/warmshao) ([#150](https://github.com/KwaiVGI/LivePortrait/issues/150))
|
| 23 |
+
- [Efficient-Live-Portrait](https://github.com/aihacker111/Efficient-Live-Portrait) by [aihacker111](https://github.com/aihacker111/Efficient-Live-Portrait) ([#126](https://github.com/KwaiVGI/LivePortrait/issues/126), [#142](https://github.com/KwaiVGI/LivePortrait/issues/142))
|
| 24 |
+
- **LivePortrait with [X-Pose](https://github.com/IDEA-Research/X-Pose) Detection:** Check out [LivePortrait](https://github.com/ShiJiaying/LivePortrait) by [ShiJiaying](https://github.com/ShiJiaying) for enhanced detection capabilities using X-pose, see [#119](https://github.com/KwaiVGI/LivePortrait/issues/119).
|
ckpts/LivePortrait/assets/docs/changelog/2024-07-24.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## 2024/07/24
|
| 2 |
+
|
| 3 |
+
### Updates
|
| 4 |
+
|
| 5 |
+
- **Portrait pose editing:** You can change the `relative pitch`, `relative yaw`, and `relative roll` in the Gradio interface to adjust the pose of the source portrait.
|
| 6 |
+
- **Detection threshold:** We have added a `--det_thresh` argument with a default value of 0.15 to increase recall, meaning more types of faces (e.g., monkeys, human-like) will be detected. You can set it to other values, e.g., 0.5, by using `python app.py --det_thresh 0.5`.
|
| 7 |
+
|
| 8 |
+
<p align="center">
|
| 9 |
+
<img src="../pose-edit-2024-07-24.jpg" alt="LivePortrait" width="960px">
|
| 10 |
+
<br>
|
| 11 |
+
Pose Editing in the Gradio Interface
|
| 12 |
+
</p>
|
ckpts/LivePortrait/assets/docs/changelog/2024-08-02.md
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## 2024/08/02
|
| 2 |
+
|
| 3 |
+
<table class="center" style="width: 80%; margin-left: auto; margin-right: auto;">
|
| 4 |
+
<tr>
|
| 5 |
+
<td style="text-align: center"><b>Animals Singing Dance Monkey 🎤</b></td>
|
| 6 |
+
</tr>
|
| 7 |
+
|
| 8 |
+
<tr>
|
| 9 |
+
<td style="border: none; text-align: center;">
|
| 10 |
+
<video controls loop src="https://github.com/user-attachments/assets/38d5b6e5-d29b-458d-9f2c-4dd52546cb41" muted="false" style="width: 60%;"></video>
|
| 11 |
+
</td>
|
| 12 |
+
</tr>
|
| 13 |
+
</table>
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
🎉 We are excited to announce the release of a new version featuring animals mode, along with several other updates. Special thanks to the dedicated efforts of the LivePortrait team. 💪 We also provided an one-click installer for Windows users, checkout the details [here](./2024-08-05.md).
|
| 17 |
+
|
| 18 |
+
### Updates on Animals mode
|
| 19 |
+
We are pleased to announce the release of the animals mode, which is fine-tuned on approximately 230K frames of various animals (mostly cats and dogs). The trained weights have been updated in the `liveportrait_animals` subdirectory, available on [HuggingFace](https://huggingface.co/KwaiVGI/LivePortrait/tree/main/) or [Google Drive](https://drive.google.com/drive/u/0/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib). You should [download the weights](https://github.com/KwaiVGI/LivePortrait?tab=readme-ov-file#2-download-pretrained-weights) before running. There are two ways to run this mode.
|
| 20 |
+
|
| 21 |
+
> Please note that we have not trained the stitching and retargeting modules for the animals model due to several technical issues. _This may be addressed in future updates._ Therefore, we recommend **disabling stitching by setting the `--no_flag_stitching`** option when running the model. Additionally, `paste-back` is also not recommended.
|
| 22 |
+
|
| 23 |
+
#### Install X-Pose
|
| 24 |
+
We have chosen [X-Pose](https://github.com/IDEA-Research/X-Pose) as the keypoints detector for animals. This relies on `transformers==4.22.0` and `pillow>=10.2.0` (which are already updated in `requirements.txt`) and requires building an OP named `MultiScaleDeformableAttention`.
|
| 25 |
+
|
| 26 |
+
Refer to the [PyTorch installation](https://github.com/KwaiVGI/LivePortrait?tab=readme-ov-file#for-linux-or-windows-users) for Linux and Windows users.
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
Next, build the OP `MultiScaleDeformableAttention` by running:
|
| 30 |
+
```bash
|
| 31 |
+
cd src/utils/dependencies/XPose/models/UniPose/ops
|
| 32 |
+
python setup.py build install
|
| 33 |
+
cd - # this returns to the previous directory
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
To run the model, use the `inference_animals.py` script:
|
| 37 |
+
```bash
|
| 38 |
+
python inference_animals.py -s assets/examples/source/s39.jpg -d assets/examples/driving/wink.pkl --no_flag_stitching --driving_multiplier 1.75
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
Alternatively, you can use Gradio for a more user-friendly interface. Launch it with:
|
| 42 |
+
```bash
|
| 43 |
+
python app_animals.py # --server_port 8889 --server_name "0.0.0.0" --share
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
> [!WARNING]
|
| 47 |
+
> [X-Pose](https://github.com/IDEA-Research/X-Pose) is only for Non-commercial Scientific Research Purposes, you should remove and replace it with other detectors if you use it for commercial purposes.
|
| 48 |
+
|
| 49 |
+
### Updates on Humans mode
|
| 50 |
+
|
| 51 |
+
- **Driving Options**: We have introduced an `expression-friendly` driving option to **reduce head wobbling**, now set as the default. While it may be less effective with large head poses, you can also select the `pose-friendly` option, which is the same as the previous version. This can be set using `--driving_option` or selected in the Gradio interface. Additionally, we added a `--driving_multiplier` option to adjust driving intensity, with a default value of 1, which can also be set in the Gradio interface.
|
| 52 |
+
|
| 53 |
+
- **Retargeting Video in Gradio**: We have implemented a video retargeting feature. You can specify a `target lip-open ratio` to adjust the mouth movement in the source video. For instance, setting it to 0 will close the mouth in the source video 🤐.
|
| 54 |
+
|
| 55 |
+
### Others
|
| 56 |
+
|
| 57 |
+
- [**Poe supports LivePortrait**](https://poe.com/LivePortrait). Check out the news on [X](https://x.com/poe_platform/status/1816136105781256260).
|
| 58 |
+
- [ComfyUI-LivePortraitKJ](https://github.com/kijai/ComfyUI-LivePortraitKJ) (1.1K 🌟) now includes MediaPipe as an alternative to InsightFace, ensuring the license remains under MIT and Apache 2.0.
|
| 59 |
+
- [ComfyUI-AdvancedLivePortrait](https://github.com/PowerHouseMan/ComfyUI-AdvancedLivePortrait) features real-time portrait pose/expression editing and animation, and is registered with ComfyUI-Manager.
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
**Below are some screenshots of the new features and improvements:**
|
| 64 |
+
|
| 65 |
+
|  |
|
| 66 |
+
|:---:|
|
| 67 |
+
| **The Gradio Interface of Animals Mode** |
|
| 68 |
+
|
| 69 |
+
|  |
|
| 70 |
+
|:---:|
|
| 71 |
+
| **Driving Options and Multiplier** |
|
| 72 |
+
|
| 73 |
+
|  |
|
| 74 |
+
|:---:|
|
| 75 |
+
| **The Feature of Retargeting Video** |
|
ckpts/LivePortrait/assets/docs/changelog/2024-08-05.md
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## One-click Windows Installer
|
| 2 |
+
|
| 3 |
+
### Download the installer from HuggingFace
|
| 4 |
+
```bash
|
| 5 |
+
# !pip install -U "huggingface_hub[cli]"
|
| 6 |
+
huggingface-cli download cleardusk/LivePortrait-Windows LivePortrait-Windows-v20240806.zip --local-dir ./
|
| 7 |
+
```
|
| 8 |
+
|
| 9 |
+
If you cannot access to Huggingface, you can use [hf-mirror](https://hf-mirror.com/) to download:
|
| 10 |
+
```bash
|
| 11 |
+
# !pip install -U "huggingface_hub[cli]"
|
| 12 |
+
export HF_ENDPOINT=https://hf-mirror.com
|
| 13 |
+
huggingface-cli download cleardusk/LivePortrait-Windows LivePortrait-Windows-v20240806.zip --local-dir ./
|
| 14 |
+
```
|
| 15 |
+
|
| 16 |
+
Alternatively, you can manually download it from the [HuggingFace](https://huggingface.co/cleardusk/LivePortrait-Windows/blob/main/LivePortrait-Windows-v20240806.zip) page.
|
| 17 |
+
|
| 18 |
+
Then, simply unzip the package `LivePortrait-Windows-v20240806.zip` and double-click `run_windows_human.bat` for the Humans mode, or `run_windows_animal.bat` for the **Animals mode**.
|
ckpts/LivePortrait/assets/docs/changelog/2024-08-06.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Precise Portrait Editing
|
| 2 |
+
|
| 3 |
+
Inspired by [ComfyUI-AdvancedLivePortrait](https://github.com/PowerHouseMan/ComfyUI-AdvancedLivePortrait) ([@PowerHouseMan](https://github.com/PowerHouseMan)), we have implemented a version of Precise Portrait Editing in the Gradio interface. With each adjustment of the slider, the edited image updates in real-time. You can click the `🔄 Reset` button to reset all slider parameters. However, the performance may not be as fast as the ComfyUI plugin.
|
| 4 |
+
|
| 5 |
+
<p align="center">
|
| 6 |
+
<img src="../editing-portrait-2024-08-06.jpg" alt="LivePortrait" width="960px">
|
| 7 |
+
<br>
|
| 8 |
+
Preciese Portrait Editing in the Gradio Interface
|
| 9 |
+
</p>
|
ckpts/LivePortrait/assets/docs/changelog/2024-08-19.md
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Image Driven and Regional Control
|
| 2 |
+
|
| 3 |
+
<p align="center">
|
| 4 |
+
<img src="../image-driven-image-2024-08-19.jpg" alt="LivePortrait" width="512px">
|
| 5 |
+
<br>
|
| 6 |
+
<strong>Image Drives an Image</strong>
|
| 7 |
+
</p>
|
| 8 |
+
|
| 9 |
+
You can now **use an image as a driving signal** to drive the source image or video! Additionally, we **have refined the driving options to support expressions, pose, lips, eyes, or all** (all is consistent with the previous default method), which we name it regional control. The control is becoming more and more precise! 🎯
|
| 10 |
+
|
| 11 |
+
> Please note that image-based driving or regional control may not perform well in certain cases. Feel free to try different options, and be patient. 😊
|
| 12 |
+
|
| 13 |
+
> [!Note]
|
| 14 |
+
> We recognize that the project now offers more options, which have become increasingly complex, but due to our limited team capacity and resources, we haven’t fully documented them yet. We ask for your understanding and will work to improve the documentation over time. Contributions via PRs are welcome! If anyone is considering donating or sponsoring, feel free to leave a message in the GitHub Issues or Discussions. We will set up a payment account to reward the team members or support additional efforts in maintaining the project. 💖
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
### CLI Usage
|
| 18 |
+
It's very simple to use an image as a driving reference. Just set the `-d` argument to the driving image:
|
| 19 |
+
|
| 20 |
+
```bash
|
| 21 |
+
python inference.py -s assets/examples/source/s5.jpg -d assets/examples/driving/d30.jpg
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
To change the `animation_region` option, you can use the `--animation_region` argument to `exp`, `pose`, `lip`, `eyes`, or `all`. For example, to only drive the lip region, you can run by:
|
| 25 |
+
|
| 26 |
+
```bash
|
| 27 |
+
# only driving the lip region
|
| 28 |
+
python inference.py -s assets/examples/source/s5.jpg -d assets/examples/driving/d0.mp4 --animation_region lip
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
### Gradio Interface
|
| 32 |
+
|
| 33 |
+
<p align="center">
|
| 34 |
+
<img src="../image-driven-portrait-animation-2024-08-19.jpg" alt="LivePortrait" width="960px">
|
| 35 |
+
<br>
|
| 36 |
+
<strong>Image-driven Portrait Animation and Regional Control</strong>
|
| 37 |
+
</p>
|
| 38 |
+
|
| 39 |
+
### More Detailed Explanation
|
| 40 |
+
|
| 41 |
+
**flag_relative_motion**:
|
| 42 |
+
When using an image as the driving input, setting `--flag_relative_motion` to true will apply the motion deformation between the driving image and its canonical form. If set to false, the absolute motion of the driving image is used, which may amplify expression driving strength but could also cause identity leakage. This option corresponds to the `relative motion` toggle in the Gradio interface. Additionally, if both source and driving inputs are images, the output will be an image. If the source is a video and the driving input is an image, the output will be a video, with each frame driven by the image's motion. The Gradio interface automatically saves and displays the output in the appropriate format.
|
| 43 |
+
|
| 44 |
+
**animation_region**:
|
| 45 |
+
This argument offers five options:
|
| 46 |
+
|
| 47 |
+
- `exp`: Only the expression of the driving input influences the source.
|
| 48 |
+
- `pose`: Only the head pose drives the source.
|
| 49 |
+
- `lip`: Only lip movement drives the source.
|
| 50 |
+
- `eyes`: Only eye movement drives the source.
|
| 51 |
+
- `all`: All motions from the driving input are applied.
|
| 52 |
+
|
| 53 |
+
You can also select these options directly in the Gradio interface.
|
| 54 |
+
|
| 55 |
+
**Editing the Lip Region of the Source Video to a Neutral Expression**:
|
| 56 |
+
In response to requests for a more neutral lip region in the `Retargeting Video` of the Gradio interface, we've added a `keeping the lip silent` option. When selected, the animated video's lip region will adopt a neutral expression. However, this may cause inter-frame jitter or identity leakage, as it uses a mode similar to absolute driving. Note that the neutral expression may sometimes feature a slightly open mouth.
|
| 57 |
+
|
| 58 |
+
**Others**:
|
| 59 |
+
When both source and driving inputs are videos, the output motion may be a blend of both, due to the default setting of `--flag_relative_motion`. This option uses relative driving, where the motion offset of the current driving frame relative to the first driving frame is added to the source frame's motion. In contrast, `--no_flag_relative_motion` applies the driving frame's motion directly as the final driving motion.
|
| 60 |
+
|
| 61 |
+
For CLI usage, to retain only the driving video's motion in the output, use:
|
| 62 |
+
```bash
|
| 63 |
+
python inference.py --no_flag_relative_motion
|
| 64 |
+
```
|
| 65 |
+
In the Gradio interface, simply uncheck the relative motion option. Note that absolute driving may cause jitter or identity leakage in the animated video.
|
ckpts/LivePortrait/assets/docs/changelog/2025-01-01.md
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## 2025/01/01
|
| 2 |
+
|
| 3 |
+
**We’re thrilled that cats 🐱 are now speaking and singing across the internet!** 🎶
|
| 4 |
+
|
| 5 |
+
In this update, we’ve improved the [Animals model](https://huggingface.co/KwaiVGI/LivePortrait/tree/main/liveportrait_animals/base_models_v1.1) with more data. While you might notice only a slight improvement for cats (if at all 😼), dogs have gotten a slightly better upgrade. For example, the model is now better at recognizing their mouths instead of mistaking them for noses. 🐶
|
| 6 |
+
|
| 7 |
+
<table class="center" style="width: 80%; margin-left: auto; margin-right: auto;">
|
| 8 |
+
<tr>
|
| 9 |
+
<td style="text-align: center"><b>Before vs. After (v1.1)</b></td>
|
| 10 |
+
</tr>
|
| 11 |
+
|
| 12 |
+
<tr>
|
| 13 |
+
<td style="border: none; text-align: center;">
|
| 14 |
+
<video controls loop src="https://github.com/user-attachments/assets/59fc09b9-6cb7-4265-833f-eebb27ed9511" muted="false" style="width: 60%;"></video>
|
| 15 |
+
</td>
|
| 16 |
+
</tr>
|
| 17 |
+
</table>
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
The new version (v1.1) Animals Model has been updated on [HuggingFace](https://huggingface.co/KwaiVGI/LivePortrait/tree/main/liveportrait_animals/base_models_v1.1). The new version is enabled by default.
|
| 21 |
+
|
| 22 |
+
> [!IMPORTANT]
|
| 23 |
+
> Note: Make sure to update your weights to use the new version.
|
| 24 |
+
|
| 25 |
+
If you prefer to use the original version, simply modify the configuration in [inference_config.py](../../../src/config/inference_config.py#L29)
|
| 26 |
+
```python
|
| 27 |
+
version_animals = "" # old version
|
| 28 |
+
# version_animals = "_v1.1" # new (v1.1) version
|
| 29 |
+
```
|
ckpts/LivePortrait/assets/docs/directory-structure.md
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## The directory structure of `pretrained_weights`
|
| 2 |
+
|
| 3 |
+
```text
|
| 4 |
+
pretrained_weights
|
| 5 |
+
├── insightface
|
| 6 |
+
│ └── models
|
| 7 |
+
│ └── buffalo_l
|
| 8 |
+
│ ├── 2d106det.onnx
|
| 9 |
+
│ └── det_10g.onnx
|
| 10 |
+
├── liveportrait
|
| 11 |
+
│ ├── base_models
|
| 12 |
+
│ │ ├── appearance_feature_extractor.pth
|
| 13 |
+
│ │ ├── motion_extractor.pth
|
| 14 |
+
│ │ ├── spade_generator.pth
|
| 15 |
+
│ │ └── warping_module.pth
|
| 16 |
+
│ ├── landmark.onnx
|
| 17 |
+
│ └── retargeting_models
|
| 18 |
+
│ └── stitching_retargeting_module.pth
|
| 19 |
+
└── liveportrait_animals
|
| 20 |
+
├── base_models
|
| 21 |
+
│ ├── appearance_feature_extractor.pth
|
| 22 |
+
│ ├── motion_extractor.pth
|
| 23 |
+
│ ├── spade_generator.pth
|
| 24 |
+
│ └── warping_module.pth
|
| 25 |
+
├── retargeting_models
|
| 26 |
+
│ └── stitching_retargeting_module.pth
|
| 27 |
+
└── xpose.pth
|
| 28 |
+
```
|
ckpts/LivePortrait/assets/docs/driving-option-multiplier-2024-08-02.jpg
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/docs/editing-portrait-2024-08-06.jpg
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/docs/how-to-install-ffmpeg.md
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Install FFmpeg
|
| 2 |
+
|
| 3 |
+
Make sure you have `ffmpeg` and `ffprobe` installed on your system. If you don't have them installed, follow the instructions below.
|
| 4 |
+
|
| 5 |
+
> [!Note]
|
| 6 |
+
> The installation is copied from [SoVITS](https://github.com/RVC-Boss/GPT-SoVITS) 🤗
|
| 7 |
+
|
| 8 |
+
### Conda Users
|
| 9 |
+
|
| 10 |
+
```bash
|
| 11 |
+
conda install ffmpeg
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
### Ubuntu/Debian Users
|
| 15 |
+
|
| 16 |
+
```bash
|
| 17 |
+
sudo apt install ffmpeg
|
| 18 |
+
sudo apt install libsox-dev
|
| 19 |
+
conda install -c conda-forge 'ffmpeg<7'
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
### Windows Users
|
| 23 |
+
|
| 24 |
+
Download and place [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) and [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) in the GPT-SoVITS root.
|
| 25 |
+
|
| 26 |
+
### MacOS Users
|
| 27 |
+
```bash
|
| 28 |
+
brew install ffmpeg
|
| 29 |
+
```
|
ckpts/LivePortrait/assets/docs/image-driven-image-2024-08-19.jpg
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/docs/image-driven-portrait-animation-2024-08-19.jpg
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/docs/inference-animals.gif
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/docs/inference.gif
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/docs/pose-edit-2024-07-24.jpg
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/docs/retargeting-video-2024-08-02.jpg
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/docs/showcase.gif
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/docs/showcase2.gif
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/docs/speed.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Speed
|
| 2 |
+
|
| 3 |
+
Below are the results of inferring one frame on an RTX 4090 GPU using the native PyTorch framework with `torch.compile`:
|
| 4 |
+
|
| 5 |
+
| Model | Parameters(M) | Model Size(MB) | Inference(ms) |
|
| 6 |
+
|-----------------------------------|:-------------:|:--------------:|:-------------:|
|
| 7 |
+
| Appearance Feature Extractor | 0.84 | 3.3 | 0.82 |
|
| 8 |
+
| Motion Extractor | 28.12 | 108 | 0.84 |
|
| 9 |
+
| Spade Generator | 55.37 | 212 | 7.59 |
|
| 10 |
+
| Warping Module | 45.53 | 174 | 5.21 |
|
| 11 |
+
| Stitching and Retargeting Modules | 0.23 | 2.3 | 0.31 |
|
| 12 |
+
|
| 13 |
+
*Note: The values for the Stitching and Retargeting Modules represent the combined parameter counts and total inference time of three sequential MLP networks.*
|
ckpts/LivePortrait/assets/examples/driving/aggrieved.pkl
ADDED
|
Binary file (25.7 kB). View file
|
|
|
ckpts/LivePortrait/assets/examples/driving/d0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63f6f9962e1fdf6e6722172e7a18155204858d5d5ce3b1e0646c150360c33bed
|
| 3 |
+
size 2958395
|
ckpts/LivePortrait/assets/examples/driving/d1.pkl
ADDED
|
Binary file (8.6 kB). View file
|
|
|
ckpts/LivePortrait/assets/examples/driving/d10.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac7ee3c2419046f11dc230b6db33c2391a98334eba2b1d773e7eb9627992622f
|
| 3 |
+
size 1064930
|
ckpts/LivePortrait/assets/examples/driving/d11.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94b449a25686eddd42e244fb571c908a123aa0154776682601df1f3830f8f65c
|
| 3 |
+
size 468504
|
ckpts/LivePortrait/assets/examples/driving/d12.jpg
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/examples/driving/d12.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2895169e5aa2a882d39dfc80a17a3eab1cf6ec23b9b6f6be76bae48deda15219
|
| 3 |
+
size 596446
|
ckpts/LivePortrait/assets/examples/driving/d13.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d03e39c28323cde1c5fc6c5629aa83fe6c834fa7c9ed2dac969e1247eaafdb60
|
| 3 |
+
size 2475854
|
ckpts/LivePortrait/assets/examples/driving/d14.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:465e72fbf26bf4ed46d1adf7aab8a7344aac54a2f92c4d82a1d53127f0170472
|
| 3 |
+
size 891025
|
ckpts/LivePortrait/assets/examples/driving/d18.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1dc94c1fec7ef7dc831c8a49f0e1788ae568812cb68e62f6875d9070f573d02a
|
| 3 |
+
size 187263
|
ckpts/LivePortrait/assets/examples/driving/d19.jpg
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/examples/driving/d19.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3047ba66296d96b8a4584e412e61493d7bc0fa5149c77b130e7feea375e698bd
|
| 3 |
+
size 232859
|
ckpts/LivePortrait/assets/examples/driving/d2.pkl
ADDED
|
Binary file (8.6 kB). View file
|
|
|
ckpts/LivePortrait/assets/examples/driving/d20.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e249873c21abf27b3c9f405a1b5283ef1b70e5eff21c1df44ca4b5b4d9b7309
|
| 3 |
+
size 462335
|
ckpts/LivePortrait/assets/examples/driving/d3.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef5c86e49b1b43dcb1449b499eb5a7f0cbae2f78aec08b5598193be1e4257099
|
| 3 |
+
size 1430968
|
ckpts/LivePortrait/assets/examples/driving/d30.jpg
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/examples/driving/d38.jpg
ADDED
|
Git LFS Details
|
ckpts/LivePortrait/assets/examples/driving/d5.pkl
ADDED
|
Binary file (77.8 kB). View file
|
|
|
ckpts/LivePortrait/assets/examples/driving/d6.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00e3ea79bbf28cbdc4fbb67ec655d9a0fe876e880ec45af55ae481348d0c0fff
|
| 3 |
+
size 1967790
|
ckpts/LivePortrait/assets/examples/driving/d7.pkl
ADDED
|
Binary file (93.5 kB). View file
|
|
|