Omni1307 commited on
Commit
8ccbf6c
·
verified ·
1 Parent(s): 66417fc

Upload 285 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +19 -0
  2. ckpts/LivePortrait/.gitignore +34 -0
  3. ckpts/LivePortrait/.vscode/settings.json +19 -0
  4. ckpts/LivePortrait/LICENSE +30 -0
  5. ckpts/LivePortrait/app.py +493 -0
  6. ckpts/LivePortrait/app_animals.py +249 -0
  7. ckpts/LivePortrait/assets/.gitignore +2 -0
  8. ckpts/LivePortrait/assets/docs/LivePortrait-Gradio-2024-07-19.jpg +3 -0
  9. ckpts/LivePortrait/assets/docs/animals-mode-gradio-2024-08-02.jpg +3 -0
  10. ckpts/LivePortrait/assets/docs/changelog/2024-07-10.md +22 -0
  11. ckpts/LivePortrait/assets/docs/changelog/2024-07-19.md +24 -0
  12. ckpts/LivePortrait/assets/docs/changelog/2024-07-24.md +12 -0
  13. ckpts/LivePortrait/assets/docs/changelog/2024-08-02.md +75 -0
  14. ckpts/LivePortrait/assets/docs/changelog/2024-08-05.md +18 -0
  15. ckpts/LivePortrait/assets/docs/changelog/2024-08-06.md +9 -0
  16. ckpts/LivePortrait/assets/docs/changelog/2024-08-19.md +65 -0
  17. ckpts/LivePortrait/assets/docs/changelog/2025-01-01.md +29 -0
  18. ckpts/LivePortrait/assets/docs/directory-structure.md +28 -0
  19. ckpts/LivePortrait/assets/docs/driving-option-multiplier-2024-08-02.jpg +3 -0
  20. ckpts/LivePortrait/assets/docs/editing-portrait-2024-08-06.jpg +3 -0
  21. ckpts/LivePortrait/assets/docs/how-to-install-ffmpeg.md +29 -0
  22. ckpts/LivePortrait/assets/docs/image-driven-image-2024-08-19.jpg +3 -0
  23. ckpts/LivePortrait/assets/docs/image-driven-portrait-animation-2024-08-19.jpg +3 -0
  24. ckpts/LivePortrait/assets/docs/inference-animals.gif +3 -0
  25. ckpts/LivePortrait/assets/docs/inference.gif +3 -0
  26. ckpts/LivePortrait/assets/docs/pose-edit-2024-07-24.jpg +3 -0
  27. ckpts/LivePortrait/assets/docs/retargeting-video-2024-08-02.jpg +3 -0
  28. ckpts/LivePortrait/assets/docs/showcase.gif +3 -0
  29. ckpts/LivePortrait/assets/docs/showcase2.gif +3 -0
  30. ckpts/LivePortrait/assets/docs/speed.md +13 -0
  31. ckpts/LivePortrait/assets/examples/driving/aggrieved.pkl +0 -0
  32. ckpts/LivePortrait/assets/examples/driving/d0.mp4 +3 -0
  33. ckpts/LivePortrait/assets/examples/driving/d1.pkl +0 -0
  34. ckpts/LivePortrait/assets/examples/driving/d10.mp4 +3 -0
  35. ckpts/LivePortrait/assets/examples/driving/d11.mp4 +3 -0
  36. ckpts/LivePortrait/assets/examples/driving/d12.jpg +3 -0
  37. ckpts/LivePortrait/assets/examples/driving/d12.mp4 +3 -0
  38. ckpts/LivePortrait/assets/examples/driving/d13.mp4 +3 -0
  39. ckpts/LivePortrait/assets/examples/driving/d14.mp4 +3 -0
  40. ckpts/LivePortrait/assets/examples/driving/d18.mp4 +3 -0
  41. ckpts/LivePortrait/assets/examples/driving/d19.jpg +3 -0
  42. ckpts/LivePortrait/assets/examples/driving/d19.mp4 +3 -0
  43. ckpts/LivePortrait/assets/examples/driving/d2.pkl +0 -0
  44. ckpts/LivePortrait/assets/examples/driving/d20.mp4 +3 -0
  45. ckpts/LivePortrait/assets/examples/driving/d3.mp4 +3 -0
  46. ckpts/LivePortrait/assets/examples/driving/d30.jpg +3 -0
  47. ckpts/LivePortrait/assets/examples/driving/d38.jpg +3 -0
  48. ckpts/LivePortrait/assets/examples/driving/d5.pkl +0 -0
  49. ckpts/LivePortrait/assets/examples/driving/d6.mp4 +3 -0
  50. ckpts/LivePortrait/assets/examples/driving/d7.pkl +0 -0
.gitattributes CHANGED
@@ -17,3 +17,22 @@ ckpts/naturalspeech3_facodec/ns3_facodec_decoder.bin filter=lfs diff=lfs merge=l
17
  ckpts/naturalspeech3_facodec/ns3_facodec_encoder_v2.bin filter=lfs diff=lfs merge=lfs -text
18
  ckpts/naturalspeech3_facodec/ns3_facodec_encoder.bin filter=lfs diff=lfs merge=lfs -text
19
  ckpts/naturalspeech3_facodec/ns3_facodec_redecoder.bin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  ckpts/naturalspeech3_facodec/ns3_facodec_encoder_v2.bin filter=lfs diff=lfs merge=lfs -text
18
  ckpts/naturalspeech3_facodec/ns3_facodec_encoder.bin filter=lfs diff=lfs merge=lfs -text
19
  ckpts/naturalspeech3_facodec/ns3_facodec_redecoder.bin filter=lfs diff=lfs merge=lfs -text
20
+ ckpts/LivePortrait/assets/examples/driving/d8.pkl filter=lfs diff=lfs merge=lfs -text
21
+ ckpts/LivePortrait/pretrained_weights/insightface/models/buffalo_l/2d106det.onnx filter=lfs diff=lfs merge=lfs -text
22
+ ckpts/LivePortrait/pretrained_weights/insightface/models/buffalo_l/det_10g.onnx filter=lfs diff=lfs merge=lfs -text
23
+ ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models_v1.1/appearance_feature_extractor.pth filter=lfs diff=lfs merge=lfs -text
24
+ ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models_v1.1/motion_extractor.pth filter=lfs diff=lfs merge=lfs -text
25
+ ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models_v1.1/spade_generator.pth filter=lfs diff=lfs merge=lfs -text
26
+ ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models_v1.1/warping_module.pth filter=lfs diff=lfs merge=lfs -text
27
+ ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models/appearance_feature_extractor.pth filter=lfs diff=lfs merge=lfs -text
28
+ ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models/motion_extractor.pth filter=lfs diff=lfs merge=lfs -text
29
+ ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models/spade_generator.pth filter=lfs diff=lfs merge=lfs -text
30
+ ckpts/LivePortrait/pretrained_weights/liveportrait_animals/base_models/warping_module.pth filter=lfs diff=lfs merge=lfs -text
31
+ ckpts/LivePortrait/pretrained_weights/liveportrait_animals/retargeting_models/stitching_retargeting_module.pth filter=lfs diff=lfs merge=lfs -text
32
+ ckpts/LivePortrait/pretrained_weights/liveportrait_animals/xpose.pth filter=lfs diff=lfs merge=lfs -text
33
+ ckpts/LivePortrait/pretrained_weights/liveportrait/base_models/appearance_feature_extractor.pth filter=lfs diff=lfs merge=lfs -text
34
+ ckpts/LivePortrait/pretrained_weights/liveportrait/base_models/motion_extractor.pth filter=lfs diff=lfs merge=lfs -text
35
+ ckpts/LivePortrait/pretrained_weights/liveportrait/base_models/spade_generator.pth filter=lfs diff=lfs merge=lfs -text
36
+ ckpts/LivePortrait/pretrained_weights/liveportrait/base_models/warping_module.pth filter=lfs diff=lfs merge=lfs -text
37
+ ckpts/LivePortrait/pretrained_weights/liveportrait/landmark.onnx filter=lfs diff=lfs merge=lfs -text
38
+ ckpts/LivePortrait/pretrained_weights/liveportrait/retargeting_models/stitching_retargeting_module.pth filter=lfs diff=lfs merge=lfs -text
ckpts/LivePortrait/.gitignore ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ **/__pycache__/
4
+ *.py[cod]
5
+ **/*.py[cod]
6
+ *$py.class
7
+
8
+ # Model weights
9
+ **/*.pth
10
+ **/*.onnx
11
+
12
+ pretrained_weights/*.md
13
+ pretrained_weights/docs
14
+ pretrained_weights/liveportrait
15
+ pretrained_weights/liveportrait_animals
16
+
17
+ # Ipython notebook
18
+ *.ipynb
19
+
20
+ # Temporary files or benchmark resources
21
+ animations/*
22
+ tmp/*
23
+ .vscode/launch.json
24
+ **/*.DS_Store
25
+ gradio_temp/**
26
+
27
+ # Windows dependencies
28
+ ffmpeg/
29
+ LivePortrait_env/
30
+
31
+ # XPose build files
32
+ src/utils/dependencies/XPose/models/UniPose/ops/build
33
+ src/utils/dependencies/XPose/models/UniPose/ops/dist
34
+ src/utils/dependencies/XPose/models/UniPose/ops/MultiScaleDeformableAttention.egg-info
ckpts/LivePortrait/.vscode/settings.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[python]": {
3
+ "editor.tabSize": 4
4
+ },
5
+ "files.eol": "\n",
6
+ "files.insertFinalNewline": true,
7
+ "files.trimFinalNewlines": true,
8
+ "files.trimTrailingWhitespace": true,
9
+ "files.exclude": {
10
+ "**/.git": true,
11
+ "**/.svn": true,
12
+ "**/.hg": true,
13
+ "**/CVS": true,
14
+ "**/.DS_Store": true,
15
+ "**/Thumbs.db": true,
16
+ "**/*.crswap": true,
17
+ "**/__pycache__": true
18
+ }
19
+ }
ckpts/LivePortrait/LICENSE ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Kuaishou Visual Generation and Interaction Center
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+ ---
24
+
25
+ The code of InsightFace is released under the MIT License.
26
+ The models of InsightFace are for non-commercial research purposes only.
27
+
28
+ If you want to use the LivePortrait project for commercial purposes, you
29
+ should remove and replace InsightFace’s detection models to fully comply with
30
+ the MIT license.
ckpts/LivePortrait/app.py ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+
3
+ """
4
+ The entrance of the gradio for human
5
+ """
6
+
7
+ import os
8
+ import tyro
9
+ import subprocess
10
+ import gradio as gr
11
+ import os.path as osp
12
+ from src.utils.helper import load_description
13
+ from src.gradio_pipeline import GradioPipeline
14
+ from src.config.crop_config import CropConfig
15
+ from src.config.argument_config import ArgumentConfig
16
+ from src.config.inference_config import InferenceConfig
17
+
18
+
19
+ def partial_fields(target_class, kwargs):
20
+ return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
21
+
22
+
23
+ def fast_check_ffmpeg():
24
+ try:
25
+ subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
26
+ return True
27
+ except:
28
+ return False
29
+
30
+
31
+ # set tyro theme
32
+ tyro.extras.set_accent_color("bright_cyan")
33
+ args = tyro.cli(ArgumentConfig)
34
+
35
+ ffmpeg_dir = os.path.join(os.getcwd(), "ffmpeg")
36
+ if osp.exists(ffmpeg_dir):
37
+ os.environ["PATH"] += (os.pathsep + ffmpeg_dir)
38
+
39
+ if not fast_check_ffmpeg():
40
+ raise ImportError(
41
+ "FFmpeg is not installed. Please install FFmpeg (including ffmpeg and ffprobe) before running this script. https://ffmpeg.org/download.html"
42
+ )
43
+ # specify configs for inference
44
+ inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
45
+ crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
46
+ # global_tab_selection = None
47
+
48
+ gradio_pipeline = GradioPipeline(
49
+ inference_cfg=inference_cfg,
50
+ crop_cfg=crop_cfg,
51
+ args=args
52
+ )
53
+
54
+ if args.gradio_temp_dir not in (None, ''):
55
+ os.environ["GRADIO_TEMP_DIR"] = args.gradio_temp_dir
56
+ os.makedirs(args.gradio_temp_dir, exist_ok=True)
57
+
58
+
59
+ def gpu_wrapped_execute_video(*args, **kwargs):
60
+ return gradio_pipeline.execute_video(*args, **kwargs)
61
+
62
+
63
+ def gpu_wrapped_execute_image_retargeting(*args, **kwargs):
64
+ return gradio_pipeline.execute_image_retargeting(*args, **kwargs)
65
+
66
+
67
+ def gpu_wrapped_execute_video_retargeting(*args, **kwargs):
68
+ return gradio_pipeline.execute_video_retargeting(*args, **kwargs)
69
+
70
+
71
+ def reset_sliders(*args, **kwargs):
72
+ return 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.5, True, True
73
+
74
+
75
+ # assets
76
+ title_md = "assets/gradio/gradio_title.md"
77
+ example_portrait_dir = "assets/examples/source"
78
+ example_video_dir = "assets/examples/driving"
79
+ data_examples_i2v = [
80
+ [osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False],
81
+ [osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False],
82
+ [osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False],
83
+ [osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d18.mp4"), True, True, True, False],
84
+ [osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d19.mp4"), True, True, True, False],
85
+ [osp.join(example_portrait_dir, "s2.jpg"), osp.join(example_video_dir, "d13.mp4"), True, True, True, True],
86
+ ]
87
+ data_examples_v2v = [
88
+ [osp.join(example_portrait_dir, "s13.mp4"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False, 3e-7],
89
+ # [osp.join(example_portrait_dir, "s14.mp4"), osp.join(example_video_dir, "d18.mp4"), True, True, True, False, False, 3e-7],
90
+ # [osp.join(example_portrait_dir, "s15.mp4"), osp.join(example_video_dir, "d19.mp4"), True, True, True, False, False, 3e-7],
91
+ [osp.join(example_portrait_dir, "s18.mp4"), osp.join(example_video_dir, "d6.mp4"), True, True, True, False, 3e-7],
92
+ # [osp.join(example_portrait_dir, "s19.mp4"), osp.join(example_video_dir, "d6.mp4"), True, True, True, False, False, 3e-7],
93
+ [osp.join(example_portrait_dir, "s20.mp4"), osp.join(example_video_dir, "d0.mp4"), True, True, True, False, 3e-7],
94
+ ]
95
+ #################### interface logic ####################
96
+
97
+ # Define components first
98
+ retargeting_source_scale = gr.Number(minimum=1.8, maximum=3.2, value=2.5, step=0.05, label="crop scale")
99
+ video_retargeting_source_scale = gr.Number(minimum=1.8, maximum=3.2, value=2.3, step=0.05, label="crop scale")
100
+ driving_smooth_observation_variance_retargeting = gr.Number(value=3e-6, label="motion smooth strength", minimum=1e-11, maximum=1e-2, step=1e-8)
101
+ video_retargeting_silence = gr.Checkbox(value=False, label="keeping the lip silent")
102
+ eye_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target eyes-open ratio")
103
+ lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target lip-open ratio")
104
+ video_lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target lip-open ratio")
105
+ head_pitch_slider = gr.Slider(minimum=-15.0, maximum=15.0, value=0, step=1, label="relative pitch")
106
+ head_yaw_slider = gr.Slider(minimum=-25, maximum=25, value=0, step=1, label="relative yaw")
107
+ head_roll_slider = gr.Slider(minimum=-15.0, maximum=15.0, value=0, step=1, label="relative roll")
108
+ mov_x = gr.Slider(minimum=-0.19, maximum=0.19, value=0.0, step=0.01, label="x-axis movement")
109
+ mov_y = gr.Slider(minimum=-0.19, maximum=0.19, value=0.0, step=0.01, label="y-axis movement")
110
+ mov_z = gr.Slider(minimum=0.9, maximum=1.2, value=1.0, step=0.01, label="z-axis movement")
111
+ lip_variation_zero = gr.Slider(minimum=-0.09, maximum=0.09, value=0, step=0.01, label="pouting")
112
+ lip_variation_one = gr.Slider(minimum=-20.0, maximum=15.0, value=0, step=0.01, label="pursing 😐")
113
+ lip_variation_two = gr.Slider(minimum=0.0, maximum=15.0, value=0, step=0.01, label="grin 😁")
114
+ lip_variation_three = gr.Slider(minimum=-90.0, maximum=120.0, value=0, step=1.0, label="lip close <-> open")
115
+ smile = gr.Slider(minimum=-0.3, maximum=1.3, value=0, step=0.01, label="smile 😄")
116
+ wink = gr.Slider(minimum=0, maximum=39, value=0, step=0.01, label="wink 😉")
117
+ eyebrow = gr.Slider(minimum=-30, maximum=30, value=0, step=0.01, label="eyebrow 🤨")
118
+ eyeball_direction_x = gr.Slider(minimum=-30.0, maximum=30.0, value=0, step=0.01, label="eye gaze (horizontal) 👀")
119
+ eyeball_direction_y = gr.Slider(minimum=-63.0, maximum=63.0, value=0, step=0.01, label="eye gaze (vertical) 🙄")
120
+ retargeting_input_image = gr.Image(type="filepath")
121
+ retargeting_input_video = gr.Video()
122
+ output_image = gr.Image(type="numpy")
123
+ output_image_paste_back = gr.Image(type="numpy")
124
+ retargeting_output_image = gr.Image(type="numpy")
125
+ retargeting_output_image_paste_back = gr.Image(type="numpy")
126
+ output_video = gr.Video(autoplay=False)
127
+ output_video_paste_back = gr.Video(autoplay=False)
128
+
129
+ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo:
130
+ gr.HTML(load_description(title_md))
131
+
132
+ gr.Markdown(load_description("assets/gradio/gradio_description_upload.md"))
133
+ with gr.Row():
134
+ with gr.Column():
135
+ with gr.Tabs():
136
+ with gr.TabItem("🖼️ Source Image") as tab_image:
137
+ with gr.Accordion(open=True, label="Source Image"):
138
+ source_image_input = gr.Image(type="filepath")
139
+ gr.Examples(
140
+ examples=[
141
+ [osp.join(example_portrait_dir, "s9.jpg")],
142
+ [osp.join(example_portrait_dir, "s6.jpg")],
143
+ [osp.join(example_portrait_dir, "s10.jpg")],
144
+ [osp.join(example_portrait_dir, "s5.jpg")],
145
+ [osp.join(example_portrait_dir, "s7.jpg")],
146
+ [osp.join(example_portrait_dir, "s12.jpg")],
147
+ [osp.join(example_portrait_dir, "s22.jpg")],
148
+ [osp.join(example_portrait_dir, "s23.jpg")],
149
+ ],
150
+ inputs=[source_image_input],
151
+ cache_examples=False,
152
+ )
153
+
154
+ with gr.TabItem("🎞️ Source Video") as tab_video:
155
+ with gr.Accordion(open=True, label="Source Video"):
156
+ source_video_input = gr.Video()
157
+ gr.Examples(
158
+ examples=[
159
+ [osp.join(example_portrait_dir, "s13.mp4")],
160
+ # [osp.join(example_portrait_dir, "s14.mp4")],
161
+ # [osp.join(example_portrait_dir, "s15.mp4")],
162
+ [osp.join(example_portrait_dir, "s18.mp4")],
163
+ # [osp.join(example_portrait_dir, "s19.mp4")],
164
+ [osp.join(example_portrait_dir, "s20.mp4")],
165
+ ],
166
+ inputs=[source_video_input],
167
+ cache_examples=False,
168
+ )
169
+
170
+ tab_selection = gr.Textbox(visible=False)
171
+ tab_image.select(lambda: "Image", None, tab_selection)
172
+ tab_video.select(lambda: "Video", None, tab_selection)
173
+ with gr.Accordion(open=True, label="Cropping Options for Source Image or Video"):
174
+ with gr.Row():
175
+ flag_do_crop_input = gr.Checkbox(value=True, label="do crop (source)")
176
+ scale = gr.Number(value=2.3, label="source crop scale", minimum=1.8, maximum=3.2, step=0.05)
177
+ vx_ratio = gr.Number(value=0.0, label="source crop x", minimum=-0.5, maximum=0.5, step=0.01)
178
+ vy_ratio = gr.Number(value=-0.125, label="source crop y", minimum=-0.5, maximum=0.5, step=0.01)
179
+
180
+ with gr.Column():
181
+ with gr.Tabs():
182
+ with gr.TabItem("🎞️ Driving Video") as v_tab_video:
183
+ with gr.Accordion(open=True, label="Driving Video"):
184
+ driving_video_input = gr.Video()
185
+ gr.Examples(
186
+ examples=[
187
+ [osp.join(example_video_dir, "d0.mp4")],
188
+ [osp.join(example_video_dir, "d18.mp4")],
189
+ [osp.join(example_video_dir, "d19.mp4")],
190
+ [osp.join(example_video_dir, "d14.mp4")],
191
+ [osp.join(example_video_dir, "d6.mp4")],
192
+ [osp.join(example_video_dir, "d20.mp4")],
193
+ ],
194
+ inputs=[driving_video_input],
195
+ cache_examples=False,
196
+ )
197
+ with gr.TabItem("🖼️ Driving Image") as v_tab_image:
198
+ with gr.Accordion(open=True, label="Driving Image"):
199
+ driving_image_input = gr.Image(type="filepath")
200
+ gr.Examples(
201
+ examples=[
202
+ [osp.join(example_video_dir, "d30.jpg")],
203
+ [osp.join(example_video_dir, "d9.jpg")],
204
+ [osp.join(example_video_dir, "d19.jpg")],
205
+ [osp.join(example_video_dir, "d8.jpg")],
206
+ [osp.join(example_video_dir, "d12.jpg")],
207
+ [osp.join(example_video_dir, "d38.jpg")],
208
+ ],
209
+ inputs=[driving_image_input],
210
+ cache_examples=False,
211
+ )
212
+
213
+ with gr.TabItem("📁 Driving Pickle") as v_tab_pickle:
214
+ with gr.Accordion(open=True, label="Driving Pickle"):
215
+ driving_video_pickle_input = gr.File(type="filepath", file_types=[".pkl"])
216
+ gr.Examples(
217
+ examples=[
218
+ [osp.join(example_video_dir, "d1.pkl")],
219
+ [osp.join(example_video_dir, "d2.pkl")],
220
+ [osp.join(example_video_dir, "d5.pkl")],
221
+ [osp.join(example_video_dir, "d7.pkl")],
222
+ [osp.join(example_video_dir, "d8.pkl")],
223
+ ],
224
+ inputs=[driving_video_pickle_input],
225
+ cache_examples=False,
226
+ )
227
+
228
+ v_tab_selection = gr.Textbox(visible=False)
229
+ v_tab_video.select(lambda: "Video", None, v_tab_selection)
230
+ v_tab_image.select(lambda: "Image", None, v_tab_selection)
231
+ v_tab_pickle.select(lambda: "Pickle", None, v_tab_selection)
232
+ # with gr.Accordion(open=False, label="Animation Instructions"):
233
+ # gr.Markdown(load_description("assets/gradio/gradio_description_animation.md"))
234
+ with gr.Accordion(open=True, label="Cropping Options for Driving Video"):
235
+ with gr.Row():
236
+ flag_crop_driving_video_input = gr.Checkbox(value=False, label="do crop (driving)")
237
+ scale_crop_driving_video = gr.Number(value=2.2, label="driving crop scale", minimum=1.8, maximum=3.2, step=0.05)
238
+ vx_ratio_crop_driving_video = gr.Number(value=0.0, label="driving crop x", minimum=-0.5, maximum=0.5, step=0.01)
239
+ vy_ratio_crop_driving_video = gr.Number(value=-0.1, label="driving crop y", minimum=-0.5, maximum=0.5, step=0.01)
240
+
241
+ with gr.Row():
242
+ with gr.Accordion(open=True, label="Animation Options"):
243
+ with gr.Row():
244
+ flag_normalize_lip = gr.Checkbox(value=False, label="normalize lip")
245
+ flag_relative_input = gr.Checkbox(value=True, label="relative motion")
246
+ flag_remap_input = gr.Checkbox(value=True, label="paste-back")
247
+ flag_stitching_input = gr.Checkbox(value=True, label="stitching")
248
+ animation_region = gr.Radio(["exp", "pose", "lip", "eyes", "all"], value="all", label="animation region")
249
+ driving_option_input = gr.Radio(['expression-friendly', 'pose-friendly'], value="expression-friendly", label="driving option (i2v)")
250
+ driving_multiplier = gr.Number(value=1.0, label="driving multiplier (i2v)", minimum=0.0, maximum=2.0, step=0.02)
251
+ driving_smooth_observation_variance = gr.Number(value=3e-7, label="motion smooth strength (v2v)", minimum=1e-11, maximum=1e-2, step=1e-8)
252
+
253
+ gr.Markdown(load_description("assets/gradio/gradio_description_animate_clear.md"))
254
+ with gr.Row():
255
+ process_button_animation = gr.Button("🚀 Animate", variant="primary")
256
+ with gr.Row():
257
+ with gr.Column():
258
+ output_video_i2v = gr.Video(autoplay=False, label="The animated video in the original image space")
259
+ with gr.Column():
260
+ output_video_concat_i2v = gr.Video(autoplay=False, label="The animated video")
261
+ with gr.Row():
262
+ with gr.Column():
263
+ output_image_i2i = gr.Image(type="numpy", label="The animated image in the original image space", visible=False)
264
+ with gr.Column():
265
+ output_image_concat_i2i = gr.Image(type="numpy", label="The animated image", visible=False)
266
+ with gr.Row():
267
+ process_button_reset = gr.ClearButton([source_image_input, source_video_input, driving_video_pickle_input, driving_video_input, driving_image_input, output_video_i2v, output_video_concat_i2v, output_image_i2i, output_image_concat_i2i], value="🧹 Clear")
268
+
269
+ with gr.Row():
270
+ # Examples
271
+ gr.Markdown("## You could also choose the examples below by one click ⬇️")
272
+ with gr.Row():
273
+ with gr.Tabs():
274
+ with gr.TabItem("🖼️ Portrait Animation"):
275
+ gr.Examples(
276
+ examples=data_examples_i2v,
277
+ fn=gpu_wrapped_execute_video,
278
+ inputs=[
279
+ source_image_input,
280
+ driving_video_input,
281
+ flag_relative_input,
282
+ flag_do_crop_input,
283
+ flag_remap_input,
284
+ flag_crop_driving_video_input,
285
+ ],
286
+ outputs=[output_image, output_image_paste_back],
287
+ examples_per_page=len(data_examples_i2v),
288
+ cache_examples=False,
289
+ )
290
+ with gr.TabItem("🎞️ Portrait Video Editing"):
291
+ gr.Examples(
292
+ examples=data_examples_v2v,
293
+ fn=gpu_wrapped_execute_video,
294
+ inputs=[
295
+ source_video_input,
296
+ driving_video_input,
297
+ flag_relative_input,
298
+ flag_do_crop_input,
299
+ flag_remap_input,
300
+ flag_crop_driving_video_input,
301
+ driving_smooth_observation_variance,
302
+ ],
303
+ outputs=[output_image, output_image_paste_back],
304
+ examples_per_page=len(data_examples_v2v),
305
+ cache_examples=False,
306
+ )
307
+
308
+ # Retargeting Image
309
+ gr.Markdown(load_description("assets/gradio/gradio_description_retargeting.md"), visible=True)
310
+ with gr.Row(visible=True):
311
+ flag_do_crop_input_retargeting_image = gr.Checkbox(value=True, label="do crop (source)")
312
+ flag_stitching_retargeting_input = gr.Checkbox(value=True, label="stitching")
313
+ retargeting_source_scale.render()
314
+ eye_retargeting_slider.render()
315
+ lip_retargeting_slider.render()
316
+ with gr.Row(visible=True):
317
+ with gr.Column():
318
+ with gr.Accordion(open=True, label="Facial movement sliders"):
319
+ with gr.Row(visible=True):
320
+ head_pitch_slider.render()
321
+ head_yaw_slider.render()
322
+ head_roll_slider.render()
323
+ with gr.Row(visible=True):
324
+ mov_x.render()
325
+ mov_y.render()
326
+ mov_z.render()
327
+ with gr.Column():
328
+ with gr.Accordion(open=True, label="Facial expression sliders"):
329
+ with gr.Row(visible=True):
330
+ lip_variation_zero.render()
331
+ lip_variation_one.render()
332
+ lip_variation_two.render()
333
+ with gr.Row(visible=True):
334
+ lip_variation_three.render()
335
+ smile.render()
336
+ wink.render()
337
+ with gr.Row(visible=True):
338
+ eyebrow.render()
339
+ eyeball_direction_x.render()
340
+ eyeball_direction_y.render()
341
+ with gr.Row(visible=True):
342
+ reset_button = gr.Button("🔄 Reset")
343
+ reset_button.click(
344
+ fn=reset_sliders,
345
+ inputs=None,
346
+ outputs=[
347
+ head_pitch_slider, head_yaw_slider, head_roll_slider, mov_x, mov_y, mov_z,
348
+ lip_variation_zero, lip_variation_one, lip_variation_two, lip_variation_three, smile, wink, eyebrow, eyeball_direction_x, eyeball_direction_y,
349
+ retargeting_source_scale, flag_stitching_retargeting_input, flag_do_crop_input_retargeting_image
350
+ ]
351
+ )
352
+ with gr.Row(visible=True):
353
+ with gr.Column():
354
+ with gr.Accordion(open=True, label="Retargeting Image Input"):
355
+ retargeting_input_image.render()
356
+ gr.Examples(
357
+ examples=[
358
+ [osp.join(example_portrait_dir, "s9.jpg")],
359
+ [osp.join(example_portrait_dir, "s6.jpg")],
360
+ [osp.join(example_portrait_dir, "s10.jpg")],
361
+ [osp.join(example_portrait_dir, "s5.jpg")],
362
+ [osp.join(example_portrait_dir, "s7.jpg")],
363
+ [osp.join(example_portrait_dir, "s12.jpg")],
364
+ [osp.join(example_portrait_dir, "s22.jpg")],
365
+ # [osp.join(example_portrait_dir, "s23.jpg")],
366
+ [osp.join(example_portrait_dir, "s42.jpg")],
367
+ ],
368
+ inputs=[retargeting_input_image],
369
+ cache_examples=False,
370
+ )
371
+ with gr.Column():
372
+ with gr.Accordion(open=True, label="Retargeting Result"):
373
+ retargeting_output_image.render()
374
+ with gr.Column():
375
+ with gr.Accordion(open=True, label="Paste-back Result"):
376
+ retargeting_output_image_paste_back.render()
377
+ with gr.Row(visible=True):
378
+ process_button_reset_retargeting = gr.ClearButton(
379
+ [
380
+ retargeting_input_image,
381
+ retargeting_output_image,
382
+ retargeting_output_image_paste_back,
383
+ ],
384
+ value="🧹 Clear"
385
+ )
386
+
387
+ # Retargeting Video
388
+ gr.Markdown(load_description("assets/gradio/gradio_description_retargeting_video.md"), visible=True)
389
+ with gr.Row(visible=True):
390
+ flag_do_crop_input_retargeting_video = gr.Checkbox(value=True, label="do crop (source)")
391
+ video_retargeting_source_scale.render()
392
+ video_lip_retargeting_slider.render()
393
+ driving_smooth_observation_variance_retargeting.render()
394
+ video_retargeting_silence.render()
395
+ with gr.Row(visible=True):
396
+ process_button_retargeting_video = gr.Button("🚗 Retargeting Video", variant="primary")
397
+ with gr.Row(visible=True):
398
+ with gr.Column():
399
+ with gr.Accordion(open=True, label="Retargeting Video Input"):
400
+ retargeting_input_video.render()
401
+ gr.Examples(
402
+ examples=[
403
+ [osp.join(example_portrait_dir, "s13.mp4")],
404
+ # [osp.join(example_portrait_dir, "s18.mp4")],
405
+ # [osp.join(example_portrait_dir, "s20.mp4")],
406
+ [osp.join(example_portrait_dir, "s29.mp4")],
407
+ [osp.join(example_portrait_dir, "s32.mp4")],
408
+ [osp.join(example_video_dir, "d3.mp4")],
409
+ ],
410
+ inputs=[retargeting_input_video],
411
+ cache_examples=False,
412
+ )
413
+ with gr.Column():
414
+ with gr.Accordion(open=True, label="Retargeting Result"):
415
+ output_video.render()
416
+ with gr.Column():
417
+ with gr.Accordion(open=True, label="Paste-back Result"):
418
+ output_video_paste_back.render()
419
+ with gr.Row(visible=True):
420
+ process_button_reset_retargeting = gr.ClearButton(
421
+ [
422
+ video_lip_retargeting_slider,
423
+ retargeting_input_video,
424
+ output_video,
425
+ output_video_paste_back
426
+ ],
427
+ value="🧹 Clear"
428
+ )
429
+
430
+ # binding functions for buttons
431
+ process_button_animation.click(
432
+ fn=gpu_wrapped_execute_video,
433
+ inputs=[
434
+ source_image_input,
435
+ source_video_input,
436
+ driving_video_input,
437
+ driving_image_input,
438
+ driving_video_pickle_input,
439
+ flag_normalize_lip,
440
+ flag_relative_input,
441
+ flag_do_crop_input,
442
+ flag_remap_input,
443
+ flag_stitching_input,
444
+ animation_region,
445
+ driving_option_input,
446
+ driving_multiplier,
447
+ flag_crop_driving_video_input,
448
+ scale,
449
+ vx_ratio,
450
+ vy_ratio,
451
+ scale_crop_driving_video,
452
+ vx_ratio_crop_driving_video,
453
+ vy_ratio_crop_driving_video,
454
+ driving_smooth_observation_variance,
455
+ tab_selection,
456
+ v_tab_selection,
457
+ ],
458
+ outputs=[output_video_i2v, output_video_i2v, output_video_concat_i2v, output_video_concat_i2v, output_image_i2i, output_image_i2i, output_image_concat_i2i, output_image_concat_i2i],
459
+ show_progress=True
460
+ )
461
+
462
+
463
+ retargeting_input_image.change(
464
+ fn=gradio_pipeline.init_retargeting_image,
465
+ inputs=[retargeting_source_scale, eye_retargeting_slider, lip_retargeting_slider, retargeting_input_image],
466
+ outputs=[eye_retargeting_slider, lip_retargeting_slider]
467
+ )
468
+
469
+ sliders = [eye_retargeting_slider, lip_retargeting_slider, head_pitch_slider, head_yaw_slider, head_roll_slider, mov_x, mov_y, mov_z, lip_variation_zero, lip_variation_one, lip_variation_two, lip_variation_three, smile, wink, eyebrow, eyeball_direction_x, eyeball_direction_y]
470
+ for slider in sliders:
471
+ # NOTE: gradio >= 4.0.0 may cause slow response
472
+ slider.change(
473
+ fn=gpu_wrapped_execute_image_retargeting,
474
+ inputs=[
475
+ eye_retargeting_slider, lip_retargeting_slider, head_pitch_slider, head_yaw_slider, head_roll_slider, mov_x, mov_y, mov_z,
476
+ lip_variation_zero, lip_variation_one, lip_variation_two, lip_variation_three, smile, wink, eyebrow, eyeball_direction_x, eyeball_direction_y,
477
+ retargeting_input_image, retargeting_source_scale, flag_stitching_retargeting_input, flag_do_crop_input_retargeting_image
478
+ ],
479
+ outputs=[retargeting_output_image, retargeting_output_image_paste_back],
480
+ )
481
+
482
+ process_button_retargeting_video.click(
483
+ fn=gpu_wrapped_execute_video_retargeting,
484
+ inputs=[video_lip_retargeting_slider, retargeting_input_video, video_retargeting_source_scale, driving_smooth_observation_variance_retargeting, video_retargeting_silence, flag_do_crop_input_retargeting_video],
485
+ outputs=[output_video, output_video_paste_back],
486
+ show_progress=True
487
+ )
488
+
489
+ demo.launch(
490
+ server_port=args.server_port,
491
+ share=args.share,
492
+ server_name=args.server_name
493
+ )
ckpts/LivePortrait/app_animals.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding: utf-8
2
+
3
+ """
4
+ The entrance of the gradio for animal
5
+ """
6
+
7
+ import os
8
+ import tyro
9
+ import subprocess
10
+ import gradio as gr
11
+ import os.path as osp
12
+ from src.utils.helper import load_description
13
+ from src.gradio_pipeline import GradioPipelineAnimal
14
+ from src.config.crop_config import CropConfig
15
+ from src.config.argument_config import ArgumentConfig
16
+ from src.config.inference_config import InferenceConfig
17
+
18
+
19
+ def partial_fields(target_class, kwargs):
20
+ return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
21
+
22
+
23
+ def fast_check_ffmpeg():
24
+ try:
25
+ subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
26
+ return True
27
+ except:
28
+ return False
29
+
30
+
31
+ # set tyro theme
32
+ tyro.extras.set_accent_color("bright_cyan")
33
+ args = tyro.cli(ArgumentConfig)
34
+
35
+ ffmpeg_dir = os.path.join(os.getcwd(), "ffmpeg")
36
+ if osp.exists(ffmpeg_dir):
37
+ os.environ["PATH"] += (os.pathsep + ffmpeg_dir)
38
+
39
+ if not fast_check_ffmpeg():
40
+ raise ImportError(
41
+ "FFmpeg is not installed. Please install FFmpeg (including ffmpeg and ffprobe) before running this script. https://ffmpeg.org/download.html"
42
+ )
43
+ # specify configs for inference
44
+ inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
45
+ crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
46
+
47
+ gradio_pipeline_animal: GradioPipelineAnimal = GradioPipelineAnimal(
48
+ inference_cfg=inference_cfg,
49
+ crop_cfg=crop_cfg,
50
+ args=args
51
+ )
52
+
53
+ if args.gradio_temp_dir not in (None, ''):
54
+ os.environ["GRADIO_TEMP_DIR"] = args.gradio_temp_dir
55
+ os.makedirs(args.gradio_temp_dir, exist_ok=True)
56
+
57
+ def gpu_wrapped_execute_video(*args, **kwargs):
58
+ return gradio_pipeline_animal.execute_video(*args, **kwargs)
59
+
60
+
61
+ # assets
62
+ title_md = "assets/gradio/gradio_title.md"
63
+ example_portrait_dir = "assets/examples/source"
64
+ example_video_dir = "assets/examples/driving"
65
+ data_examples_i2v = [
66
+ [osp.join(example_portrait_dir, "s41.jpg"), osp.join(example_video_dir, "d3.mp4"), True, False, False, False],
67
+ [osp.join(example_portrait_dir, "s40.jpg"), osp.join(example_video_dir, "d6.mp4"), True, False, False, False],
68
+ [osp.join(example_portrait_dir, "s25.jpg"), osp.join(example_video_dir, "d19.mp4"), True, False, False, False],
69
+ ]
70
+ data_examples_i2v_pickle = [
71
+ [osp.join(example_portrait_dir, "s25.jpg"), osp.join(example_video_dir, "wink.pkl"), True, False, False, False],
72
+ [osp.join(example_portrait_dir, "s40.jpg"), osp.join(example_video_dir, "talking.pkl"), True, False, False, False],
73
+ [osp.join(example_portrait_dir, "s41.jpg"), osp.join(example_video_dir, "aggrieved.pkl"), True, False, False, False],
74
+ ]
75
+ #################### interface logic ####################
76
+
77
+ # Define components first
78
+ output_image = gr.Image(type="numpy")
79
+ output_image_paste_back = gr.Image(type="numpy")
80
+ output_video_i2v = gr.Video(autoplay=False)
81
+ output_video_concat_i2v = gr.Video(autoplay=False)
82
+ output_video_i2v_gif = gr.Image(type="numpy")
83
+
84
+
85
+ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo:
86
+ gr.HTML(load_description(title_md))
87
+
88
+ gr.Markdown(load_description("assets/gradio/gradio_description_upload_animal.md"))
89
+ with gr.Row():
90
+ with gr.Column():
91
+ with gr.Accordion(open=True, label="🐱 Source Animal Image"):
92
+ source_image_input = gr.Image(type="filepath")
93
+ gr.Examples(
94
+ examples=[
95
+ [osp.join(example_portrait_dir, "s25.jpg")],
96
+ [osp.join(example_portrait_dir, "s30.jpg")],
97
+ [osp.join(example_portrait_dir, "s31.jpg")],
98
+ [osp.join(example_portrait_dir, "s32.jpg")],
99
+ [osp.join(example_portrait_dir, "s33.jpg")],
100
+ [osp.join(example_portrait_dir, "s39.jpg")],
101
+ [osp.join(example_portrait_dir, "s40.jpg")],
102
+ [osp.join(example_portrait_dir, "s41.jpg")],
103
+ [osp.join(example_portrait_dir, "s38.jpg")],
104
+ [osp.join(example_portrait_dir, "s36.jpg")],
105
+ ],
106
+ inputs=[source_image_input],
107
+ cache_examples=False,
108
+ )
109
+
110
+ with gr.Accordion(open=True, label="Cropping Options for Source Image"):
111
+ with gr.Row():
112
+ flag_do_crop_input = gr.Checkbox(value=True, label="do crop (source)")
113
+ scale = gr.Number(value=2.3, label="source crop scale", minimum=1.8, maximum=3.2, step=0.05)
114
+ vx_ratio = gr.Number(value=0.0, label="source crop x", minimum=-0.5, maximum=0.5, step=0.01)
115
+ vy_ratio = gr.Number(value=-0.125, label="source crop y", minimum=-0.5, maximum=0.5, step=0.01)
116
+
117
+ with gr.Column():
118
+ with gr.Tabs():
119
+ with gr.TabItem("📁 Driving Pickle") as tab_pickle:
120
+ with gr.Accordion(open=True, label="Driving Pickle"):
121
+ driving_video_pickle_input = gr.File()
122
+ gr.Examples(
123
+ examples=[
124
+ [osp.join(example_video_dir, "wink.pkl")],
125
+ [osp.join(example_video_dir, "shy.pkl")],
126
+ [osp.join(example_video_dir, "aggrieved.pkl")],
127
+ [osp.join(example_video_dir, "open_lip.pkl")],
128
+ [osp.join(example_video_dir, "laugh.pkl")],
129
+ [osp.join(example_video_dir, "talking.pkl")],
130
+ [osp.join(example_video_dir, "shake_face.pkl")],
131
+ ],
132
+ inputs=[driving_video_pickle_input],
133
+ cache_examples=False,
134
+ )
135
+ with gr.TabItem("🎞️ Driving Video") as tab_video:
136
+ with gr.Accordion(open=True, label="Driving Video"):
137
+ driving_video_input = gr.Video()
138
+ gr.Examples(
139
+ examples=[
140
+ # [osp.join(example_video_dir, "d0.mp4")],
141
+ # [osp.join(example_video_dir, "d18.mp4")],
142
+ [osp.join(example_video_dir, "d19.mp4")],
143
+ [osp.join(example_video_dir, "d14.mp4")],
144
+ [osp.join(example_video_dir, "d6.mp4")],
145
+ [osp.join(example_video_dir, "d3.mp4")],
146
+ ],
147
+ inputs=[driving_video_input],
148
+ cache_examples=False,
149
+ )
150
+
151
+ tab_selection = gr.Textbox(visible=False)
152
+ tab_pickle.select(lambda: "Pickle", None, tab_selection)
153
+ tab_video.select(lambda: "Video", None, tab_selection)
154
+ with gr.Accordion(open=True, label="Cropping Options for Driving Video"):
155
+ with gr.Row():
156
+ flag_crop_driving_video_input = gr.Checkbox(value=False, label="do crop (driving)")
157
+ scale_crop_driving_video = gr.Number(value=2.2, label="driving crop scale", minimum=1.8, maximum=3.2, step=0.05)
158
+ vx_ratio_crop_driving_video = gr.Number(value=0.0, label="driving crop x", minimum=-0.5, maximum=0.5, step=0.01)
159
+ vy_ratio_crop_driving_video = gr.Number(value=-0.1, label="driving crop y", minimum=-0.5, maximum=0.5, step=0.01)
160
+
161
+ with gr.Row():
162
+ with gr.Accordion(open=False, label="Animation Options"):
163
+ with gr.Row():
164
+ flag_stitching = gr.Checkbox(value=False, label="stitching (not recommended)")
165
+ flag_remap_input = gr.Checkbox(value=False, label="paste-back (not recommended)")
166
+ driving_multiplier = gr.Number(value=1.0, label="driving multiplier", minimum=0.0, maximum=2.0, step=0.02)
167
+
168
+ gr.Markdown(load_description("assets/gradio/gradio_description_animate_clear.md"))
169
+ with gr.Row():
170
+ process_button_animation = gr.Button("🚀 Animate", variant="primary")
171
+ with gr.Row():
172
+ with gr.Column():
173
+ with gr.Accordion(open=True, label="The animated video in the cropped image space"):
174
+ output_video_i2v.render()
175
+ with gr.Column():
176
+ with gr.Accordion(open=True, label="The animated gif in the cropped image space"):
177
+ output_video_i2v_gif.render()
178
+ with gr.Column():
179
+ with gr.Accordion(open=True, label="The animated video"):
180
+ output_video_concat_i2v.render()
181
+ with gr.Row():
182
+ process_button_reset = gr.ClearButton([source_image_input, driving_video_input, output_video_i2v, output_video_concat_i2v, output_video_i2v_gif], value="🧹 Clear")
183
+
184
+ with gr.Row():
185
+ # Examples
186
+ gr.Markdown("## You could also choose the examples below by one click ⬇️")
187
+ with gr.Row():
188
+ with gr.Tabs():
189
+ with gr.TabItem("📁 Driving Pickle") as tab_video:
190
+ gr.Examples(
191
+ examples=data_examples_i2v_pickle,
192
+ fn=gpu_wrapped_execute_video,
193
+ inputs=[
194
+ source_image_input,
195
+ driving_video_pickle_input,
196
+ flag_do_crop_input,
197
+ flag_stitching,
198
+ flag_remap_input,
199
+ flag_crop_driving_video_input,
200
+ ],
201
+ outputs=[output_image, output_image_paste_back, output_video_i2v_gif],
202
+ examples_per_page=len(data_examples_i2v_pickle),
203
+ cache_examples=False,
204
+ )
205
+ with gr.TabItem("🎞️ Driving Video") as tab_video:
206
+ gr.Examples(
207
+ examples=data_examples_i2v,
208
+ fn=gpu_wrapped_execute_video,
209
+ inputs=[
210
+ source_image_input,
211
+ driving_video_input,
212
+ flag_do_crop_input,
213
+ flag_stitching,
214
+ flag_remap_input,
215
+ flag_crop_driving_video_input,
216
+ ],
217
+ outputs=[output_image, output_image_paste_back, output_video_i2v_gif],
218
+ examples_per_page=len(data_examples_i2v),
219
+ cache_examples=False,
220
+ )
221
+
222
+ process_button_animation.click(
223
+ fn=gpu_wrapped_execute_video,
224
+ inputs=[
225
+ source_image_input,
226
+ driving_video_input,
227
+ driving_video_pickle_input,
228
+ flag_do_crop_input,
229
+ flag_remap_input,
230
+ driving_multiplier,
231
+ flag_stitching,
232
+ flag_crop_driving_video_input,
233
+ scale,
234
+ vx_ratio,
235
+ vy_ratio,
236
+ scale_crop_driving_video,
237
+ vx_ratio_crop_driving_video,
238
+ vy_ratio_crop_driving_video,
239
+ tab_selection,
240
+ ],
241
+ outputs=[output_video_i2v, output_video_concat_i2v, output_video_i2v_gif],
242
+ show_progress=True
243
+ )
244
+
245
+ demo.launch(
246
+ server_port=args.server_port,
247
+ share=args.share,
248
+ server_name=args.server_name
249
+ )
ckpts/LivePortrait/assets/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ examples/driving/*.pkl
2
+ examples/driving/*_crop.mp4
ckpts/LivePortrait/assets/docs/LivePortrait-Gradio-2024-07-19.jpg ADDED

Git LFS Details

  • SHA256: 0a2e82639758440e952de96d6fee589ad00d00ffba33dfa16b2d32507ed71206
  • Pointer size: 131 Bytes
  • Size of remote file: 373 kB
ckpts/LivePortrait/assets/docs/animals-mode-gradio-2024-08-02.jpg ADDED

Git LFS Details

  • SHA256: 9a1e07d2719385601e1477ea49546d69ff21bb8c9f99625f8c48583f075b3606
  • Pointer size: 131 Bytes
  • Size of remote file: 352 kB
ckpts/LivePortrait/assets/docs/changelog/2024-07-10.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 2024/07/10
2
+
3
+ **First, thank you all for your attention, support, sharing, and contributions to LivePortrait!** ❤️
4
+ The popularity of LivePortrait has exceeded our expectations. If you encounter any issues or other problems and we do not respond promptly, please accept our apologies. We are still actively updating and improving this repository.
5
+
6
+ ### Updates
7
+
8
+ - <strong>Audio and video concatenating: </strong> If the driving video contains audio, it will automatically be included in the generated video. Additionally, the generated video will maintain the same FPS as the driving video. If you run LivePortrait on Windows, you need to install `ffprobe` and `ffmpeg` exe, see issue [#94](https://github.com/KwaiVGI/LivePortrait/issues/94).
9
+
10
+ - <strong>Driving video auto-cropping: </strong> Implemented automatic cropping for driving videos by tracking facial landmarks and calculating a global cropping box with a 1:1 aspect ratio. Alternatively, you can crop using video editing software or other tools to achieve a 1:1 ratio. Auto-cropping is not enbaled by default, you can specify it by `--flag_crop_driving_video`.
11
+
12
+ - <strong>Motion template making: </strong> Added the ability to create motion templates to protect privacy. The motion template is a `.pkl` file that only contains the motions of the driving video. Theoretically, it is impossible to reconstruct the original face from the template. These motion templates can be used to generate videos without needing the original driving video. By default, the motion template will be generated and saved as a `.pkl` file with the same name as the driving video, e.g., `d0.mp4` -> `d0.pkl`. Once generated, you can specify it using the `-d` or `--driving` option.
13
+
14
+
15
+ ### About driving video
16
+
17
+ - For a guide on using your own driving video, see the [driving video auto-cropping](https://github.com/KwaiVGI/LivePortrait/tree/main?tab=readme-ov-file#driving-video-auto-cropping) section.
18
+
19
+
20
+ ### Others
21
+
22
+ - If you encounter a black box problem, disable half-precision inference by using `--no_flag_use_half_precision`, reported by issue [#40](https://github.com/KwaiVGI/LivePortrait/issues/40), [#48](https://github.com/KwaiVGI/LivePortrait/issues/48), [#62](https://github.com/KwaiVGI/LivePortrait/issues/62).
ckpts/LivePortrait/assets/docs/changelog/2024-07-19.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 2024/07/19
2
+
3
+ **Once again, we would like to express our heartfelt gratitude for your love, attention, and support for LivePortrait! 🎉**
4
+ We are excited to announce the release of an implementation of Portrait Video Editing (aka v2v) today! Special thanks to the hard work of the LivePortrait team: [Dingyun Zhang](https://github.com/Mystery099), [Zhizhou Zhong](https://github.com/zzzweakman), and [Jianzhu Guo](https://github.com/cleardusk).
5
+
6
+ ### Updates
7
+
8
+ - <strong>Portrait video editing (v2v):</strong> Implemented a version of Portrait Video Editing (aka v2v). Ensure you have `pykalman` package installed, which has been added in [`requirements_base.txt`](../../../requirements_base.txt). You can specify the source video using the `-s` or `--source` option, adjust the temporal smoothness of motion with `--driving_smooth_observation_variance`, enable head pose motion transfer with `--flag_video_editing_head_rotation`, and ensure the eye-open scalar of each source frame matches the first source frame before animation with `--flag_source_video_eye_retargeting`.
9
+
10
+ - <strong>More options in Gradio:</strong> We have upgraded the Gradio interface and added more options. These include `Cropping Options for Source Image or Video` and `Cropping Options for Driving Video`, providing greater flexibility and control.
11
+
12
+ <p align="center">
13
+ <img src="../LivePortrait-Gradio-2024-07-19.jpg" alt="LivePortrait" width="800px">
14
+ <br>
15
+ The Gradio Interface for LivePortrait
16
+ </p>
17
+
18
+
19
+ ### Community Contributions
20
+
21
+ - **ONNX/TensorRT Versions of LivePortrait:** Explore optimized versions of LivePortrait for faster performance:
22
+ - [FasterLivePortrait](https://github.com/warmshao/FasterLivePortrait) by [warmshao](https://github.com/warmshao) ([#150](https://github.com/KwaiVGI/LivePortrait/issues/150))
23
+ - [Efficient-Live-Portrait](https://github.com/aihacker111/Efficient-Live-Portrait) by [aihacker111](https://github.com/aihacker111/Efficient-Live-Portrait) ([#126](https://github.com/KwaiVGI/LivePortrait/issues/126), [#142](https://github.com/KwaiVGI/LivePortrait/issues/142))
24
+ - **LivePortrait with [X-Pose](https://github.com/IDEA-Research/X-Pose) Detection:** Check out [LivePortrait](https://github.com/ShiJiaying/LivePortrait) by [ShiJiaying](https://github.com/ShiJiaying) for enhanced detection capabilities using X-pose, see [#119](https://github.com/KwaiVGI/LivePortrait/issues/119).
ckpts/LivePortrait/assets/docs/changelog/2024-07-24.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 2024/07/24
2
+
3
+ ### Updates
4
+
5
+ - **Portrait pose editing:** You can change the `relative pitch`, `relative yaw`, and `relative roll` in the Gradio interface to adjust the pose of the source portrait.
6
+ - **Detection threshold:** We have added a `--det_thresh` argument with a default value of 0.15 to increase recall, meaning more types of faces (e.g., monkeys, human-like) will be detected. You can set it to other values, e.g., 0.5, by using `python app.py --det_thresh 0.5`.
7
+
8
+ <p align="center">
9
+ <img src="../pose-edit-2024-07-24.jpg" alt="LivePortrait" width="960px">
10
+ <br>
11
+ Pose Editing in the Gradio Interface
12
+ </p>
ckpts/LivePortrait/assets/docs/changelog/2024-08-02.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 2024/08/02
2
+
3
+ <table class="center" style="width: 80%; margin-left: auto; margin-right: auto;">
4
+ <tr>
5
+ <td style="text-align: center"><b>Animals Singing Dance Monkey 🎤</b></td>
6
+ </tr>
7
+
8
+ <tr>
9
+ <td style="border: none; text-align: center;">
10
+ <video controls loop src="https://github.com/user-attachments/assets/38d5b6e5-d29b-458d-9f2c-4dd52546cb41" muted="false" style="width: 60%;"></video>
11
+ </td>
12
+ </tr>
13
+ </table>
14
+
15
+
16
+ 🎉 We are excited to announce the release of a new version featuring animals mode, along with several other updates. Special thanks to the dedicated efforts of the LivePortrait team. 💪 We also provided an one-click installer for Windows users, checkout the details [here](./2024-08-05.md).
17
+
18
+ ### Updates on Animals mode
19
+ We are pleased to announce the release of the animals mode, which is fine-tuned on approximately 230K frames of various animals (mostly cats and dogs). The trained weights have been updated in the `liveportrait_animals` subdirectory, available on [HuggingFace](https://huggingface.co/KwaiVGI/LivePortrait/tree/main/) or [Google Drive](https://drive.google.com/drive/u/0/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib). You should [download the weights](https://github.com/KwaiVGI/LivePortrait?tab=readme-ov-file#2-download-pretrained-weights) before running. There are two ways to run this mode.
20
+
21
+ > Please note that we have not trained the stitching and retargeting modules for the animals model due to several technical issues. _This may be addressed in future updates._ Therefore, we recommend **disabling stitching by setting the `--no_flag_stitching`** option when running the model. Additionally, `paste-back` is also not recommended.
22
+
23
+ #### Install X-Pose
24
+ We have chosen [X-Pose](https://github.com/IDEA-Research/X-Pose) as the keypoints detector for animals. This relies on `transformers==4.22.0` and `pillow>=10.2.0` (which are already updated in `requirements.txt`) and requires building an OP named `MultiScaleDeformableAttention`.
25
+
26
+ Refer to the [PyTorch installation](https://github.com/KwaiVGI/LivePortrait?tab=readme-ov-file#for-linux-or-windows-users) for Linux and Windows users.
27
+
28
+
29
+ Next, build the OP `MultiScaleDeformableAttention` by running:
30
+ ```bash
31
+ cd src/utils/dependencies/XPose/models/UniPose/ops
32
+ python setup.py build install
33
+ cd - # this returns to the previous directory
34
+ ```
35
+
36
+ To run the model, use the `inference_animals.py` script:
37
+ ```bash
38
+ python inference_animals.py -s assets/examples/source/s39.jpg -d assets/examples/driving/wink.pkl --no_flag_stitching --driving_multiplier 1.75
39
+ ```
40
+
41
+ Alternatively, you can use Gradio for a more user-friendly interface. Launch it with:
42
+ ```bash
43
+ python app_animals.py # --server_port 8889 --server_name "0.0.0.0" --share
44
+ ```
45
+
46
+ > [!WARNING]
47
+ > [X-Pose](https://github.com/IDEA-Research/X-Pose) is only for Non-commercial Scientific Research Purposes, you should remove and replace it with other detectors if you use it for commercial purposes.
48
+
49
+ ### Updates on Humans mode
50
+
51
+ - **Driving Options**: We have introduced an `expression-friendly` driving option to **reduce head wobbling**, now set as the default. While it may be less effective with large head poses, you can also select the `pose-friendly` option, which is the same as the previous version. This can be set using `--driving_option` or selected in the Gradio interface. Additionally, we added a `--driving_multiplier` option to adjust driving intensity, with a default value of 1, which can also be set in the Gradio interface.
52
+
53
+ - **Retargeting Video in Gradio**: We have implemented a video retargeting feature. You can specify a `target lip-open ratio` to adjust the mouth movement in the source video. For instance, setting it to 0 will close the mouth in the source video 🤐.
54
+
55
+ ### Others
56
+
57
+ - [**Poe supports LivePortrait**](https://poe.com/LivePortrait). Check out the news on [X](https://x.com/poe_platform/status/1816136105781256260).
58
+ - [ComfyUI-LivePortraitKJ](https://github.com/kijai/ComfyUI-LivePortraitKJ) (1.1K 🌟) now includes MediaPipe as an alternative to InsightFace, ensuring the license remains under MIT and Apache 2.0.
59
+ - [ComfyUI-AdvancedLivePortrait](https://github.com/PowerHouseMan/ComfyUI-AdvancedLivePortrait) features real-time portrait pose/expression editing and animation, and is registered with ComfyUI-Manager.
60
+
61
+
62
+
63
+ **Below are some screenshots of the new features and improvements:**
64
+
65
+ | ![The Gradio Interface of Animals Mode](../animals-mode-gradio-2024-08-02.jpg) |
66
+ |:---:|
67
+ | **The Gradio Interface of Animals Mode** |
68
+
69
+ | ![Driving Options and Multiplier](../driving-option-multiplier-2024-08-02.jpg) |
70
+ |:---:|
71
+ | **Driving Options and Multiplier** |
72
+
73
+ | ![The Feature of Retargeting Video](../retargeting-video-2024-08-02.jpg) |
74
+ |:---:|
75
+ | **The Feature of Retargeting Video** |
ckpts/LivePortrait/assets/docs/changelog/2024-08-05.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## One-click Windows Installer
2
+
3
+ ### Download the installer from HuggingFace
4
+ ```bash
5
+ # !pip install -U "huggingface_hub[cli]"
6
+ huggingface-cli download cleardusk/LivePortrait-Windows LivePortrait-Windows-v20240806.zip --local-dir ./
7
+ ```
8
+
9
+ If you cannot access to Huggingface, you can use [hf-mirror](https://hf-mirror.com/) to download:
10
+ ```bash
11
+ # !pip install -U "huggingface_hub[cli]"
12
+ export HF_ENDPOINT=https://hf-mirror.com
13
+ huggingface-cli download cleardusk/LivePortrait-Windows LivePortrait-Windows-v20240806.zip --local-dir ./
14
+ ```
15
+
16
+ Alternatively, you can manually download it from the [HuggingFace](https://huggingface.co/cleardusk/LivePortrait-Windows/blob/main/LivePortrait-Windows-v20240806.zip) page.
17
+
18
+ Then, simply unzip the package `LivePortrait-Windows-v20240806.zip` and double-click `run_windows_human.bat` for the Humans mode, or `run_windows_animal.bat` for the **Animals mode**.
ckpts/LivePortrait/assets/docs/changelog/2024-08-06.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ## Precise Portrait Editing
2
+
3
+ Inspired by [ComfyUI-AdvancedLivePortrait](https://github.com/PowerHouseMan/ComfyUI-AdvancedLivePortrait) ([@PowerHouseMan](https://github.com/PowerHouseMan)), we have implemented a version of Precise Portrait Editing in the Gradio interface. With each adjustment of the slider, the edited image updates in real-time. You can click the `🔄 Reset` button to reset all slider parameters. However, the performance may not be as fast as the ComfyUI plugin.
4
+
5
+ <p align="center">
6
+ <img src="../editing-portrait-2024-08-06.jpg" alt="LivePortrait" width="960px">
7
+ <br>
8
+ Preciese Portrait Editing in the Gradio Interface
9
+ </p>
ckpts/LivePortrait/assets/docs/changelog/2024-08-19.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Image Driven and Regional Control
2
+
3
+ <p align="center">
4
+ <img src="../image-driven-image-2024-08-19.jpg" alt="LivePortrait" width="512px">
5
+ <br>
6
+ <strong>Image Drives an Image</strong>
7
+ </p>
8
+
9
+ You can now **use an image as a driving signal** to drive the source image or video! Additionally, we **have refined the driving options to support expressions, pose, lips, eyes, or all** (all is consistent with the previous default method), which we name it regional control. The control is becoming more and more precise! 🎯
10
+
11
+ > Please note that image-based driving or regional control may not perform well in certain cases. Feel free to try different options, and be patient. 😊
12
+
13
+ > [!Note]
14
+ > We recognize that the project now offers more options, which have become increasingly complex, but due to our limited team capacity and resources, we haven’t fully documented them yet. We ask for your understanding and will work to improve the documentation over time. Contributions via PRs are welcome! If anyone is considering donating or sponsoring, feel free to leave a message in the GitHub Issues or Discussions. We will set up a payment account to reward the team members or support additional efforts in maintaining the project. 💖
15
+
16
+
17
+ ### CLI Usage
18
+ It's very simple to use an image as a driving reference. Just set the `-d` argument to the driving image:
19
+
20
+ ```bash
21
+ python inference.py -s assets/examples/source/s5.jpg -d assets/examples/driving/d30.jpg
22
+ ```
23
+
24
+ To change the `animation_region` option, you can use the `--animation_region` argument to `exp`, `pose`, `lip`, `eyes`, or `all`. For example, to only drive the lip region, you can run by:
25
+
26
+ ```bash
27
+ # only driving the lip region
28
+ python inference.py -s assets/examples/source/s5.jpg -d assets/examples/driving/d0.mp4 --animation_region lip
29
+ ```
30
+
31
+ ### Gradio Interface
32
+
33
+ <p align="center">
34
+ <img src="../image-driven-portrait-animation-2024-08-19.jpg" alt="LivePortrait" width="960px">
35
+ <br>
36
+ <strong>Image-driven Portrait Animation and Regional Control</strong>
37
+ </p>
38
+
39
+ ### More Detailed Explanation
40
+
41
+ **flag_relative_motion**:
42
+ When using an image as the driving input, setting `--flag_relative_motion` to true will apply the motion deformation between the driving image and its canonical form. If set to false, the absolute motion of the driving image is used, which may amplify expression driving strength but could also cause identity leakage. This option corresponds to the `relative motion` toggle in the Gradio interface. Additionally, if both source and driving inputs are images, the output will be an image. If the source is a video and the driving input is an image, the output will be a video, with each frame driven by the image's motion. The Gradio interface automatically saves and displays the output in the appropriate format.
43
+
44
+ **animation_region**:
45
+ This argument offers five options:
46
+
47
+ - `exp`: Only the expression of the driving input influences the source.
48
+ - `pose`: Only the head pose drives the source.
49
+ - `lip`: Only lip movement drives the source.
50
+ - `eyes`: Only eye movement drives the source.
51
+ - `all`: All motions from the driving input are applied.
52
+
53
+ You can also select these options directly in the Gradio interface.
54
+
55
+ **Editing the Lip Region of the Source Video to a Neutral Expression**:
56
+ In response to requests for a more neutral lip region in the `Retargeting Video` of the Gradio interface, we've added a `keeping the lip silent` option. When selected, the animated video's lip region will adopt a neutral expression. However, this may cause inter-frame jitter or identity leakage, as it uses a mode similar to absolute driving. Note that the neutral expression may sometimes feature a slightly open mouth.
57
+
58
+ **Others**:
59
+ When both source and driving inputs are videos, the output motion may be a blend of both, due to the default setting of `--flag_relative_motion`. This option uses relative driving, where the motion offset of the current driving frame relative to the first driving frame is added to the source frame's motion. In contrast, `--no_flag_relative_motion` applies the driving frame's motion directly as the final driving motion.
60
+
61
+ For CLI usage, to retain only the driving video's motion in the output, use:
62
+ ```bash
63
+ python inference.py --no_flag_relative_motion
64
+ ```
65
+ In the Gradio interface, simply uncheck the relative motion option. Note that absolute driving may cause jitter or identity leakage in the animated video.
ckpts/LivePortrait/assets/docs/changelog/2025-01-01.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 2025/01/01
2
+
3
+ **We’re thrilled that cats 🐱 are now speaking and singing across the internet!** 🎶
4
+
5
+ In this update, we’ve improved the [Animals model](https://huggingface.co/KwaiVGI/LivePortrait/tree/main/liveportrait_animals/base_models_v1.1) with more data. While you might notice only a slight improvement for cats (if at all 😼), dogs have gotten a slightly better upgrade. For example, the model is now better at recognizing their mouths instead of mistaking them for noses. 🐶
6
+
7
+ <table class="center" style="width: 80%; margin-left: auto; margin-right: auto;">
8
+ <tr>
9
+ <td style="text-align: center"><b>Before vs. After (v1.1)</b></td>
10
+ </tr>
11
+
12
+ <tr>
13
+ <td style="border: none; text-align: center;">
14
+ <video controls loop src="https://github.com/user-attachments/assets/59fc09b9-6cb7-4265-833f-eebb27ed9511" muted="false" style="width: 60%;"></video>
15
+ </td>
16
+ </tr>
17
+ </table>
18
+
19
+
20
+ The new version (v1.1) Animals Model has been updated on [HuggingFace](https://huggingface.co/KwaiVGI/LivePortrait/tree/main/liveportrait_animals/base_models_v1.1). The new version is enabled by default.
21
+
22
+ > [!IMPORTANT]
23
+ > Note: Make sure to update your weights to use the new version.
24
+
25
+ If you prefer to use the original version, simply modify the configuration in [inference_config.py](../../../src/config/inference_config.py#L29)
26
+ ```python
27
+ version_animals = "" # old version
28
+ # version_animals = "_v1.1" # new (v1.1) version
29
+ ```
ckpts/LivePortrait/assets/docs/directory-structure.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## The directory structure of `pretrained_weights`
2
+
3
+ ```text
4
+ pretrained_weights
5
+ ├── insightface
6
+ │ └── models
7
+ │ └── buffalo_l
8
+ │ ├── 2d106det.onnx
9
+ │ └── det_10g.onnx
10
+ ├── liveportrait
11
+ │ ├── base_models
12
+ │ │ ├── appearance_feature_extractor.pth
13
+ │ │ ├── motion_extractor.pth
14
+ │ │ ├── spade_generator.pth
15
+ │ │ └── warping_module.pth
16
+ │ ├── landmark.onnx
17
+ │ └── retargeting_models
18
+ │ └── stitching_retargeting_module.pth
19
+ └── liveportrait_animals
20
+ ├── base_models
21
+ │ ├── appearance_feature_extractor.pth
22
+ │ ├── motion_extractor.pth
23
+ │ ├── spade_generator.pth
24
+ │ └── warping_module.pth
25
+ ├── retargeting_models
26
+ │ └── stitching_retargeting_module.pth
27
+ └── xpose.pth
28
+ ```
ckpts/LivePortrait/assets/docs/driving-option-multiplier-2024-08-02.jpg ADDED

Git LFS Details

  • SHA256: 0af97b8a104bdcb4f843a52672df9378353572d91522fa254b36fd4e08cbaaf7
  • Pointer size: 130 Bytes
  • Size of remote file: 84.5 kB
ckpts/LivePortrait/assets/docs/editing-portrait-2024-08-06.jpg ADDED

Git LFS Details

  • SHA256: a08342e3c8b9868bae2963dd5d890c53eb67b4bcb8baef7f594da0c805d66c48
  • Pointer size: 131 Bytes
  • Size of remote file: 308 kB
ckpts/LivePortrait/assets/docs/how-to-install-ffmpeg.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Install FFmpeg
2
+
3
+ Make sure you have `ffmpeg` and `ffprobe` installed on your system. If you don't have them installed, follow the instructions below.
4
+
5
+ > [!Note]
6
+ > The installation is copied from [SoVITS](https://github.com/RVC-Boss/GPT-SoVITS) 🤗
7
+
8
+ ### Conda Users
9
+
10
+ ```bash
11
+ conda install ffmpeg
12
+ ```
13
+
14
+ ### Ubuntu/Debian Users
15
+
16
+ ```bash
17
+ sudo apt install ffmpeg
18
+ sudo apt install libsox-dev
19
+ conda install -c conda-forge 'ffmpeg<7'
20
+ ```
21
+
22
+ ### Windows Users
23
+
24
+ Download and place [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) and [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) in the GPT-SoVITS root.
25
+
26
+ ### MacOS Users
27
+ ```bash
28
+ brew install ffmpeg
29
+ ```
ckpts/LivePortrait/assets/docs/image-driven-image-2024-08-19.jpg ADDED

Git LFS Details

  • SHA256: 1841abcb093dddd62c09224958d6613ce4677d5b25c53be4665dce1270029a44
  • Pointer size: 131 Bytes
  • Size of remote file: 333 kB
ckpts/LivePortrait/assets/docs/image-driven-portrait-animation-2024-08-19.jpg ADDED

Git LFS Details

  • SHA256: c8f1a04343e412ff80c9d67e0b10af65c42488cff10524aef7fe2ba033a8ca12
  • Pointer size: 131 Bytes
  • Size of remote file: 557 kB
ckpts/LivePortrait/assets/docs/inference-animals.gif ADDED

Git LFS Details

  • SHA256: 68f41750305737321661241edf8f08959595bf2520c2a13f60bbb0c2a7df9775
  • Pointer size: 131 Bytes
  • Size of remote file: 502 kB
ckpts/LivePortrait/assets/docs/inference.gif ADDED

Git LFS Details

  • SHA256: e1316eca5556ba5a8da7c53bcadbc1df26aa822bbde68fbad94813139803d0c6
  • Pointer size: 131 Bytes
  • Size of remote file: 820 kB
ckpts/LivePortrait/assets/docs/pose-edit-2024-07-24.jpg ADDED

Git LFS Details

  • SHA256: aee74d44b4d70591b1275b4db5f8cfc60dd067d4c15007b0295011f5e8fb33f7
  • Pointer size: 131 Bytes
  • Size of remote file: 223 kB
ckpts/LivePortrait/assets/docs/retargeting-video-2024-08-02.jpg ADDED

Git LFS Details

  • SHA256: 25fbba254f5ae11c020dff62865c9b97db61f15336a2a8723da5a5b0df85a621
  • Pointer size: 131 Bytes
  • Size of remote file: 118 kB
ckpts/LivePortrait/assets/docs/showcase.gif ADDED

Git LFS Details

  • SHA256: 7bca5f38bfd555bf7c013312d87883afdf39d97fba719ac171c60f897af49e21
  • Pointer size: 132 Bytes
  • Size of remote file: 6.62 MB
ckpts/LivePortrait/assets/docs/showcase2.gif ADDED

Git LFS Details

  • SHA256: eb1fffb139681775780b2956e7d0289f55d199c1a3e14ab263887864d4b0d586
  • Pointer size: 132 Bytes
  • Size of remote file: 2.88 MB
ckpts/LivePortrait/assets/docs/speed.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Speed
2
+
3
+ Below are the results of inferring one frame on an RTX 4090 GPU using the native PyTorch framework with `torch.compile`:
4
+
5
+ | Model | Parameters(M) | Model Size(MB) | Inference(ms) |
6
+ |-----------------------------------|:-------------:|:--------------:|:-------------:|
7
+ | Appearance Feature Extractor | 0.84 | 3.3 | 0.82 |
8
+ | Motion Extractor | 28.12 | 108 | 0.84 |
9
+ | Spade Generator | 55.37 | 212 | 7.59 |
10
+ | Warping Module | 45.53 | 174 | 5.21 |
11
+ | Stitching and Retargeting Modules | 0.23 | 2.3 | 0.31 |
12
+
13
+ *Note: The values for the Stitching and Retargeting Modules represent the combined parameter counts and total inference time of three sequential MLP networks.*
ckpts/LivePortrait/assets/examples/driving/aggrieved.pkl ADDED
Binary file (25.7 kB). View file
 
ckpts/LivePortrait/assets/examples/driving/d0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63f6f9962e1fdf6e6722172e7a18155204858d5d5ce3b1e0646c150360c33bed
3
+ size 2958395
ckpts/LivePortrait/assets/examples/driving/d1.pkl ADDED
Binary file (8.6 kB). View file
 
ckpts/LivePortrait/assets/examples/driving/d10.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac7ee3c2419046f11dc230b6db33c2391a98334eba2b1d773e7eb9627992622f
3
+ size 1064930
ckpts/LivePortrait/assets/examples/driving/d11.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94b449a25686eddd42e244fb571c908a123aa0154776682601df1f3830f8f65c
3
+ size 468504
ckpts/LivePortrait/assets/examples/driving/d12.jpg ADDED

Git LFS Details

  • SHA256: 5d7f7bebd22b47a587e2e86474585c3a0a3afbe77028c49eef79b1aa86ae854f
  • Pointer size: 130 Bytes
  • Size of remote file: 98.9 kB
ckpts/LivePortrait/assets/examples/driving/d12.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2895169e5aa2a882d39dfc80a17a3eab1cf6ec23b9b6f6be76bae48deda15219
3
+ size 596446
ckpts/LivePortrait/assets/examples/driving/d13.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d03e39c28323cde1c5fc6c5629aa83fe6c834fa7c9ed2dac969e1247eaafdb60
3
+ size 2475854
ckpts/LivePortrait/assets/examples/driving/d14.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:465e72fbf26bf4ed46d1adf7aab8a7344aac54a2f92c4d82a1d53127f0170472
3
+ size 891025
ckpts/LivePortrait/assets/examples/driving/d18.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dc94c1fec7ef7dc831c8a49f0e1788ae568812cb68e62f6875d9070f573d02a
3
+ size 187263
ckpts/LivePortrait/assets/examples/driving/d19.jpg ADDED

Git LFS Details

  • SHA256: 81a82eb49cc66ac098dff8cbca9b621dbc1889934e503735f4f6ccb689fa8ce3
  • Pointer size: 130 Bytes
  • Size of remote file: 68.7 kB
ckpts/LivePortrait/assets/examples/driving/d19.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3047ba66296d96b8a4584e412e61493d7bc0fa5149c77b130e7feea375e698bd
3
+ size 232859
ckpts/LivePortrait/assets/examples/driving/d2.pkl ADDED
Binary file (8.6 kB). View file
 
ckpts/LivePortrait/assets/examples/driving/d20.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e249873c21abf27b3c9f405a1b5283ef1b70e5eff21c1df44ca4b5b4d9b7309
3
+ size 462335
ckpts/LivePortrait/assets/examples/driving/d3.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef5c86e49b1b43dcb1449b499eb5a7f0cbae2f78aec08b5598193be1e4257099
3
+ size 1430968
ckpts/LivePortrait/assets/examples/driving/d30.jpg ADDED

Git LFS Details

  • SHA256: d68425dab27dc5898c8664c8f93f1a345ea701d70fad8a0a2904f1bc194bb527
  • Pointer size: 130 Bytes
  • Size of remote file: 77.2 kB
ckpts/LivePortrait/assets/examples/driving/d38.jpg ADDED

Git LFS Details

  • SHA256: 9e8f695879b5843b708d3936eeb39e0df410cc1100c9d2d4fdacb1485bc354c0
  • Pointer size: 130 Bytes
  • Size of remote file: 75.3 kB
ckpts/LivePortrait/assets/examples/driving/d5.pkl ADDED
Binary file (77.8 kB). View file
 
ckpts/LivePortrait/assets/examples/driving/d6.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00e3ea79bbf28cbdc4fbb67ec655d9a0fe876e880ec45af55ae481348d0c0fff
3
+ size 1967790
ckpts/LivePortrait/assets/examples/driving/d7.pkl ADDED
Binary file (93.5 kB). View file