Spaces:
Running
on
Zero
Running
on
Zero
fix: retargeting feature leakage
#7
by
zzz66
- opened
- .gitattributes +0 -9
- README.md +1 -2
- app.py +11 -18
- assets/examples/driving/d1.mp4 +0 -0
- assets/examples/driving/d14.mp4 +0 -3
- assets/examples/driving/d14_trim.mp4 +0 -3
- assets/examples/driving/d15.mp4 +0 -3
- assets/examples/driving/d16.mp4 +0 -3
- assets/examples/driving/d18.mp4 +0 -3
- assets/examples/driving/d19.mp4 +0 -3
- assets/examples/driving/d2.mp4 +0 -0
- assets/examples/driving/d5.mp4 +0 -0
- assets/examples/driving/d6_trim.mp4 +0 -3
- assets/examples/driving/d7.mp4 +0 -0
- assets/examples/driving/d8.mp4 +0 -0
- assets/examples/source/s12.jpg +0 -3
- assets/examples/source/s22.jpg +0 -3
- assets/gradio_description_animate_clear.md +0 -3
- assets/gradio_description_retargeting.md +1 -13
- assets/gradio_description_upload.md +2 -30
- assets/gradio_title.md +7 -14
- readme.md +138 -7
- src/config/argument_config.py +1 -2
- src/gradio_pipeline.py +9 -7
- src/live_portrait_pipeline.py +7 -26
- src/utils/video.py +6 -70
- video2template.py +37 -0
.gitattributes
CHANGED
|
@@ -47,12 +47,3 @@ pretrained_weights/liveportrait/base_models/warping_module.pth filter=lfs diff=l
|
|
| 47 |
pretrained_weights/insightface/models/buffalo_l/2d106det.onnx filter=lfs diff=lfs merge=lfs -text
|
| 48 |
pretrained_weights/insightface/models/buffalo_l/det_10g.onnx filter=lfs diff=lfs merge=lfs -text
|
| 49 |
pretrained_weights/liveportrait/landmark.onnx filter=lfs diff=lfs merge=lfs -text
|
| 50 |
-
assets/examples/driving/d14.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 51 |
-
assets/examples/source/s12.jpg filter=lfs diff=lfs merge=lfs -text
|
| 52 |
-
assets/examples/driving/d14_trim.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 53 |
-
assets/examples/driving/d6_trim.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 54 |
-
assets/examples/driving/d15.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 55 |
-
assets/examples/driving/d16.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 56 |
-
assets/examples/driving/d18.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 57 |
-
assets/examples/driving/d19.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 58 |
-
assets/examples/source/s22.jpg filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 47 |
pretrained_weights/insightface/models/buffalo_l/2d106det.onnx filter=lfs diff=lfs merge=lfs -text
|
| 48 |
pretrained_weights/insightface/models/buffalo_l/det_10g.onnx filter=lfs diff=lfs merge=lfs -text
|
| 49 |
pretrained_weights/liveportrait/landmark.onnx filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -7,7 +7,6 @@ sdk: gradio
|
|
| 7 |
sdk_version: 4.37.2
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
-
disable_embedding: true
|
| 11 |
tags:
|
| 12 |
- Multimodal
|
| 13 |
- Motion control
|
|
@@ -18,4 +17,4 @@ tags:
|
|
| 18 |
short_description: Apply the motion of a video on a portrait
|
| 19 |
---
|
| 20 |
|
| 21 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 7 |
sdk_version: 4.37.2
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
| 10 |
tags:
|
| 11 |
- Multimodal
|
| 12 |
- Motion control
|
|
|
|
| 17 |
short_description: Apply the motion of a video on a portrait
|
| 18 |
---
|
| 19 |
|
| 20 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
|
@@ -46,14 +46,14 @@ def gpu_wrapped_execute_image(*args, **kwargs):
|
|
| 46 |
|
| 47 |
def is_square_video(video_path):
|
| 48 |
video = cv2.VideoCapture(video_path)
|
| 49 |
-
|
| 50 |
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 51 |
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 52 |
-
|
| 53 |
video.release()
|
| 54 |
if width != height:
|
| 55 |
raise gr.Error("Error: the video does not have a square aspect ratio. We currently only support square videos")
|
| 56 |
-
|
| 57 |
return gr.update(visible=True)
|
| 58 |
|
| 59 |
# assets
|
|
@@ -63,10 +63,9 @@ example_video_dir = "assets/examples/driving"
|
|
| 63 |
data_examples = [
|
| 64 |
[osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
|
| 65 |
[osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
|
| 66 |
-
[osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "
|
| 67 |
-
[osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "
|
| 68 |
-
[osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "
|
| 69 |
-
[osp.join(example_portrait_dir, "s22.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
|
| 70 |
]
|
| 71 |
#################### interface logic ####################
|
| 72 |
|
|
@@ -92,8 +91,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 92 |
[osp.join(example_portrait_dir, "s10.jpg")],
|
| 93 |
[osp.join(example_portrait_dir, "s5.jpg")],
|
| 94 |
[osp.join(example_portrait_dir, "s7.jpg")],
|
| 95 |
-
[osp.join(example_portrait_dir, "s12.jpg")],
|
| 96 |
-
[osp.join(example_portrait_dir, "s22.jpg")],
|
| 97 |
],
|
| 98 |
inputs=[image_input],
|
| 99 |
cache_examples=False,
|
|
@@ -103,10 +100,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 103 |
gr.Examples(
|
| 104 |
examples=[
|
| 105 |
[osp.join(example_video_dir, "d0.mp4")],
|
| 106 |
-
[osp.join(example_video_dir, "
|
| 107 |
-
[osp.join(example_video_dir, "
|
| 108 |
-
[osp.join(example_video_dir, "
|
| 109 |
-
[osp.join(example_video_dir, "d6_trim.mp4")],
|
| 110 |
],
|
| 111 |
inputs=[video_input],
|
| 112 |
cache_examples=False,
|
|
@@ -118,7 +114,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 118 |
flag_relative_input = gr.Checkbox(value=True, label="relative motion")
|
| 119 |
flag_do_crop_input = gr.Checkbox(value=True, label="do crop")
|
| 120 |
flag_remap_input = gr.Checkbox(value=True, label="paste-back")
|
| 121 |
-
gr.Markdown(load_description("assets/gradio_description_animate_clear.md"))
|
| 122 |
with gr.Row():
|
| 123 |
with gr.Column():
|
| 124 |
process_button_animation = gr.Button("🚀 Animate", variant="primary")
|
|
@@ -133,7 +128,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 133 |
output_video_concat.render()
|
| 134 |
with gr.Row():
|
| 135 |
# Examples
|
| 136 |
-
gr.Markdown("## You could
|
| 137 |
with gr.Row():
|
| 138 |
gr.Examples(
|
| 139 |
examples=data_examples,
|
|
@@ -146,7 +141,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 146 |
flag_remap_input
|
| 147 |
],
|
| 148 |
outputs=[output_image, output_image_paste_back],
|
| 149 |
-
examples_per_page=
|
| 150 |
cache_examples=False,
|
| 151 |
)
|
| 152 |
gr.Markdown(load_description("assets/gradio_description_retargeting.md"), visible=True)
|
|
@@ -176,8 +171,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 176 |
[osp.join(example_portrait_dir, "s10.jpg")],
|
| 177 |
[osp.join(example_portrait_dir, "s5.jpg")],
|
| 178 |
[osp.join(example_portrait_dir, "s7.jpg")],
|
| 179 |
-
[osp.join(example_portrait_dir, "s12.jpg")],
|
| 180 |
-
[osp.join(example_portrait_dir, "s22.jpg")],
|
| 181 |
],
|
| 182 |
inputs=[retargeting_input_image],
|
| 183 |
cache_examples=False,
|
|
|
|
| 46 |
|
| 47 |
def is_square_video(video_path):
|
| 48 |
video = cv2.VideoCapture(video_path)
|
| 49 |
+
|
| 50 |
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 51 |
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 52 |
+
|
| 53 |
video.release()
|
| 54 |
if width != height:
|
| 55 |
raise gr.Error("Error: the video does not have a square aspect ratio. We currently only support square videos")
|
| 56 |
+
|
| 57 |
return gr.update(visible=True)
|
| 58 |
|
| 59 |
# assets
|
|
|
|
| 63 |
data_examples = [
|
| 64 |
[osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
|
| 65 |
[osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
|
| 66 |
+
[osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d5.mp4"), True, True, True, True],
|
| 67 |
+
[osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d6.mp4"), True, True, True, True],
|
| 68 |
+
[osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d7.mp4"), True, True, True, True],
|
|
|
|
| 69 |
]
|
| 70 |
#################### interface logic ####################
|
| 71 |
|
|
|
|
| 91 |
[osp.join(example_portrait_dir, "s10.jpg")],
|
| 92 |
[osp.join(example_portrait_dir, "s5.jpg")],
|
| 93 |
[osp.join(example_portrait_dir, "s7.jpg")],
|
|
|
|
|
|
|
| 94 |
],
|
| 95 |
inputs=[image_input],
|
| 96 |
cache_examples=False,
|
|
|
|
| 100 |
gr.Examples(
|
| 101 |
examples=[
|
| 102 |
[osp.join(example_video_dir, "d0.mp4")],
|
| 103 |
+
[osp.join(example_video_dir, "d5.mp4")],
|
| 104 |
+
[osp.join(example_video_dir, "d6.mp4")],
|
| 105 |
+
[osp.join(example_video_dir, "d7.mp4")],
|
|
|
|
| 106 |
],
|
| 107 |
inputs=[video_input],
|
| 108 |
cache_examples=False,
|
|
|
|
| 114 |
flag_relative_input = gr.Checkbox(value=True, label="relative motion")
|
| 115 |
flag_do_crop_input = gr.Checkbox(value=True, label="do crop")
|
| 116 |
flag_remap_input = gr.Checkbox(value=True, label="paste-back")
|
|
|
|
| 117 |
with gr.Row():
|
| 118 |
with gr.Column():
|
| 119 |
process_button_animation = gr.Button("🚀 Animate", variant="primary")
|
|
|
|
| 128 |
output_video_concat.render()
|
| 129 |
with gr.Row():
|
| 130 |
# Examples
|
| 131 |
+
gr.Markdown("## You could choose the examples below ⬇️")
|
| 132 |
with gr.Row():
|
| 133 |
gr.Examples(
|
| 134 |
examples=data_examples,
|
|
|
|
| 141 |
flag_remap_input
|
| 142 |
],
|
| 143 |
outputs=[output_image, output_image_paste_back],
|
| 144 |
+
examples_per_page=5,
|
| 145 |
cache_examples=False,
|
| 146 |
)
|
| 147 |
gr.Markdown(load_description("assets/gradio_description_retargeting.md"), visible=True)
|
|
|
|
| 171 |
[osp.join(example_portrait_dir, "s10.jpg")],
|
| 172 |
[osp.join(example_portrait_dir, "s5.jpg")],
|
| 173 |
[osp.join(example_portrait_dir, "s7.jpg")],
|
|
|
|
|
|
|
| 174 |
],
|
| 175 |
inputs=[retargeting_input_image],
|
| 176 |
cache_examples=False,
|
assets/examples/driving/d1.mp4
ADDED
|
Binary file (48.8 kB). View file
|
|
|
assets/examples/driving/d14.mp4
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:465e72fbf26bf4ed46d1adf7aab8a7344aac54a2f92c4d82a1d53127f0170472
|
| 3 |
-
size 891025
|
|
|
|
|
|
|
|
|
|
|
|
assets/examples/driving/d14_trim.mp4
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:1ddeda3ad54627d893afcbef9ca09d4e6b7b510d6c10407ce89d10f1b0e1cd16
|
| 3 |
-
size 433589
|
|
|
|
|
|
|
|
|
|
|
|
assets/examples/driving/d15.mp4
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:7ff1f84228c8db9eee09b28372ddfc4d5752d779860fdb882287d8c2edcf99d4
|
| 3 |
-
size 105285
|
|
|
|
|
|
|
|
|
|
|
|
assets/examples/driving/d16.mp4
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:853639a403c0285d8073ffd3aa3b80fb52b351f3a720785ce799694d6ab63a16
|
| 3 |
-
size 68369
|
|
|
|
|
|
|
|
|
|
|
|
assets/examples/driving/d18.mp4
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:1dc94c1fec7ef7dc831c8a49f0e1788ae568812cb68e62f6875d9070f573d02a
|
| 3 |
-
size 187263
|
|
|
|
|
|
|
|
|
|
|
|
assets/examples/driving/d19.mp4
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:3047ba66296d96b8a4584e412e61493d7bc0fa5149c77b130e7feea375e698bd
|
| 3 |
-
size 232859
|
|
|
|
|
|
|
|
|
|
|
|
assets/examples/driving/d2.mp4
ADDED
|
Binary file (47.8 kB). View file
|
|
|
assets/examples/driving/d5.mp4
ADDED
|
Binary file (135 kB). View file
|
|
|
assets/examples/driving/d6_trim.mp4
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:de9e15eef87674433f2a60972da65e42b55fa154df7beaf4e0ee1cea1939774b
|
| 3 |
-
size 530752
|
|
|
|
|
|
|
|
|
|
|
|
assets/examples/driving/d7.mp4
ADDED
|
Binary file (185 kB). View file
|
|
|
assets/examples/driving/d8.mp4
ADDED
|
Binary file (312 kB). View file
|
|
|
assets/examples/source/s12.jpg
DELETED
Git LFS Details
|
assets/examples/source/s22.jpg
DELETED
Git LFS Details
|
assets/gradio_description_animate_clear.md
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
<div style="font-size: 1.2em; text-align: center;">
|
| 2 |
-
Step 3: Click the <strong>🚀 Animate</strong> button below to generate, or click 🧹 Clear to erase the results
|
| 3 |
-
</div>
|
|
|
|
|
|
|
|
|
|
|
|
assets/gradio_description_retargeting.md
CHANGED
|
@@ -1,13 +1 @@
|
|
| 1 |
-
<
|
| 2 |
-
|
| 3 |
-
<!-- ## Retargeting
|
| 4 |
-
<span style="font-size: 1.2em;">🔥 To edit the eyes and lip open ratio of the source portrait, drag the sliders and click the <strong>🚗 Retargeting</strong> button. You can try running it multiple times. <strong>😊 Set both ratios to 0.8 to see what's going on!</strong> </span> -->
|
| 5 |
-
|
| 6 |
-
<div style="display: flex; justify-content: center; align-items: center; text-align: center; font-size: 1.2em;">
|
| 7 |
-
<div>
|
| 8 |
-
<h2>Retargeting</h2>
|
| 9 |
-
<p>Upload a Source Portrait as Retargeting Input, then drag the sliders and click the <strong>🚗 Retargeting</strong> button. You can try running it multiple times.
|
| 10 |
-
<br>
|
| 11 |
-
<strong>😊 Set both ratios to 0.8 to see what's going on!</strong></p>
|
| 12 |
-
</div>
|
| 13 |
-
</div>
|
|
|
|
| 1 |
+
<span style="font-size: 1.2em;">🔥 To change the eyes and lip open ratio of the source portrait, please drag the sliders and then click the <strong>🚗 Retargeting</strong> button. The result would be shown in the blocks. You can try running it multiple times. <strong>😊 Set both ratios to 0.8 to see what's going on!</strong> </span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assets/gradio_description_upload.md
CHANGED
|
@@ -1,30 +1,2 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
If you find LivePortrait fun 🤪 or useful, please consider starring 🌟 our <a href="https://github.com/KwaiVGI/LivePortrait">GitHub Repo</a> to discover more features!
|
| 4 |
-
</div>
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
<!-- <div style="font-size: 1.2em;">
|
| 8 |
-
Step1: upload or use a webcam to get a <strong>Source Portrait</strong> (any aspect ratio) to left side.<br>
|
| 9 |
-
Step2: upload a <strong>Driving Video</strong> (1:1 aspect ratio) to right side.
|
| 10 |
-
</div> -->
|
| 11 |
-
|
| 12 |
-
<br>
|
| 13 |
-
<div style="font-size: 1.2em; display: flex; justify-content: space-between;">
|
| 14 |
-
<div style="flex: 1; text-align: center; margin-right: 20px;">
|
| 15 |
-
<div style="display: inline-block;">
|
| 16 |
-
Step 1: Upload a <strong>Source Portrait</strong> (any aspect ratio) ⬇️
|
| 17 |
-
</div>
|
| 18 |
-
<div style="display: inline-block; font-size: 0.75em;">
|
| 19 |
-
<strong>Note:</strong> To upload a source video, <a href="https://github.com/KwaiVGI/LivePortrait?tab=readme-ov-file#4-gradio-interface-"><strong>build Gradio locally</strong></a>. Windows users can use the <a href="https://huggingface.co/cleardusk/LivePortrait-Windows"><strong>one-click package</strong></a>. Animals model <a href="https://github.com/KwaiVGI/LivePortrait/blob/main/assets/docs/changelog/2024-08-02.md"><strong>here</strong></a>.
|
| 20 |
-
</div>
|
| 21 |
-
</div>
|
| 22 |
-
<div style="flex: 1; text-align: center; margin-left: 20px;">
|
| 23 |
-
<div style="display: inline-block;">
|
| 24 |
-
Step 2: Upload a <strong>Driving Video</strong> (1:1 aspect ratio) ⬇️
|
| 25 |
-
</div>
|
| 26 |
-
<div style="display: inline-block; font-size: 0.75em;">
|
| 27 |
-
<strong>Tips:</strong> Focus on the head, minimize shoulder movement, <strong>neutral expression</strong> in first frame.
|
| 28 |
-
</div>
|
| 29 |
-
</div>
|
| 30 |
-
</div>
|
|
|
|
| 1 |
+
## 🤗 This is the official gradio demo for **LivePortrait**.
|
| 2 |
+
<div style="font-size: 1.2em;">Please upload or use the webcam to get a source portrait to the <strong>Source Portrait</strong> field and a driving video to the <strong>Driving Video</strong> field.</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assets/gradio_title.md
CHANGED
|
@@ -1,17 +1,10 @@
|
|
| 1 |
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
<a href="https://liveportrait.github.io"><img src="https://img.shields.io/badge/Project_Page-LivePortrait-green" alt="Project Page"></a>
|
| 10 |
-
|
| 11 |
-
<a href="https://github.com/KwaiVGI/LivePortrait"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
|
| 12 |
-
|
| 13 |
-
<a href="https://github.com/KwaiVGI/LivePortrait"><img src="https://img.shields.io/github/stars/KwaiVGI/LivePortrait
|
| 14 |
-
"></a>
|
| 15 |
</div>
|
| 16 |
-
</div>
|
| 17 |
</div>
|
|
|
|
| 1 |
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
| 2 |
+
<div>
|
| 3 |
+
<h1>LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control</h1>
|
| 4 |
+
<div style="display: flex; justify-content: center; align-items: center; text-align: center;>
|
| 5 |
+
<a href="https://arxiv.org/pdf/2407.03168"><img src="https://img.shields.io/badge/arXiv-2407.03168-red"></a>
|
| 6 |
+
<a href="https://liveportrait.github.io"><img src="https://img.shields.io/badge/Project_Page-LivePortrait-green" alt="Project Page"></a>
|
| 7 |
+
<a href="https://github.com/KwaiVGI/LivePortrait"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
|
| 8 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
</div>
|
|
|
|
| 10 |
</div>
|
readme.md
CHANGED
|
@@ -1,12 +1,143 @@
|
|
| 1 |
-
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
```bibtex
|
| 5 |
-
@article{
|
| 6 |
title = {LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control},
|
| 7 |
-
author = {Guo
|
| 8 |
-
|
| 9 |
-
|
| 10 |
}
|
| 11 |
```
|
| 12 |
-
|
|
|
|
| 1 |
+
<h1 align="center">LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control</h1>
|
| 2 |
|
| 3 |
+
<div align='center'>
|
| 4 |
+
<a href='https://github.com/cleardusk' target='_blank'><strong>Jianzhu Guo</strong></a><sup> 1†</sup> 
|
| 5 |
+
<a href='https://github.com/KwaiVGI' target='_blank'><strong>Dingyun Zhang</strong></a><sup> 1,2</sup> 
|
| 6 |
+
<a href='https://github.com/KwaiVGI' target='_blank'><strong>Xiaoqiang Liu</strong></a><sup> 1</sup> 
|
| 7 |
+
<a href='https://github.com/KwaiVGI' target='_blank'><strong>Zhizhou Zhong</strong></a><sup> 1,3</sup> 
|
| 8 |
+
<a href='https://scholar.google.com.hk/citations?user=_8k1ubAAAAAJ' target='_blank'><strong>Yuan Zhang</strong></a><sup> 1</sup> 
|
| 9 |
+
</div>
|
| 10 |
+
|
| 11 |
+
<div align='center'>
|
| 12 |
+
<a href='https://scholar.google.com/citations?user=P6MraaYAAAAJ' target='_blank'><strong>Pengfei Wan</strong></a><sup> 1</sup> 
|
| 13 |
+
<a href='https://openreview.net/profile?id=~Di_ZHANG3' target='_blank'><strong>Di Zhang</strong></a><sup> 1</sup> 
|
| 14 |
+
</div>
|
| 15 |
+
|
| 16 |
+
<div align='center'>
|
| 17 |
+
<sup>1 </sup>Kuaishou Technology  <sup>2 </sup>University of Science and Technology of China  <sup>3 </sup>Fudan University 
|
| 18 |
+
</div>
|
| 19 |
+
|
| 20 |
+
<br>
|
| 21 |
+
<div align="center">
|
| 22 |
+
<!-- <a href='LICENSE'><img src='https://img.shields.io/badge/license-MIT-yellow'></a> -->
|
| 23 |
+
<a href='https://liveportrait.github.io'><img src='https://img.shields.io/badge/Project-Homepage-green'></a>
|
| 24 |
+
<a href='https://arxiv.org/pdf/2407.03168'><img src='https://img.shields.io/badge/Paper-arXiv-red'></a>
|
| 25 |
+
</div>
|
| 26 |
+
<br>
|
| 27 |
+
|
| 28 |
+
<p align="center">
|
| 29 |
+
<img src="./assets/docs/showcase2.gif" alt="showcase">
|
| 30 |
+
<br>
|
| 31 |
+
🔥 For more results, visit our <a href="https://liveportrait.github.io/"><strong>homepage</strong></a> 🔥
|
| 32 |
+
</p>
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
## 🔥 Updates
|
| 37 |
+
- **`2024/07/04`**: 🔥 We released the initial version of the inference code and models. Continuous updates, stay tuned!
|
| 38 |
+
- **`2024/07/04`**: 😊 We released the [homepage](https://liveportrait.github.io) and technical report on [arXiv](https://arxiv.org/pdf/2407.03168).
|
| 39 |
+
|
| 40 |
+
## Introduction
|
| 41 |
+
This repo, named **LivePortrait**, contains the official PyTorch implementation of our paper [LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control](https://arxiv.org/pdf/2407.03168).
|
| 42 |
+
We are actively updating and improving this repository. If you find any bugs or have suggestions, welcome to raise issues or submit pull requests (PR) 💖.
|
| 43 |
+
|
| 44 |
+
## 🔥 Getting Started
|
| 45 |
+
### 1. Clone the code and prepare the environment
|
| 46 |
+
```bash
|
| 47 |
+
git clone https://github.com/KwaiVGI/LivePortrait
|
| 48 |
+
cd LivePortrait
|
| 49 |
+
|
| 50 |
+
# create env using conda
|
| 51 |
+
conda create -n LivePortrait python==3.9.18
|
| 52 |
+
conda activate LivePortrait
|
| 53 |
+
# install dependencies with pip
|
| 54 |
+
pip install -r requirements.txt
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
### 2. Download pretrained weights
|
| 58 |
+
Download our pretrained LivePortrait weights and face detection models of InsightFace from [Google Drive](https://drive.google.com/drive/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib) or [Baidu Yun](https://pan.baidu.com/s/1MGctWmNla_vZxDbEp2Dtzw?pwd=z5cn). We have packed all weights in one directory 😊. Unzip and place them in `./pretrained_weights` ensuring the directory structure is as follows:
|
| 59 |
+
```text
|
| 60 |
+
pretrained_weights
|
| 61 |
+
├── insightface
|
| 62 |
+
│ └── models
|
| 63 |
+
│ └── buffalo_l
|
| 64 |
+
│ ├── 2d106det.onnx
|
| 65 |
+
│ └── det_10g.onnx
|
| 66 |
+
└── liveportrait
|
| 67 |
+
├── base_models
|
| 68 |
+
│ ├── appearance_feature_extractor.pth
|
| 69 |
+
│ ├── motion_extractor.pth
|
| 70 |
+
│ ├── spade_generator.pth
|
| 71 |
+
│ └── warping_module.pth
|
| 72 |
+
├── landmark.onnx
|
| 73 |
+
└── retargeting_models
|
| 74 |
+
└── stitching_retargeting_module.pth
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
### 3. Inference 🚀
|
| 78 |
+
|
| 79 |
+
```bash
|
| 80 |
+
python inference.py
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
If the script runs successfully, you will get an output mp4 file named `animations/s6--d0_concat.mp4`. This file includes the following results: driving video, input image, and generated result.
|
| 84 |
+
|
| 85 |
+
<p align="center">
|
| 86 |
+
<img src="./assets/docs/inference.gif" alt="image">
|
| 87 |
+
</p>
|
| 88 |
+
|
| 89 |
+
Or, you can change the input by specifying the `-s` and `-d` arguments:
|
| 90 |
+
|
| 91 |
+
```bash
|
| 92 |
+
python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4
|
| 93 |
+
|
| 94 |
+
# or disable pasting back
|
| 95 |
+
python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4 --no_flag_pasteback
|
| 96 |
+
|
| 97 |
+
# more options to see
|
| 98 |
+
python inference.py -h
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
**More interesting results can be found in our [Homepage](https://liveportrait.github.io)** 😊
|
| 102 |
+
|
| 103 |
+
### 4. Gradio interface
|
| 104 |
+
|
| 105 |
+
We also provide a Gradio interface for a better experience, just run by:
|
| 106 |
+
|
| 107 |
+
```bash
|
| 108 |
+
python app.py
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
### 5. Inference speed evaluation 🚀🚀🚀
|
| 112 |
+
We have also provided a script to evaluate the inference speed of each module:
|
| 113 |
+
|
| 114 |
+
```bash
|
| 115 |
+
python speed.py
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
Below are the results of inferring one frame on an RTX 4090 GPU using the native PyTorch framework with `torch.compile`:
|
| 119 |
+
|
| 120 |
+
| Model | Parameters(M) | Model Size(MB) | Inference(ms) |
|
| 121 |
+
|-----------------------------------|:-------------:|:--------------:|:-------------:|
|
| 122 |
+
| Appearance Feature Extractor | 0.84 | 3.3 | 0.82 |
|
| 123 |
+
| Motion Extractor | 28.12 | 108 | 0.84 |
|
| 124 |
+
| Spade Generator | 55.37 | 212 | 7.59 |
|
| 125 |
+
| Warping Module | 45.53 | 174 | 5.21 |
|
| 126 |
+
| Stitching and Retargeting Modules| 0.23 | 2.3 | 0.31 |
|
| 127 |
+
|
| 128 |
+
*Note: the listed values of Stitching and Retargeting Modules represent the combined parameter counts and the total sequential inference time of three MLP networks.*
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
## Acknowledgements
|
| 132 |
+
We would like to thank the contributors of [FOMM](https://github.com/AliaksandrSiarohin/first-order-model), [Open Facevid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis), [SPADE](https://github.com/NVlabs/SPADE), [InsightFace](https://github.com/deepinsight/insightface) repositories, for their open research and contributions.
|
| 133 |
+
|
| 134 |
+
## Citation 💖
|
| 135 |
+
If you find LivePortrait useful for your research, welcome to 🌟 this repo and cite our work using the following BibTeX:
|
| 136 |
```bibtex
|
| 137 |
+
@article{guo2024live,
|
| 138 |
title = {LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control},
|
| 139 |
+
author = {Jianzhu Guo and Dingyun Zhang and Xiaoqiang Liu and Zhizhou Zhong and Yuan Zhang and Pengfei Wan and Di Zhang},
|
| 140 |
+
year = {2024},
|
| 141 |
+
journal = {arXiv preprint:2407.03168},
|
| 142 |
}
|
| 143 |
```
|
|
|
src/config/argument_config.py
CHANGED
|
@@ -8,7 +8,6 @@ import os.path as osp
|
|
| 8 |
from dataclasses import dataclass
|
| 9 |
import tyro
|
| 10 |
from typing_extensions import Annotated
|
| 11 |
-
from typing import Optional
|
| 12 |
from .base_config import PrintableConfig, make_abs_path
|
| 13 |
|
| 14 |
|
|
@@ -42,4 +41,4 @@ class ArgumentConfig(PrintableConfig):
|
|
| 42 |
########## gradio arguments ##########
|
| 43 |
server_port: Annotated[int, tyro.conf.arg(aliases=["-p"])] = 7860
|
| 44 |
share: bool = False
|
| 45 |
-
server_name:
|
|
|
|
| 8 |
from dataclasses import dataclass
|
| 9 |
import tyro
|
| 10 |
from typing_extensions import Annotated
|
|
|
|
| 11 |
from .base_config import PrintableConfig, make_abs_path
|
| 12 |
|
| 13 |
|
|
|
|
| 41 |
########## gradio arguments ##########
|
| 42 |
server_port: Annotated[int, tyro.conf.arg(aliases=["-p"])] = 7860
|
| 43 |
share: bool = False
|
| 44 |
+
server_name: str = None # one can set "0.0.0.0" on local
|
src/gradio_pipeline.py
CHANGED
|
@@ -9,7 +9,8 @@ from .live_portrait_pipeline import LivePortraitPipeline
|
|
| 9 |
from .utils.io import load_img_online
|
| 10 |
from .utils.rprint import rlog as log
|
| 11 |
from .utils.crop import prepare_paste_back, paste_back
|
| 12 |
-
|
|
|
|
| 13 |
|
| 14 |
def update_args(args, user_args):
|
| 15 |
"""update the args according to user inputs
|
|
@@ -33,7 +34,7 @@ class GradioPipeline(LivePortraitPipeline):
|
|
| 33 |
flag_relative_input,
|
| 34 |
flag_do_crop_input,
|
| 35 |
flag_remap_input,
|
| 36 |
-
|
| 37 |
""" for video driven potrait animation
|
| 38 |
"""
|
| 39 |
if input_image_path is not None and input_video_path is not None:
|
|
@@ -53,7 +54,7 @@ class GradioPipeline(LivePortraitPipeline):
|
|
| 53 |
# gr.Info("Run successfully!", duration=2)
|
| 54 |
return video_path, video_path_concat,
|
| 55 |
else:
|
| 56 |
-
raise gr.Error("
|
| 57 |
|
| 58 |
def execute_image(self, input_eye_ratio: float, input_lip_ratio: float, input_image, flag_do_crop = True):
|
| 59 |
""" for single image retargeting
|
|
@@ -62,7 +63,7 @@ class GradioPipeline(LivePortraitPipeline):
|
|
| 62 |
f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb = \
|
| 63 |
self.prepare_retargeting(input_image, flag_do_crop)
|
| 64 |
|
| 65 |
-
if input_eye_ratio is None or
|
| 66 |
raise gr.Error("Invalid ratio input 💥!", duration=5)
|
| 67 |
else:
|
| 68 |
x_s_user = x_s_user.to("cuda")
|
|
@@ -91,7 +92,7 @@ class GradioPipeline(LivePortraitPipeline):
|
|
| 91 |
# gr.Info("Upload successfully!", duration=2)
|
| 92 |
inference_cfg = self.live_portrait_wrapper.cfg
|
| 93 |
######## process source portrait ########
|
| 94 |
-
img_rgb = load_img_online(input_image, mode='rgb', max_dim=1280, n=
|
| 95 |
log(f"Load source image from {input_image}.")
|
| 96 |
crop_info = self.cropper.crop_single_image(img_rgb)
|
| 97 |
if flag_do_crop:
|
|
@@ -99,7 +100,7 @@ class GradioPipeline(LivePortraitPipeline):
|
|
| 99 |
else:
|
| 100 |
I_s = self.live_portrait_wrapper.prepare_source(img_rgb)
|
| 101 |
x_s_info = self.live_portrait_wrapper.get_kp_info(I_s)
|
| 102 |
-
|
| 103 |
############################################
|
| 104 |
f_s_user = self.live_portrait_wrapper.extract_feature_3d(I_s)
|
| 105 |
x_s_user = self.live_portrait_wrapper.transform_keypoint(x_s_info)
|
|
@@ -109,4 +110,5 @@ class GradioPipeline(LivePortraitPipeline):
|
|
| 109 |
return f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb
|
| 110 |
else:
|
| 111 |
# when press the clear button, go here
|
| 112 |
-
raise gr.Error("
|
|
|
|
|
|
| 9 |
from .utils.io import load_img_online
|
| 10 |
from .utils.rprint import rlog as log
|
| 11 |
from .utils.crop import prepare_paste_back, paste_back
|
| 12 |
+
from .utils.camera import get_rotation_matrix
|
| 13 |
+
from .utils.retargeting_utils import calc_eye_close_ratio, calc_lip_close_ratio
|
| 14 |
|
| 15 |
def update_args(args, user_args):
|
| 16 |
"""update the args according to user inputs
|
|
|
|
| 34 |
flag_relative_input,
|
| 35 |
flag_do_crop_input,
|
| 36 |
flag_remap_input,
|
| 37 |
+
):
|
| 38 |
""" for video driven potrait animation
|
| 39 |
"""
|
| 40 |
if input_image_path is not None and input_video_path is not None:
|
|
|
|
| 54 |
# gr.Info("Run successfully!", duration=2)
|
| 55 |
return video_path, video_path_concat,
|
| 56 |
else:
|
| 57 |
+
raise gr.Error("The input source portrait or driving video hasn't been prepared yet 💥!", duration=5)
|
| 58 |
|
| 59 |
def execute_image(self, input_eye_ratio: float, input_lip_ratio: float, input_image, flag_do_crop = True):
|
| 60 |
""" for single image retargeting
|
|
|
|
| 63 |
f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb = \
|
| 64 |
self.prepare_retargeting(input_image, flag_do_crop)
|
| 65 |
|
| 66 |
+
if input_eye_ratio is None or input_eye_ratio is None:
|
| 67 |
raise gr.Error("Invalid ratio input 💥!", duration=5)
|
| 68 |
else:
|
| 69 |
x_s_user = x_s_user.to("cuda")
|
|
|
|
| 92 |
# gr.Info("Upload successfully!", duration=2)
|
| 93 |
inference_cfg = self.live_portrait_wrapper.cfg
|
| 94 |
######## process source portrait ########
|
| 95 |
+
img_rgb = load_img_online(input_image, mode='rgb', max_dim=1280, n=16)
|
| 96 |
log(f"Load source image from {input_image}.")
|
| 97 |
crop_info = self.cropper.crop_single_image(img_rgb)
|
| 98 |
if flag_do_crop:
|
|
|
|
| 100 |
else:
|
| 101 |
I_s = self.live_portrait_wrapper.prepare_source(img_rgb)
|
| 102 |
x_s_info = self.live_portrait_wrapper.get_kp_info(I_s)
|
| 103 |
+
R_s = get_rotation_matrix(x_s_info['pitch'], x_s_info['yaw'], x_s_info['roll'])
|
| 104 |
############################################
|
| 105 |
f_s_user = self.live_portrait_wrapper.extract_feature_3d(I_s)
|
| 106 |
x_s_user = self.live_portrait_wrapper.transform_keypoint(x_s_info)
|
|
|
|
| 110 |
return f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb
|
| 111 |
else:
|
| 112 |
# when press the clear button, go here
|
| 113 |
+
raise gr.Error("The retargeting input hasn't been prepared yet 💥!", duration=5)
|
| 114 |
+
|
src/live_portrait_pipeline.py
CHANGED
|
@@ -4,13 +4,13 @@
|
|
| 4 |
Pipeline of LivePortrait
|
| 5 |
"""
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
|
|
|
| 9 |
|
| 10 |
import cv2
|
| 11 |
import numpy as np
|
| 12 |
import pickle
|
| 13 |
-
import os
|
| 14 |
import os.path as osp
|
| 15 |
from rich.progress import track
|
| 16 |
|
|
@@ -19,7 +19,7 @@ from .config.inference_config import InferenceConfig
|
|
| 19 |
from .config.crop_config import CropConfig
|
| 20 |
from .utils.cropper import Cropper
|
| 21 |
from .utils.camera import get_rotation_matrix
|
| 22 |
-
from .utils.video import images2video, concat_frames
|
| 23 |
from .utils.crop import _transform_img, prepare_paste_back, paste_back
|
| 24 |
from .utils.retargeting_utils import calc_lip_close_ratio
|
| 25 |
from .utils.io import load_image_rgb, load_driving_info, resize_to_limit
|
|
@@ -68,12 +68,8 @@ class LivePortraitPipeline(object):
|
|
| 68 |
############################################
|
| 69 |
|
| 70 |
######## process driving info ########
|
| 71 |
-
output_fps = 30 # default fps
|
| 72 |
if is_video(args.driving_info):
|
| 73 |
log(f"Load from video file (mp4 mov avi etc...): {args.driving_info}")
|
| 74 |
-
output_fps = int(get_fps(args.driving_info))
|
| 75 |
-
log(f'The FPS of {args.driving_info} is: {output_fps}')
|
| 76 |
-
|
| 77 |
# TODO: 这里track一下驱动视频 -> 构建模板
|
| 78 |
driving_rgb_lst = load_driving_info(args.driving_info)
|
| 79 |
driving_rgb_lst_256 = [cv2.resize(_, (256, 256)) for _ in driving_rgb_lst]
|
|
@@ -178,32 +174,17 @@ class LivePortraitPipeline(object):
|
|
| 178 |
|
| 179 |
mkdir(args.output_dir)
|
| 180 |
wfp_concat = None
|
| 181 |
-
flag_has_audio = has_audio_stream(args.driving_info)
|
| 182 |
-
|
| 183 |
if is_video(args.driving_info):
|
| 184 |
frames_concatenated = concat_frames(I_p_lst, driving_rgb_lst, img_crop_256x256)
|
| 185 |
# save (driving frames, source image, drived frames) result
|
| 186 |
wfp_concat = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_concat.mp4')
|
| 187 |
-
images2video(frames_concatenated, wfp=wfp_concat
|
| 188 |
-
if flag_has_audio:
|
| 189 |
-
# final result with concat
|
| 190 |
-
wfp_concat_with_audio = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_concat_with_audio.mp4')
|
| 191 |
-
add_audio_to_video(wfp_concat, args.driving_info, wfp_concat_with_audio)
|
| 192 |
-
os.replace(wfp_concat_with_audio, wfp_concat)
|
| 193 |
-
log(f"Replace {wfp_concat} with {wfp_concat_with_audio}")
|
| 194 |
|
| 195 |
# save drived result
|
| 196 |
wfp = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}.mp4')
|
| 197 |
if inference_cfg.flag_pasteback:
|
| 198 |
-
images2video(I_p_paste_lst, wfp=wfp
|
| 199 |
else:
|
| 200 |
-
images2video(I_p_lst, wfp=wfp
|
| 201 |
-
|
| 202 |
-
######### build final result #########
|
| 203 |
-
if flag_has_audio:
|
| 204 |
-
wfp_with_audio = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_with_audio.mp4')
|
| 205 |
-
add_audio_to_video(wfp, args.driving_info, wfp_with_audio)
|
| 206 |
-
os.replace(wfp_with_audio, wfp)
|
| 207 |
-
log(f"Replace {wfp} with {wfp_with_audio}")
|
| 208 |
|
| 209 |
return wfp, wfp_concat
|
|
|
|
| 4 |
Pipeline of LivePortrait
|
| 5 |
"""
|
| 6 |
|
| 7 |
+
# TODO:
|
| 8 |
+
# 1. 当前假定所有的模板都是已经裁好的,需要修改下
|
| 9 |
+
# 2. pick样例图 source + driving
|
| 10 |
|
| 11 |
import cv2
|
| 12 |
import numpy as np
|
| 13 |
import pickle
|
|
|
|
| 14 |
import os.path as osp
|
| 15 |
from rich.progress import track
|
| 16 |
|
|
|
|
| 19 |
from .config.crop_config import CropConfig
|
| 20 |
from .utils.cropper import Cropper
|
| 21 |
from .utils.camera import get_rotation_matrix
|
| 22 |
+
from .utils.video import images2video, concat_frames
|
| 23 |
from .utils.crop import _transform_img, prepare_paste_back, paste_back
|
| 24 |
from .utils.retargeting_utils import calc_lip_close_ratio
|
| 25 |
from .utils.io import load_image_rgb, load_driving_info, resize_to_limit
|
|
|
|
| 68 |
############################################
|
| 69 |
|
| 70 |
######## process driving info ########
|
|
|
|
| 71 |
if is_video(args.driving_info):
|
| 72 |
log(f"Load from video file (mp4 mov avi etc...): {args.driving_info}")
|
|
|
|
|
|
|
|
|
|
| 73 |
# TODO: 这里track一下驱动视频 -> 构建模板
|
| 74 |
driving_rgb_lst = load_driving_info(args.driving_info)
|
| 75 |
driving_rgb_lst_256 = [cv2.resize(_, (256, 256)) for _ in driving_rgb_lst]
|
|
|
|
| 174 |
|
| 175 |
mkdir(args.output_dir)
|
| 176 |
wfp_concat = None
|
|
|
|
|
|
|
| 177 |
if is_video(args.driving_info):
|
| 178 |
frames_concatenated = concat_frames(I_p_lst, driving_rgb_lst, img_crop_256x256)
|
| 179 |
# save (driving frames, source image, drived frames) result
|
| 180 |
wfp_concat = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_concat.mp4')
|
| 181 |
+
images2video(frames_concatenated, wfp=wfp_concat)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
# save drived result
|
| 184 |
wfp = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}.mp4')
|
| 185 |
if inference_cfg.flag_pasteback:
|
| 186 |
+
images2video(I_p_paste_lst, wfp=wfp)
|
| 187 |
else:
|
| 188 |
+
images2video(I_p_lst, wfp=wfp)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
return wfp, wfp_concat
|
src/utils/video.py
CHANGED
|
@@ -12,12 +12,11 @@ import cv2
|
|
| 12 |
|
| 13 |
from rich.progress import track
|
| 14 |
from .helper import prefix
|
| 15 |
-
from .rprint import rlog as log
|
| 16 |
from .rprint import rprint as print
|
| 17 |
|
| 18 |
|
| 19 |
def exec_cmd(cmd):
|
| 20 |
-
|
| 21 |
|
| 22 |
|
| 23 |
def images2video(images, wfp, **kwargs):
|
|
@@ -132,72 +131,9 @@ def change_video_fps(input_file, output_file, fps=20, codec='libx264', crf=5):
|
|
| 132 |
exec_cmd(cmd)
|
| 133 |
|
| 134 |
|
| 135 |
-
def get_fps(filepath
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
fps = default_fps
|
| 141 |
-
except Exception as e:
|
| 142 |
-
log(e)
|
| 143 |
-
fps = default_fps
|
| 144 |
-
|
| 145 |
return fps
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
def has_audio_stream(video_path: str) -> bool:
|
| 149 |
-
"""
|
| 150 |
-
Check if the video file contains an audio stream.
|
| 151 |
-
|
| 152 |
-
:param video_path: Path to the video file
|
| 153 |
-
:return: True if the video contains an audio stream, False otherwise
|
| 154 |
-
"""
|
| 155 |
-
if osp.isdir(video_path):
|
| 156 |
-
return False
|
| 157 |
-
|
| 158 |
-
cmd = [
|
| 159 |
-
'ffprobe',
|
| 160 |
-
'-v', 'error',
|
| 161 |
-
'-select_streams', 'a',
|
| 162 |
-
'-show_entries', 'stream=codec_type',
|
| 163 |
-
'-of', 'default=noprint_wrappers=1:nokey=1',
|
| 164 |
-
f'"{video_path}"'
|
| 165 |
-
]
|
| 166 |
-
|
| 167 |
-
try:
|
| 168 |
-
# result = subprocess.run(cmd, capture_output=True, text=True)
|
| 169 |
-
result = exec_cmd(' '.join(cmd))
|
| 170 |
-
if result.returncode != 0:
|
| 171 |
-
log(f"Error occurred while probing video: {result.stderr}")
|
| 172 |
-
return False
|
| 173 |
-
|
| 174 |
-
# Check if there is any output from ffprobe command
|
| 175 |
-
return bool(result.stdout.strip())
|
| 176 |
-
except Exception as e:
|
| 177 |
-
log(
|
| 178 |
-
f"Error occurred while probing video: {video_path}, "
|
| 179 |
-
"you may need to install ffprobe! (https://ffmpeg.org/download.html) "
|
| 180 |
-
"Now set audio to false!",
|
| 181 |
-
style="bold red"
|
| 182 |
-
)
|
| 183 |
-
return False
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
def add_audio_to_video(silent_video_path: str, audio_video_path: str, output_video_path: str):
|
| 187 |
-
cmd = [
|
| 188 |
-
'ffmpeg',
|
| 189 |
-
'-y',
|
| 190 |
-
'-i', f'"{silent_video_path}"',
|
| 191 |
-
'-i', f'"{audio_video_path}"',
|
| 192 |
-
'-map', '0:v',
|
| 193 |
-
'-map', '1:a',
|
| 194 |
-
'-c:v', 'copy',
|
| 195 |
-
'-shortest',
|
| 196 |
-
f'"{output_video_path}"'
|
| 197 |
-
]
|
| 198 |
-
|
| 199 |
-
try:
|
| 200 |
-
exec_cmd(' '.join(cmd))
|
| 201 |
-
log(f"Video with audio generated successfully: {output_video_path}")
|
| 202 |
-
except subprocess.CalledProcessError as e:
|
| 203 |
-
log(f"Error occurred: {e}")
|
|
|
|
| 12 |
|
| 13 |
from rich.progress import track
|
| 14 |
from .helper import prefix
|
|
|
|
| 15 |
from .rprint import rprint as print
|
| 16 |
|
| 17 |
|
| 18 |
def exec_cmd(cmd):
|
| 19 |
+
subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
| 20 |
|
| 21 |
|
| 22 |
def images2video(images, wfp, **kwargs):
|
|
|
|
| 131 |
exec_cmd(cmd)
|
| 132 |
|
| 133 |
|
| 134 |
+
def get_fps(filepath):
|
| 135 |
+
import ffmpeg
|
| 136 |
+
probe = ffmpeg.probe(filepath)
|
| 137 |
+
video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
|
| 138 |
+
fps = eval(video_stream['avg_frame_rate'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
return fps
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
video2template.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# coding: utf-8
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
[WIP] Pipeline for video template preparation
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import tyro
|
| 8 |
+
from src.config.crop_config import CropConfig
|
| 9 |
+
from src.config.inference_config import InferenceConfig
|
| 10 |
+
from src.config.argument_config import ArgumentConfig
|
| 11 |
+
from src.template_maker import TemplateMaker
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def partial_fields(target_class, kwargs):
|
| 15 |
+
return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def main():
|
| 19 |
+
# set tyro theme
|
| 20 |
+
tyro.extras.set_accent_color("bright_cyan")
|
| 21 |
+
args = tyro.cli(ArgumentConfig)
|
| 22 |
+
|
| 23 |
+
# specify configs for inference
|
| 24 |
+
inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
|
| 25 |
+
crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
|
| 26 |
+
|
| 27 |
+
video_template_maker = TemplateMaker(
|
| 28 |
+
inference_cfg=inference_cfg,
|
| 29 |
+
crop_cfg=crop_cfg
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# run
|
| 33 |
+
video_template_maker.make_motion_template(args.driving_video_path, args.template_output_dir)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
if __name__ == '__main__':
|
| 37 |
+
main()
|