Spaces:

KlingTeam
/

LivePortrait

Paused

App Files Files Community

fix: retargeting feature leakage

by zzz66 - opened Jul 9, 2024

base: refs/heads/main

←

from: refs/pr/7

Discussion Files changed

+220

-228

Files changed (27) hide show

.gitattributes +0 -9
README.md +1 -2
app.py +11 -18
assets/examples/driving/d1.mp4 +0 -0
assets/examples/driving/d14.mp4 +0 -3
assets/examples/driving/d14_trim.mp4 +0 -3
assets/examples/driving/d15.mp4 +0 -3
assets/examples/driving/d16.mp4 +0 -3
assets/examples/driving/d18.mp4 +0 -3
assets/examples/driving/d19.mp4 +0 -3
assets/examples/driving/d2.mp4 +0 -0
assets/examples/driving/d5.mp4 +0 -0
assets/examples/driving/d6_trim.mp4 +0 -3
assets/examples/driving/d7.mp4 +0 -0
assets/examples/driving/d8.mp4 +0 -0
assets/examples/source/s12.jpg +0 -3
assets/examples/source/s22.jpg +0 -3
assets/gradio_description_animate_clear.md +0 -3
assets/gradio_description_retargeting.md +1 -13
assets/gradio_description_upload.md +2 -30
assets/gradio_title.md +7 -14
readme.md +138 -7
src/config/argument_config.py +1 -2
src/gradio_pipeline.py +9 -7
src/live_portrait_pipeline.py +7 -26
src/utils/video.py +6 -70
video2template.py +37 -0

.gitattributes CHANGED Viewed

@@ -47,12 +47,3 @@ pretrained_weights/liveportrait/base_models/warping_module.pth filter=lfs diff=l
 pretrained_weights/insightface/models/buffalo_l/2d106det.onnx filter=lfs diff=lfs merge=lfs -text
 pretrained_weights/insightface/models/buffalo_l/det_10g.onnx filter=lfs diff=lfs merge=lfs -text
 pretrained_weights/liveportrait/landmark.onnx filter=lfs diff=lfs merge=lfs -text
-assets/examples/driving/d14.mp4 filter=lfs diff=lfs merge=lfs -text
-assets/examples/source/s12.jpg filter=lfs diff=lfs merge=lfs -text
-assets/examples/driving/d14_trim.mp4 filter=lfs diff=lfs merge=lfs -text
-assets/examples/driving/d6_trim.mp4 filter=lfs diff=lfs merge=lfs -text
-assets/examples/driving/d15.mp4 filter=lfs diff=lfs merge=lfs -text
-assets/examples/driving/d16.mp4 filter=lfs diff=lfs merge=lfs -text
-assets/examples/driving/d18.mp4 filter=lfs diff=lfs merge=lfs -text
-assets/examples/driving/d19.mp4 filter=lfs diff=lfs merge=lfs -text
-assets/examples/source/s22.jpg filter=lfs diff=lfs merge=lfs -text

 pretrained_weights/insightface/models/buffalo_l/2d106det.onnx filter=lfs diff=lfs merge=lfs -text
 pretrained_weights/insightface/models/buffalo_l/det_10g.onnx filter=lfs diff=lfs merge=lfs -text
 pretrained_weights/liveportrait/landmark.onnx filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -7,7 +7,6 @@ sdk: gradio
 sdk_version: 4.37.2
 app_file: app.py
 pinned: false
-disable_embedding: true
 tags:
   - Multimodal
   - Motion control
@@ -18,4 +17,4 @@ tags:
 short_description: Apply the motion of a video on a portrait
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk_version: 4.37.2
 app_file: app.py
 pinned: false
 tags:
   - Multimodal
   - Motion control
 short_description: Apply the motion of a video on a portrait
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -46,14 +46,14 @@ def gpu_wrapped_execute_image(*args, **kwargs):
 def is_square_video(video_path):
     video = cv2.VideoCapture(video_path)
     width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
     video.release()
     if width != height:
         raise gr.Error("Error: the video does not have a square aspect ratio. We currently only support square videos")
     return gr.update(visible=True)
 # assets
@@ -63,10 +63,9 @@ example_video_dir = "assets/examples/driving"
 data_examples = [
     [osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
     [osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
-    [osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
-    [osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d18.mp4"), True, True, True, True],
-    [osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d19.mp4"), True, True, True, True],
-    [osp.join(example_portrait_dir, "s22.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
 ]
 #################### interface logic ####################
@@ -92,8 +91,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                     [osp.join(example_portrait_dir, "s10.jpg")],
                     [osp.join(example_portrait_dir, "s5.jpg")],
                     [osp.join(example_portrait_dir, "s7.jpg")],
-                    [osp.join(example_portrait_dir, "s12.jpg")],
-                    [osp.join(example_portrait_dir, "s22.jpg")],
                 ],
                 inputs=[image_input],
                 cache_examples=False,
@@ -103,10 +100,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             gr.Examples(
                 examples=[
                     [osp.join(example_video_dir, "d0.mp4")],
-                    [osp.join(example_video_dir, "d18.mp4")],
-                    [osp.join(example_video_dir, "d19.mp4")],
-                    [osp.join(example_video_dir, "d14_trim.mp4")],
-                    [osp.join(example_video_dir, "d6_trim.mp4")],
                 ],
                 inputs=[video_input],
                 cache_examples=False,
@@ -118,7 +114,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 flag_relative_input = gr.Checkbox(value=True, label="relative motion")
                 flag_do_crop_input = gr.Checkbox(value=True, label="do crop")
                 flag_remap_input = gr.Checkbox(value=True, label="paste-back")
-    gr.Markdown(load_description("assets/gradio_description_animate_clear.md"))
     with gr.Row():
         with gr.Column():
             process_button_animation = gr.Button("🚀 Animate", variant="primary")
@@ -133,7 +128,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 output_video_concat.render()
     with gr.Row():
         # Examples
-        gr.Markdown("## You could also choose the examples below by one click ⬇️")
     with gr.Row():
         gr.Examples(
             examples=data_examples,
@@ -146,7 +141,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 flag_remap_input
             ],
             outputs=[output_image, output_image_paste_back],
-            examples_per_page=6,
             cache_examples=False,
         )
     gr.Markdown(load_description("assets/gradio_description_retargeting.md"), visible=True)
@@ -176,8 +171,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                         [osp.join(example_portrait_dir, "s10.jpg")],
                         [osp.join(example_portrait_dir, "s5.jpg")],
                         [osp.join(example_portrait_dir, "s7.jpg")],
-                        [osp.join(example_portrait_dir, "s12.jpg")],
-                        [osp.join(example_portrait_dir, "s22.jpg")],
                     ],
                     inputs=[retargeting_input_image],
                     cache_examples=False,

 def is_square_video(video_path):
     video = cv2.VideoCapture(video_path)
     width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
     video.release()
     if width != height:
         raise gr.Error("Error: the video does not have a square aspect ratio. We currently only support square videos")
     return gr.update(visible=True)
 # assets
 data_examples = [
     [osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
     [osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
+    [osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d5.mp4"), True, True, True, True],
+    [osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d6.mp4"), True, True, True, True],
+    [osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d7.mp4"), True, True, True, True],
 ]
 #################### interface logic ####################
                     [osp.join(example_portrait_dir, "s10.jpg")],
                     [osp.join(example_portrait_dir, "s5.jpg")],
                     [osp.join(example_portrait_dir, "s7.jpg")],
                 ],
                 inputs=[image_input],
                 cache_examples=False,
             gr.Examples(
                 examples=[
                     [osp.join(example_video_dir, "d0.mp4")],
+                    [osp.join(example_video_dir, "d5.mp4")],
+                    [osp.join(example_video_dir, "d6.mp4")],
+                    [osp.join(example_video_dir, "d7.mp4")],
                 ],
                 inputs=[video_input],
                 cache_examples=False,
                 flag_relative_input = gr.Checkbox(value=True, label="relative motion")
                 flag_do_crop_input = gr.Checkbox(value=True, label="do crop")
                 flag_remap_input = gr.Checkbox(value=True, label="paste-back")
     with gr.Row():
         with gr.Column():
             process_button_animation = gr.Button("🚀 Animate", variant="primary")
                 output_video_concat.render()
     with gr.Row():
         # Examples
+        gr.Markdown("## You could choose the examples below ⬇️")
     with gr.Row():
         gr.Examples(
             examples=data_examples,
                 flag_remap_input
             ],
             outputs=[output_image, output_image_paste_back],
+            examples_per_page=5,
             cache_examples=False,
         )
     gr.Markdown(load_description("assets/gradio_description_retargeting.md"), visible=True)
                         [osp.join(example_portrait_dir, "s10.jpg")],
                         [osp.join(example_portrait_dir, "s5.jpg")],
                         [osp.join(example_portrait_dir, "s7.jpg")],
                     ],
                     inputs=[retargeting_input_image],
                     cache_examples=False,

assets/examples/driving/d1.mp4 ADDED Viewed

Binary file (48.8 kB). View file

assets/examples/driving/d14.mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:465e72fbf26bf4ed46d1adf7aab8a7344aac54a2f92c4d82a1d53127f0170472
-size 891025

assets/examples/driving/d14_trim.mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1ddeda3ad54627d893afcbef9ca09d4e6b7b510d6c10407ce89d10f1b0e1cd16
-size 433589

assets/examples/driving/d15.mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7ff1f84228c8db9eee09b28372ddfc4d5752d779860fdb882287d8c2edcf99d4
-size 105285

assets/examples/driving/d16.mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:853639a403c0285d8073ffd3aa3b80fb52b351f3a720785ce799694d6ab63a16
-size 68369

assets/examples/driving/d18.mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1dc94c1fec7ef7dc831c8a49f0e1788ae568812cb68e62f6875d9070f573d02a
-size 187263

assets/examples/driving/d19.mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3047ba66296d96b8a4584e412e61493d7bc0fa5149c77b130e7feea375e698bd
-size 232859

assets/examples/driving/d2.mp4 ADDED Viewed

Binary file (47.8 kB). View file

assets/examples/driving/d5.mp4 ADDED Viewed

Binary file (135 kB). View file

assets/examples/driving/d6_trim.mp4 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:de9e15eef87674433f2a60972da65e42b55fa154df7beaf4e0ee1cea1939774b
-size 530752

assets/examples/driving/d7.mp4 ADDED Viewed

Binary file (185 kB). View file

assets/examples/driving/d8.mp4 ADDED Viewed

Binary file (312 kB). View file

assets/examples/source/s12.jpg DELETED Viewed

Git LFS Details

SHA256: c3122ed04b2a73e02fd21cb80a7119ef9dedc7ae988f80c2ea20947c6292f3ea
Pointer size: 130 Bytes
Size of remote file: 50 kB

assets/examples/source/s22.jpg DELETED Viewed

Git LFS Details

SHA256: 113d76fe4c3693916cde74e0f1250b516a9ea6b679c28cbd910b46a6c090cc62
Pointer size: 131 Bytes
Size of remote file: 159 kB

assets/gradio_description_animate_clear.md DELETED Viewed

@@ -1,3 +0,0 @@
-<div style="font-size: 1.2em; text-align: center;">
-    Step 3: Click the <strong>🚀 Animate</strong> button below to generate, or click 🧹 Clear to erase the results
-</div>

assets/gradio_description_retargeting.md CHANGED Viewed

@@ -1,13 +1 @@
-<br>
-<!-- ## Retargeting
-<span style="font-size: 1.2em;">🔥 To edit the eyes and lip open ratio of the source portrait, drag the sliders and click the <strong>🚗 Retargeting</strong> button. You can try running it multiple times. <strong>😊 Set both ratios to 0.8 to see what's going on!</strong> </span> -->
-<div style="display: flex; justify-content: center; align-items: center; text-align: center; font-size: 1.2em;">
-  <div>
-    <h2>Retargeting</h2>
-    <p>Upload a Source Portrait as Retargeting Input, then drag the sliders and click the <strong>🚗 Retargeting</strong> button. You can try running it multiple times.
-    <br>
-    <strong>😊 Set both ratios to 0.8 to see what's going on!</strong></p>
-  </div>
-</div>


1	+ <span style="font-size: 1.2em;">🔥 To change the eyes and lip open ratio of the source portrait, please drag the sliders and then click the <strong>🚗 Retargeting</strong> button. The result would be shown in the blocks. You can try running it multiple times. <strong>😊 Set both ratios to 0.8 to see what's going on!</strong> </span>

assets/gradio_description_upload.md CHANGED Viewed

@@ -1,30 +1,2 @@
-<!-- ## 🤗 This is the official gradio demo for LivePortrait. -->
-<!-- <div style="font-size: 1.0em;">
-    If you find LivePortrait fun 🤪 or useful, please consider starring 🌟 our <a href="https://github.com/KwaiVGI/LivePortrait">GitHub Repo</a> to discover more features!
-</div>
-<!-- <div style="font-size: 1.2em;">
-    Step1: upload or use a webcam to get a <strong>Source Portrait</strong> (any aspect ratio) to left side.<br>
-    Step2: upload a <strong>Driving Video</strong> (1:1 aspect ratio) to right side.
-</div> -->
-<br>
-<div style="font-size: 1.2em; display: flex; justify-content: space-between;">
-    <div style="flex: 1; text-align: center; margin-right: 20px;">
-        <div style="display: inline-block;">
-            Step 1: Upload a <strong>Source Portrait</strong> (any aspect ratio) ⬇️
-        </div>
-        <div style="display: inline-block; font-size: 0.75em;">
-            <strong>Note:</strong> To upload a source video, <a href="https://github.com/KwaiVGI/LivePortrait?tab=readme-ov-file#4-gradio-interface-"><strong>build Gradio locally</strong></a>. Windows users can use the <a href="https://huggingface.co/cleardusk/LivePortrait-Windows"><strong>one-click package</strong></a>. Animals model <a href="https://github.com/KwaiVGI/LivePortrait/blob/main/assets/docs/changelog/2024-08-02.md"><strong>here</strong></a>.
-        </div>
-    </div>
-    <div style="flex: 1; text-align: center; margin-left: 20px;">
-        <div style="display: inline-block;">
-            Step 2: Upload a <strong>Driving Video</strong> (1:1 aspect ratio) ⬇️
-        </div>
-        <div style="display: inline-block; font-size: 0.75em;">
-            <strong>Tips:</strong> Focus on the head, minimize shoulder movement, <strong>neutral expression</strong> in first frame.
-        </div>
-    </div>
-</div>


1	+ ## 🤗 This is the official gradio demo for LivePortrait.
2	+ <div style="font-size: 1.2em;">Please upload or use the webcam to get a source portrait to the <strong>Source Portrait</strong> field and a driving video to the <strong>Driving Video</strong> field.</div>

assets/gradio_title.md CHANGED Viewed

@@ -1,17 +1,10 @@
 <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
-  <div>
-    <h1>LivePortrait</h1>
-    <span>Add mimics and lip sync to your static portrait driven by a video</span>
-    <br>
-    <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
-      <a href="https://arxiv.org/pdf/2407.03168"><img src="https://img.shields.io/badge/arXiv-2407.03168-red"></a>
-      &nbsp;
-      <a href="https://liveportrait.github.io"><img src="https://img.shields.io/badge/Project_Page-LivePortrait-green" alt="Project Page"></a>
-      &nbsp;
-      <a href="https://github.com/KwaiVGI/LivePortrait"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
-      &nbsp;
-      <a href="https://github.com/KwaiVGI/LivePortrait"><img src="https://img.shields.io/github/stars/KwaiVGI/LivePortrait
-      "></a>
     </div>
-  </div>
 </div>

 <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+    <div>
+        <h1>LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control</h1>
+        <div style="display: flex; justify-content: center; align-items: center; text-align: center;>
+            <a href="https://arxiv.org/pdf/2407.03168"><img src="https://img.shields.io/badge/arXiv-2407.03168-red"></a>
+            <a href="https://liveportrait.github.io"><img src="https://img.shields.io/badge/Project_Page-LivePortrait-green" alt="Project Page"></a>
+            <a href="https://github.com/KwaiVGI/LivePortrait"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
+        </div>
     </div>
 </div>

readme.md CHANGED Viewed

@@ -1,12 +1,143 @@
-This is the official Space of the paper: [**LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control**](https://arxiv.org/abs/2407.03168)
-If you find LivePortrait useful for your research, welcome to cite our work using the following BibTeX:
 ```bibtex
-@article{guo2024liveportrait,
   title   = {LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control},
-  author  = {Guo, Jianzhu and Zhang, Dingyun and Liu, Xiaoqiang and Zhong, Zhizhou and Zhang, Yuan and Wan, Pengfei and Zhang, Di},
-  journal = {arXiv preprint arXiv:2407.03168},
-  year    = {2024}
 }
 ```

+<h1 align="center">LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control</h1>
+<div align='center'>
+    <a href='https://github.com/cleardusk' target='_blank'><strong>Jianzhu Guo</strong></a><sup> 1†</sup>&emsp;
+    <a href='https://github.com/KwaiVGI' target='_blank'><strong>Dingyun Zhang</strong></a><sup> 1,2</sup>&emsp;
+    <a href='https://github.com/KwaiVGI' target='_blank'><strong>Xiaoqiang Liu</strong></a><sup> 1</sup>&emsp;
+    <a href='https://github.com/KwaiVGI' target='_blank'><strong>Zhizhou Zhong</strong></a><sup> 1,3</sup>&emsp;
+    <a href='https://scholar.google.com.hk/citations?user=_8k1ubAAAAAJ' target='_blank'><strong>Yuan Zhang</strong></a><sup> 1</sup>&emsp;
+</div>
+<div align='center'>
+    <a href='https://scholar.google.com/citations?user=P6MraaYAAAAJ' target='_blank'><strong>Pengfei Wan</strong></a><sup> 1</sup>&emsp;
+    <a href='https://openreview.net/profile?id=~Di_ZHANG3' target='_blank'><strong>Di Zhang</strong></a><sup> 1</sup>&emsp;
+</div>
+<div align='center'>
+    <sup>1 </sup>Kuaishou Technology&emsp; <sup>2 </sup>University of Science and Technology of China&emsp; <sup>3 </sup>Fudan University&emsp;
+</div>
+<br>
+<div align="center">
+  <!-- <a href='LICENSE'><img src='https://img.shields.io/badge/license-MIT-yellow'></a> -->
+  <a href='https://liveportrait.github.io'><img src='https://img.shields.io/badge/Project-Homepage-green'></a>
+  <a href='https://arxiv.org/pdf/2407.03168'><img src='https://img.shields.io/badge/Paper-arXiv-red'></a>
+</div>
+<br>
+<p align="center">
+  <img src="./assets/docs/showcase2.gif" alt="showcase">
+  <br>
+  🔥 For more results, visit our <a href="https://liveportrait.github.io/"><strong>homepage</strong></a> 🔥
+</p>
+## 🔥 Updates
+- **`2024/07/04`**: 🔥 We released the initial version of the inference code and models. Continuous updates, stay tuned!
+- **`2024/07/04`**: 😊 We released the [homepage](https://liveportrait.github.io) and technical report on [arXiv](https://arxiv.org/pdf/2407.03168).
+## Introduction
+This repo, named **LivePortrait**, contains the official PyTorch implementation of our paper [LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control](https://arxiv.org/pdf/2407.03168).
+We are actively updating and improving this repository. If you find any bugs or have suggestions, welcome to raise issues or submit pull requests (PR) 💖.
+## 🔥 Getting Started
+### 1. Clone the code and prepare the environment
+```bash
+git clone https://github.com/KwaiVGI/LivePortrait
+cd LivePortrait
+# create env using conda
+conda create -n LivePortrait python==3.9.18
+conda activate LivePortrait
+# install dependencies with pip
+pip install -r requirements.txt
+```
+### 2. Download pretrained weights
+Download our pretrained LivePortrait weights and face detection models of InsightFace from [Google Drive](https://drive.google.com/drive/folders/1UtKgzKjFAOmZkhNK-OYT0caJ_w2XAnib) or [Baidu Yun](https://pan.baidu.com/s/1MGctWmNla_vZxDbEp2Dtzw?pwd=z5cn). We have packed all weights in one directory 😊. Unzip and place them in `./pretrained_weights` ensuring the directory structure is as follows:
+```text
+pretrained_weights
+├── insightface
+│   └── models
+│       └── buffalo_l
+│           ├── 2d106det.onnx
+│           └── det_10g.onnx
+└── liveportrait
+    ├── base_models
+    │   ├── appearance_feature_extractor.pth
+    │   ├── motion_extractor.pth
+    │   ├── spade_generator.pth
+    │   └── warping_module.pth
+    ├── landmark.onnx
+    └── retargeting_models
+        └── stitching_retargeting_module.pth
+```
+### 3. Inference 🚀
+```bash
+python inference.py
+```
+If the script runs successfully, you will get an output mp4 file named `animations/s6--d0_concat.mp4`. This file includes the following results: driving video, input image, and generated result.
+<p align="center">
+  <img src="./assets/docs/inference.gif" alt="image">
+</p>
+Or, you can change the input by specifying the `-s` and `-d` arguments:
+```bash
+python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4
+# or disable pasting back
+python inference.py -s assets/examples/source/s9.jpg -d assets/examples/driving/d0.mp4 --no_flag_pasteback
+# more options to see
+python inference.py -h
+```
+**More interesting results can be found in our [Homepage](https://liveportrait.github.io)** 😊
+### 4. Gradio interface
+We also provide a Gradio interface for a better experience, just run by:
+```bash
+python app.py
+```
+### 5. Inference speed evaluation 🚀🚀🚀
+We have also provided a script to evaluate the inference speed of each module:
+```bash
+python speed.py
+```
+Below are the results of inferring one frame on an RTX 4090 GPU using the native PyTorch framework with `torch.compile`:
+| Model                             | Parameters(M) | Model Size(MB) | Inference(ms) |
+|-----------------------------------|:-------------:|:--------------:|:-------------:|
+| Appearance Feature Extractor      |     0.84      |       3.3      |     0.82      |
+| Motion Extractor                  |     28.12     |       108      |     0.84      |
+| Spade Generator                   |     55.37     |       212      |     7.59      |
+| Warping Module                    |     45.53     |       174      |     5.21      |
+| Stitching and Retargeting Modules|     0.23      |       2.3      |     0.31      |
+*Note: the listed values of Stitching and Retargeting Modules represent the combined parameter counts and the total sequential inference time of three MLP networks.*
+## Acknowledgements
+We would like to thank the contributors of [FOMM](https://github.com/AliaksandrSiarohin/first-order-model), [Open Facevid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis), [SPADE](https://github.com/NVlabs/SPADE), [InsightFace](https://github.com/deepinsight/insightface) repositories, for their open research and contributions.
+## Citation 💖
+If you find LivePortrait useful for your research, welcome to 🌟 this repo and cite our work using the following BibTeX:
 ```bibtex
+@article{guo2024live,
   title   = {LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control},
+  author  = {Jianzhu Guo and Dingyun Zhang and Xiaoqiang Liu and Zhizhou Zhong and Yuan Zhang and Pengfei Wan and Di Zhang},
+  year    = {2024},
+  journal = {arXiv preprint:2407.03168},
 }
 ```

src/config/argument_config.py CHANGED Viewed

@@ -8,7 +8,6 @@ import os.path as osp
 from dataclasses import dataclass
 import tyro
 from typing_extensions import Annotated
-from typing import Optional
 from .base_config import PrintableConfig, make_abs_path
@@ -42,4 +41,4 @@ class ArgumentConfig(PrintableConfig):
     ########## gradio arguments ##########
     server_port: Annotated[int, tyro.conf.arg(aliases=["-p"])]  = 7860
     share: bool = False
-    server_name: Optional[str] = None  # one can set "0.0.0.0" on local

 from dataclasses import dataclass
 import tyro
 from typing_extensions import Annotated
 from .base_config import PrintableConfig, make_abs_path
     ########## gradio arguments ##########
     server_port: Annotated[int, tyro.conf.arg(aliases=["-p"])]  = 7860
     share: bool = False
+    server_name: str = None # one can set "0.0.0.0" on local

src/gradio_pipeline.py CHANGED Viewed

@@ -9,7 +9,8 @@ from .live_portrait_pipeline import LivePortraitPipeline
 from .utils.io import load_img_online
 from .utils.rprint import rlog as log
 from .utils.crop import prepare_paste_back, paste_back
-# from .utils.camera import get_rotation_matrix
 def update_args(args, user_args):
     """update the args according to user inputs
@@ -33,7 +34,7 @@ class GradioPipeline(LivePortraitPipeline):
         flag_relative_input,
         flag_do_crop_input,
         flag_remap_input,
-    ):
         """ for video driven potrait animation
         """
         if input_image_path is not None and input_video_path is not None:
@@ -53,7 +54,7 @@ class GradioPipeline(LivePortraitPipeline):
             # gr.Info("Run successfully!", duration=2)
             return video_path, video_path_concat,
         else:
-            raise gr.Error("Please upload the source portrait and driving video 🤗🤗🤗", duration=5)
     def execute_image(self, input_eye_ratio: float, input_lip_ratio: float, input_image, flag_do_crop = True):
         """ for single image retargeting
@@ -62,7 +63,7 @@ class GradioPipeline(LivePortraitPipeline):
         f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb = \
         self.prepare_retargeting(input_image, flag_do_crop)
-        if input_eye_ratio is None or input_lip_ratio is None:
             raise gr.Error("Invalid ratio input 💥!", duration=5)
         else:
             x_s_user = x_s_user.to("cuda")
@@ -91,7 +92,7 @@ class GradioPipeline(LivePortraitPipeline):
             # gr.Info("Upload successfully!", duration=2)
             inference_cfg = self.live_portrait_wrapper.cfg
             ######## process source portrait ########
-            img_rgb = load_img_online(input_image, mode='rgb', max_dim=1280, n=1) # n=1 means do not trim the pixels
             log(f"Load source image from {input_image}.")
             crop_info = self.cropper.crop_single_image(img_rgb)
             if flag_do_crop:
@@ -99,7 +100,7 @@ class GradioPipeline(LivePortraitPipeline):
             else:
                 I_s = self.live_portrait_wrapper.prepare_source(img_rgb)
             x_s_info = self.live_portrait_wrapper.get_kp_info(I_s)
-            # R_s = get_rotation_matrix(x_s_info['pitch'], x_s_info['yaw'], x_s_info['roll'])
             ############################################
             f_s_user = self.live_portrait_wrapper.extract_feature_3d(I_s)
             x_s_user = self.live_portrait_wrapper.transform_keypoint(x_s_info)
@@ -109,4 +110,5 @@ class GradioPipeline(LivePortraitPipeline):
             return f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb
         else:
             # when press the clear button, go here
-            raise gr.Error("Please upload a source portrait as the retargeting input 🤗🤗🤗", duration=5)

 from .utils.io import load_img_online
 from .utils.rprint import rlog as log
 from .utils.crop import prepare_paste_back, paste_back
+from .utils.camera import get_rotation_matrix
+from .utils.retargeting_utils import calc_eye_close_ratio, calc_lip_close_ratio
 def update_args(args, user_args):
     """update the args according to user inputs
         flag_relative_input,
         flag_do_crop_input,
         flag_remap_input,
+        ):
         """ for video driven potrait animation
         """
         if input_image_path is not None and input_video_path is not None:
             # gr.Info("Run successfully!", duration=2)
             return video_path, video_path_concat,
         else:
+            raise gr.Error("The input source portrait or driving video hasn't been prepared yet 💥!", duration=5)
     def execute_image(self, input_eye_ratio: float, input_lip_ratio: float, input_image, flag_do_crop = True):
         """ for single image retargeting
         f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb = \
         self.prepare_retargeting(input_image, flag_do_crop)
+        if input_eye_ratio is None or input_eye_ratio is None:
             raise gr.Error("Invalid ratio input 💥!", duration=5)
         else:
             x_s_user = x_s_user.to("cuda")
             # gr.Info("Upload successfully!", duration=2)
             inference_cfg = self.live_portrait_wrapper.cfg
             ######## process source portrait ########
+            img_rgb = load_img_online(input_image, mode='rgb', max_dim=1280, n=16)
             log(f"Load source image from {input_image}.")
             crop_info = self.cropper.crop_single_image(img_rgb)
             if flag_do_crop:
             else:
                 I_s = self.live_portrait_wrapper.prepare_source(img_rgb)
             x_s_info = self.live_portrait_wrapper.get_kp_info(I_s)
+            R_s = get_rotation_matrix(x_s_info['pitch'], x_s_info['yaw'], x_s_info['roll'])
             ############################################
             f_s_user = self.live_portrait_wrapper.extract_feature_3d(I_s)
             x_s_user = self.live_portrait_wrapper.transform_keypoint(x_s_info)
             return f_s_user, x_s_user, source_lmk_user, crop_M_c2o, mask_ori, img_rgb
         else:
             # when press the clear button, go here
+            raise gr.Error("The retargeting input hasn't been prepared yet 💥!", duration=5)

src/live_portrait_pipeline.py CHANGED Viewed

@@ -4,13 +4,13 @@
 Pipeline of LivePortrait
 """
-import torch
-torch.backends.cudnn.benchmark = True # disable CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR warning
 import cv2
 import numpy as np
 import pickle
-import os
 import os.path as osp
 from rich.progress import track
@@ -19,7 +19,7 @@ from .config.inference_config import InferenceConfig
 from .config.crop_config import CropConfig
 from .utils.cropper import Cropper
 from .utils.camera import get_rotation_matrix
-from .utils.video import images2video, concat_frames, get_fps, add_audio_to_video, has_audio_stream
 from .utils.crop import _transform_img, prepare_paste_back, paste_back
 from .utils.retargeting_utils import calc_lip_close_ratio
 from .utils.io import load_image_rgb, load_driving_info, resize_to_limit
@@ -68,12 +68,8 @@ class LivePortraitPipeline(object):
         ############################################
         ######## process driving info ########
-        output_fps = 30 # default fps
         if is_video(args.driving_info):
             log(f"Load from video file (mp4 mov avi etc...): {args.driving_info}")
-            output_fps = int(get_fps(args.driving_info))
-            log(f'The FPS of {args.driving_info} is: {output_fps}')
             # TODO: 这里track一下驱动视频 -> 构建模板
             driving_rgb_lst = load_driving_info(args.driving_info)
             driving_rgb_lst_256 = [cv2.resize(_, (256, 256)) for _ in driving_rgb_lst]
@@ -178,32 +174,17 @@ class LivePortraitPipeline(object):
         mkdir(args.output_dir)
         wfp_concat = None
-        flag_has_audio = has_audio_stream(args.driving_info)
         if is_video(args.driving_info):
             frames_concatenated = concat_frames(I_p_lst, driving_rgb_lst, img_crop_256x256)
             # save (driving frames, source image, drived frames) result
             wfp_concat = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_concat.mp4')
-            images2video(frames_concatenated, wfp=wfp_concat, fps=output_fps)
-            if flag_has_audio:
-                # final result with concat
-                wfp_concat_with_audio = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_concat_with_audio.mp4')
-                add_audio_to_video(wfp_concat, args.driving_info, wfp_concat_with_audio)
-                os.replace(wfp_concat_with_audio, wfp_concat)
-                log(f"Replace {wfp_concat} with {wfp_concat_with_audio}")
         # save drived result
         wfp = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}.mp4')
         if inference_cfg.flag_pasteback:
-            images2video(I_p_paste_lst, wfp=wfp, fps=output_fps)
         else:
-            images2video(I_p_lst, wfp=wfp, fps=output_fps)
-        ######### build final result #########
-        if flag_has_audio:
-            wfp_with_audio = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_with_audio.mp4')
-            add_audio_to_video(wfp, args.driving_info, wfp_with_audio)
-            os.replace(wfp_with_audio, wfp)
-            log(f"Replace {wfp} with {wfp_with_audio}")
         return wfp, wfp_concat

 Pipeline of LivePortrait
 """
+# TODO:
+# 1. 当前假定所有的模板都是已经裁好的，需要修改下
+# 2. pick样例图 source + driving
 import cv2
 import numpy as np
 import pickle
 import os.path as osp
 from rich.progress import track
 from .config.crop_config import CropConfig
 from .utils.cropper import Cropper
 from .utils.camera import get_rotation_matrix
+from .utils.video import images2video, concat_frames
 from .utils.crop import _transform_img, prepare_paste_back, paste_back
 from .utils.retargeting_utils import calc_lip_close_ratio
 from .utils.io import load_image_rgb, load_driving_info, resize_to_limit
         ############################################
         ######## process driving info ########
         if is_video(args.driving_info):
             log(f"Load from video file (mp4 mov avi etc...): {args.driving_info}")
             # TODO: 这里track一下驱动视频 -> 构建模板
             driving_rgb_lst = load_driving_info(args.driving_info)
             driving_rgb_lst_256 = [cv2.resize(_, (256, 256)) for _ in driving_rgb_lst]
         mkdir(args.output_dir)
         wfp_concat = None
         if is_video(args.driving_info):
             frames_concatenated = concat_frames(I_p_lst, driving_rgb_lst, img_crop_256x256)
             # save (driving frames, source image, drived frames) result
             wfp_concat = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}_concat.mp4')
+            images2video(frames_concatenated, wfp=wfp_concat)
         # save drived result
         wfp = osp.join(args.output_dir, f'{basename(args.source_image)}--{basename(args.driving_info)}.mp4')
         if inference_cfg.flag_pasteback:
+            images2video(I_p_paste_lst, wfp=wfp)
         else:
+            images2video(I_p_lst, wfp=wfp)
         return wfp, wfp_concat

src/utils/video.py CHANGED Viewed

@@ -12,12 +12,11 @@ import cv2
 from rich.progress import track
 from .helper import prefix
-from .rprint import rlog as log
 from .rprint import rprint as print
 def exec_cmd(cmd):
-    return subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 def images2video(images, wfp, **kwargs):
@@ -132,72 +131,9 @@ def change_video_fps(input_file, output_file, fps=20, codec='libx264', crf=5):
     exec_cmd(cmd)
-def get_fps(filepath, default_fps=25):
-    try:
-        fps = cv2.VideoCapture(filepath).get(cv2.CAP_PROP_FPS)
-        if fps in (0, None):
-            fps = default_fps
-    except Exception as e:
-        log(e)
-        fps = default_fps
     return fps
-def has_audio_stream(video_path: str) -> bool:
-    """
-    Check if the video file contains an audio stream.
-    :param video_path: Path to the video file
-    :return: True if the video contains an audio stream, False otherwise
-    """
-    if osp.isdir(video_path):
-        return False
-    cmd = [
-        'ffprobe',
-        '-v', 'error',
-        '-select_streams', 'a',
-        '-show_entries', 'stream=codec_type',
-        '-of', 'default=noprint_wrappers=1:nokey=1',
-        f'"{video_path}"'
-    ]
-    try:
-        # result = subprocess.run(cmd, capture_output=True, text=True)
-        result = exec_cmd(' '.join(cmd))
-        if result.returncode != 0:
-            log(f"Error occurred while probing video: {result.stderr}")
-            return False
-        # Check if there is any output from ffprobe command
-        return bool(result.stdout.strip())
-    except Exception as e:
-        log(
-            f"Error occurred while probing video: {video_path}, "
-            "you may need to install ffprobe! (https://ffmpeg.org/download.html) "
-            "Now set audio to false!",
-            style="bold red"
-        )
-    return False
-def add_audio_to_video(silent_video_path: str, audio_video_path: str, output_video_path: str):
-    cmd = [
-        'ffmpeg',
-        '-y',
-        '-i', f'"{silent_video_path}"',
-        '-i', f'"{audio_video_path}"',
-        '-map', '0:v',
-        '-map', '1:a',
-        '-c:v', 'copy',
-        '-shortest',
-        f'"{output_video_path}"'
-    ]
-    try:
-        exec_cmd(' '.join(cmd))
-        log(f"Video with audio generated successfully: {output_video_path}")
-    except subprocess.CalledProcessError as e:
-        log(f"Error occurred: {e}")

 from rich.progress import track
 from .helper import prefix
 from .rprint import rprint as print
 def exec_cmd(cmd):
+    subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 def images2video(images, wfp, **kwargs):
     exec_cmd(cmd)
+def get_fps(filepath):
+    import ffmpeg
+    probe = ffmpeg.probe(filepath)
+    video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
+    fps = eval(video_stream['avg_frame_rate'])
     return fps

video2template.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# coding: utf-8
+"""
+[WIP] Pipeline for video template preparation
+"""
+import tyro
+from src.config.crop_config import CropConfig
+from src.config.inference_config import InferenceConfig
+from src.config.argument_config import ArgumentConfig
+from src.template_maker import TemplateMaker
+def partial_fields(target_class, kwargs):
+    return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
+def main():
+    # set tyro theme
+    tyro.extras.set_accent_color("bright_cyan")
+    args = tyro.cli(ArgumentConfig)
+    # specify configs for inference
+    inference_cfg = partial_fields(InferenceConfig, args.__dict__)  # use attribute of args to initial InferenceConfig
+    crop_cfg = partial_fields(CropConfig, args.__dict__)  # use attribute of args to initial CropConfig
+    video_template_maker = TemplateMaker(
+        inference_cfg=inference_cfg,
+        crop_cfg=crop_cfg
+    )
+    # run
+    video_template_maker.make_motion_template(args.driving_video_path, args.template_output_dir)
+if __name__ == '__main__':
+    main()