urikxx commited on
Commit
82c4169
·
verified ·
1 Parent(s): 2fcfa6d

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +8 -0
  2. .gitignore +4 -0
  3. LICENSE +201 -0
  4. README.md +285 -0
  5. assets/driven_videos/a.mp4 +3 -0
  6. assets/driven_videos/b.mp4 +3 -0
  7. assets/driven_videos/c.mp4 +3 -0
  8. assets/echomimic.png +0 -0
  9. assets/test_audios/chunnuanhuakai.wav +0 -0
  10. assets/test_audios/chunwang.wav +0 -0
  11. assets/test_audios/echomimic.wav +0 -0
  12. assets/test_audios/echomimic_en.wav +0 -0
  13. assets/test_audios/echomimic_en_girl.wav +0 -0
  14. assets/test_audios/echomimic_girl.wav +0 -0
  15. assets/test_audios/jane.wav +0 -0
  16. assets/test_audios/mei.wav +0 -0
  17. assets/test_audios/walden.wav +0 -0
  18. assets/test_audios/yun.wav +0 -0
  19. assets/test_imgs/a.png +0 -0
  20. assets/test_imgs/b.png +0 -0
  21. assets/test_imgs/c.png +3 -0
  22. assets/test_imgs/d.png +3 -0
  23. assets/test_imgs/e.png +3 -0
  24. assets/test_pose_demo/d.jpg +0 -0
  25. assets/test_pose_demo_audios/movie_0_clip_0.wav +3 -0
  26. assets/test_pose_demo_pose/0.pkl +3 -0
  27. assets/test_pose_demo_pose/1.pkl +3 -0
  28. assets/test_pose_demo_pose/10.pkl +3 -0
  29. assets/test_pose_demo_pose/100.pkl +3 -0
  30. assets/test_pose_demo_pose/101.pkl +3 -0
  31. assets/test_pose_demo_pose/102.pkl +3 -0
  32. assets/test_pose_demo_pose/103.pkl +3 -0
  33. assets/test_pose_demo_pose/104.pkl +3 -0
  34. assets/test_pose_demo_pose/105.pkl +3 -0
  35. assets/test_pose_demo_pose/106.pkl +3 -0
  36. assets/test_pose_demo_pose/107.pkl +3 -0
  37. assets/test_pose_demo_pose/108.pkl +3 -0
  38. assets/test_pose_demo_pose/109.pkl +3 -0
  39. assets/test_pose_demo_pose/11.pkl +3 -0
  40. assets/test_pose_demo_pose/110.pkl +3 -0
  41. assets/test_pose_demo_pose/111.pkl +3 -0
  42. assets/test_pose_demo_pose/112.pkl +3 -0
  43. assets/test_pose_demo_pose/113.pkl +3 -0
  44. assets/test_pose_demo_pose/114.pkl +3 -0
  45. assets/test_pose_demo_pose/115.pkl +3 -0
  46. assets/test_pose_demo_pose/116.pkl +3 -0
  47. assets/test_pose_demo_pose/117.pkl +3 -0
  48. assets/test_pose_demo_pose/118.pkl +3 -0
  49. assets/test_pose_demo_pose/119.pkl +3 -0
  50. assets/test_pose_demo_pose/12.pkl +3 -0
.gitattributes CHANGED
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/driven_videos/a.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ assets/driven_videos/b.mp4 filter=lfs diff=lfs merge=lfs -text
38
+ assets/driven_videos/c.mp4 filter=lfs diff=lfs merge=lfs -text
39
+ assets/test_imgs/c.png filter=lfs diff=lfs merge=lfs -text
40
+ assets/test_imgs/d.png filter=lfs diff=lfs merge=lfs -text
41
+ assets/test_imgs/e.png filter=lfs diff=lfs merge=lfs -text
42
+ assets/test_pose_demo_audios/movie_0_clip_0.wav filter=lfs diff=lfs merge=lfs -text
43
+ src/utils/mp_models/face_landmarker_v2_with_blendshapes.task filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ffmpeg-4.4-amd64-static
2
+ pretrained_weights
3
+ output
4
+ __pycache__
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <h1 align='center'>EchoMimic: Lifelike Audio-Driven Portrait Animations through Editable Landmark Conditioning</h1>
2
+
3
+ <div align='center'>
4
+ <a href='https://github.com/yuange250' target='_blank'>Zhiyuan Chen</a><sup>*</sup>&emsp;
5
+ <a href='https://github.com/JoeFannie' target='_blank'>Jiajiong Cao</a><sup>*</sup>&emsp;
6
+ <a href='https://github.com/octavianChen' target='_blank'>Zhiquan Chen</a><sup></sup>&emsp;
7
+ <a href='https://github.com/lymhust' target='_blank'>Yuming Li</a><sup></sup>&emsp;
8
+ <a href='https://github.com/' target='_blank'>Chenguang Ma</a><sup></sup>
9
+ </div>
10
+ <div align='center'>
11
+ *Equal Contribution.
12
+ </div>
13
+
14
+ <div align='center'>
15
+ Terminal Technology Department, Alipay, Ant Group.
16
+ </div>
17
+ <br>
18
+ <div align='center'>
19
+ <a href='https://badtobest.github.io/echomimic.html'><img src='https://img.shields.io/badge/Project-Page-blue'></a>
20
+ <a href='https://huggingface.co/BadToBest/EchoMimic'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace-Model-yellow'></a>
21
+ <a href='https://huggingface.co/spaces/BadToBest/EchoMimic'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace-Demo-yellow'></a>
22
+ <a href='https://www.modelscope.cn/models/BadToBest/EchoMimic'><img src='https://img.shields.io/badge/ModelScope-Model-purple'></a>
23
+ <a href='https://www.modelscope.cn/studios/BadToBest/BadToBest'><img src='https://img.shields.io/badge/ModelScope-Demo-purple'></a>
24
+ <a href='https://arxiv.org/abs/2407.08136'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a>
25
+ <a href='assets/echomimic.png'><img src='https://badges.aleen42.com/src/wechat.svg'></a>
26
+ </div>
27
+
28
+ ## &#x1F4E3; &#x1F4E3; Updates
29
+ * [2024.08.02] 🔥 EchoMimic is now available on [huggingface](https://huggingface.co/spaces/BadToBest/EchoMimic) with A100 GPU. Thanks Wenmeng Zhou@ModelScope.
30
+ * [2024.07.25] 🔥🔥🔥 Accelerated models and pipe on **Audio Driven** are released. The inference speed can be improved by **10x** (from ~7mins/240frames to ~50s/240frames on V100 GPU)
31
+ * [2024.07.23] 🔥 EchoMimic gradio demo on [modelscope](https://www.modelscope.cn/studios/BadToBest/BadToBest) is ready.
32
+ * [2024.07.23] 🔥 EchoMimic gradio demo on [huggingface](https://huggingface.co/spaces/fffiloni/EchoMimic) is ready. Thanks Sylvain Filoni@fffiloni.
33
+ * [2024.07.17] 🔥🔥🔥 Accelerated models and pipe on **Audio + Selected Landmarks** are released. The inference speed can be improved by **10x** (from ~7mins/240frames to ~50s/240frames on V100 GPU)
34
+ * [2024.07.14] 🔥 [ComfyUI](https://github.com/smthemex/ComfyUI_EchoMimic) is now available. Thanks @smthemex for the contribution.
35
+ * [2024.07.13] 🔥 Thanks [NewGenAI](https://www.youtube.com/@StableAIHub) for the [video installation tutorial](https://www.youtube.com/watch?v=8R0lTIY7tfI).
36
+ * [2024.07.13] 🔥 We release our pose&audio driven codes and models.
37
+ * [2024.07.12] 🔥 WebUI and GradioUI versions are released. We thank @greengerong @Robin021 and @O-O1024 for their contributions.
38
+ * [2024.07.12] 🔥 Our [paper](https://arxiv.org/abs/2407.08136) is in public on arxiv.
39
+ * [2024.07.09] 🔥 We release our audio driven codes and models.
40
+
41
+ ## Gallery
42
+ ### Audio Driven (Sing)
43
+
44
+ <table class="center">
45
+
46
+ <tr>
47
+ <td width=30% style="border: none">
48
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/d014d921-9f94-4640-97ad-035b00effbfe" muted="false"></video>
49
+ </td>
50
+ <td width=30% style="border: none">
51
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/877603a5-a4f9-4486-a19f-8888422daf78" muted="false"></video>
52
+ </td>
53
+ <td width=30% style="border: none">
54
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/e0cb5afb-40a6-4365-84f8-cb2834c4cfe7" muted="false"></video>
55
+ </td>
56
+ </tr>
57
+
58
+ </table>
59
+
60
+ ### Audio Driven (English)
61
+
62
+ <table class="center">
63
+
64
+ <tr>
65
+ <td width=30% style="border: none">
66
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/386982cd-3ff8-470d-a6d9-b621e112f8a5" muted="false"></video>
67
+ </td>
68
+ <td width=30% style="border: none">
69
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/5c60bb91-1776-434e-a720-8857a00b1501" muted="false"></video>
70
+ </td>
71
+ <td width=30% style="border: none">
72
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/1f15adc5-0f33-4afa-b96a-2011886a4a06" muted="false"></video>
73
+ </td>
74
+ </tr>
75
+
76
+ </table>
77
+
78
+ ### Audio Driven (Chinese)
79
+
80
+ <table class="center">
81
+
82
+ <tr>
83
+ <td width=30% style="border: none">
84
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/a8092f9a-a5dc-4cd6-95be-1831afaccf00" muted="false"></video>
85
+ </td>
86
+ <td width=30% style="border: none">
87
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/c8b5c59f-0483-42ef-b3ee-4cffae6c7a52" muted="false"></video>
88
+ </td>
89
+ <td width=30% style="border: none">
90
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/532a3e60-2bac-4039-a06c-ff6bf06cb4a4" muted="false"></video>
91
+ </td>
92
+ </tr>
93
+
94
+ </table>
95
+
96
+ ### Landmark Driven
97
+
98
+ <table class="center">
99
+
100
+ <tr>
101
+ <td width=30% style="border: none">
102
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/1da6c46f-4532-4375-a0dc-0a4d6fd30a39" muted="false"></video>
103
+ </td>
104
+ <td width=30% style="border: none">
105
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/d4f4d5c1-e228-463a-b383-27fb90ed6172" muted="false"></video>
106
+ </td>
107
+ <td width=30% style="border: none">
108
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/18bd2c93-319e-4d1c-8255-3f02ba717475" muted="false"></video>
109
+ </td>
110
+ </tr>
111
+
112
+ </table>
113
+
114
+ ### Audio + Selected Landmark Driven
115
+
116
+ <table class="center">
117
+
118
+ <tr>
119
+ <td width=30% style="border: none">
120
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/4a29d735-ec1b-474d-b843-3ff0bdf85f55" muted="false"></video>
121
+ </td>
122
+ <td width=30% style="border: none">
123
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/b994c8f5-8dae-4dd8-870f-962b50dc091f" muted="false"></video>
124
+ </td>
125
+ <td width=30% style="border: none">
126
+ <video controls loop src="https://github.com/BadToBest/EchoMimic/assets/11451501/955c1d51-07b2-494d-ab93-895b9c43b896" muted="false"></video>
127
+ </td>
128
+ </tr>
129
+
130
+ </table>
131
+
132
+ **(Some demo images above are sourced from image websites. If there is any infringement, we will immediately remove them and apologize.)**
133
+
134
+ ## Installation
135
+
136
+ ### Download the Codes
137
+
138
+ ```bash
139
+ git clone https://github.com/BadToBest/EchoMimic
140
+ cd EchoMimic
141
+ ```
142
+
143
+ ### Python Environment Setup
144
+
145
+ - Tested System Environment: Centos 7.2/Ubuntu 22.04, Cuda >= 11.7
146
+ - Tested GPUs: A100(80G) / RTX4090D (24G) / V100(16G)
147
+ - Tested Python Version: 3.8 / 3.10 / 3.11
148
+
149
+ Create conda environment (Recommended):
150
+
151
+ ```bash
152
+ conda create -n echomimic python=3.8
153
+ conda activate echomimic
154
+ ```
155
+
156
+ Install packages with `pip`
157
+ ```bash
158
+ pip install -r requirements.txt
159
+ ```
160
+
161
+ ### Download ffmpeg-static
162
+ Download and decompress [ffmpeg-static](https://www.johnvansickle.com/ffmpeg/old-releases/ffmpeg-4.4-amd64-static.tar.xz), then
163
+ ```
164
+ export FFMPEG_PATH=/path/to/ffmpeg-4.4-amd64-static
165
+ ```
166
+
167
+ ### Download pretrained weights
168
+
169
+ ```shell
170
+ git lfs install
171
+ git clone https://huggingface.co/BadToBest/EchoMimic pretrained_weights
172
+ ```
173
+
174
+ The **pretrained_weights** is organized as follows.
175
+
176
+ ```
177
+ ./pretrained_weights/
178
+ ├── denoising_unet.pth
179
+ ├── reference_unet.pth
180
+ ├── motion_module.pth
181
+ ├── face_locator.pth
182
+ ├── sd-vae-ft-mse
183
+ │ └── ...
184
+ ├── sd-image-variations-diffusers
185
+ │ └── ...
186
+ └── audio_processor
187
+ └── whisper_tiny.pt
188
+ ```
189
+
190
+ In which **denoising_unet.pth** / **reference_unet.pth** / **motion_module.pth** / **face_locator.pth** are the main checkpoints of **EchoMimic**. Other models in this hub can be also downloaded from it's original hub, thanks to their brilliant works:
191
+ - [sd-vae-ft-mse](https://huggingface.co/stabilityai/sd-vae-ft-mse)
192
+ - [sd-image-variations-diffusers](https://huggingface.co/lambdalabs/sd-image-variations-diffusers)
193
+ - [audio_processor(whisper)](https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt)
194
+
195
+ ### Audio-Drived Algo Inference
196
+ Run the python inference script:
197
+
198
+ ```bash
199
+ python -u infer_audio2vid.py
200
+ python -u infer_audio2vid_pose.py
201
+ ```
202
+
203
+ ### Audio-Drived Algo Inference On Your Own Cases
204
+
205
+ Edit the inference config file **./configs/prompts/animation.yaml**, and add your own case:
206
+
207
+ ```bash
208
+ test_cases:
209
+ "path/to/your/image":
210
+ - "path/to/your/audio"
211
+ ```
212
+
213
+ The run the python inference script:
214
+ ```bash
215
+ python -u infer_audio2vid.py
216
+ ```
217
+
218
+ ### Motion Alignment between Ref. Img. and Driven Vid.
219
+
220
+ (Firstly download the checkpoints with '_pose.pth' postfix from huggingface)
221
+
222
+ Edit driver_video and ref_image to your path in demo_motion_sync.py, then run
223
+ ```bash
224
+ python -u demo_motion_sync.py
225
+ ```
226
+
227
+ ### Audio&Pose-Drived Algo Inference
228
+ Edit ./configs/prompts/animation_pose.yaml, then run
229
+ ```bash
230
+ python -u infer_audio2vid_pose.py
231
+ ```
232
+
233
+ ### Pose-Drived Algo Inference
234
+ Set draw_mouse=True in line 135 of infer_audio2vid_pose.py. Edit ./configs/prompts/animation_pose.yaml, then run
235
+ ```bash
236
+ python -u infer_audio2vid_pose.py
237
+ ```
238
+
239
+ ### Run the Gradio UI
240
+
241
+ Thanks to the contribution from @Robin021:
242
+
243
+ ```bash
244
+
245
+ python -u webgui.py --server_port=3000
246
+
247
+ ```
248
+
249
+ ## Release Plans
250
+
251
+ | Status | Milestone | ETA |
252
+ |:--------:|:-------------------------------------------------------------------------|:--:|
253
+ | ✅ | The inference source code of the Audio-Driven algo meet everyone on GitHub | 9th July, 2024 |
254
+ | ✅ | Pretrained models trained on English and Mandarin Chinese to be released | 9th July, 2024 |
255
+ | ✅ | The inference source code of the Pose-Driven algo meet everyone on GitHub | 13th July, 2024 |
256
+ | ✅ | Pretrained models with better pose control to be released | 13th July, 2024 |
257
+ | ✅ | Accelerated models to be released | 17th July, 2024 |
258
+ | 🚀 | Pretrained models with better sing performance to be released | TBD |
259
+ | 🚀 | Large-Scale and High-resolution Chinese-Based Talking Head Dataset | TBD |
260
+
261
+ ## Acknowledgements
262
+
263
+ We would like to thank the contributors to the [AnimateDiff](https://github.com/guoyww/AnimateDiff), [Moore-AnimateAnyone](https://github.com/MooreThreads/Moore-AnimateAnyone) and [MuseTalk](https://github.com/TMElyralab/MuseTalk) repositories, for their open research and exploration.
264
+
265
+ We are also grateful to [V-Express](https://github.com/tencent-ailab/V-Express) and [hallo](https://github.com/fudan-generative-vision/hallo) for their outstanding work in the area of diffusion-based talking heads.
266
+
267
+ If we missed any open-source projects or related articles, we would like to complement the acknowledgement of this specific work immediately.
268
+
269
+ ## Citation
270
+
271
+ If you find our work useful for your research, please consider citing the paper :
272
+
273
+ ```
274
+ @misc{chen2024echomimic,
275
+ title={EchoMimic: Lifelike Audio-Driven Portrait Animations through Editable Landmark Conditioning},
276
+ author={Zhiyuan Chen, Jiajiong Cao, Zhiquan Chen, Yuming Li, Chenguang Ma},
277
+ year={2024},
278
+ archivePrefix={arXiv},
279
+ primaryClass={cs.CV}
280
+ }
281
+ ```
282
+
283
+ ## Star History
284
+
285
+ [![Star History Chart](https://api.star-history.com/svg?repos=BadToBest/EchoMimic&type=Date)](https://star-history.com/?spm=5176.28103460.0.0.342a3da23STWrU#BadToBest/EchoMimic&Date)
assets/driven_videos/a.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4889c1ecc27aea2be958f67ad417bd0b11b6626d5ca1edd692c52a3fe1e73ee
3
+ size 1798555
assets/driven_videos/b.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97c5cc480eecf21130b5d17807f23242b67861b58a36f308df7159aa5d0b8dc0
3
+ size 2072709
assets/driven_videos/c.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228736987a683026e3220ed1bca8775ec44692e75cbddbb4c132903d90b022a5
3
+ size 2340154
assets/echomimic.png ADDED
assets/test_audios/chunnuanhuakai.wav ADDED
Binary file (511 kB). View file
 
assets/test_audios/chunwang.wav ADDED
Binary file (236 kB). View file
 
assets/test_audios/echomimic.wav ADDED
Binary file (217 kB). View file
 
assets/test_audios/echomimic_en.wav ADDED
Binary file (251 kB). View file
 
assets/test_audios/echomimic_en_girl.wav ADDED
Binary file (234 kB). View file
 
assets/test_audios/echomimic_girl.wav ADDED
Binary file (253 kB). View file
 
assets/test_audios/jane.wav ADDED
Binary file (756 kB). View file
 
assets/test_audios/mei.wav ADDED
Binary file (270 kB). View file
 
assets/test_audios/walden.wav ADDED
Binary file (391 kB). View file
 
assets/test_audios/yun.wav ADDED
Binary file (661 kB). View file
 
assets/test_imgs/a.png ADDED
assets/test_imgs/b.png ADDED
assets/test_imgs/c.png ADDED

Git LFS Details

  • SHA256: c8d03b2d81b0338676e3d31534788c4b13d21988cb026ad26d9c74ec83267367
  • Pointer size: 132 Bytes
  • Size of remote file: 1.96 MB
assets/test_imgs/d.png ADDED

Git LFS Details

  • SHA256: a89c0663a5cc4e5c872bbf0a9883b327080b855af4d54701a0c0abc0b2057fe5
  • Pointer size: 132 Bytes
  • Size of remote file: 1.24 MB
assets/test_imgs/e.png ADDED

Git LFS Details

  • SHA256: b24837b7f1c3341fe7e3089314ee32428ae0aa0435df463c1087aa1ea3c644da
  • Pointer size: 132 Bytes
  • Size of remote file: 1.15 MB
assets/test_pose_demo/d.jpg ADDED
assets/test_pose_demo_audios/movie_0_clip_0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb6994f9ce1788e7672d2489fb05e21459e0869e44202634a503e2a556ac939c
3
+ size 1771130
assets/test_pose_demo_pose/0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79f459214a9284b9a80b38c3fb64c9d24890fe5472a0220f6c0327742c0cc856
3
+ size 11625
assets/test_pose_demo_pose/1.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acb82a39521d5d774f67546e94aaf2cf743e2f16e4894d7819241dd290c004ee
3
+ size 11625
assets/test_pose_demo_pose/10.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a30a8350432635fc65861c30c4433b03f0ddff615628356cd8e6eeb36bd0c985
3
+ size 11625
assets/test_pose_demo_pose/100.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:425a4c9428f09e1b73da0b31d4aa5df50421085b18dbc3eccfdf85ab40e97770
3
+ size 11625
assets/test_pose_demo_pose/101.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d06bc52421328959f16fc95e7495767c982870a88e1ec78d681cc06dc7bd4f09
3
+ size 11625
assets/test_pose_demo_pose/102.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:135ee941d0c7bf27c1808d19a2e56a89e4f96a5b89c3f9c8793682cf2a707bf0
3
+ size 11625
assets/test_pose_demo_pose/103.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f0a3b12714d02f933cca212689d5727fd5a87e3fdd29c7ed6d403a0367a2312
3
+ size 11625
assets/test_pose_demo_pose/104.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43bec0c1442aaba6a20ec49aa472fbd35ce3b42f24567f698a92ade73de6fc50
3
+ size 11625
assets/test_pose_demo_pose/105.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4143b52e0430a7ae75d2a9785bdea5505ae2936e47531273086961a2f63c2cac
3
+ size 11625
assets/test_pose_demo_pose/106.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10396bf15c0fa9bd7e3d925aced0009ae74c3353daa5076dd74bacad37572603
3
+ size 11625
assets/test_pose_demo_pose/107.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ebcf965e910b130f48f1c1dd35b68176f13e1d7f546681eaf37a959b0325ebb
3
+ size 11625
assets/test_pose_demo_pose/108.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25546b95ca775dc94bd9439ab748cacab1d13b8ddae2a35c931a92fc8669c22f
3
+ size 11625
assets/test_pose_demo_pose/109.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16068bbde36d595f268b2ec37baa6b7dcea935b50345d989066f4d8d7d2aac1f
3
+ size 11625
assets/test_pose_demo_pose/11.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e534fe28d74fb88578fee00b40aa3b368065bb67e6e240664024f7bd5fc37b64
3
+ size 11625
assets/test_pose_demo_pose/110.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49bd7320a4d2eb64bf04eff284eacc124a3bdfd3313f825d092788d42c3f6ad6
3
+ size 11625
assets/test_pose_demo_pose/111.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6eea05553a2f4ef6ec9c51c5973ab576d86cd58c26a3777084b1baccfe61b44
3
+ size 11625
assets/test_pose_demo_pose/112.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:333655dcb1cabb37f217569690e09fa884e8c6634f62610b1234476a2cddd177
3
+ size 11625
assets/test_pose_demo_pose/113.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:338363e0f2b69e23d29be2aa9952e8de4a8d79ae27f0b015caded1a1231b74a4
3
+ size 11625
assets/test_pose_demo_pose/114.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4099b77a8fb45d1e8f09d43af6e46d7bb59a60734f7f37f12e1cff31c07b91a
3
+ size 11625
assets/test_pose_demo_pose/115.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5611bce0729c35305f3b67b0b7492786a109f071c06e26600009b6464238d81
3
+ size 11625
assets/test_pose_demo_pose/116.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a79cac90622ee10de3a4d1b2fe1373155ead1fe325bba1c5fa12763fa3800718
3
+ size 11625
assets/test_pose_demo_pose/117.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3ada492fe7572daff2a8ac567c86f771bf90b5bd3147e8ad799c73ea83dbfb4
3
+ size 11625
assets/test_pose_demo_pose/118.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd671f8f3c1b3d0a95d62ebbf3db5c9372aa1267e3b242bf86a19cd9e9a97f9a
3
+ size 11625
assets/test_pose_demo_pose/119.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c50938554e8bd2c8b918b8f5dc258f4fde0e984011e906df9b6a63338997c2b8
3
+ size 11625
assets/test_pose_demo_pose/12.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cfdbc12184239274ebc8ffa3eb76ee8b4fa4f437be847db91324f0c7943fd10
3
+ size 11625