SetuG commited on
Commit
017c8fa
·
verified ·
1 Parent(s): 26f88c0

Upload 4 files

Browse files
Files changed (4) hide show
  1. .gitattributes +6 -35
  2. README.md +89 -12
  3. app.py +126 -0
  4. requirements.txt +30 -0
.gitattributes CHANGED
@@ -1,35 +1,6 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.pth filter=lfs diff=lfs merge=lfs -text
2
+ *.pth.tar filter=lfs diff=lfs merge=lfs -text
3
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
4
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
5
+ *.bin filter=lfs diff=lfs merge=lfs -text
6
+ *.pt filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,12 +1,89 @@
1
- ---
2
- title: Sadtalker
3
- emoji:
4
- colorFrom: indigo
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 6.5.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: SadTalker
3
+ emoji: 😭
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ <!-- Alternative deployment options:
14
+
15
+ For Streamlit:
16
+ sdk: streamlit
17
+ app_file: app_streamlit.py
18
+
19
+ For FastAPI:
20
+ sdk: docker
21
+ app_port: 7860
22
+
23
+ For Docker:
24
+ sdk: docker
25
+ app_port: 7860
26
+ -->
27
+
28
+ # SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023)
29
+
30
+ This is a Gradio app for SadTalker, which can generate talking face videos from a single image and audio.
31
+
32
+ ## Features
33
+ - Generate talking face videos from single image + audio
34
+ - Multiple preprocessing options
35
+ - Face enhancement with GFPGAN
36
+ - Multiple pose styles
37
+ - Still mode for fewer head movements
38
+
39
+ ## Setup
40
+
41
+ After cloning this repository to your Hugging Face Space, you'll need to:
42
+
43
+ 1. **Upload model files**: Download the following model files and upload them to your repository:
44
+
45
+ ### Required Model Files:
46
+
47
+ #### SadTalker Models (upload to `checkpoints/` folder):
48
+ - `SadTalker_V0.0.2_256.safetensors`
49
+ - `SadTalker_V0.0.2_512.safetensors`
50
+ - `mapping_00109-model.pth.tar`
51
+ - `mapping_00229-model.pth.tar`
52
+
53
+ #### GFPGAN Models (upload to `gfpgan/weights/` folder):
54
+ - `alignment_WFLW_4HG.pth`
55
+ - `detection_Resnet50_Final.pth`
56
+ - `GFPGANv1.4.pth`
57
+ - `parsing_parsenet.pth`
58
+
59
+ ### Where to get the models:
60
+ 1. Download from the original SadTalker repository: https://github.com/OpenTalker/SadTalker
61
+ 2. Or from the model links provided in their documentation
62
+
63
+ ### Upload Instructions:
64
+ 1. Go to your Hugging Face Space repository
65
+ 2. Click "Upload files"
66
+ 3. Create the folder structure and upload the model files
67
+ 4. Make sure the files are in the correct paths as listed above
68
+
69
+ ## Usage
70
+ 1. Upload a source image (preferably a portrait with clear face)
71
+ 2. Upload an audio file
72
+ 3. Adjust settings as needed
73
+ 4. Click Generate to create your talking face video
74
+
75
+ ## Citation
76
+ ```
77
+ @InProceedings{zhang2023sadtalker,
78
+ author={Zhang, Wenxuan and Cun, Xiaodong and Wang, Xuan and Zhang, Yong and Shen, Xi and Guo, Yu and Shan, Ying and Wang, Fei},
79
+ title={SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation},
80
+ booktitle={The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
81
+ month={June},
82
+ year={2023}
83
+ }
84
+ ```
85
+
86
+ ## Links
87
+ - [Paper](https://arxiv.org/abs/2211.12194)
88
+ - [Project Page](https://sadtalker.github.io)
89
+ - [Original Repository](https://github.com/OpenTalker/SadTalker)
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ import gradio as gr
3
+ import torch
4
+ import shutil
5
+ from src.gradio_demo import SadTalker
6
+
7
+ def sadtalker_demo():
8
+ # For Hugging Face, we'll use the current directory structure
9
+ checkpoint_path = 'checkpoints'
10
+ config_path = 'src/config'
11
+
12
+ try:
13
+ sad_talker = SadTalker(checkpoint_path, config_path, lazy_load=True)
14
+ except Exception as e:
15
+ print(f"Warning: Could not initialize SadTalker: {e}")
16
+ sad_talker = None
17
+
18
+ def generate_video(source_image, driven_audio, preprocess_type, is_still_mode, enhancer, batch_size, size_of_image, pose_style):
19
+ if sad_talker is None:
20
+ return "Error: SadTalker not initialized. Please ensure all model files are uploaded."
21
+
22
+ try:
23
+ return sad_talker.test(
24
+ source_image=source_image,
25
+ driven_audio=driven_audio,
26
+ preprocess=preprocess_type,
27
+ still_mode=is_still_mode,
28
+ use_enhancer=enhancer,
29
+ batch_size=batch_size,
30
+ size=size_of_image,
31
+ pose_style=pose_style
32
+ )
33
+ except Exception as e:
34
+ return f"Error generating video: {str(e)}"
35
+ with gr.Row().style(equal_height=False):
36
+ with gr.Column(variant='panel'):
37
+ with gr.Tabs(elem_id="sadtalker_source_image"):
38
+ with gr.TabItem('Upload image'):
39
+ with gr.Row():
40
+ source_image = gr.Image(
41
+ label="Source image",
42
+ source="upload",
43
+ type="filepath",
44
+ elem_id="img2img_image"
45
+ ).style(width=512)
46
+
47
+ with gr.Tabs(elem_id="sadtalker_driven_audio"):
48
+ with gr.TabItem('Upload Audio'):
49
+ with gr.Column(variant='panel'):
50
+ driven_audio = gr.Audio(
51
+ label="Input audio",
52
+ source="upload",
53
+ type="filepath"
54
+ )
55
+
56
+ with gr.Column(variant='panel'):
57
+ with gr.Tabs(elem_id="sadtalker_checkbox"):
58
+ with gr.TabItem('Settings'):
59
+ gr.Markdown("""
60
+ Need help? Please visit our [best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md) for more details
61
+ """)
62
+ with gr.Column(variant='panel'):
63
+ pose_style = gr.Slider(
64
+ minimum=0,
65
+ maximum=46,
66
+ step=1,
67
+ label="Pose style",
68
+ value=0
69
+ )
70
+ size_of_image = gr.Radio(
71
+ [256, 512],
72
+ value=256,
73
+ label='Face model resolution',
74
+ info="Use 256/512 model?"
75
+ )
76
+ preprocess_type = gr.Radio(
77
+ ['crop', 'resize','full', 'extcrop', 'extfull'],
78
+ value='crop',
79
+ label='preprocess',
80
+ info="How to handle input image?"
81
+ )
82
+ is_still_mode = gr.Checkbox(
83
+ label="Still Mode (fewer head motion, works with preprocess `full`)"
84
+ )
85
+ batch_size = gr.Slider(
86
+ label="Batch size in generation",
87
+ step=1,
88
+ maximum=10,
89
+ value=2
90
+ )
91
+ enhancer = gr.Checkbox(
92
+ label="GFPGAN as Face enhancer"
93
+ )
94
+ submit = gr.Button(
95
+ 'Generate',
96
+ elem_id="sadtalker_generate",
97
+ variant='primary'
98
+ )
99
+
100
+ with gr.Tabs(elem_id="sadtalker_generated"):
101
+ gen_video = gr.Video(
102
+ label="Generated video",
103
+ format="mp4"
104
+ ).style(width=512)
105
+
106
+ submit.click(
107
+ fn=generate_video,
108
+ inputs=[
109
+ source_image,
110
+ driven_audio,
111
+ preprocess_type,
112
+ is_still_mode,
113
+ enhancer,
114
+ batch_size,
115
+ size_of_image,
116
+ pose_style
117
+ ],
118
+ outputs=[gen_video]
119
+ )
120
+
121
+ return sadtalker_interface
122
+
123
+ if __name__ == "__main__":
124
+ demo = sadtalker_demo()
125
+ demo.queue()
126
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy==1.23.4
2
+ torch>=1.12.1
3
+ torchvision>=0.13.1
4
+ torchaudio>=0.12.1
5
+ face_alignment==1.3.5
6
+ imageio==2.19.3
7
+ imageio-ffmpeg==0.4.7
8
+ librosa==0.9.2
9
+ numba
10
+ resampy==0.3.1
11
+ pydub==0.25.1
12
+ scipy==1.10.1
13
+ kornia==0.6.8
14
+ tqdm
15
+ yacs==0.1.8
16
+ pyyaml
17
+ joblib==1.1.0
18
+ scikit-image==0.19.3
19
+ basicsr==1.4.2
20
+ facexlib==0.3.0
21
+ gradio
22
+ gfpgan
23
+ av
24
+ safetensors
25
+ opencv-python
26
+ Pillow
27
+ matplotlib
28
+ einops
29
+ timm
30
+ xformers