File size: 5,216 Bytes
d24fe1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
017c8fa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os, sys
import gradio as gr
import torch
import shutil
from src.gradio_demo import SadTalker  

def sadtalker_demo():
    # For Hugging Face, we'll use the current directory structure
    checkpoint_path = 'checkpoints'
    config_path = 'src/config'
    
    try:
        sad_talker = SadTalker(checkpoint_path, config_path, lazy_load=True)
    except Exception as e:
        print(f"Warning: Could not initialize SadTalker: {e}")
        sad_talker = None

    def generate_video(source_image, driven_audio, preprocess_type, is_still_mode, enhancer, batch_size, size_of_image, pose_style):
        if sad_talker is None:
            return "Error: SadTalker not initialized. Please ensure all model files are uploaded."
            
        try:
            return sad_talker.test(
                source_image=source_image,
                driven_audio=driven_audio,
                preprocess=preprocess_type,
                still_mode=is_still_mode,
                use_enhancer=enhancer,
                batch_size=batch_size,
                size=size_of_image,
                pose_style=pose_style
            )
        except Exception as e:
            return f"Error generating video: {str(e)}"
        with gr.Row().style(equal_height=False):
            with gr.Column(variant='panel'):
                with gr.Tabs(elem_id="sadtalker_source_image"):
                    with gr.TabItem('Upload image'):
                        with gr.Row():
                            source_image = gr.Image(
                                label="Source image", 
                                source="upload", 
                                type="filepath", 
                                elem_id="img2img_image"
                            ).style(width=512)

                with gr.Tabs(elem_id="sadtalker_driven_audio"):
                    with gr.TabItem('Upload Audio'):
                        with gr.Column(variant='panel'):
                            driven_audio = gr.Audio(
                                label="Input audio", 
                                source="upload", 
                                type="filepath"
                            )
                            
            with gr.Column(variant='panel'): 
                with gr.Tabs(elem_id="sadtalker_checkbox"):
                    with gr.TabItem('Settings'):
                        gr.Markdown("""
                        Need help? Please visit our [best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md) for more details
                        """)
                        with gr.Column(variant='panel'):
                            pose_style = gr.Slider(
                                minimum=0, 
                                maximum=46, 
                                step=1, 
                                label="Pose style", 
                                value=0
                            )
                            size_of_image = gr.Radio(
                                [256, 512], 
                                value=256, 
                                label='Face model resolution', 
                                info="Use 256/512 model?"
                            )
                            preprocess_type = gr.Radio(
                                ['crop', 'resize','full', 'extcrop', 'extfull'], 
                                value='crop', 
                                label='preprocess', 
                                info="How to handle input image?"
                            )
                            is_still_mode = gr.Checkbox(
                                label="Still Mode (fewer head motion, works with preprocess `full`)"
                            )
                            batch_size = gr.Slider(
                                label="Batch size in generation", 
                                step=1, 
                                maximum=10, 
                                value=2
                            )
                            enhancer = gr.Checkbox(
                                label="GFPGAN as Face enhancer"
                            )
                            submit = gr.Button(
                                'Generate', 
                                elem_id="sadtalker_generate", 
                                variant='primary'
                            )
                            
                with gr.Tabs(elem_id="sadtalker_generated"):
                    gen_video = gr.Video(
                        label="Generated video", 
                        format="mp4"
                    ).style(width=512)

        submit.click(
            fn=generate_video, 
            inputs=[
                source_image,
                driven_audio,
                preprocess_type,
                is_still_mode,
                enhancer,
                batch_size,                            
                size_of_image,
                pose_style
            ], 
            outputs=[gen_video]
        )

    return sadtalker_interface

if __name__ == "__main__":
    demo = sadtalker_demo()
    demo.queue()
    demo.launch()