File size: 3,582 Bytes
93871a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import torch
import shutil
import torch
from scripts.utils.preprocess import CropAndExtract
from scripts.test_audio2coeff import Audio2Coeff
from scripts.facerender.animate import AnimateFromCoeff
from scripts.generate_batch import get_data
from scripts.generate_facerender_batch import get_facerender_data
import uuid
import os
class sad_talker:
def __init__(self):
self.size = 256
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.sadtalker_paths = {'checkpoint': './models/SadTalker_V0.0.2_256.safetensors', 'dir_of_BFM_fitting': './scripts/config', 'audio2pose_yaml_path': './scripts/config/auido2pose.yaml', 'audio2exp_yaml_path': './scripts/config/auido2exp.yaml', 'use_safetensor': True, 'mappingnet_checkpoint': './models/mapping_00109-model.pth.tar', 'facerender_yaml': './scripts/config/facerender_still.yaml'}
self.preprocess_model = CropAndExtract(self.sadtalker_paths, self.device)
self.audio_to_coeff = Audio2Coeff(self.sadtalker_paths, self.device)
self.animate_from_coeff = AnimateFromCoeff(self.sadtalker_paths, self.device)
def genrate_video(self,image_path , audio_path , output_folder , still = True ):
try:
preprocess = 'full'
temp_file_uuid = str(uuid.uuid4())
save_dir = f'./outputs/{temp_file_uuid}'
first_frame_dir = os.path.join(save_dir, 'first_frame_dir')
os.makedirs(first_frame_dir, exist_ok=True)
first_coeff_path, crop_pic_path, crop_info = self.preprocess_model.generate(image_path, first_frame_dir, preprocess,\
source_image_flag=True, pic_size=self.size)
if first_coeff_path is None:
return None
ref_eyeblink_coeff_path=None
ref_pose_coeff_path=None
pose_style = 0
batch_size = 2
input_yaw_list = None
input_pitch_list = None
input_roll_list = None
background_enhancer = None
enhancer = None
expression_scale = 1.
batch = get_data(first_coeff_path, audio_path, self.device, ref_eyeblink_coeff_path, still=still)
coeff_path = self.audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path)
data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path,
batch_size, input_yaw_list, input_pitch_list, input_roll_list,
expression_scale=expression_scale, still_mode=still, preprocess=preprocess, size=self.size)
result = self.animate_from_coeff.generate(data, save_dir, image_path, crop_info, \
enhancer=enhancer, background_enhancer=background_enhancer, preprocess=preprocess, img_size=self.size)
shutil.move(result,f"{output_folder}/output.mp4" )
shutil.rmtree(save_dir)
return True
except:
shutil.rmtree(save_dir)
return False
def __del__(self):
self.preprocess_model = None
self.audio_to_coeff = None
self.animate_from_coeff = None
del self.preprocess_model
del self.audio_to_coeff
del self.animate_from_coeff
torch.cuda.empty_cache()
import gc
gc.collect()
if __name__ == "__main__":
pass |