import torch import shutil import torch from scripts.utils.preprocess import CropAndExtract from scripts.test_audio2coeff import Audio2Coeff from scripts.facerender.animate import AnimateFromCoeff from scripts.generate_batch import get_data from scripts.generate_facerender_batch import get_facerender_data import uuid import os class sad_talker: def __init__(self): self.size = 256 self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.sadtalker_paths = {'checkpoint': './models/SadTalker_V0.0.2_256.safetensors', 'dir_of_BFM_fitting': './scripts/config', 'audio2pose_yaml_path': './scripts/config/auido2pose.yaml', 'audio2exp_yaml_path': './scripts/config/auido2exp.yaml', 'use_safetensor': True, 'mappingnet_checkpoint': './models/mapping_00109-model.pth.tar', 'facerender_yaml': './scripts/config/facerender_still.yaml'} self.preprocess_model = CropAndExtract(self.sadtalker_paths, self.device) self.audio_to_coeff = Audio2Coeff(self.sadtalker_paths, self.device) self.animate_from_coeff = AnimateFromCoeff(self.sadtalker_paths, self.device) def genrate_video(self,image_path , audio_path , output_folder , still = True ): try: preprocess = 'full' temp_file_uuid = str(uuid.uuid4()) save_dir = f'./outputs/{temp_file_uuid}' first_frame_dir = os.path.join(save_dir, 'first_frame_dir') os.makedirs(first_frame_dir, exist_ok=True) first_coeff_path, crop_pic_path, crop_info = self.preprocess_model.generate(image_path, first_frame_dir, preprocess,\ source_image_flag=True, pic_size=self.size) if first_coeff_path is None: return None ref_eyeblink_coeff_path=None ref_pose_coeff_path=None pose_style = 0 batch_size = 2 input_yaw_list = None input_pitch_list = None input_roll_list = None background_enhancer = None enhancer = None expression_scale = 1. batch = get_data(first_coeff_path, audio_path, self.device, ref_eyeblink_coeff_path, still=still) coeff_path = self.audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path) data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, batch_size, input_yaw_list, input_pitch_list, input_roll_list, expression_scale=expression_scale, still_mode=still, preprocess=preprocess, size=self.size) result = self.animate_from_coeff.generate(data, save_dir, image_path, crop_info, \ enhancer=enhancer, background_enhancer=background_enhancer, preprocess=preprocess, img_size=self.size) shutil.move(result,f"{output_folder}/output.mp4" ) shutil.rmtree(save_dir) return True except: shutil.rmtree(save_dir) return False def __del__(self): self.preprocess_model = None self.audio_to_coeff = None self.animate_from_coeff = None del self.preprocess_model del self.audio_to_coeff del self.animate_from_coeff torch.cuda.empty_cache() import gc gc.collect() if __name__ == "__main__": pass