| import torch | |
| import shutil | |
| import torch | |
| from scripts.utils.preprocess import CropAndExtract | |
| from scripts.test_audio2coeff import Audio2Coeff | |
| from scripts.facerender.animate import AnimateFromCoeff | |
| from scripts.generate_batch import get_data | |
| from scripts.generate_facerender_batch import get_facerender_data | |
| import uuid | |
| import os | |
| class sad_talker: | |
| def __init__(self): | |
| self.size = 256 | |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| self.sadtalker_paths = {'checkpoint': './models/SadTalker_V0.0.2_256.safetensors', 'dir_of_BFM_fitting': './scripts/config', 'audio2pose_yaml_path': './scripts/config/auido2pose.yaml', 'audio2exp_yaml_path': './scripts/config/auido2exp.yaml', 'use_safetensor': True, 'mappingnet_checkpoint': './models/mapping_00109-model.pth.tar', 'facerender_yaml': './scripts/config/facerender_still.yaml'} | |
| self.preprocess_model = CropAndExtract(self.sadtalker_paths, self.device) | |
| self.audio_to_coeff = Audio2Coeff(self.sadtalker_paths, self.device) | |
| self.animate_from_coeff = AnimateFromCoeff(self.sadtalker_paths, self.device) | |
| def genrate_video(self,image_path , audio_path , output_folder , still = True ): | |
| try: | |
| preprocess = 'full' | |
| temp_file_uuid = str(uuid.uuid4()) | |
| save_dir = f'./outputs/{temp_file_uuid}' | |
| first_frame_dir = os.path.join(save_dir, 'first_frame_dir') | |
| os.makedirs(first_frame_dir, exist_ok=True) | |
| first_coeff_path, crop_pic_path, crop_info = self.preprocess_model.generate(image_path, first_frame_dir, preprocess,\ | |
| source_image_flag=True, pic_size=self.size) | |
| if first_coeff_path is None: | |
| return None | |
| ref_eyeblink_coeff_path=None | |
| ref_pose_coeff_path=None | |
| pose_style = 0 | |
| batch_size = 2 | |
| input_yaw_list = None | |
| input_pitch_list = None | |
| input_roll_list = None | |
| background_enhancer = None | |
| enhancer = None | |
| expression_scale = 1. | |
| batch = get_data(first_coeff_path, audio_path, self.device, ref_eyeblink_coeff_path, still=still) | |
| coeff_path = self.audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path) | |
| data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path, | |
| batch_size, input_yaw_list, input_pitch_list, input_roll_list, | |
| expression_scale=expression_scale, still_mode=still, preprocess=preprocess, size=self.size) | |
| result = self.animate_from_coeff.generate(data, save_dir, image_path, crop_info, \ | |
| enhancer=enhancer, background_enhancer=background_enhancer, preprocess=preprocess, img_size=self.size) | |
| shutil.move(result,f"{output_folder}/output.mp4" ) | |
| shutil.rmtree(save_dir) | |
| return True | |
| except: | |
| shutil.rmtree(save_dir) | |
| return False | |
| def __del__(self): | |
| self.preprocess_model = None | |
| self.audio_to_coeff = None | |
| self.animate_from_coeff = None | |
| del self.preprocess_model | |
| del self.audio_to_coeff | |
| del self.animate_from_coeff | |
| torch.cuda.empty_cache() | |
| import gc | |
| gc.collect() | |
| if __name__ == "__main__": | |
| pass |