Intelligent-Medical-Guidance-Large-Model
/
server
/digital_human
/modules
/musetalk
/utils
/preprocessing.py
| import pickle | |
| import cv2 | |
| import numpy as np | |
| import torch | |
| from face_detection import FaceAlignment, LandmarksType | |
| from mmpose.apis import inference_topdown | |
| from mmpose.structures import merge_data_samples | |
| from tqdm import tqdm | |
| # initialize the face detection model | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| fa = FaceAlignment(LandmarksType._2D, flip_input=False, device=device) | |
| # maker if the bbox is not sufficient | |
| coord_placeholder = (0.0, 0.0, 0.0, 0.0) | |
| def resize_landmark(landmark, w, h, new_w, new_h): | |
| w_ratio = new_w / w | |
| h_ratio = new_h / h | |
| landmark_norm = landmark / [w, h] | |
| landmark_resized = landmark_norm * [new_w, new_h] | |
| return landmark_resized | |
| def read_imgs(img_list): | |
| frames = [] | |
| print("reading images...") | |
| for img_path in tqdm(img_list): | |
| frame = cv2.imread(img_path) | |
| frames.append(frame) | |
| return frames | |
| def get_bbox_range(img_list, model, upperbondrange=0): | |
| frames = read_imgs(img_list) | |
| batch_size_fa = 1 | |
| batches = [frames[i : i + batch_size_fa] for i in range(0, len(frames), batch_size_fa)] | |
| coords_list = [] | |
| landmarks = [] | |
| if upperbondrange != 0: | |
| print("get key_landmark and face bounding boxes with the bbox_shift:", upperbondrange) | |
| else: | |
| print("get key_landmark and face bounding boxes with the default value") | |
| average_range_minus = [] | |
| average_range_plus = [] | |
| for fb in tqdm(batches): | |
| results = inference_topdown(model, np.asarray(fb)[0]) | |
| results = merge_data_samples(results) | |
| keypoints = results.pred_instances.keypoints | |
| face_land_mark = keypoints[0][23:91] | |
| face_land_mark = face_land_mark.astype(np.int32) | |
| # get bounding boxes by face detetion | |
| bbox = fa.get_detections_for_batch(np.asarray(fb)) | |
| # adjust the bounding box refer to landmark | |
| # Add the bounding box to a tuple and append it to the coordinates list | |
| for j, f in enumerate(bbox): | |
| if f is None: # no face in the image | |
| coords_list += [coord_placeholder] | |
| continue | |
| half_face_coord = face_land_mark[29] # np.mean([face_land_mark[28], face_land_mark[29]], axis=0) | |
| range_minus = (face_land_mark[30] - face_land_mark[29])[1] | |
| range_plus = (face_land_mark[29] - face_land_mark[28])[1] | |
| average_range_minus.append(range_minus) | |
| average_range_plus.append(range_plus) | |
| if upperbondrange != 0: | |
| half_face_coord[1] = upperbondrange + half_face_coord[1] # 手动调整 + 向下(偏29) - 向上(偏28) | |
| text_range = f"Total frame:「{len(frames)}」 Manually adjust range : [ -{int(sum(average_range_minus) / len(average_range_minus))}~{int(sum(average_range_plus) / len(average_range_plus))} ] , the current value: {upperbondrange}" | |
| return text_range | |
| def get_landmark_and_bbox(img_list, model, upperbondrange=0): | |
| frames = read_imgs(img_list) | |
| batch_size_fa = 1 | |
| batches = [frames[i : i + batch_size_fa] for i in range(0, len(frames), batch_size_fa)] | |
| coords_list = [] | |
| landmarks = [] | |
| if upperbondrange != 0: | |
| print("get key_landmark and face bounding boxes with the bbox_shift:", upperbondrange) | |
| else: | |
| print("get key_landmark and face bounding boxes with the default value") | |
| average_range_minus = [] | |
| average_range_plus = [] | |
| for fb in tqdm(batches): | |
| results = inference_topdown(model, np.asarray(fb)[0]) | |
| results = merge_data_samples(results) | |
| keypoints = results.pred_instances.keypoints | |
| face_land_mark = keypoints[0][23:91] | |
| face_land_mark = face_land_mark.astype(np.int32) | |
| # get bounding boxes by face detetion | |
| bbox = fa.get_detections_for_batch(np.asarray(fb)) | |
| # adjust the bounding box refer to landmark | |
| # Add the bounding box to a tuple and append it to the coordinates list | |
| for j, f in enumerate(bbox): | |
| if f is None: # no face in the image | |
| coords_list += [coord_placeholder] | |
| continue | |
| half_face_coord = face_land_mark[29] # np.mean([face_land_mark[28], face_land_mark[29]], axis=0) | |
| range_minus = (face_land_mark[30] - face_land_mark[29])[1] | |
| range_plus = (face_land_mark[29] - face_land_mark[28])[1] | |
| average_range_minus.append(range_minus) | |
| average_range_plus.append(range_plus) | |
| if upperbondrange != 0: | |
| half_face_coord[1] = upperbondrange + half_face_coord[1] # 手动调整 + 向下(偏29) - 向上(偏28) | |
| half_face_dist = np.max(face_land_mark[:, 1]) - half_face_coord[1] | |
| upper_bond = half_face_coord[1] - half_face_dist | |
| f_landmark = ( | |
| np.min(face_land_mark[:, 0]), | |
| int(upper_bond), | |
| np.max(face_land_mark[:, 0]), | |
| np.max(face_land_mark[:, 1]), | |
| ) | |
| x1, y1, x2, y2 = f_landmark | |
| if y2 - y1 <= 0 or x2 - x1 <= 0 or x1 < 0: # if the landmark bbox is not suitable, reuse the bbox | |
| coords_list += [f] | |
| w, h = f[2] - f[0], f[3] - f[1] | |
| print("error bbox:", f) | |
| else: | |
| coords_list += [f_landmark] | |
| print( | |
| "********************************************bbox_shift parameter adjustment**********************************************************" | |
| ) | |
| print( | |
| f"Total frame:「{len(frames)}」 Manually adjust range : [ -{int(sum(average_range_minus) / len(average_range_minus))}~{int(sum(average_range_plus) / len(average_range_plus))} ] , the current value: {upperbondrange}" | |
| ) | |
| print( | |
| "*************************************************************************************************************************************" | |
| ) | |
| return coords_list, frames | |
| if __name__ == "__main__": | |
| img_list = [ | |
| "./results/lyria/00000.png", | |
| "./results/lyria/00001.png", | |
| "./results/lyria/00002.png", | |
| "./results/lyria/00003.png", | |
| ] | |
| crop_coord_path = "./coord_face.pkl" | |
| coords_list, full_frames = get_landmark_and_bbox(img_list) | |
| with open(crop_coord_path, "wb") as f: | |
| pickle.dump(coords_list, f) | |
| for bbox, frame in zip(coords_list, full_frames): | |
| if bbox == coord_placeholder: | |
| continue | |
| x1, y1, x2, y2 = bbox | |
| crop_frame = frame[y1:y2, x1:x2] | |
| print("Cropped shape", crop_frame.shape) | |
| # cv2.imwrite(path.join(save_dir, '{}.png'.format(i)),full_frames[i][0][y1:y2, x1:x2]) | |
| print(coords_list) | |