Spaces:

Bread-F
/

Intelligent-Medical-Guidance-Large-Model

Running

Intelligent-Medical-Guidance-Large-Model / server /digital_human /modules /musetalk /utils /preprocessing.py

FAYO

model

1ef9436 12 months ago

6.64 kB

	import pickle

	import cv2
	import numpy as np
	import torch
	from face_detection import FaceAlignment, LandmarksType
	from mmpose.apis import inference_topdown
	from mmpose.structures import merge_data_samples
	from tqdm import tqdm

	# initialize the face detection model
	device = "cuda" if torch.cuda.is_available() else "cpu"
	fa = FaceAlignment(LandmarksType._2D, flip_input=False, device=device)

	# maker if the bbox is not sufficient
	coord_placeholder = (0.0, 0.0, 0.0, 0.0)


	def resize_landmark(landmark, w, h, new_w, new_h):
	w_ratio = new_w / w
	h_ratio = new_h / h
	landmark_norm = landmark / [w, h]
	landmark_resized = landmark_norm * [new_w, new_h]
	return landmark_resized


	def read_imgs(img_list):
	frames = []
	print("reading images...")
	for img_path in tqdm(img_list):
	frame = cv2.imread(img_path)
	frames.append(frame)
	return frames


	def get_bbox_range(img_list, model, upperbondrange=0):
	frames = read_imgs(img_list)
	batch_size_fa = 1
	batches = [frames[i : i + batch_size_fa] for i in range(0, len(frames), batch_size_fa)]
	coords_list = []
	landmarks = []
	if upperbondrange != 0:
	print("get key_landmark and face bounding boxes with the bbox_shift:", upperbondrange)
	else:
	print("get key_landmark and face bounding boxes with the default value")
	average_range_minus = []
	average_range_plus = []
	for fb in tqdm(batches):
	results = inference_topdown(model, np.asarray(fb)[0])
	results = merge_data_samples(results)
	keypoints = results.pred_instances.keypoints
	face_land_mark = keypoints[0][23:91]
	face_land_mark = face_land_mark.astype(np.int32)

	# get bounding boxes by face detetion
	bbox = fa.get_detections_for_batch(np.asarray(fb))

	# adjust the bounding box refer to landmark
	# Add the bounding box to a tuple and append it to the coordinates list
	for j, f in enumerate(bbox):
	if f is None: # no face in the image
	coords_list += [coord_placeholder]
	continue

	half_face_coord = face_land_mark[29] # np.mean([face_land_mark[28], face_land_mark[29]], axis=0)
	range_minus = (face_land_mark[30] - face_land_mark[29])[1]
	range_plus = (face_land_mark[29] - face_land_mark[28])[1]
	average_range_minus.append(range_minus)
	average_range_plus.append(range_plus)
	if upperbondrange != 0:
	half_face_coord[1] = upperbondrange + half_face_coord[1] # 手动调整 + 向下（偏29） - 向上（偏28）

	text_range = f"Total frame:「{len(frames)}」 Manually adjust range : [ -{int(sum(average_range_minus) / len(average_range_minus))}~{int(sum(average_range_plus) / len(average_range_plus))} ] , the current value: {upperbondrange}"
	return text_range


	def get_landmark_and_bbox(img_list, model, upperbondrange=0):
	frames = read_imgs(img_list)
	batch_size_fa = 1
	batches = [frames[i : i + batch_size_fa] for i in range(0, len(frames), batch_size_fa)]
	coords_list = []
	landmarks = []
	if upperbondrange != 0:
	print("get key_landmark and face bounding boxes with the bbox_shift:", upperbondrange)
	else:
	print("get key_landmark and face bounding boxes with the default value")
	average_range_minus = []
	average_range_plus = []
	for fb in tqdm(batches):
	results = inference_topdown(model, np.asarray(fb)[0])
	results = merge_data_samples(results)
	keypoints = results.pred_instances.keypoints
	face_land_mark = keypoints[0][23:91]
	face_land_mark = face_land_mark.astype(np.int32)

	# get bounding boxes by face detetion
	bbox = fa.get_detections_for_batch(np.asarray(fb))

	# adjust the bounding box refer to landmark
	# Add the bounding box to a tuple and append it to the coordinates list
	for j, f in enumerate(bbox):
	if f is None: # no face in the image
	coords_list += [coord_placeholder]
	continue

	half_face_coord = face_land_mark[29] # np.mean([face_land_mark[28], face_land_mark[29]], axis=0)
	range_minus = (face_land_mark[30] - face_land_mark[29])[1]
	range_plus = (face_land_mark[29] - face_land_mark[28])[1]
	average_range_minus.append(range_minus)
	average_range_plus.append(range_plus)
	if upperbondrange != 0:
	half_face_coord[1] = upperbondrange + half_face_coord[1] # 手动调整 + 向下（偏29） - 向上（偏28）
	half_face_dist = np.max(face_land_mark[:, 1]) - half_face_coord[1]
	upper_bond = half_face_coord[1] - half_face_dist

	f_landmark = (
	np.min(face_land_mark[:, 0]),
	int(upper_bond),
	np.max(face_land_mark[:, 0]),
	np.max(face_land_mark[:, 1]),
	)
	x1, y1, x2, y2 = f_landmark

	if y2 - y1 <= 0 or x2 - x1 <= 0 or x1 < 0: # if the landmark bbox is not suitable, reuse the bbox
	coords_list += [f]
	w, h = f[2] - f[0], f[3] - f[1]
	print("error bbox:", f)
	else:
	coords_list += [f_landmark]

	print(
	"******************************************bbox_shift parameter adjustment********************************************************"
	)
	print(
	f"Total frame:「{len(frames)}」 Manually adjust range : [ -{int(sum(average_range_minus) / len(average_range_minus))}~{int(sum(average_range_plus) / len(average_range_plus))} ] , the current value: {upperbondrange}"
	)
	print(
	"*************************************************************************************************************************************"
	)
	return coords_list, frames


	if __name__ == "__main__":
	img_list = [
	"./results/lyria/00000.png",
	"./results/lyria/00001.png",
	"./results/lyria/00002.png",
	"./results/lyria/00003.png",
	]
	crop_coord_path = "./coord_face.pkl"
	coords_list, full_frames = get_landmark_and_bbox(img_list)
	with open(crop_coord_path, "wb") as f:
	pickle.dump(coords_list, f)

	for bbox, frame in zip(coords_list, full_frames):
	if bbox == coord_placeholder:
	continue
	x1, y1, x2, y2 = bbox
	crop_frame = frame[y1:y2, x1:x2]
	print("Cropped shape", crop_frame.shape)

	# cv2.imwrite(path.join(save_dir, '{}.png'.format(i)),full_frames[i][0][y1:y2, x1:x2])
	print(coords_list)