see-through-demo

Runtime error

App Files Files Community

see-through-demo / inference /scripts /parse_live2d.py

24yearsold

update: add ComfyUI Node Extension mention to description

b55a1fc verified about 2 months ago

raw

history blame contribute delete

66.9 kB

	from PIL import Image

	import random
	import os
	import os.path as osp
	import numpy as np
	from tqdm import tqdm
	from einops import reduce
	import click
	import cv2
	import sys
	sys.path.append(osp.dirname(osp.dirname(osp.abspath(__file__))))

	from utils.io_utils import load_exec_list, pil_pad_square, pil_ensure_rgb, imglist2imgrid, find_all_files_with_name, dict2json, json2dict, load_image, save_tmp_img
	from live2d.scrap_model import Live2DScrapModel, compose_from_drawables, load_detected_character, init_drawable_visible_map, Drawable
	from utils.visualize import pil_draw_text, visualize_segs, VALID_FACE_GROUPS, FACE_LABEL2NAME, visualize_facedet_output, LEFT_EYEBROW, RIGHT_EYEBROW, show_factorization_on_image
	from utils.cv import mask2rle, rle2mask

	exclude_cls = \
	{
	'1girl',
	'smile',
	'simple_background',
	'white_background',
	'solo',
	'closed_mouth',
	'looking_at_viewer',
	'standing',
	'full_body',
	'virtual_youtuber',
	'tachi-e',
	'elf',
	'transparent_background',
	'blush',
	'straight-on',
	'looking_to_the_side',
	'expressionless',
	'holding',
	}

	@click.group()
	def cli():
	"""live2d scripts.
	"""


	@cli.command('build_live2d_exec_list')
	@click.option('--srcd')
	@click.option('--save_dir', default=None)
	@click.option('--filter_p', default=None)
	@click.option('--target_fno', default=-1)
	@click.option('--num_chunk', default=-1)
	@click.option('--save_name', default='exec_list')
	def build_live2d_exec_list(srcd, save_dir, filter_p, target_fno, num_chunk, save_name):

	exec_list = find_all_files_with_name(srcd, name='final', exclude_suffix=True)

	tgt_list = []
	filter_set = set()
	if filter_p is not None:
	filter_set = set(load_exec_list(filter_p))
	for d in exec_list:
	if d in filter_set or osp.dirname(d) in filter_set:
	continue
	dname = osp.basename(osp.dirname(d))
	if target_fno > 0:
	fno = dname.split('-')[-1]
	if not fno.isdigit():
	print(f'{d} is not a valid path')
	continue
	fno = int(fno)
	if fno == target_fno:
	tgt_list.append(d)
	else:
	tgt_list.append(d)

	random.shuffle(tgt_list)
	print(f'num samples: {len(tgt_list)}')

	if save_dir is None:
	save_dir = srcd

	savep = osp.join(save_dir, f'{save_name}.txt')
	with open(savep, 'w', encoding='utf8') as f:
	f.write('\n'.join(tgt_list))
	print(f'exec list saved to {savep}')

	if num_chunk > 0:
	world_size = num_chunk
	for ii in range(world_size):
	t = load_exec_list(tgt_list, ii, world_size=world_size)
	savep = osp.join(save_dir, f'{save_name}{ii}.txt')
	with open(savep, 'w', encoding='utf8') as f:
	f.write('\n'.join(t))
	print(f'exec list saved to {savep}')
	print(f'chunk {ii} num samples: {len(t)}')



	@cli.command('further_extr')
	@click.option('--exec_list')
	@click.option('--rank_to_worldsize', default=None)
	@click.option('--save_name', default=None)
	def _further_extr(args, *kwargs):
	further_extr(args, *kwargs)

	def further_extr(exec_list, rank_to_worldsize=None, save_name=None):
	exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize)


	eye_mesh_dict = {
	'1-2-3-2-2+eyebgs-l': 'eyebgsl',
	'1-2-3-3-2+irides-l': 'iridesl',
	'1-2-3-1-2+eyelashs-l': 'eyelashsl',
	'1-2-3-2-1+eyebgs-r': 'eyebgsr',
	'1-2-3-3-1+irides-r': 'iridesr',
	'1-2-3-1-1+eyelashs-r': 'eyelashsr',
	'1-2-1-1+eyebrows-r': 'eyebrowr',
	'1-2-1-2+eyebrows-l': 'eyebrowl'
	}

	for p in tqdm(exec_list):
	try:
	fp = osp.join(p, 'face_parsing')
	parts_dict_exist = osp.exists(osp.join(fp, 'parts.json'))
	if parts_dict_exist:
	parts = json2dict(osp.join(fp, 'parts.json'))
	eye_parts = {}
	for k, n in eye_mesh_dict.items():
	imgp = osp.join(fp, k + '.png')
	if not osp.exists(imgp) or not parts_dict_exist:
	eye_parts[n] = {'area': 0}
	continue
	img = np.array(Image.open(osp.join(fp, k + '.png')))
	pd = parts[k]
	x, y, w, h = pd['x'], pd['y'], pd['w'], pd['h']
	mask = img[..., -1] > 15
	rect = cv2.boundingRect(cv2.findNonZero(mask.astype(np.uint8)))
	xyxy = [x, y, x + w, y + h]
	rect = [rect[0] + x, rect[1] + y, rect[2], rect[3]]
	rect[2] += rect[0]
	rect[3] += rect[1]
	eye_parts[n] = {
	'img': img,
	'xyxy': xyxy,
	'mask': mask,
	'rect': rect,
	'area': np.sum(mask)
	}

	lmodel = Live2DScrapModel(p, pad_to_square=False, crop_to_final=False)

	lmodel.init_drawable_visible_map()
	lmodel.load_body_parsing()
	max_d = len(lmodel.drawables) + 1
	face_max_idx = -1
	face_min_idx = max_d
	neck_max_idx = -1
	neck_min_idx = max_d
	nose_max_idx = -1
	nose_min_idx = max_d
	mouth_max_idx = -1
	mouth_min_idx = max_d
	for d in lmodel.drawables:
	if d.body_part_tag == 'nose':
	nose_max_idx = max(d.idx, nose_max_idx)
	nose_min_idx = min(d.idx, nose_min_idx)
	elif d.body_part_tag == 'mouth':
	mouth_max_idx = max(d.idx, mouth_max_idx)
	mouth_min_idx = min(d.idx, mouth_min_idx)
	for d in lmodel.drawables:
	if d.body_part_tag == 'face':
	tgt_max_idx = min(d.idx, nose_min_idx, mouth_min_idx)
	face_max_idx = max(tgt_max_idx, face_max_idx)
	face_min_idx = min(d.idx, face_min_idx)
	hair_split_idx = face_max_idx
	for d in lmodel.drawables:
	if d.body_part_tag is None or 'hair' not in d.body_part_tag:
	continue
	if d.idx > hair_split_idx:
	d.body_part_tag = 'front hair'
	else:
	d.body_part_tag = 'back hair'

	eyel_xyxy = [lmodel.final.shape[1], lmodel.final.shape[0], 0, 0]
	eyer_xyxy = [lmodel.final.shape[1], lmodel.final.shape[0], 0, 0]
	for k in {'iridesl', 'eyebgsl', 'eyelashsl'}:
	if 'xyxy' in eye_parts[k]:
	eyel_xyxy[0] = min(eyel_xyxy[0], eye_parts[k]['xyxy'][0])
	eyel_xyxy[1] = min(eyel_xyxy[1], eye_parts[k]['xyxy'][1])
	eyel_xyxy[2] = max(eyel_xyxy[2], eye_parts[k]['xyxy'][2])
	eyel_xyxy[3] = max(eyel_xyxy[3], eye_parts[k]['xyxy'][3])
	for k in {'iridesr', 'eyebgsr', 'eyelashsr'}:
	if 'xyxy' in eye_parts[k]:
	eyer_xyxy[0] = min(eyer_xyxy[0], eye_parts[k]['xyxy'][0])
	eyer_xyxy[1] = min(eyer_xyxy[1], eye_parts[k]['xyxy'][1])
	eyer_xyxy[2] = max(eyer_xyxy[2], eye_parts[k]['xyxy'][2])
	eyer_xyxy[3] = max(eyer_xyxy[3], eye_parts[k]['xyxy'][3])

	for d in lmodel.drawables:
	if d.body_part_tag != 'eyes':
	continue
	eye_tag = None
	score = 0.
	eye_scores = {}
	for ek, ed in eye_parts.items():
	if ed['area'] == 0:
	eye_scores[ek] = [None] * 4
	continue
	mask = ed['mask']
	area, u_area, i_area = d.mask_union_intersection(mask, ed['xyxy'], final_vis_mask=True)
	eye_scores[ek] = [area, u_area, i_area, ed['area']]
	irides_scores, bg_scores = None, None
	eyelash_scores = eyebrow_scores = None
	if eye_scores['iridesl'][0] is not None:
	irides_scores = eye_scores['iridesl']
	bg_scores = eye_scores['eyebgsl']
	elif eye_scores['iridesr'][0] is not None:
	irides_scores = eye_scores['iridesr']
	bg_scores = eye_scores['eyebgsr']
	if eye_scores['eyelashsr'][0] is not None:
	eyelash_scores = eye_scores['eyelashsr']
	elif eye_scores['eyelashsl'][0] is not None:
	eyelash_scores = eye_scores['eyelashsl']
	if eye_scores['eyebrowr'][0] is not None:
	eyebrow_scores = eye_scores['eyebrowr']
	elif eye_scores['eyebrowl'][0] is not None:
	eyebrow_scores = eye_scores['eyebrowl']
	iou_i = iou_b = iou_l = iou_br = -1
	scores = {'irides': 0, 'eyebg': 0, 'eyelash': 0, 'eyebrow': 0}
	if irides_scores is not None and bg_scores is not None and irides_scores[2] > 0 and bg_scores[2] > 0:
	scores['irides'] = irides_scores[2] / irides_scores[1]
	scores['eyebg'] = bg_scores[2] / bg_scores[1]
	if eyelash_scores is not None and eyelash_scores[2] > 0:
	scores['eyelash'] = eyelash_scores[2] / eyelash_scores[1]
	if eyebrow_scores is not None and eyebrow_scores[2] > 0:
	scores['eyebrow'] = eyebrow_scores[2] / eyebrow_scores[1]
	k = max(scores, key=scores.get)

	def rect_include(xyxy1, dict2):
	if 'xyxy' not in dict2:
	return False
	xyxy2 = dict2['xyxy']
	return xyxy1[0] > xyxy2[0] and xyxy1[1] > xyxy2[1] and xyxy1[2] < xyxy2[2] and xyxy1[3] < xyxy2[3]

	if scores[k] > 0:
	d.body_part_tag = k
	else:
	x1, y1, x2, y2 = d.xyxy
	y = (y1 + y2) / 2
	if y < eyel_xyxy[1] or y < eyer_xyxy[1]:
	d.body_part_tag = 'eyebrow'
	elif rect_include(d.xyxy, eye_scores['iridesl']) or rect_include(d.xyxy, eye_scores['iridesr']):
	d.body_part_tag = 'irides'
	elif rect_include(d.xyxy, eye_scores['eyebgsl']) or rect_include(d.xyxy, eye_scores['eyebgsr']):
	d.body_part_tag = 'eyebg'
	else:
	d.body_part_tag = 'eyelash'

	if lmodel._body_parsing is not None:
	metadata = lmodel._body_parsing['metadata']
	else:
	metadata = {}
	lmodel.save_body_parsing(save_name=save_name, metadata=metadata)

	# hairf = lmodel.compose_bodypart_drawables('hairf')
	# hairb = lmodel.compose_bodypart_drawables('hairb')
	# irides = lmodel.compose_bodypart_drawables('irides')
	# eyebg = lmodel.compose_bodypart_drawables('eyebg')
	# eyelash = lmodel.compose_bodypart_drawables('eyelash')
	# eyebrow = lmodel.compose_bodypart_drawables('eyebrow')

	# save_tmp_img(
	# imglist2imgrid([lmodel.final, hairf, hairb, irides, eyebg, eyelash, eyebrow], fix_size=512)
	# )
	# pass

	except Exception as e:
	raise
	print(f'failed to process {p}: {e}')
	continue



	def propagate_invisible_parts(lmodel: Live2DScrapModel):
	voting_tree = {}
	for d in lmodel.drawables:
	if d.tag is None:
	continue
	parent = osp.dirname(d.did)
	if parent == '':
	parent = '_root'
	if parent not in voting_tree:
	voting_tree[parent] = {}
	if d.tag not in voting_tree[parent]:
	voting_tree[parent][d.tag] = 0
	voting_tree[parent][d.tag] += 1

	for d in lmodel.drawables:
	if d.tag is not None:
	continue
	parent = osp.dirname(d.did)
	target_tag = None
	while True:
	if parent == '':
	parent = '_root'
	if parent not in voting_tree:
	break
	if len(voting_tree[parent]) > 0:
	target_tag = max(voting_tree[parent], key=voting_tree[parent].get)
	break
	if parent == '_root':
	break
	parent = osp.dirname(parent)
	if target_tag is not None:
	voting_tree[parent][target_tag] += 1
	d.set_tag(target_tag)


	def assign_tag_by_path(lmodel: Live2DScrapModel):
	did_contain_arms = False
	for d in lmodel.drawables:
	if d.did is None:
	continue
	if 'arm' in d.did.lower():
	did_contain_arms = True

	for d in lmodel.drawables:
	if d.did is None:
	continue
	did_lower = d.did.lower()
	if d.tag == 'objects':
	continue
	if d.tag is None:
	if 'hair' in did_lower:
	d.set_tag('hair')
	elif 'arm' in did_lower:
	d.set_tag('handwear')
	elif 'mouth' in did_lower:
	d.set_tag('mouth')
	elif 'body' in did_lower:
	if 'body2' in did_lower:
	d.set_tag('bottomwear')
	# else:
	# d.set_tag('topwear')
	elif 'face' in did_lower:
	d.set_tag('face')
	elif 'ear' in did_lower:
	d.set_tag('ears')
	elif 'eye' in did_lower:
	d.set_tag('eyes')
	elif 'leg' in did_lower:
	d.set_tag('legwear')
	elif d.tag == 'hair':
	if 'face' in did_lower:
	d.set_tag('face')
	elif 'arm' in did_lower:
	d.set_tag('handwear')
	elif 'body' in did_lower and 'hair' not in did_lower:
	d.set_tag('topwear')
	elif d.tag == 'handwear':
	if did_contain_arms:
	if 'body' in did_lower and 'arm' not in did_lower:
	if 'body2' in did_lower:
	d.set_tag('bottomwear')
	# else:
	# d.set_tag('topwear')
	if 'hair' in did_lower:
	d.set_tag('hair')
	elif d.tag == 'topwear':
	if 'hair' in did_lower:
	d.set_tag('headwear')
	elif 'arm' in did_lower:
	d.set_tag('handwear')
	elif d.tag == 'bottomwear':
	if 'hair' in did_lower:
	d.set_tag('headwear')
	else:
	if 'arm' in did_lower:
	d.set_tag('handwear')
	if d.tag in {'legwear', 'topwear', 'bottomwear', 'hair', 'face', None} and 'ear' in did_lower:
	d.set_tag('ears')
	elif d.tag in {'legwear', 'topwear', 'bottomwear', 'hair', 'face', None} and 'neck' in did_lower:
	d.set_tag('neck')
	elif d.tag in {'legwear', 'topwear', 'bottomwear', 'hair', None} and ('hand' in did_lower or 'arm' in did_lower):
	d.set_tag('handwear')
	elif d.tag in {'legwear', 'topwear', 'bottomwear', 'hair', None} and 'eye' in did_lower:
	d.set_tag('eyes')
	elif d.tag in {'legwear', 'topwear', 'bottomwear', 'hair', None} and 'mouth' in did_lower:
	d.set_tag('mouth')
	elif d.tag in {'legwear', 'topwear', 'bottomwear', 'hair', 'face', None} and 'nose' in did_lower:
	d.set_tag('nose')


	@cli.command('label_l2d_wsamsegs')
	@click.option('--exec_list')
	@click.option('--save_dir', default='')
	@click.option('--extr_more', is_flag=True, default=False, help='required if sam masks is 19 classes, further divide hair and eyes into sub parts')
	@click.option('--rank_to_worldsize', default='', type=str)
	def label_l2d_wsamsegs(exec_list, save_dir, extr_more, rank_to_worldsize):

	from live2d.scrap_model import Drawable, VALID_BODY_PARTS_V2
	from utils.cv import fgbg_hist_matching, quantize_image, random_crop, rle2mask, mask2rle, img_alpha_blending, resize_short_side_to, batch_save_masks, batch_load_masks
	from utils.torch_utils import seed_everything

	seed_everything(42)
	exec_listp = exec_list

	exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize)

	if save_dir != '':
	os.makedirs(save_dir, exist_ok=True)

	mask_name = 'sam_masks.json'

	for ii, p in enumerate(tqdm(exec_list[0:])):
	try:

	instance_mask, crop_xyxy, score = load_detected_character(p)
	# if instance_mask is None:
	# print(f'skip {p}, no character instance detected')
	# continue

	lmodel = Live2DScrapModel(p, crop_xyxy=crop_xyxy, pad_to_square=False)
	model_dir = lmodel.directory

	if lmodel._body_parsing is not None:
	metadata = lmodel._body_parsing['metadata']
	else:
	metadata = {}
	# feet_mask_valid = metadata['tag_valid']['footwear']

	masks_ann = json2dict(osp.join(model_dir, mask_name))
	sam_masks = [rle2mask(m, to_bool=True) for m in masks_ann]

	init_drawable_visible_map(lmodel.drawables)

	for tg in lmodel.drawables:
	if tg.final_visible_area < 1:
	continue
	score_list = []
	for m in sam_masks:
	area, u_area, i_area = tg.mask_union_intersection(m, final_vis_mask=True)
	if i_area is None:
	i_area = -1
	score = i_area / tg.final_visible_area
	score_list.append(score)
	best_match = np.argmax(np.array(score_list))
	best_match = VALID_BODY_PARTS_V2[best_match]
	tg.body_part_tag = best_match
	if tg.body_part_tag == 'legwear' and score_list[VALID_BODY_PARTS_V2.index('footwear') > 0.5]:
	tg.body_part_tag = 'footwear'

	assign_tag_by_path(lmodel)
	propagate_invisible_parts(lmodel)
	lmodel.save_body_parsing(metadata=metadata, save_name='body_parsing')

	except Exception as e:
	raise
	print(f'Failed to process {p}: {e}')

	if extr_more:
	further_extr(exec_listp)


	@cli.command('gradcam_heatmap')
	@click.option('--image_file')
	@click.option('--savep', default=None)
	@click.option('--method', default='gradcam++')
	@click.option('--model_type', default='eva')
	@click.option('--gen_threshold', default=0.35)
	@click.option('--eigen_smooth', is_flag=True, default=False)
	@click.option('--aug_smooth', is_flag=True, default=False)
	@click.option('--device', default='cuda')
	def gradcam_heatmap(image_file, savep, method, model_type, gen_threshold, eigen_smooth, aug_smooth, device):

	from annotators.wdv3_tagger import apply_wdv3_tagger, get_tagger_and_transform
	from annotators.gradcam import apply_gradcam
	from pytorch_grad_cam.utils.image import show_cam_on_image

	if savep is None:
	os.makedirs('workspace', exist_ok=True)
	savep = osp.join('workspace', osp.basename(osp.dirname(image_file)) + '_' + model_type + '_' + method + '.png')

	img_input: Image.Image = Image.open(image_file)
	alpha = img_input.split()[-1]
	bbox = alpha.getbbox()

	# ensure image is RGB
	img_input = pil_ensure_rgb(img_input)
	img_input = img_input.crop(bbox)
	# pad to square with white background
	img_input, _ = pil_pad_square(img_input)
	img_input = img_input.resize((448, 448), resample=Image.Resampling.LANCZOS)

	caption, taglist, ratings, character, general = apply_wdv3_tagger(img_input, model_type=model_type, exclude_cls=exclude_cls, gen_threshold=gen_threshold)

	_, transform, labels = get_tagger_and_transform(model_type)
	inputs = transform(img_input).unsqueeze(0)
	inputs = inputs[:, [2, 1, 0]]

	imglist = []
	for k, v in tqdm(general.items()):
	grayscale_cam = apply_gradcam(inputs, v[1], method=method, model_type=model_type, eigen_smooth=eigen_smooth, aug_smooth=aug_smooth, device=device)
	grayscale_cam = grayscale_cam[0, :]
	cam_image = show_cam_on_image(np.array(img_input)[..., ::-1] / 255., grayscale_cam)
	fontScale = 0.9
	cam_image = cv2.putText(cam_image, k, (10, 24), cv2.FONT_HERSHEY_SIMPLEX, fontScale, color=(255, 255, 255), thickness=2, lineType=cv2.LINE_AA)
	imglist.append(cam_image)
	# torch.cuda.empty_cache()

	rst = imglist2imgrid(imglist)
	Image.fromarray(rst[..., ::-1]).save(savep)
	print(f'result saved to {savep}')



	@cli.command('infer_bizarre_tagger')
	@click.option('--exec_list')
	@click.option('--detected_instanec_only', default=False, is_flag=True)
	@click.option('--rank_to_worldsize', default='', type=str)
	def infer_bizarre_tagger(exec_list, detected_instanec_only, rank_to_worldsize):

	'''
	apply pos estimator: bizarre tagger
	'''

	from annotators.bizarre_tagger import apply_pos_estimator

	# model = LangSAM(sam_type="sam2.1_hiera_large")
	exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize)


	for model_dir in tqdm(exec_list):
	try:
	if osp.isfile(model_dir):
	model_dir = osp.dirname(model_dir)
	crop_xyxy = None
	if detected_instanec_only:
	instance_mask, crop_xyxy, score = load_detected_character(model_dir)
	if instance_mask is None:
	print(f'skip {model_dir}, no character instance detected')
	continue

	lmodel = Live2DScrapModel(model_dir, crop_xyxy=crop_xyxy, crop_to_final=True, pad_to_square=False)
	model_dir = lmodel.directory
	# ensure image is RGB
	img_input = pil_ensure_rgb(Image.fromarray(lmodel.final))
	kps, scores, bbox = apply_pos_estimator(img_input, mask=lmodel.final[..., -1].astype(np.float32) / 255.)

	save_rst = {'transform_stats': {'crop_xyxy': lmodel.final_bbox}, 'pos': [k for k in kps], 'scores': scores}
	savep = osp.join(model_dir, 'bizarre_pos.json')
	dict2json(save_rst, savep)

	except Exception as e:
	# raise e
	print(f'failed to process {model_dir}: {e}')


	@cli.command('infer_langsam')
	@click.option('--exec_list')
	@click.option('--box_threshold', default=0.35, type=float)
	@click.option('--text_threshold', default=0.25, type=float)
	@click.option('--detected_instanec_only', default=False, is_flag=True)
	@click.option('--rank_to_worldsize', default='', type=str)
	@click.option('--skip_exists', default=False, is_flag=True)
	def infer_langsam(exec_list, box_threshold, text_threshold, detected_instanec_only, rank_to_worldsize, skip_exists):
	import torch
	import gc

	from annotators.lang_sam import LangSAM

	model = LangSAM(sam_type="sam2.1_hiera_large")
	exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize)

	prompts = ['face', 'hair', 'hand', 'feet', 'leg', 'dress', 'shirt', 'skirt', 'jacket', 'neck', 'outfit', 'shoes']
	prompt_list_head = ['mouth', 'nose', 'ears']
	# prompt_list_head = ['hairband', 'crown']

	# skip_exists = True

	for model_dir in tqdm(exec_list[0:]):
	try:
	if osp.isfile(model_dir):
	model_dir = osp.dirname(model_dir)
	crop_xyxy = None
	if detected_instanec_only:
	instance_mask, crop_xyxy, score = load_detected_character(model_dir)
	if instance_mask is None:
	print(f'skip {model_dir}, no character instance detected')
	continue

	lmodel = Live2DScrapModel(model_dir, crop_xyxy=crop_xyxy, crop_to_final=True, pad_to_square=False)
	model_dir = lmodel.directory
	# ensure image is RGB
	img_input = pil_ensure_rgb(Image.fromarray(lmodel.final))


	savep = osp.join(model_dir, 'langsam_masks.json')
	if osp.exists(savep) and skip_exists:
	save_rst = json2dict(savep)
	else:
	save_rst = {'transform_stats': {'crop_xyxy': lmodel.final_bbox}, 'instances': {}}

	if skip_exists:
	prompt_list = [k for k in prompts if (k not in save_rst['instances'])]
	else:
	prompt_list = prompts
	if len(prompt_list) > 0:
	rst = model.predict_multi_prompts(img_input, prompt_list, box_threshold=box_threshold, text_threshold=text_threshold)
	for p, ins in zip(prompt_list, rst):
	masks = [np.squeeze(m, 0) if m.ndim == 3 else m for m in ins['masks']]
	masks = [mask2rle(m) for m in masks]
	ins['boxes'] = [b for b in ins['boxes']]
	ins['masks'] = masks
	save_rst['instances'][p] = ins

	if skip_exists:
	prompt_list = [k for k in prompt_list_head if (k not in save_rst['instances'])]
	else:
	prompt_list = prompt_list_head

	crop_head_for_head_prompt = True
	if len(prompt_list) > 0:

	head_crop = head_pad = None
	head_input = img_input
	h, w = img_input.height, img_input.width
	if crop_head_for_head_prompt and lmodel.face_detected():
	facedet = lmodel.facedet[0]
	x1, y1, x2, y2 = facedet['bbox'][:4]
	p = int(round(max(x2 - x1, y2 - y1) * 1.0))
	if p > 0:
	head_crop = [max(x1 - p, 0), max(y1 - p, 0), min(x2 + p, w), min(y2 + p, h)]
	hw, hh = head_crop[2] - head_crop[0], head_crop[3] - head_crop[1]
	head_pad = [head_crop[0], head_crop[1], w - head_crop[2], h - head_crop[3]]
	if np.all(np.array(head_pad) == 0) or hw <= 0 or hh <= 0:
	head_pad = None
	else:
	head_input = head_input.crop(head_crop)

	rst = model.predict_multi_prompts(head_input, prompt_list, box_threshold=box_threshold, text_threshold=text_threshold)

	for p, ins in zip(prompt_list, rst):
	masks = [np.squeeze(m, 0) if m.ndim == 3 else m for m in ins['masks']]
	if head_pad is not None:
	masks = [cv2.copyMakeBorder(m.astype(np.uint8), head_pad[1], head_pad[3], head_pad[0], head_pad[2], value=0, borderType=cv2.BORDER_CONSTANT) for m in masks]
	masks = [mask2rle(m) for m in masks]
	ins['boxes'] = [b for b in ins['boxes']]
	ins['masks'] = masks
	save_rst['instances'][p] = ins

	# from utils.visualize import visualize_segs_with_labels
	# from utils.cv import rle2mask
	# masks = []
	# for p in prompt_list:
	# msk = [rle2mask(m) for m in save_rst['instances'][p]['masks']]
	# if len(msk) > 0:
	# msk = np.logical_or.reduce(np.stack(msk, 0), axis=0)
	# else:
	# msk = np.zeros_like(lmodel.final[..., 0])
	# masks.append(msk)
	# t = json2dict(osp.join(model_dir, 'general_tags.json'))
	# print(t.keys())
	# print(has_animal_ear(t.keys()))
	# save_tmp_img(visualize_segs_with_labels(masks, lmodel.final, prompt_list))
	# pass

	savep = osp.join(model_dir, 'langsam_masks.json')
	dict2json(save_rst, savep)

	# pad to square with white background
	except Exception as e:
	# raise
	print(f'failed to process {model_dir}: {e}')
	gc.collect()
	torch.cuda.empty_cache()
	torch.cuda.ipc_collect()

	@cli.command('parse_live2d')
	@click.option('--exec_list')
	@click.option('--method', default='gradcam')
	@click.option('--model_type', default='eva')
	@click.option('--gen_threshold', default=0.3)
	@click.option('--eigen_smooth', is_flag=True, default=False)
	@click.option('--aug_smooth', is_flag=True, default=False)
	@click.option('--save_gradcam_heatmap', is_flag=True, default=False)
	@click.option('--device', default='cuda')
	@click.option('--tag_only', default=False, is_flag=True)
	@click.option('--detected_instanec_only', default=False, is_flag=True)
	@click.option('--rank_to_worldsize', default='', type=str)
	def parse_live2d(exec_list, method, model_type, gen_threshold, eigen_smooth, aug_smooth, save_gradcam_heatmap, device, tag_only, detected_instanec_only, rank_to_worldsize):

	from annotators.wdv3_tagger import apply_wdv3_tagger, get_tagger_and_transform
	from annotators.gradcam import apply_gradcam
	from pytorch_grad_cam.utils.image import show_cam_on_image

	exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize)

	for model_dir in tqdm(exec_list):
	try:
	crop_xyxy = None
	if detected_instanec_only:
	instance_mask, crop_xyxy, score = load_detected_character(model_dir)
	if instance_mask is None:
	print(f'skip {model_dir}, no character instance detected')
	continue

	model = Live2DScrapModel(model_dir, target_frame_size=448, crop_to_final=True, pad_to_square=True, crop_xyxy=crop_xyxy, pad_drawable_img=False)
	model_dir = model.directory
	# ensure image is RGB
	img_input = pil_ensure_rgb(Image.fromarray(model.final))
	# pad to square with white background
	caption, taglist, ratings, character, general = apply_wdv3_tagger(img_input, model_type=model_type, exclude_cls=exclude_cls, gen_threshold=gen_threshold)
	dict2json(general, osp.join(model_dir, 'general_tags.json'))
	if tag_only:
	continue
	model.init_drawable_visible_map()
	_, transform, labels = get_tagger_and_transform(model_type)
	inputs = transform(img_input).unsqueeze(0)
	inputs = inputs[:, [2, 1, 0]]

	gradcam_heatmap_vis = []
	for cls_name, v in general.items():
	cls_score, cls_idx = v[0], v[1]
	score_map = apply_gradcam(inputs, cls_idx, method=method, model_type=model_type, eigen_smooth=eigen_smooth, aug_smooth=aug_smooth, device=device)
	model.update_tag_stats(score_map[0], cls_idx, cls_name, filter_scoremap=True)

	if save_gradcam_heatmap:
	cam_image = show_cam_on_image(np.array(img_input)[..., ::-1] / 255., score_map[0])
	fontScale = 0.9
	cam_image = cv2.putText(cam_image, cls_name, (10, 24), cv2.FONT_HERSHEY_SIMPLEX, fontScale, color=(255, 255, 255), thickness=2, lineType=cv2.LINE_AA)
	gradcam_heatmap_vis.append(cam_image)

	if save_gradcam_heatmap:
	gradcam_heatmap_vis = imglist2imgrid(gradcam_heatmap_vis, cols=6)
	savep = osp.join(model_dir, 'heatmap_' + model_type + '_' + method + '.png')
	Image.fromarray(gradcam_heatmap_vis[..., ::-1]).save(savep)

	model.save_tag_stats()
	except Exception as e:
	print(f'failed to process {model_dir}: {e}')
	pass

	# # assign drawable to the tag with maximum
	# avgscore_lst = []
	# for tag, tag_info in model.tag_stats.items():
	# avgscore_map = np.zeros_like(model.final[..., 0]).astype(np.float32)
	# for drawable in model.drawables:
	# if drawable.final_visible_area < 1:
	# continue
	# x1, y1, x2, y2 = drawable.xyxy
	# avgscore_map[y1: y2, x1: x2] += drawable.final_visible_mask.astype(np.float32) * drawable.tag_stats[tag]['avg_score']
	# avgscore_lst.append(avgscore_map)

	# avgscore_lst = np.stack(avgscore_lst).clip(0, 1)
	# concept_labels = list(model.tag_stats.keys())
	# vis = show_factorization_on_image(model.final[..., :3] / 255., avgscore_lst, concept_labels=concept_labels, image_weight=0.1, visible_mask=model.final_visible_mask[..., None])
	# Image.fromarray(vis).save(osp.join(model_dir, 'segmentation_' + model_type + '_' + method + '.png'))


	@cli.command('dump_body_tags')
	@click.option('--src_dir', default='workspace/tags_raw/bodyparts')
	@click.option('--savep', default='workspace/tagcluster_bodypart.json')
	def dump_body_tags(src_dir, savep):
	from utils.io_utils import json2dict, dict2json

	spliters = [',', '\|']
	tag_set_cleaned = {}

	sets_duplicated = {}

	for d in os.listdir(src_dir):
	p = osp.join(src_dir, d)
	with open(p, 'r', encoding='utf8') as f:
	lines = f.read().split('\n')

	lines_lst = []
	for l in lines:
	l = l.strip().lower()
	if l.startswith('#'):
	continue
	for s in spliters:
	l = l.split(s)[0].strip()
	if len(l) > 0:
	l = '_'.join(l.split(' '))
	lines_lst.append(l)
	tag_set_cleaned[d] = lines_lst


	dict2json(tag_set_cleaned, savep)


	@cli.command('facedet')
	@click.option('--exec_list')
	@click.option('--twopass', is_flag=True, default=False)
	@click.option('--rank_to_worldsize', default='', type=str)
	@click.option('--skip_exists', default=False, is_flag=True)
	def facedet(exec_list, twopass, rank_to_worldsize, skip_exists):

	from annotators import anime_face_detector

	if exec_list.endswith('.json') or exec_list.endswith('.json.gz'):
	exec_list = json2dict(exec_list)
	exec_list = list(exec_list.keys())
	exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize)

	device = 'cuda' #@param ['cuda:0', 'cpu']
	model = 'yolov3' #@param ['yolov3', 'faster-rcnn']
	detector = anime_face_detector.create_detector(model, device=device)

	if skip_exists:
	new_exec_list = []
	for srcp in exec_list:
	if osp.isfile(srcp):
	srcp = osp.dirname(srcp)
	if osp.exists(osp.join(srcp, 'facedet.json')):
	print(f'skip {srcp} due to result exists')
	continue
	new_exec_list.append(srcp)
	exec_list = new_exec_list

	for srcp in tqdm(exec_list):
	try:
	if osp.isfile(srcp):
	srcp = osp.dirname(srcp)
	savep = osp.join(srcp, 'facedet.json')

	lmodel = Live2DScrapModel(srcp, crop_to_final=True, pad_to_square=False)
	image = Image.fromarray(lmodel.final)
	if twopass:
	lmodel.init_drawable_visible_map()

	image = pil_ensure_rgb(image)
	image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

	preds = detector(image)
	if len(preds) > 0:
	pred = max(preds, key=lambda x: x['bbox'][-1])

	bbox = pred['bbox']
	keypoints = pred['keypoints']

	if twopass:
	bbox_input = bbox.copy()
	# bbox_input[..., :4] = lmodel.processor.scale_coordinates(bbox_input[..., :4].reshape((2, 2))).flatten()
	x1, y1, x2, y2, _ = np.round(bbox_input).astype(np.int32)
	# vis_face_det = visualize_facedet_output(model.final, [facedet])[y1: y2, x1: x2, :3]
	# vis_face_det = np.concatenate([vis_face_det, np.full_like(vis_face_det[..., [0]], fill_value=255)], axis=2)

	xyxy = [x1, y1, x2, y2]
	valid_drawables = []
	for drawable in lmodel.drawables:
	if drawable.final_visible_area < 1:
	continue
	bbox_i, vis_mask = drawable.get_vis_mask(xyxy, final_vis_mask=True)
	if bbox_i is None or vis_mask.sum() / drawable.final_visible_area < 0.8:
	continue
	valid_drawables.append(drawable)
	face_crop = compose_from_drawables(valid_drawables, xyxy=xyxy)
	facedet2 = detector(face_crop, boxes=[np.array([0, 0, x2-x1, y2-y1, 1])])
	keypoints2 = facedet2[0]['keypoints']
	px1 = x1 + lmodel.final_bbox[0]
	py1 = y1 + lmodel.final_bbox[1]
	keypoints2[:, 0] += px1
	keypoints2[:, 1] += py1
	keypoints[LEFT_EYEBROW] = keypoints2[LEFT_EYEBROW]
	keypoints[RIGHT_EYEBROW] = keypoints2[RIGHT_EYEBROW]
	# Image.fromarray(face_crop).save('local_tst.png')
	pass

	bbox[-1] = np.round(bbox[-1] * 100)
	bbox[:-1] = np.round(bbox[:-1])
	pred['bbox'] = bbox.astype(np.int32)
	if lmodel.final_bbox is not None:
	pred['bbox'][[0, 2]] += lmodel.final_bbox[0]
	pred['bbox'][[1, 3]] += lmodel.final_bbox[1]

	# print(lmodel.final_bbox)
	# save_tmp_img(imread(osp.join(srcp, 'final.jxl'))[pred['bbox'][1]: pred['bbox'][3], pred['bbox'][0]: pred['bbox'][2]])
	# break
	keypoints[:, 2] = np.round(keypoints[:, 2] * 100)
	keypoints[:, :2] = np.round(keypoints[:, :2])
	pred['keypoints'] = [k for k in keypoints.astype(np.int32)]

	else:
	pass
	dict2json(preds, savep)
	except Exception as e:
	print(f'failed to process {srcp}: {e}')



	def hflip_aug_mask(mask: np.ndarray, x: int, aug=True):
	'''
	mask: (h, w) or (c, h, w)
	'''
	if mask.ndim == 3:
	if mask.shape[0] == 1:
	mask = mask[0]
	else:
	mask = np.logical_or.reduce(mask, axis=0)
	h, w = mask.shape[:2]
	mid = w // 2
	if x <= mid:
	x1 = 0
	x2 = x * 2
	else:
	x2 = w
	x1 = w - (w - x) * 2
	mask_or = mask[:, x1: x2]
	if aug:
	mask[:, x1: x2] = np.bitwise_or(mask_or, mask_or[:, ::-1])
	mask_l = mask.copy()
	mask_l[:, mid:] = 0
	mask[:, :mid] = 0
	# imglist2imgrid([mask_l.astype(np.uint8) * 255, mask.astype(np.uint8) * 255], output_type='pil').save('local_tst.png')
	return mask_l, mask


	def part_morph_transform(masks, target_cls, ksize=1, op='dilate'):
	mask = masks[target_cls].astype(np.uint8)
	element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * ksize + 1, 2 * ksize + 1),(ksize, ksize))
	if op == 'dilate':
	mask = cv2.dilate(mask, element)
	else:
	mask = cv2.erode(mask, element)
	masks[target_cls] = mask.astype(bool)


	def split_lr_part(lmodel: Live2DScrapModel, target_ids: list):
	from sklearn.cluster import MiniBatchKMeans, KMeans
	eye_xs = []
	eye_drawables = []
	for d in lmodel.drawables:
	if d.face_part_id in target_ids:
	dx, dy, dw, dh = cv2.boundingRect(cv2.findNonZero(d.final_visible_mask.astype(np.uint8)))
	dx += d.x
	dy += d.y
	eye_xs.append(dx + dw / 2)
	eye_drawables.append(d)
	if len(eye_drawables) < 2:
	return False
	eye_xs = np.array(eye_xs)
	eye_xs_mean = np.mean(eye_xs) + 1e-6
	eye_xs = eye_xs[:, None] / eye_xs_mean - 1
	rst = KMeans(2, max_iter=50).fit(eye_xs)
	labels = rst.predict(eye_xs)
	if rst.cluster_centers_[0, 0] > rst.cluster_centers_[1, 0]:
	labels = 1 - labels
	for d, l in zip(eye_drawables, labels):
	d.face_part_id = target_ids[l]
	return True


	def split_lr_mask(masks: np.ndarray, split_channels):
	ms = masks[split_channels]
	ms = np.logical_or.reduce(ms, axis=0)
	xs = np.where(ms > 0)

	pass


	def find_brow(lmodel: Live2DScrapModel, brow_id, face_xyxy=None):
	btop = 100000
	eye_id = brow_id + 2
	eye_mask = lmodel.compose_face_drawables(eye_id, mask_only=True, final_visible_mask=True, xyxy=face_xyxy)
	ex, ey, ew, eh = cv2.boundingRect(cv2.findNonZero(eye_mask.astype(np.uint8)))
	tgt_brow = None
	for d in lmodel.drawables:
	if d.face_part_id != eye_id:
	continue
	dx, dy, dw, dh = d.get_bbox(xyxy=face_xyxy)
	if dy < btop and dw / ew > 0.5:
	tgt_brow = d
	btop = dy
	if tgt_brow is not None:
	tgt_brow.face_part_id = brow_id
	return True
	return False


	@cli.command('facedet_sam')
	@click.option('--exec_list')
	@click.option('--ckpt')
	@click.option('--mask_decoder', default='mlp_variant')
	@click.option('--class_num', default=19)
	@click.option('--save_segs', default=False, is_flag=True)
	@click.option('--save_preview', default=False, is_flag=True)
	@click.option('--rank_to_worldsize', default='', type=str)
	@click.option('--skip_exists', default=False, is_flag=True)
	def facedet_sam(exec_list, ckpt, mask_decoder, class_num, save_segs, save_preview, rank_to_worldsize, skip_exists):

	from modules.semanticsam import SemanticSam, Sam
	from utils.torch_utils import init_model_from_pretrained
	from utils.cv import batch_save_masks
	import torch

	if exec_list.endswith('.json') or exec_list.endswith('.json.gz'):
	exec_list = json2dict(exec_list)
	exec_list = list(exec_list.keys())

	exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize)
	device = 'cuda' #@param ['cuda:0', 'cpu']

	# if model is None:
	sam: SemanticSam = init_model_from_pretrained(
	pretrained_model_name_or_path=ckpt,
	module_cls=SemanticSam,
	model_args={'mask_decoder': mask_decoder, 'class_num': class_num},
	device=device
	).eval()

	head_pad_ratio = 0.4

	if skip_exists:
	new_exec_list = []
	for srcp in exec_list:
	if osp.isfile(srcp):
	srcp = osp.dirname(srcp)
	if osp.exists(osp.join(srcp, 'face_parsing.json')):
	print(f'skip {srcp} due to result exists')
	continue
	new_exec_list.append(srcp)
	exec_list = new_exec_list

	for sidx, srcp in enumerate(tqdm(exec_list[0:])):
	# 5 12
	try:
	if osp.isfile(srcp):
	srcp = osp.dirname(srcp)

	lmodel_dir = srcp
	lmodel = Live2DScrapModel(lmodel_dir, crop_to_final=True, pad_to_square=False)
	if not lmodel.face_detected():
	print(f'skip {srcp} due to no face detected')
	continue
	lmodel.init_drawable_visible_map()

	fh, fw = lmodel.final.shape[:2]

	facedet = lmodel.facedet[0]
	x1, y1, x2, y2 = facedet['bbox'][:4]
	# save_tmp_img(lmodel.final[y1: y2, x1: x2])

	head_pad = 0
	if head_pad_ratio != 0:
	head_pad = head_pad_ratio * (y2 - y1)
	head_pad = int(round(head_pad))
	facedet['bbox'][:2] -= head_pad
	facedet['bbox'][2:4] += head_pad
	facedet['bbox'] = np.clip(facedet['bbox'], 0, min(fh, fw))
	x1, y1, x2, y2, _ = facedet['bbox']
	# x1 -= head_pad ; y1 -= head_pad ; x2 += head_pad ; y2 += head_pad

	face_xyxy = [x1, y1, x2, y2]
	image = lmodel.final
	face_image = image[y1: y2, x1: x2, :3]
	ch, cw = face_image.shape[:2]

	save_tmp_img(face_image)
	with torch.inference_mode():
	preds = sam.inference(face_image)[0]
	masks_np = (preds > 0).to(device='cpu', dtype=torch.bool).numpy()

	if save_segs:
	batch_save_masks(masks_np, osp.join(lmodel_dir, 'faceseg.json'), compress='gzip')
	batch_save_masks(masks_np[[10, 11]], osp.join(lmodel_dir, 'faceseg_nosemouth.json'), compress='gzip')

	# save_tmp_img(visualize_segs(masks_np[[1]], src_img=np.array(face_image), image_weight=0.3))

	part_morph_transform(masks_np, 11, ksize=2)
	part_morph_transform(masks_np, 4, ksize=3)
	part_morph_transform(masks_np, 5, ksize=3)
	part_morph_transform(masks_np, 7, ksize=2)
	part_morph_transform(masks_np, 8, ksize=2)

	neck_detected = False
	seg_areas = reduce(masks_np, 'b h w -> b', 'sum') + 1e-6
	for drawable in lmodel.drawables:
	if drawable.final_visible_area < 1:
	continue
	area, u_area, i_area = drawable.mask_union_intersection(masks_np, face_xyxy, final_vis_mask=True)
	if u_area is None or area == 0 or np.all(i_area[1:] == 0):
	continue
	u_area += 1e-6
	drawable.face_part_stats = {
	'union': u_area, 'intersection': i_area, 'iou': i_area / u_area, 'ioa': i_area / area, 'area': area, 'ios': i_area / seg_areas
	}
	drawable.face_part_id = np.argmax(drawable.face_part_stats['ioa'][1:]) + 1
	if drawable.face_part_id == 14:
	neck_detected = True

	base_face_mask = lmodel.compose_face_drawables(1, mask_only=True, xyxy=face_xyxy)
	bx, by, bw, bh = cv2.boundingRect(cv2.findNonZero(base_face_mask.astype(np.uint8)))
	by2 = by + bh
	bx2 = bw + bx

	base_face_mask_vis = cv2.cvtColor(base_face_mask.astype(np.uint8) * 255, cv2.COLOR_GRAY2RGB)
	base_face_mask_vis = cv2.rectangle(base_face_mask_vis, (bx, by), (bx2, by2), color=(0, 255, 0), thickness=4)

	eyew = cv2.boundingRect(cv2.findNonZero(
	lmodel.compose_face_drawables([4, 5], mask_only=True, xyxy=face_xyxy).astype(np.uint8)
	))[2]
	eye_detected = eyew > 1
	if eyew / bw > 0.5:
	split_lr_part(lmodel, (4, 5))
	leye_mask = lmodel.compose_face_drawables(4, mask_only=True, xyxy=face_xyxy)
	reye_mask = lmodel.compose_face_drawables(5, mask_only=True, xyxy=face_xyxy)

	leye_x, leye_y, leye_w, leye_h = cv2.boundingRect(cv2.findNonZero(leye_mask.astype(np.uint8)))
	reye_x, reye_y, reye_w, reye_h = cv2.boundingRect(cv2.findNonZero(reye_mask.astype(np.uint8)))
	eyel, eyer = min(leye_x, reye_x), max(leye_x + leye_w, reye_x + reye_w)
	eyew = eyer - eyel

	brow_potentials = []

	leye_detected, reye_detected = lmodel.face_part_detected([4, 5])
	beye_detected = leye_detected and reye_detected

	# re-assign eye lids & brows
	base_face_draworder = 10000
	for d_id, drawable in enumerate(lmodel.drawables):
	if drawable.area < 1:
	continue
	if drawable.face_part_id == 14:
	if drawable.face_part_stats['ioa'][16] + 0.1 > drawable.face_part_stats['ioa'][14]:
	drawable.face_part_id = 16
	if not neck_detected and drawable.face_part_id == 16 and drawable.face_part_stats['ioa'][14] > 0.15:
	drawable.face_part_id = 14

	dx, dy, dw, dh = drawable.get_bbox(xyxy=face_xyxy)
	dx2 = dx + dw
	dy2 = dy + dh
	if drawable.face_part_id == 16 and dy < by + bh / 2:
	if drawable.face_part_stats['ioa'][17] > 0.15:
	drawable.face_part_id = 17

	if not drawable.face_part_id in {None, 1, 17, 4, 5}:
	continue

	# check if hair drawable is actually background
	if drawable.face_part_id == 17:
	if drawable.face_part_stats['ioa'][0] > 0.7 and drawable.face_part_stats['ioa'][17] < 0.3:
	drawable.face_part_id = None


	if drawable.face_part_id == 1 and dw / bw > 0.7 and dh > bw > 0.7:
	if drawable.draw_order < base_face_draworder:
	base_face_draworder = drawable.draw_order

	if not (dx > bx and dx2 < bx2 and dy > by and dy2 < by2):
	continue

	if dy > max(leye_y + leye_h, reye_y + reye_h):
	continue

	if drawable.face_part_id == 17 and drawable.draw_order >= base_face_draworder:
	if dw / bw > 0.4 or dh / bh > 0.2:
	continue

	# re-assign glass
	if drawable.face_part_id in {4, 5} and beye_detected:
	if eye_detected and (dw / eyew > 0.6 or drawable.face_part_stats['ioa'][6] > 0.4):
	drawable.face_part_id = 6
	continue
	if dw > dh:
	brow_potentials.append(drawable)

	facedrawable_wo_prehair = []
	for drawable in lmodel.drawables:
	# skip face-covering hairs
	if drawable.face_part_id == 17 and drawable.draw_order >= base_face_draworder:
	continue
	# skip glass, neck, hat, cloth
	if drawable.face_part_id in {6, 14, 16, 18}:
	continue

	dx, dy, dw, dh = drawable.get_bbox(xyxy=face_xyxy)
	dx2 = dx + dw ; dy2 = dy + dh
	ix = min(dx2, bx2) - max(dx, bx)
	iy = min(dy2, by2) - max(dy, by)
	if dw > 0 and dh > 0 and ix / dw > 0.3 and iy / dh > 0.8:
	facedrawable_wo_prehair.append(drawable)

	for drawable in lmodel.drawables:
	if drawable.face_part_id is None:
	continue
	if drawable.face_part_id == 1:
	# re-assgin mouth tags
	if drawable.face_part_stats['ioa'][11] > 0.3:
	drawable.face_part_id = 11


	base_face_drawables: list[Drawable] = None
	base_face_drawables = set(facedrawable_wo_prehair + brow_potentials)
	base_face_drawables = list(base_face_drawables)
	base_face_drawables.sort(key=lambda x: x.draw_order)
	# reinit for those covered by hairs
	init_drawable_visible_map(base_face_drawables)
	# pil_ensure_rgb(compose_from_drawables(base_face_drawables[10:12], xyxy=face_xyxy, output_type='pil')).save('local_tst.png')

	base_face = compose_from_drawables(base_face_drawables, xyxy=face_xyxy)
	base_face = np.array(pil_ensure_rgb(Image.fromarray(base_face)))
	with torch.inference_mode():
	preds = sam.inference(base_face)[0]
	masks_np2 = (preds > 0).to(device='cpu', dtype=torch.bool).numpy()

	if save_segs:
	batch_save_masks(masks_np2, osp.join(lmodel_dir, 'faceseg2.json'), compress='gzip')

	masks_np2[[1, 10, 11]] = masks_np[[1, 10, 11]]
	masks_np2[[2, 3, 7, 8]] = np.bitwise_or(masks_np2[[2, 3, 7, 8]], masks_np[[2, 3, 7, 8]])
	if beye_detected:
	masks_np2[[4, 5]] = masks_np[[4, 5]]

	seg_areas = reduce(masks_np2, 'b h w -> b', 'sum') + 1e-6
	hair_mask = lmodel.compose_face_drawables([17], mask_only=True, xyxy=face_xyxy)
	for didx, drawable in enumerate(base_face_drawables):

	if didx in [10, 11]:
	continue
	if drawable.final_visible_area < 1:
	continue

	area, u_area, i_area = drawable.mask_union_intersection(masks_np2, face_xyxy, final_vis_mask=True)
	if u_area is None or area == 0 or np.all(i_area[1:] == 0):
	continue
	u_area += 1e-6
	i_area[17] = 0
	if np.all(i_area[1:] == 0):
	continue
	ori_face_part_stats = drawable.face_part_stats
	drawable.face_part_stats = {
	'union': u_area, 'intersection': i_area, 'iou': i_area / u_area, 'ioa': i_area / area, 'area': area, 'ios': i_area / seg_areas
	}
	if len(ori_face_part_stats) > 0:
	drawable.face_part_stats['ioa'][17] = ori_face_part_stats['ioa'][17]
	drawable.face_part_stats['ioa'][1] = ori_face_part_stats['ioa'][1]
	face_part_id = np.argmax(drawable.face_part_stats['ioa'][1:]) + 1
	face_part_id = int(face_part_id)

	# fix brows & ears attached to hair
	if face_part_id == 17:
	if drawable.face_part_stats['ioa'][2] > 0.5:
	face_part_id = 2
	elif drawable.face_part_stats['ioa'][3] > 0.5:
	face_part_id = 3
	elif drawable.face_part_stats['ioa'][7] > 0.5:
	face_part_id = 7
	elif drawable.face_part_stats['ioa'][8] > 0.5:
	face_part_id = 8

	# fix brows & ears attached to face
	if face_part_id == 1:
	if drawable.face_part_stats['ioa'][2] > 0.3:
	face_part_id = 2
	if drawable.face_part_stats['ioa'][3] > 0.3:
	face_part_id = 3

	# assign drawables not classified as hair in the first pass
	if drawable.face_part_id != 17:
	drawable.face_part_id = face_part_id
	# drawable was classiified as hair in the first pass and classified as ear now
	elif face_part_id in {7, 8} and drawable.draw_order <= base_face_draworder:
	drawable.face_part_id = face_part_id
	elif face_part_id in {2, 3}:
	drawable.face_part_id = face_part_id

	if drawable.face_part_id in {1, 17}:
	dx, dy, dw, dh = drawable.get_bbox(xyxy=face_xyxy)
	dx2 = dx + dw
	dy2 = dy + dh
	if not (dx > bx and dx2 < bx2 and dy > by and dy2 < by2):
	continue
	if dy > max(leye_y + leye_h, reye_y + reye_h):
	continue
	# if (dx - bx_mid) * (dx2 - bx_mid) < 0:
	# continue
	# eye lids
	if dw < max(leye_w, reye_w) * 1.1 and dh < max(leye_h / 2, reye_h / 2):
	if drawable.face_part_stats['ioa'][4] > 0.4 or np.any(drawable.bitwise_and(masks_np[4], face_xyxy)):
	drawable.face_part_id = 4
	elif drawable.face_part_stats['ioa'][5] > 0.4 or np.any(drawable.bitwise_and(masks_np[5], face_xyxy)):
	drawable.face_part_id = 5


	if drawable.face_part_id == 11 and drawable.get_bbox(xyxy=face_xyxy)[3] / bh > 0.4:
	if np.any(drawable.bitwise_and(hair_mask, face_xyxy)):
	drawable.face_part_id = 17

	if eyew / bw > 0.5:
	split_lr_part(lmodel, (4, 5))
	lbrow_detected, rbrow_detected = lmodel.brow_detected()
	if lbrow_detected ^ rbrow_detected:
	brow_mask = lmodel.compose_face_drawables([2, 3], mask_only=True, xyxy=face_xyxy)
	brx, bry, brw, brh = cv2.boundingRect(cv2.findNonZero(brow_mask.astype(np.uint8)))
	if eye_detected and brw / eyew > 0.6:
	split_lr_part(lmodel, (2, 3))
	lbrow_detected = rbrow_detected = True
	if not lbrow_detected:
	lbrow_detected = find_brow(lmodel, 2)
	if not rbrow_detected:
	rbrow_detected = find_brow(lmodel, 3)

	if lbrow_detected and rbrow_detected:
	brow_mask = lmodel.compose_face_drawables([2, 3], mask_only=True, xyxy=face_xyxy)
	brx, bry, brw, brh = cv2.boundingRect(cv2.findNonZero(brow_mask.astype(np.uint8)))
	if eye_detected and brw / eyew > 0.6:
	split_lr_part(lmodel, (2, 3))
	# lmodel.compose_face_drawables([2], xyxy=face_xyxy, output_type='pil', mask_only=True).save('local_tst.png')
	# lmodel.compose_face_drawables([3], xyxy=face_xyxy, output_type='pil', mask_only=True).save('local_tst.png')
	pass

	lear_detected, rear_detected = lmodel.face_part_detected([7, 8])
	if lear_detected ^ rear_detected:
	ear_mask = lmodel.compose_face_drawables([7, 8], mask_only=True, xyxy=face_xyxy)
	brx, bry, brw, brh = cv2.boundingRect(cv2.findNonZero(ear_mask.astype(np.uint8)))
	if brw / bw > 0.75:
	split_lr_part(lmodel, (7, 8))
	elif lear_detected:
	ear_mask = lmodel.compose_face_drawables([7, 8], mask_only=True, xyxy=face_xyxy)
	brx, bry, brw, brh = cv2.boundingRect(cv2.findNonZero(ear_mask.astype(np.uint8)))
	if brw / bw > 0.75:
	split_lr_part(lmodel, (7, 8))

	neck_base_mask = None
	face_mask = lmodel.compose_face_drawables(1, mask_only=True, final_visible_mask=True, xyxy=face_xyxy)
	base_neck_ids = set()
	for d in lmodel.drawables:
	if d.face_part_id != 14:
	continue
	d_mask = d.get_full_mask(final_visible_mask=True, xyxy=face_xyxy)
	if np.any(np.bitwise_and(face_mask, d_mask)):
	if neck_base_mask is None:
	neck_base_mask = d_mask
	else:
	neck_base_mask = np.bitwise_or(neck_base_mask, d_mask)
	base_neck_ids.add(d.draw_order)
	continue
	if neck_base_mask is not None:
	# save_tmp_img(neck_base_mask, mask2img=True)
	for d in lmodel.drawables:
	if d.face_part_id != 14 or d.draw_order in base_neck_ids:
	continue
	area, u_area, i_area = d.mask_union_intersection(neck_base_mask[None], face_xyxy, final_vis_mask=True)
	if i_area[0] / (d.final_visible_area + 1e-6) < 0.95:
	d.face_part_id = None

	# fix hat assigned as cloth
	for d in lmodel.drawables:
	if d.face_part_id == 16:
	msk = d.get_full_mask(True, face_xyxy)
	if np.any(msk):
	ymean = np.mean(np.where(msk)[0])
	if ymean < 1 / 3 * ch:
	d.face_part_id = 18

	nose_detected = lmodel.face_part_detected(10)
	mouth_detected = lmodel.face_part_detected(11)
	# if not nose_detected:
	# for d in lmodel.drawables:
	# if d.face_part_id != 1:
	# continue
	# if d.face_part_stats['ioa'][10] > 0.4:
	# d.face_part_id = 10
	# nose_detected = True

	if not nose_detected:
	d = lmodel.maxios_mindrawable(10, 0.7, ioa_thr=0.3)
	if d is not None:
	d.face_part_id = 10
	# save_tmp_img(compose_from_drawables([d], xyxy=face_xyxy))
	pass

	if save_preview:
	vis_face_det = visualize_facedet_output(lmodel.final, [facedet])[y1: y2, x1: x2, :3]
	rst_preds = visualize_segs(masks_np, src_img=np.array(face_image), image_weight=0.3)
	rst_preds2 = visualize_segs(masks_np2, src_img=np.array(base_face), image_weight=0.3)
	vis_list = [face_image, rst_preds, vis_face_det, base_face, rst_preds2]
	for flabel, flist in VALID_FACE_GROUPS.items():
	rst = lmodel.compose_face_drawables(face_part_ids=flist, xyxy=face_xyxy)
	rst = Image.fromarray(rst)
	pil_draw_text(rst, flabel, point=(0, 0), stroke_width=2, font_size=int(bw / 5))
	rst = pil_ensure_rgb(rst)
	vis_list.append(rst)
	# save_tmp_img(masks_np[11], mask2img=True)
	imglist2imgrid(vis_list, cols=4, output_type='pil').save(osp.join(lmodel.directory, 'face_parsing_preview.jpg'), q=95)
	# imglist2imgrid(vis_list, cols=4, output_type='pil').save('local_tst.png')
	pass

	if lmodel.final_bbox is not None:
	face_xyxy[0] += lmodel.final_bbox[0]
	face_xyxy[2] += lmodel.final_bbox[0]
	face_xyxy[1] += lmodel.final_bbox[1]
	face_xyxy[3] += lmodel.final_bbox[1]
	lmodel.save_face_parsing(metadata=FACE_LABEL2NAME, face_seg_xyxy=face_xyxy)
	pass

	except Exception as e:
	raise
	print(f'failed to process {srcp}: {e}')



	@cli.command('instance_segmentation')
	@click.option('--exec_list')
	@click.option('--rank_to_worldsize', default='', type=str)
	def instance_segmentation(exec_list, rank_to_worldsize):

	from annotators.animeinsseg.instance_segmentation import apply_instance_segmentation

	exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize)

	for p in tqdm(exec_list):
	img = Image.open(p)
	img = np.array(pil_ensure_rgb(img))
	instances = apply_instance_segmentation(img)

	instances_dict = {
	'masks': instances.masks if instances.masks is not None else [],
	'scores': instances.scores,
	'bboxes': instances.bboxes
	}
	instances_dict['masks'] = [mask2rle(m) for m in instances_dict['masks'] if instances_dict['masks'] is not None]

	d = osp.dirname(p)
	savep = osp.join(d, 'instances.json')
	dict2json(instances_dict, savep)



	@cli.command('sam_infer_l2d')
	@click.option('--exec_list')
	@click.option('--ckpt', default='sam_l2d_19cls_iter2_18k')
	@click.option('--rank_to_worldsize', default='', type=str)
	def sam_infer_l2d(exec_list, ckpt, rank_to_worldsize):

	from live2d.scrap_model import animal_ear_detected, Drawable, VALID_BODY_PARTS_V1, VALID_BODY_PARTS_V2
	from utils.cv import fgbg_hist_matching, quantize_image, random_crop, rle2mask, mask2rle, img_alpha_blending, resize_short_side_to, batch_save_masks, batch_load_masks
	from utils.torch_utils import seed_everything, init_model_from_pretrained
	from utils.visualize import visualize_segs_with_labels
	from modules.semanticsam import SemanticSam, Sam
	import torch


	if ckpt == 'sam_l2d_19cls_iter2_18k':
	model: SemanticSam = init_model_from_pretrained(
	pretrained_model_name_or_path='24yearsold/l2d_sam_iter2',
	weights_name='checkpoint-18000.pt',
	module_cls=SemanticSam,
	download_from_hf=True,
	model_args=dict(class_num=19)
	).to(device='cuda')
	else:
	raise Exception(f'Invalid ckpt: {ckpt}')

	seed_everything(42)

	exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize)

	for ii, p in enumerate(tqdm(exec_list[0:])):
	try:

	instance_mask, crop_xyxy, score = load_detected_character(p)
	# if instance_mask is None:
	# print(f'skip {p}, no character instance detected')
	# continue

	lmodel = Live2DScrapModel(p, crop_xyxy=crop_xyxy, pad_to_square=False)
	lmodel.init_drawable_visible_map()
	final_img = compose_from_drawables(lmodel.drawables)

	model_dir = lmodel.directory

	with torch.inference_mode():
	preds = model.inference(final_img[..., :3])[0]
	masks_np = (preds > 0).to(device='cpu', dtype=torch.bool).numpy()

	batch_save_masks(masks_np, osp.join(model_dir, 'sam_masks.json'))

	except Exception as e:
	print(f'Failed to process {p}: {e}')



	@cli.command('infer_synsample_tags')
	@click.option('--exec_list')
	@click.option('--tags', default='objects,fullpage')
	@click.option('--rank_to_worldsize', default='', type=str)
	def infer_synsample_tags(exec_list, tags, rank_to_worldsize):

	# from annotators.animeinsseg.instance_segmentation import apply_instance_segmentation
	from annotators.wdv3_tagger import apply_wdv3_tagger
	import os.path as osp
	tags = tags.split(',')

	exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize)

	img_ext = '.png'

	tagcluster_bodypart = json2dict('assets/tagcluster_bodypart_v2.json')
	tag2generaltag = {}
	for general_tag, tlist in tagcluster_bodypart.items():
	for t in tlist:
	if t in tag2generaltag and tag2generaltag[t] != general_tag:
	print(f'conflict tag def: {t} - {general_tag}, ' + tag2generaltag[t])
	tag2generaltag[t] = general_tag
	valid_taglst = set(list(tag2generaltag.keys()) + ['smile'])

	for p in tqdm(exec_list):

	pbase = osp.splitext(p)[0]

	for t in tags:
	if t == 'fullpage':
	p = pbase
	else:
	p = pbase + f'_{t}'

	try:
	imgp = p + img_ext
	if not osp.exists(imgp):
	# raise Exception(f'{imgp}')
	continue
	img = Image.open(p + img_ext)
	img = pil_ensure_rgb(img)
	img_input = img.resize((448, 448), resample=Image.Resampling.LANCZOS)

	caption, taglist, ratings, character, general = apply_wdv3_tagger(img_input, exclude_cls=exclude_cls)

	# img_input.save('local_tst.png')
	# general = [t for t in general if t in valid_taglst]

	general_tags = ','.join([t for t in general])

	savep = p + '.txt'
	# print(general_tags)
	with open(savep, 'w', encoding='utf8') as f:
	f.write(general_tags)
	# print(savep)

	except Exception as e:
	print(f'failed on {p}: ', e)
	# return


	if __name__ == '__main__':
	cli()