Spaces:
Runtime error
Runtime error
| from PIL import Image | |
| import random | |
| import os | |
| import os.path as osp | |
| import numpy as np | |
| from tqdm import tqdm | |
| from einops import reduce | |
| import click | |
| import cv2 | |
| import sys | |
| sys.path.append(osp.dirname(osp.dirname(osp.abspath(__file__)))) | |
| from utils.io_utils import load_exec_list, pil_pad_square, pil_ensure_rgb, imglist2imgrid, find_all_files_with_name, dict2json, json2dict, load_image, save_tmp_img | |
| from live2d.scrap_model import Live2DScrapModel, compose_from_drawables, load_detected_character, init_drawable_visible_map, Drawable | |
| from utils.visualize import pil_draw_text, visualize_segs, VALID_FACE_GROUPS, FACE_LABEL2NAME, visualize_facedet_output, LEFT_EYEBROW, RIGHT_EYEBROW, show_factorization_on_image | |
| from utils.cv import mask2rle, rle2mask | |
| exclude_cls = \ | |
| { | |
| '1girl', | |
| 'smile', | |
| 'simple_background', | |
| 'white_background', | |
| 'solo', | |
| 'closed_mouth', | |
| 'looking_at_viewer', | |
| 'standing', | |
| 'full_body', | |
| 'virtual_youtuber', | |
| 'tachi-e', | |
| 'elf', | |
| 'transparent_background', | |
| 'blush', | |
| 'straight-on', | |
| 'looking_to_the_side', | |
| 'expressionless', | |
| 'holding', | |
| } | |
| def cli(): | |
| """live2d scripts. | |
| """ | |
| def build_live2d_exec_list(srcd, save_dir, filter_p, target_fno, num_chunk, save_name): | |
| exec_list = find_all_files_with_name(srcd, name='final', exclude_suffix=True) | |
| tgt_list = [] | |
| filter_set = set() | |
| if filter_p is not None: | |
| filter_set = set(load_exec_list(filter_p)) | |
| for d in exec_list: | |
| if d in filter_set or osp.dirname(d) in filter_set: | |
| continue | |
| dname = osp.basename(osp.dirname(d)) | |
| if target_fno > 0: | |
| fno = dname.split('-')[-1] | |
| if not fno.isdigit(): | |
| print(f'{d} is not a valid path') | |
| continue | |
| fno = int(fno) | |
| if fno == target_fno: | |
| tgt_list.append(d) | |
| else: | |
| tgt_list.append(d) | |
| random.shuffle(tgt_list) | |
| print(f'num samples: {len(tgt_list)}') | |
| if save_dir is None: | |
| save_dir = srcd | |
| savep = osp.join(save_dir, f'{save_name}.txt') | |
| with open(savep, 'w', encoding='utf8') as f: | |
| f.write('\n'.join(tgt_list)) | |
| print(f'exec list saved to {savep}') | |
| if num_chunk > 0: | |
| world_size = num_chunk | |
| for ii in range(world_size): | |
| t = load_exec_list(tgt_list, ii, world_size=world_size) | |
| savep = osp.join(save_dir, f'{save_name}{ii}.txt') | |
| with open(savep, 'w', encoding='utf8') as f: | |
| f.write('\n'.join(t)) | |
| print(f'exec list saved to {savep}') | |
| print(f'chunk {ii} num samples: {len(t)}') | |
| def _further_extr(*args, **kwargs): | |
| further_extr(*args, **kwargs) | |
| def further_extr(exec_list, rank_to_worldsize=None, save_name=None): | |
| exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize) | |
| eye_mesh_dict = { | |
| '1-2-3-2-2+eyebgs-l': 'eyebgsl', | |
| '1-2-3-3-2+irides-l': 'iridesl', | |
| '1-2-3-1-2+eyelashs-l': 'eyelashsl', | |
| '1-2-3-2-1+eyebgs-r': 'eyebgsr', | |
| '1-2-3-3-1+irides-r': 'iridesr', | |
| '1-2-3-1-1+eyelashs-r': 'eyelashsr', | |
| '1-2-1-1+eyebrows-r': 'eyebrowr', | |
| '1-2-1-2+eyebrows-l': 'eyebrowl' | |
| } | |
| for p in tqdm(exec_list): | |
| try: | |
| fp = osp.join(p, 'face_parsing') | |
| parts_dict_exist = osp.exists(osp.join(fp, 'parts.json')) | |
| if parts_dict_exist: | |
| parts = json2dict(osp.join(fp, 'parts.json')) | |
| eye_parts = {} | |
| for k, n in eye_mesh_dict.items(): | |
| imgp = osp.join(fp, k + '.png') | |
| if not osp.exists(imgp) or not parts_dict_exist: | |
| eye_parts[n] = {'area': 0} | |
| continue | |
| img = np.array(Image.open(osp.join(fp, k + '.png'))) | |
| pd = parts[k] | |
| x, y, w, h = pd['x'], pd['y'], pd['w'], pd['h'] | |
| mask = img[..., -1] > 15 | |
| rect = cv2.boundingRect(cv2.findNonZero(mask.astype(np.uint8))) | |
| xyxy = [x, y, x + w, y + h] | |
| rect = [rect[0] + x, rect[1] + y, rect[2], rect[3]] | |
| rect[2] += rect[0] | |
| rect[3] += rect[1] | |
| eye_parts[n] = { | |
| 'img': img, | |
| 'xyxy': xyxy, | |
| 'mask': mask, | |
| 'rect': rect, | |
| 'area': np.sum(mask) | |
| } | |
| lmodel = Live2DScrapModel(p, pad_to_square=False, crop_to_final=False) | |
| lmodel.init_drawable_visible_map() | |
| lmodel.load_body_parsing() | |
| max_d = len(lmodel.drawables) + 1 | |
| face_max_idx = -1 | |
| face_min_idx = max_d | |
| neck_max_idx = -1 | |
| neck_min_idx = max_d | |
| nose_max_idx = -1 | |
| nose_min_idx = max_d | |
| mouth_max_idx = -1 | |
| mouth_min_idx = max_d | |
| for d in lmodel.drawables: | |
| if d.body_part_tag == 'nose': | |
| nose_max_idx = max(d.idx, nose_max_idx) | |
| nose_min_idx = min(d.idx, nose_min_idx) | |
| elif d.body_part_tag == 'mouth': | |
| mouth_max_idx = max(d.idx, mouth_max_idx) | |
| mouth_min_idx = min(d.idx, mouth_min_idx) | |
| for d in lmodel.drawables: | |
| if d.body_part_tag == 'face': | |
| tgt_max_idx = min(d.idx, nose_min_idx, mouth_min_idx) | |
| face_max_idx = max(tgt_max_idx, face_max_idx) | |
| face_min_idx = min(d.idx, face_min_idx) | |
| hair_split_idx = face_max_idx | |
| for d in lmodel.drawables: | |
| if d.body_part_tag is None or 'hair' not in d.body_part_tag: | |
| continue | |
| if d.idx > hair_split_idx: | |
| d.body_part_tag = 'front hair' | |
| else: | |
| d.body_part_tag = 'back hair' | |
| eyel_xyxy = [lmodel.final.shape[1], lmodel.final.shape[0], 0, 0] | |
| eyer_xyxy = [lmodel.final.shape[1], lmodel.final.shape[0], 0, 0] | |
| for k in {'iridesl', 'eyebgsl', 'eyelashsl'}: | |
| if 'xyxy' in eye_parts[k]: | |
| eyel_xyxy[0] = min(eyel_xyxy[0], eye_parts[k]['xyxy'][0]) | |
| eyel_xyxy[1] = min(eyel_xyxy[1], eye_parts[k]['xyxy'][1]) | |
| eyel_xyxy[2] = max(eyel_xyxy[2], eye_parts[k]['xyxy'][2]) | |
| eyel_xyxy[3] = max(eyel_xyxy[3], eye_parts[k]['xyxy'][3]) | |
| for k in {'iridesr', 'eyebgsr', 'eyelashsr'}: | |
| if 'xyxy' in eye_parts[k]: | |
| eyer_xyxy[0] = min(eyer_xyxy[0], eye_parts[k]['xyxy'][0]) | |
| eyer_xyxy[1] = min(eyer_xyxy[1], eye_parts[k]['xyxy'][1]) | |
| eyer_xyxy[2] = max(eyer_xyxy[2], eye_parts[k]['xyxy'][2]) | |
| eyer_xyxy[3] = max(eyer_xyxy[3], eye_parts[k]['xyxy'][3]) | |
| for d in lmodel.drawables: | |
| if d.body_part_tag != 'eyes': | |
| continue | |
| eye_tag = None | |
| score = 0. | |
| eye_scores = {} | |
| for ek, ed in eye_parts.items(): | |
| if ed['area'] == 0: | |
| eye_scores[ek] = [None] * 4 | |
| continue | |
| mask = ed['mask'] | |
| area, u_area, i_area = d.mask_union_intersection(mask, ed['xyxy'], final_vis_mask=True) | |
| eye_scores[ek] = [area, u_area, i_area, ed['area']] | |
| irides_scores, bg_scores = None, None | |
| eyelash_scores = eyebrow_scores = None | |
| if eye_scores['iridesl'][0] is not None: | |
| irides_scores = eye_scores['iridesl'] | |
| bg_scores = eye_scores['eyebgsl'] | |
| elif eye_scores['iridesr'][0] is not None: | |
| irides_scores = eye_scores['iridesr'] | |
| bg_scores = eye_scores['eyebgsr'] | |
| if eye_scores['eyelashsr'][0] is not None: | |
| eyelash_scores = eye_scores['eyelashsr'] | |
| elif eye_scores['eyelashsl'][0] is not None: | |
| eyelash_scores = eye_scores['eyelashsl'] | |
| if eye_scores['eyebrowr'][0] is not None: | |
| eyebrow_scores = eye_scores['eyebrowr'] | |
| elif eye_scores['eyebrowl'][0] is not None: | |
| eyebrow_scores = eye_scores['eyebrowl'] | |
| iou_i = iou_b = iou_l = iou_br = -1 | |
| scores = {'irides': 0, 'eyebg': 0, 'eyelash': 0, 'eyebrow': 0} | |
| if irides_scores is not None and bg_scores is not None and irides_scores[2] > 0 and bg_scores[2] > 0: | |
| scores['irides'] = irides_scores[2] / irides_scores[1] | |
| scores['eyebg'] = bg_scores[2] / bg_scores[1] | |
| if eyelash_scores is not None and eyelash_scores[2] > 0: | |
| scores['eyelash'] = eyelash_scores[2] / eyelash_scores[1] | |
| if eyebrow_scores is not None and eyebrow_scores[2] > 0: | |
| scores['eyebrow'] = eyebrow_scores[2] / eyebrow_scores[1] | |
| k = max(scores, key=scores.get) | |
| def rect_include(xyxy1, dict2): | |
| if 'xyxy' not in dict2: | |
| return False | |
| xyxy2 = dict2['xyxy'] | |
| return xyxy1[0] > xyxy2[0] and xyxy1[1] > xyxy2[1] and xyxy1[2] < xyxy2[2] and xyxy1[3] < xyxy2[3] | |
| if scores[k] > 0: | |
| d.body_part_tag = k | |
| else: | |
| x1, y1, x2, y2 = d.xyxy | |
| y = (y1 + y2) / 2 | |
| if y < eyel_xyxy[1] or y < eyer_xyxy[1]: | |
| d.body_part_tag = 'eyebrow' | |
| elif rect_include(d.xyxy, eye_scores['iridesl']) or rect_include(d.xyxy, eye_scores['iridesr']): | |
| d.body_part_tag = 'irides' | |
| elif rect_include(d.xyxy, eye_scores['eyebgsl']) or rect_include(d.xyxy, eye_scores['eyebgsr']): | |
| d.body_part_tag = 'eyebg' | |
| else: | |
| d.body_part_tag = 'eyelash' | |
| if lmodel._body_parsing is not None: | |
| metadata = lmodel._body_parsing['metadata'] | |
| else: | |
| metadata = {} | |
| lmodel.save_body_parsing(save_name=save_name, metadata=metadata) | |
| # hairf = lmodel.compose_bodypart_drawables('hairf') | |
| # hairb = lmodel.compose_bodypart_drawables('hairb') | |
| # irides = lmodel.compose_bodypart_drawables('irides') | |
| # eyebg = lmodel.compose_bodypart_drawables('eyebg') | |
| # eyelash = lmodel.compose_bodypart_drawables('eyelash') | |
| # eyebrow = lmodel.compose_bodypart_drawables('eyebrow') | |
| # save_tmp_img( | |
| # imglist2imgrid([lmodel.final, hairf, hairb, irides, eyebg, eyelash, eyebrow], fix_size=512) | |
| # ) | |
| # pass | |
| except Exception as e: | |
| raise | |
| print(f'failed to process {p}: {e}') | |
| continue | |
| def propagate_invisible_parts(lmodel: Live2DScrapModel): | |
| voting_tree = {} | |
| for d in lmodel.drawables: | |
| if d.tag is None: | |
| continue | |
| parent = osp.dirname(d.did) | |
| if parent == '': | |
| parent = '_root' | |
| if parent not in voting_tree: | |
| voting_tree[parent] = {} | |
| if d.tag not in voting_tree[parent]: | |
| voting_tree[parent][d.tag] = 0 | |
| voting_tree[parent][d.tag] += 1 | |
| for d in lmodel.drawables: | |
| if d.tag is not None: | |
| continue | |
| parent = osp.dirname(d.did) | |
| target_tag = None | |
| while True: | |
| if parent == '': | |
| parent = '_root' | |
| if parent not in voting_tree: | |
| break | |
| if len(voting_tree[parent]) > 0: | |
| target_tag = max(voting_tree[parent], key=voting_tree[parent].get) | |
| break | |
| if parent == '_root': | |
| break | |
| parent = osp.dirname(parent) | |
| if target_tag is not None: | |
| voting_tree[parent][target_tag] += 1 | |
| d.set_tag(target_tag) | |
| def assign_tag_by_path(lmodel: Live2DScrapModel): | |
| did_contain_arms = False | |
| for d in lmodel.drawables: | |
| if d.did is None: | |
| continue | |
| if 'arm' in d.did.lower(): | |
| did_contain_arms = True | |
| for d in lmodel.drawables: | |
| if d.did is None: | |
| continue | |
| did_lower = d.did.lower() | |
| if d.tag == 'objects': | |
| continue | |
| if d.tag is None: | |
| if 'hair' in did_lower: | |
| d.set_tag('hair') | |
| elif 'arm' in did_lower: | |
| d.set_tag('handwear') | |
| elif 'mouth' in did_lower: | |
| d.set_tag('mouth') | |
| elif 'body' in did_lower: | |
| if 'body2' in did_lower: | |
| d.set_tag('bottomwear') | |
| # else: | |
| # d.set_tag('topwear') | |
| elif 'face' in did_lower: | |
| d.set_tag('face') | |
| elif 'ear' in did_lower: | |
| d.set_tag('ears') | |
| elif 'eye' in did_lower: | |
| d.set_tag('eyes') | |
| elif 'leg' in did_lower: | |
| d.set_tag('legwear') | |
| elif d.tag == 'hair': | |
| if 'face' in did_lower: | |
| d.set_tag('face') | |
| elif 'arm' in did_lower: | |
| d.set_tag('handwear') | |
| elif 'body' in did_lower and 'hair' not in did_lower: | |
| d.set_tag('topwear') | |
| elif d.tag == 'handwear': | |
| if did_contain_arms: | |
| if 'body' in did_lower and 'arm' not in did_lower: | |
| if 'body2' in did_lower: | |
| d.set_tag('bottomwear') | |
| # else: | |
| # d.set_tag('topwear') | |
| if 'hair' in did_lower: | |
| d.set_tag('hair') | |
| elif d.tag == 'topwear': | |
| if 'hair' in did_lower: | |
| d.set_tag('headwear') | |
| elif 'arm' in did_lower: | |
| d.set_tag('handwear') | |
| elif d.tag == 'bottomwear': | |
| if 'hair' in did_lower: | |
| d.set_tag('headwear') | |
| else: | |
| if 'arm' in did_lower: | |
| d.set_tag('handwear') | |
| if d.tag in {'legwear', 'topwear', 'bottomwear', 'hair', 'face', None} and 'ear' in did_lower: | |
| d.set_tag('ears') | |
| elif d.tag in {'legwear', 'topwear', 'bottomwear', 'hair', 'face', None} and 'neck' in did_lower: | |
| d.set_tag('neck') | |
| elif d.tag in {'legwear', 'topwear', 'bottomwear', 'hair', None} and ('hand' in did_lower or 'arm' in did_lower): | |
| d.set_tag('handwear') | |
| elif d.tag in {'legwear', 'topwear', 'bottomwear', 'hair', None} and 'eye' in did_lower: | |
| d.set_tag('eyes') | |
| elif d.tag in {'legwear', 'topwear', 'bottomwear', 'hair', None} and 'mouth' in did_lower: | |
| d.set_tag('mouth') | |
| elif d.tag in {'legwear', 'topwear', 'bottomwear', 'hair', 'face', None} and 'nose' in did_lower: | |
| d.set_tag('nose') | |
| def label_l2d_wsamsegs(exec_list, save_dir, extr_more, rank_to_worldsize): | |
| from live2d.scrap_model import Drawable, VALID_BODY_PARTS_V2 | |
| from utils.cv import fgbg_hist_matching, quantize_image, random_crop, rle2mask, mask2rle, img_alpha_blending, resize_short_side_to, batch_save_masks, batch_load_masks | |
| from utils.torch_utils import seed_everything | |
| seed_everything(42) | |
| exec_listp = exec_list | |
| exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize) | |
| if save_dir != '': | |
| os.makedirs(save_dir, exist_ok=True) | |
| mask_name = 'sam_masks.json' | |
| for ii, p in enumerate(tqdm(exec_list[0:])): | |
| try: | |
| instance_mask, crop_xyxy, score = load_detected_character(p) | |
| # if instance_mask is None: | |
| # print(f'skip {p}, no character instance detected') | |
| # continue | |
| lmodel = Live2DScrapModel(p, crop_xyxy=crop_xyxy, pad_to_square=False) | |
| model_dir = lmodel.directory | |
| if lmodel._body_parsing is not None: | |
| metadata = lmodel._body_parsing['metadata'] | |
| else: | |
| metadata = {} | |
| # feet_mask_valid = metadata['tag_valid']['footwear'] | |
| masks_ann = json2dict(osp.join(model_dir, mask_name)) | |
| sam_masks = [rle2mask(m, to_bool=True) for m in masks_ann] | |
| init_drawable_visible_map(lmodel.drawables) | |
| for tg in lmodel.drawables: | |
| if tg.final_visible_area < 1: | |
| continue | |
| score_list = [] | |
| for m in sam_masks: | |
| area, u_area, i_area = tg.mask_union_intersection(m, final_vis_mask=True) | |
| if i_area is None: | |
| i_area = -1 | |
| score = i_area / tg.final_visible_area | |
| score_list.append(score) | |
| best_match = np.argmax(np.array(score_list)) | |
| best_match = VALID_BODY_PARTS_V2[best_match] | |
| tg.body_part_tag = best_match | |
| if tg.body_part_tag == 'legwear' and score_list[VALID_BODY_PARTS_V2.index('footwear') > 0.5]: | |
| tg.body_part_tag = 'footwear' | |
| assign_tag_by_path(lmodel) | |
| propagate_invisible_parts(lmodel) | |
| lmodel.save_body_parsing(metadata=metadata, save_name='body_parsing') | |
| except Exception as e: | |
| raise | |
| print(f'Failed to process {p}: {e}') | |
| if extr_more: | |
| further_extr(exec_listp) | |
| def gradcam_heatmap(image_file, savep, method, model_type, gen_threshold, eigen_smooth, aug_smooth, device): | |
| from annotators.wdv3_tagger import apply_wdv3_tagger, get_tagger_and_transform | |
| from annotators.gradcam import apply_gradcam | |
| from pytorch_grad_cam.utils.image import show_cam_on_image | |
| if savep is None: | |
| os.makedirs('workspace', exist_ok=True) | |
| savep = osp.join('workspace', osp.basename(osp.dirname(image_file)) + '_' + model_type + '_' + method + '.png') | |
| img_input: Image.Image = Image.open(image_file) | |
| alpha = img_input.split()[-1] | |
| bbox = alpha.getbbox() | |
| # ensure image is RGB | |
| img_input = pil_ensure_rgb(img_input) | |
| img_input = img_input.crop(bbox) | |
| # pad to square with white background | |
| img_input, _ = pil_pad_square(img_input) | |
| img_input = img_input.resize((448, 448), resample=Image.Resampling.LANCZOS) | |
| caption, taglist, ratings, character, general = apply_wdv3_tagger(img_input, model_type=model_type, exclude_cls=exclude_cls, gen_threshold=gen_threshold) | |
| _, transform, labels = get_tagger_and_transform(model_type) | |
| inputs = transform(img_input).unsqueeze(0) | |
| inputs = inputs[:, [2, 1, 0]] | |
| imglist = [] | |
| for k, v in tqdm(general.items()): | |
| grayscale_cam = apply_gradcam(inputs, v[1], method=method, model_type=model_type, eigen_smooth=eigen_smooth, aug_smooth=aug_smooth, device=device) | |
| grayscale_cam = grayscale_cam[0, :] | |
| cam_image = show_cam_on_image(np.array(img_input)[..., ::-1] / 255., grayscale_cam) | |
| fontScale = 0.9 | |
| cam_image = cv2.putText(cam_image, k, (10, 24), cv2.FONT_HERSHEY_SIMPLEX, fontScale, color=(255, 255, 255), thickness=2, lineType=cv2.LINE_AA) | |
| imglist.append(cam_image) | |
| # torch.cuda.empty_cache() | |
| rst = imglist2imgrid(imglist) | |
| Image.fromarray(rst[..., ::-1]).save(savep) | |
| print(f'result saved to {savep}') | |
| def infer_bizarre_tagger(exec_list, detected_instanec_only, rank_to_worldsize): | |
| ''' | |
| apply pos estimator: bizarre tagger | |
| ''' | |
| from annotators.bizarre_tagger import apply_pos_estimator | |
| # model = LangSAM(sam_type="sam2.1_hiera_large") | |
| exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize) | |
| for model_dir in tqdm(exec_list): | |
| try: | |
| if osp.isfile(model_dir): | |
| model_dir = osp.dirname(model_dir) | |
| crop_xyxy = None | |
| if detected_instanec_only: | |
| instance_mask, crop_xyxy, score = load_detected_character(model_dir) | |
| if instance_mask is None: | |
| print(f'skip {model_dir}, no character instance detected') | |
| continue | |
| lmodel = Live2DScrapModel(model_dir, crop_xyxy=crop_xyxy, crop_to_final=True, pad_to_square=False) | |
| model_dir = lmodel.directory | |
| # ensure image is RGB | |
| img_input = pil_ensure_rgb(Image.fromarray(lmodel.final)) | |
| kps, scores, bbox = apply_pos_estimator(img_input, mask=lmodel.final[..., -1].astype(np.float32) / 255.) | |
| save_rst = {'transform_stats': {'crop_xyxy': lmodel.final_bbox}, 'pos': [k for k in kps], 'scores': scores} | |
| savep = osp.join(model_dir, 'bizarre_pos.json') | |
| dict2json(save_rst, savep) | |
| except Exception as e: | |
| # raise e | |
| print(f'failed to process {model_dir}: {e}') | |
| def infer_langsam(exec_list, box_threshold, text_threshold, detected_instanec_only, rank_to_worldsize, skip_exists): | |
| import torch | |
| import gc | |
| from annotators.lang_sam import LangSAM | |
| model = LangSAM(sam_type="sam2.1_hiera_large") | |
| exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize) | |
| prompts = ['face', 'hair', 'hand', 'feet', 'leg', 'dress', 'shirt', 'skirt', 'jacket', 'neck', 'outfit', 'shoes'] | |
| prompt_list_head = ['mouth', 'nose', 'ears'] | |
| # prompt_list_head = ['hairband', 'crown'] | |
| # skip_exists = True | |
| for model_dir in tqdm(exec_list[0:]): | |
| try: | |
| if osp.isfile(model_dir): | |
| model_dir = osp.dirname(model_dir) | |
| crop_xyxy = None | |
| if detected_instanec_only: | |
| instance_mask, crop_xyxy, score = load_detected_character(model_dir) | |
| if instance_mask is None: | |
| print(f'skip {model_dir}, no character instance detected') | |
| continue | |
| lmodel = Live2DScrapModel(model_dir, crop_xyxy=crop_xyxy, crop_to_final=True, pad_to_square=False) | |
| model_dir = lmodel.directory | |
| # ensure image is RGB | |
| img_input = pil_ensure_rgb(Image.fromarray(lmodel.final)) | |
| savep = osp.join(model_dir, 'langsam_masks.json') | |
| if osp.exists(savep) and skip_exists: | |
| save_rst = json2dict(savep) | |
| else: | |
| save_rst = {'transform_stats': {'crop_xyxy': lmodel.final_bbox}, 'instances': {}} | |
| if skip_exists: | |
| prompt_list = [k for k in prompts if (k not in save_rst['instances'])] | |
| else: | |
| prompt_list = prompts | |
| if len(prompt_list) > 0: | |
| rst = model.predict_multi_prompts(img_input, prompt_list, box_threshold=box_threshold, text_threshold=text_threshold) | |
| for p, ins in zip(prompt_list, rst): | |
| masks = [np.squeeze(m, 0) if m.ndim == 3 else m for m in ins['masks']] | |
| masks = [mask2rle(m) for m in masks] | |
| ins['boxes'] = [b for b in ins['boxes']] | |
| ins['masks'] = masks | |
| save_rst['instances'][p] = ins | |
| if skip_exists: | |
| prompt_list = [k for k in prompt_list_head if (k not in save_rst['instances'])] | |
| else: | |
| prompt_list = prompt_list_head | |
| crop_head_for_head_prompt = True | |
| if len(prompt_list) > 0: | |
| head_crop = head_pad = None | |
| head_input = img_input | |
| h, w = img_input.height, img_input.width | |
| if crop_head_for_head_prompt and lmodel.face_detected(): | |
| facedet = lmodel.facedet[0] | |
| x1, y1, x2, y2 = facedet['bbox'][:4] | |
| p = int(round(max(x2 - x1, y2 - y1) * 1.0)) | |
| if p > 0: | |
| head_crop = [max(x1 - p, 0), max(y1 - p, 0), min(x2 + p, w), min(y2 + p, h)] | |
| hw, hh = head_crop[2] - head_crop[0], head_crop[3] - head_crop[1] | |
| head_pad = [head_crop[0], head_crop[1], w - head_crop[2], h - head_crop[3]] | |
| if np.all(np.array(head_pad) == 0) or hw <= 0 or hh <= 0: | |
| head_pad = None | |
| else: | |
| head_input = head_input.crop(head_crop) | |
| rst = model.predict_multi_prompts(head_input, prompt_list, box_threshold=box_threshold, text_threshold=text_threshold) | |
| for p, ins in zip(prompt_list, rst): | |
| masks = [np.squeeze(m, 0) if m.ndim == 3 else m for m in ins['masks']] | |
| if head_pad is not None: | |
| masks = [cv2.copyMakeBorder(m.astype(np.uint8), head_pad[1], head_pad[3], head_pad[0], head_pad[2], value=0, borderType=cv2.BORDER_CONSTANT) for m in masks] | |
| masks = [mask2rle(m) for m in masks] | |
| ins['boxes'] = [b for b in ins['boxes']] | |
| ins['masks'] = masks | |
| save_rst['instances'][p] = ins | |
| # from utils.visualize import visualize_segs_with_labels | |
| # from utils.cv import rle2mask | |
| # masks = [] | |
| # for p in prompt_list: | |
| # msk = [rle2mask(m) for m in save_rst['instances'][p]['masks']] | |
| # if len(msk) > 0: | |
| # msk = np.logical_or.reduce(np.stack(msk, 0), axis=0) | |
| # else: | |
| # msk = np.zeros_like(lmodel.final[..., 0]) | |
| # masks.append(msk) | |
| # t = json2dict(osp.join(model_dir, 'general_tags.json')) | |
| # print(t.keys()) | |
| # print(has_animal_ear(t.keys())) | |
| # save_tmp_img(visualize_segs_with_labels(masks, lmodel.final, prompt_list)) | |
| # pass | |
| savep = osp.join(model_dir, 'langsam_masks.json') | |
| dict2json(save_rst, savep) | |
| # pad to square with white background | |
| except Exception as e: | |
| # raise | |
| print(f'failed to process {model_dir}: {e}') | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| torch.cuda.ipc_collect() | |
| def parse_live2d(exec_list, method, model_type, gen_threshold, eigen_smooth, aug_smooth, save_gradcam_heatmap, device, tag_only, detected_instanec_only, rank_to_worldsize): | |
| from annotators.wdv3_tagger import apply_wdv3_tagger, get_tagger_and_transform | |
| from annotators.gradcam import apply_gradcam | |
| from pytorch_grad_cam.utils.image import show_cam_on_image | |
| exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize) | |
| for model_dir in tqdm(exec_list): | |
| try: | |
| crop_xyxy = None | |
| if detected_instanec_only: | |
| instance_mask, crop_xyxy, score = load_detected_character(model_dir) | |
| if instance_mask is None: | |
| print(f'skip {model_dir}, no character instance detected') | |
| continue | |
| model = Live2DScrapModel(model_dir, target_frame_size=448, crop_to_final=True, pad_to_square=True, crop_xyxy=crop_xyxy, pad_drawable_img=False) | |
| model_dir = model.directory | |
| # ensure image is RGB | |
| img_input = pil_ensure_rgb(Image.fromarray(model.final)) | |
| # pad to square with white background | |
| caption, taglist, ratings, character, general = apply_wdv3_tagger(img_input, model_type=model_type, exclude_cls=exclude_cls, gen_threshold=gen_threshold) | |
| dict2json(general, osp.join(model_dir, 'general_tags.json')) | |
| if tag_only: | |
| continue | |
| model.init_drawable_visible_map() | |
| _, transform, labels = get_tagger_and_transform(model_type) | |
| inputs = transform(img_input).unsqueeze(0) | |
| inputs = inputs[:, [2, 1, 0]] | |
| gradcam_heatmap_vis = [] | |
| for cls_name, v in general.items(): | |
| cls_score, cls_idx = v[0], v[1] | |
| score_map = apply_gradcam(inputs, cls_idx, method=method, model_type=model_type, eigen_smooth=eigen_smooth, aug_smooth=aug_smooth, device=device) | |
| model.update_tag_stats(score_map[0], cls_idx, cls_name, filter_scoremap=True) | |
| if save_gradcam_heatmap: | |
| cam_image = show_cam_on_image(np.array(img_input)[..., ::-1] / 255., score_map[0]) | |
| fontScale = 0.9 | |
| cam_image = cv2.putText(cam_image, cls_name, (10, 24), cv2.FONT_HERSHEY_SIMPLEX, fontScale, color=(255, 255, 255), thickness=2, lineType=cv2.LINE_AA) | |
| gradcam_heatmap_vis.append(cam_image) | |
| if save_gradcam_heatmap: | |
| gradcam_heatmap_vis = imglist2imgrid(gradcam_heatmap_vis, cols=6) | |
| savep = osp.join(model_dir, 'heatmap_' + model_type + '_' + method + '.png') | |
| Image.fromarray(gradcam_heatmap_vis[..., ::-1]).save(savep) | |
| model.save_tag_stats() | |
| except Exception as e: | |
| print(f'failed to process {model_dir}: {e}') | |
| pass | |
| # # assign drawable to the tag with maximum | |
| # avgscore_lst = [] | |
| # for tag, tag_info in model.tag_stats.items(): | |
| # avgscore_map = np.zeros_like(model.final[..., 0]).astype(np.float32) | |
| # for drawable in model.drawables: | |
| # if drawable.final_visible_area < 1: | |
| # continue | |
| # x1, y1, x2, y2 = drawable.xyxy | |
| # avgscore_map[y1: y2, x1: x2] += drawable.final_visible_mask.astype(np.float32) * drawable.tag_stats[tag]['avg_score'] | |
| # avgscore_lst.append(avgscore_map) | |
| # avgscore_lst = np.stack(avgscore_lst).clip(0, 1) | |
| # concept_labels = list(model.tag_stats.keys()) | |
| # vis = show_factorization_on_image(model.final[..., :3] / 255., avgscore_lst, concept_labels=concept_labels, image_weight=0.1, visible_mask=model.final_visible_mask[..., None]) | |
| # Image.fromarray(vis).save(osp.join(model_dir, 'segmentation_' + model_type + '_' + method + '.png')) | |
| def dump_body_tags(src_dir, savep): | |
| from utils.io_utils import json2dict, dict2json | |
| spliters = [',', '|'] | |
| tag_set_cleaned = {} | |
| sets_duplicated = {} | |
| for d in os.listdir(src_dir): | |
| p = osp.join(src_dir, d) | |
| with open(p, 'r', encoding='utf8') as f: | |
| lines = f.read().split('\n') | |
| lines_lst = [] | |
| for l in lines: | |
| l = l.strip().lower() | |
| if l.startswith('#'): | |
| continue | |
| for s in spliters: | |
| l = l.split(s)[0].strip() | |
| if len(l) > 0: | |
| l = '_'.join(l.split(' ')) | |
| lines_lst.append(l) | |
| tag_set_cleaned[d] = lines_lst | |
| dict2json(tag_set_cleaned, savep) | |
| def facedet(exec_list, twopass, rank_to_worldsize, skip_exists): | |
| from annotators import anime_face_detector | |
| if exec_list.endswith('.json') or exec_list.endswith('.json.gz'): | |
| exec_list = json2dict(exec_list) | |
| exec_list = list(exec_list.keys()) | |
| exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize) | |
| device = 'cuda' #@param ['cuda:0', 'cpu'] | |
| model = 'yolov3' #@param ['yolov3', 'faster-rcnn'] | |
| detector = anime_face_detector.create_detector(model, device=device) | |
| if skip_exists: | |
| new_exec_list = [] | |
| for srcp in exec_list: | |
| if osp.isfile(srcp): | |
| srcp = osp.dirname(srcp) | |
| if osp.exists(osp.join(srcp, 'facedet.json')): | |
| print(f'skip {srcp} due to result exists') | |
| continue | |
| new_exec_list.append(srcp) | |
| exec_list = new_exec_list | |
| for srcp in tqdm(exec_list): | |
| try: | |
| if osp.isfile(srcp): | |
| srcp = osp.dirname(srcp) | |
| savep = osp.join(srcp, 'facedet.json') | |
| lmodel = Live2DScrapModel(srcp, crop_to_final=True, pad_to_square=False) | |
| image = Image.fromarray(lmodel.final) | |
| if twopass: | |
| lmodel.init_drawable_visible_map() | |
| image = pil_ensure_rgb(image) | |
| image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
| preds = detector(image) | |
| if len(preds) > 0: | |
| pred = max(preds, key=lambda x: x['bbox'][-1]) | |
| bbox = pred['bbox'] | |
| keypoints = pred['keypoints'] | |
| if twopass: | |
| bbox_input = bbox.copy() | |
| # bbox_input[..., :4] = lmodel.processor.scale_coordinates(bbox_input[..., :4].reshape((2, 2))).flatten() | |
| x1, y1, x2, y2, _ = np.round(bbox_input).astype(np.int32) | |
| # vis_face_det = visualize_facedet_output(model.final, [facedet])[y1: y2, x1: x2, :3] | |
| # vis_face_det = np.concatenate([vis_face_det, np.full_like(vis_face_det[..., [0]], fill_value=255)], axis=2) | |
| xyxy = [x1, y1, x2, y2] | |
| valid_drawables = [] | |
| for drawable in lmodel.drawables: | |
| if drawable.final_visible_area < 1: | |
| continue | |
| bbox_i, vis_mask = drawable.get_vis_mask(xyxy, final_vis_mask=True) | |
| if bbox_i is None or vis_mask.sum() / drawable.final_visible_area < 0.8: | |
| continue | |
| valid_drawables.append(drawable) | |
| face_crop = compose_from_drawables(valid_drawables, xyxy=xyxy) | |
| facedet2 = detector(face_crop, boxes=[np.array([0, 0, x2-x1, y2-y1, 1])]) | |
| keypoints2 = facedet2[0]['keypoints'] | |
| px1 = x1 + lmodel.final_bbox[0] | |
| py1 = y1 + lmodel.final_bbox[1] | |
| keypoints2[:, 0] += px1 | |
| keypoints2[:, 1] += py1 | |
| keypoints[LEFT_EYEBROW] = keypoints2[LEFT_EYEBROW] | |
| keypoints[RIGHT_EYEBROW] = keypoints2[RIGHT_EYEBROW] | |
| # Image.fromarray(face_crop).save('local_tst.png') | |
| pass | |
| bbox[-1] = np.round(bbox[-1] * 100) | |
| bbox[:-1] = np.round(bbox[:-1]) | |
| pred['bbox'] = bbox.astype(np.int32) | |
| if lmodel.final_bbox is not None: | |
| pred['bbox'][[0, 2]] += lmodel.final_bbox[0] | |
| pred['bbox'][[1, 3]] += lmodel.final_bbox[1] | |
| # print(lmodel.final_bbox) | |
| # save_tmp_img(imread(osp.join(srcp, 'final.jxl'))[pred['bbox'][1]: pred['bbox'][3], pred['bbox'][0]: pred['bbox'][2]]) | |
| # break | |
| keypoints[:, 2] = np.round(keypoints[:, 2] * 100) | |
| keypoints[:, :2] = np.round(keypoints[:, :2]) | |
| pred['keypoints'] = [k for k in keypoints.astype(np.int32)] | |
| else: | |
| pass | |
| dict2json(preds, savep) | |
| except Exception as e: | |
| print(f'failed to process {srcp}: {e}') | |
| def hflip_aug_mask(mask: np.ndarray, x: int, aug=True): | |
| ''' | |
| mask: (h, w) or (c, h, w) | |
| ''' | |
| if mask.ndim == 3: | |
| if mask.shape[0] == 1: | |
| mask = mask[0] | |
| else: | |
| mask = np.logical_or.reduce(mask, axis=0) | |
| h, w = mask.shape[:2] | |
| mid = w // 2 | |
| if x <= mid: | |
| x1 = 0 | |
| x2 = x * 2 | |
| else: | |
| x2 = w | |
| x1 = w - (w - x) * 2 | |
| mask_or = mask[:, x1: x2] | |
| if aug: | |
| mask[:, x1: x2] = np.bitwise_or(mask_or, mask_or[:, ::-1]) | |
| mask_l = mask.copy() | |
| mask_l[:, mid:] = 0 | |
| mask[:, :mid] = 0 | |
| # imglist2imgrid([mask_l.astype(np.uint8) * 255, mask.astype(np.uint8) * 255], output_type='pil').save('local_tst.png') | |
| return mask_l, mask | |
| def part_morph_transform(masks, target_cls, ksize=1, op='dilate'): | |
| mask = masks[target_cls].astype(np.uint8) | |
| element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * ksize + 1, 2 * ksize + 1),(ksize, ksize)) | |
| if op == 'dilate': | |
| mask = cv2.dilate(mask, element) | |
| else: | |
| mask = cv2.erode(mask, element) | |
| masks[target_cls] = mask.astype(bool) | |
| def split_lr_part(lmodel: Live2DScrapModel, target_ids: list): | |
| from sklearn.cluster import MiniBatchKMeans, KMeans | |
| eye_xs = [] | |
| eye_drawables = [] | |
| for d in lmodel.drawables: | |
| if d.face_part_id in target_ids: | |
| dx, dy, dw, dh = cv2.boundingRect(cv2.findNonZero(d.final_visible_mask.astype(np.uint8))) | |
| dx += d.x | |
| dy += d.y | |
| eye_xs.append(dx + dw / 2) | |
| eye_drawables.append(d) | |
| if len(eye_drawables) < 2: | |
| return False | |
| eye_xs = np.array(eye_xs) | |
| eye_xs_mean = np.mean(eye_xs) + 1e-6 | |
| eye_xs = eye_xs[:, None] / eye_xs_mean - 1 | |
| rst = KMeans(2, max_iter=50).fit(eye_xs) | |
| labels = rst.predict(eye_xs) | |
| if rst.cluster_centers_[0, 0] > rst.cluster_centers_[1, 0]: | |
| labels = 1 - labels | |
| for d, l in zip(eye_drawables, labels): | |
| d.face_part_id = target_ids[l] | |
| return True | |
| def split_lr_mask(masks: np.ndarray, split_channels): | |
| ms = masks[split_channels] | |
| ms = np.logical_or.reduce(ms, axis=0) | |
| xs = np.where(ms > 0) | |
| pass | |
| def find_brow(lmodel: Live2DScrapModel, brow_id, face_xyxy=None): | |
| btop = 100000 | |
| eye_id = brow_id + 2 | |
| eye_mask = lmodel.compose_face_drawables(eye_id, mask_only=True, final_visible_mask=True, xyxy=face_xyxy) | |
| ex, ey, ew, eh = cv2.boundingRect(cv2.findNonZero(eye_mask.astype(np.uint8))) | |
| tgt_brow = None | |
| for d in lmodel.drawables: | |
| if d.face_part_id != eye_id: | |
| continue | |
| dx, dy, dw, dh = d.get_bbox(xyxy=face_xyxy) | |
| if dy < btop and dw / ew > 0.5: | |
| tgt_brow = d | |
| btop = dy | |
| if tgt_brow is not None: | |
| tgt_brow.face_part_id = brow_id | |
| return True | |
| return False | |
| def facedet_sam(exec_list, ckpt, mask_decoder, class_num, save_segs, save_preview, rank_to_worldsize, skip_exists): | |
| from modules.semanticsam import SemanticSam, Sam | |
| from utils.torch_utils import init_model_from_pretrained | |
| from utils.cv import batch_save_masks | |
| import torch | |
| if exec_list.endswith('.json') or exec_list.endswith('.json.gz'): | |
| exec_list = json2dict(exec_list) | |
| exec_list = list(exec_list.keys()) | |
| exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize) | |
| device = 'cuda' #@param ['cuda:0', 'cpu'] | |
| # if model is None: | |
| sam: SemanticSam = init_model_from_pretrained( | |
| pretrained_model_name_or_path=ckpt, | |
| module_cls=SemanticSam, | |
| model_args={'mask_decoder': mask_decoder, 'class_num': class_num}, | |
| device=device | |
| ).eval() | |
| head_pad_ratio = 0.4 | |
| if skip_exists: | |
| new_exec_list = [] | |
| for srcp in exec_list: | |
| if osp.isfile(srcp): | |
| srcp = osp.dirname(srcp) | |
| if osp.exists(osp.join(srcp, 'face_parsing.json')): | |
| print(f'skip {srcp} due to result exists') | |
| continue | |
| new_exec_list.append(srcp) | |
| exec_list = new_exec_list | |
| for sidx, srcp in enumerate(tqdm(exec_list[0:])): | |
| # 5 12 | |
| try: | |
| if osp.isfile(srcp): | |
| srcp = osp.dirname(srcp) | |
| lmodel_dir = srcp | |
| lmodel = Live2DScrapModel(lmodel_dir, crop_to_final=True, pad_to_square=False) | |
| if not lmodel.face_detected(): | |
| print(f'skip {srcp} due to no face detected') | |
| continue | |
| lmodel.init_drawable_visible_map() | |
| fh, fw = lmodel.final.shape[:2] | |
| facedet = lmodel.facedet[0] | |
| x1, y1, x2, y2 = facedet['bbox'][:4] | |
| # save_tmp_img(lmodel.final[y1: y2, x1: x2]) | |
| head_pad = 0 | |
| if head_pad_ratio != 0: | |
| head_pad = head_pad_ratio * (y2 - y1) | |
| head_pad = int(round(head_pad)) | |
| facedet['bbox'][:2] -= head_pad | |
| facedet['bbox'][2:4] += head_pad | |
| facedet['bbox'] = np.clip(facedet['bbox'], 0, min(fh, fw)) | |
| x1, y1, x2, y2, _ = facedet['bbox'] | |
| # x1 -= head_pad ; y1 -= head_pad ; x2 += head_pad ; y2 += head_pad | |
| face_xyxy = [x1, y1, x2, y2] | |
| image = lmodel.final | |
| face_image = image[y1: y2, x1: x2, :3] | |
| ch, cw = face_image.shape[:2] | |
| save_tmp_img(face_image) | |
| with torch.inference_mode(): | |
| preds = sam.inference(face_image)[0] | |
| masks_np = (preds > 0).to(device='cpu', dtype=torch.bool).numpy() | |
| if save_segs: | |
| batch_save_masks(masks_np, osp.join(lmodel_dir, 'faceseg.json'), compress='gzip') | |
| batch_save_masks(masks_np[[10, 11]], osp.join(lmodel_dir, 'faceseg_nosemouth.json'), compress='gzip') | |
| # save_tmp_img(visualize_segs(masks_np[[1]], src_img=np.array(face_image), image_weight=0.3)) | |
| part_morph_transform(masks_np, 11, ksize=2) | |
| part_morph_transform(masks_np, 4, ksize=3) | |
| part_morph_transform(masks_np, 5, ksize=3) | |
| part_morph_transform(masks_np, 7, ksize=2) | |
| part_morph_transform(masks_np, 8, ksize=2) | |
| neck_detected = False | |
| seg_areas = reduce(masks_np, 'b h w -> b', 'sum') + 1e-6 | |
| for drawable in lmodel.drawables: | |
| if drawable.final_visible_area < 1: | |
| continue | |
| area, u_area, i_area = drawable.mask_union_intersection(masks_np, face_xyxy, final_vis_mask=True) | |
| if u_area is None or area == 0 or np.all(i_area[1:] == 0): | |
| continue | |
| u_area += 1e-6 | |
| drawable.face_part_stats = { | |
| 'union': u_area, 'intersection': i_area, 'iou': i_area / u_area, 'ioa': i_area / area, 'area': area, 'ios': i_area / seg_areas | |
| } | |
| drawable.face_part_id = np.argmax(drawable.face_part_stats['ioa'][1:]) + 1 | |
| if drawable.face_part_id == 14: | |
| neck_detected = True | |
| base_face_mask = lmodel.compose_face_drawables(1, mask_only=True, xyxy=face_xyxy) | |
| bx, by, bw, bh = cv2.boundingRect(cv2.findNonZero(base_face_mask.astype(np.uint8))) | |
| by2 = by + bh | |
| bx2 = bw + bx | |
| base_face_mask_vis = cv2.cvtColor(base_face_mask.astype(np.uint8) * 255, cv2.COLOR_GRAY2RGB) | |
| base_face_mask_vis = cv2.rectangle(base_face_mask_vis, (bx, by), (bx2, by2), color=(0, 255, 0), thickness=4) | |
| eyew = cv2.boundingRect(cv2.findNonZero( | |
| lmodel.compose_face_drawables([4, 5], mask_only=True, xyxy=face_xyxy).astype(np.uint8) | |
| ))[2] | |
| eye_detected = eyew > 1 | |
| if eyew / bw > 0.5: | |
| split_lr_part(lmodel, (4, 5)) | |
| leye_mask = lmodel.compose_face_drawables(4, mask_only=True, xyxy=face_xyxy) | |
| reye_mask = lmodel.compose_face_drawables(5, mask_only=True, xyxy=face_xyxy) | |
| leye_x, leye_y, leye_w, leye_h = cv2.boundingRect(cv2.findNonZero(leye_mask.astype(np.uint8))) | |
| reye_x, reye_y, reye_w, reye_h = cv2.boundingRect(cv2.findNonZero(reye_mask.astype(np.uint8))) | |
| eyel, eyer = min(leye_x, reye_x), max(leye_x + leye_w, reye_x + reye_w) | |
| eyew = eyer - eyel | |
| brow_potentials = [] | |
| leye_detected, reye_detected = lmodel.face_part_detected([4, 5]) | |
| beye_detected = leye_detected and reye_detected | |
| # re-assign eye lids & brows | |
| base_face_draworder = 10000 | |
| for d_id, drawable in enumerate(lmodel.drawables): | |
| if drawable.area < 1: | |
| continue | |
| if drawable.face_part_id == 14: | |
| if drawable.face_part_stats['ioa'][16] + 0.1 > drawable.face_part_stats['ioa'][14]: | |
| drawable.face_part_id = 16 | |
| if not neck_detected and drawable.face_part_id == 16 and drawable.face_part_stats['ioa'][14] > 0.15: | |
| drawable.face_part_id = 14 | |
| dx, dy, dw, dh = drawable.get_bbox(xyxy=face_xyxy) | |
| dx2 = dx + dw | |
| dy2 = dy + dh | |
| if drawable.face_part_id == 16 and dy < by + bh / 2: | |
| if drawable.face_part_stats['ioa'][17] > 0.15: | |
| drawable.face_part_id = 17 | |
| if not drawable.face_part_id in {None, 1, 17, 4, 5}: | |
| continue | |
| # check if hair drawable is actually background | |
| if drawable.face_part_id == 17: | |
| if drawable.face_part_stats['ioa'][0] > 0.7 and drawable.face_part_stats['ioa'][17] < 0.3: | |
| drawable.face_part_id = None | |
| if drawable.face_part_id == 1 and dw / bw > 0.7 and dh > bw > 0.7: | |
| if drawable.draw_order < base_face_draworder: | |
| base_face_draworder = drawable.draw_order | |
| if not (dx > bx and dx2 < bx2 and dy > by and dy2 < by2): | |
| continue | |
| if dy > max(leye_y + leye_h, reye_y + reye_h): | |
| continue | |
| if drawable.face_part_id == 17 and drawable.draw_order >= base_face_draworder: | |
| if dw / bw > 0.4 or dh / bh > 0.2: | |
| continue | |
| # re-assign glass | |
| if drawable.face_part_id in {4, 5} and beye_detected: | |
| if eye_detected and (dw / eyew > 0.6 or drawable.face_part_stats['ioa'][6] > 0.4): | |
| drawable.face_part_id = 6 | |
| continue | |
| if dw > dh: | |
| brow_potentials.append(drawable) | |
| facedrawable_wo_prehair = [] | |
| for drawable in lmodel.drawables: | |
| # skip face-covering hairs | |
| if drawable.face_part_id == 17 and drawable.draw_order >= base_face_draworder: | |
| continue | |
| # skip glass, neck, hat, cloth | |
| if drawable.face_part_id in {6, 14, 16, 18}: | |
| continue | |
| dx, dy, dw, dh = drawable.get_bbox(xyxy=face_xyxy) | |
| dx2 = dx + dw ; dy2 = dy + dh | |
| ix = min(dx2, bx2) - max(dx, bx) | |
| iy = min(dy2, by2) - max(dy, by) | |
| if dw > 0 and dh > 0 and ix / dw > 0.3 and iy / dh > 0.8: | |
| facedrawable_wo_prehair.append(drawable) | |
| for drawable in lmodel.drawables: | |
| if drawable.face_part_id is None: | |
| continue | |
| if drawable.face_part_id == 1: | |
| # re-assgin mouth tags | |
| if drawable.face_part_stats['ioa'][11] > 0.3: | |
| drawable.face_part_id = 11 | |
| base_face_drawables: list[Drawable] = None | |
| base_face_drawables = set(facedrawable_wo_prehair + brow_potentials) | |
| base_face_drawables = list(base_face_drawables) | |
| base_face_drawables.sort(key=lambda x: x.draw_order) | |
| # reinit for those covered by hairs | |
| init_drawable_visible_map(base_face_drawables) | |
| # pil_ensure_rgb(compose_from_drawables(base_face_drawables[10:12], xyxy=face_xyxy, output_type='pil')).save('local_tst.png') | |
| base_face = compose_from_drawables(base_face_drawables, xyxy=face_xyxy) | |
| base_face = np.array(pil_ensure_rgb(Image.fromarray(base_face))) | |
| with torch.inference_mode(): | |
| preds = sam.inference(base_face)[0] | |
| masks_np2 = (preds > 0).to(device='cpu', dtype=torch.bool).numpy() | |
| if save_segs: | |
| batch_save_masks(masks_np2, osp.join(lmodel_dir, 'faceseg2.json'), compress='gzip') | |
| masks_np2[[1, 10, 11]] = masks_np[[1, 10, 11]] | |
| masks_np2[[2, 3, 7, 8]] = np.bitwise_or(masks_np2[[2, 3, 7, 8]], masks_np[[2, 3, 7, 8]]) | |
| if beye_detected: | |
| masks_np2[[4, 5]] = masks_np[[4, 5]] | |
| seg_areas = reduce(masks_np2, 'b h w -> b', 'sum') + 1e-6 | |
| hair_mask = lmodel.compose_face_drawables([17], mask_only=True, xyxy=face_xyxy) | |
| for didx, drawable in enumerate(base_face_drawables): | |
| if didx in [10, 11]: | |
| continue | |
| if drawable.final_visible_area < 1: | |
| continue | |
| area, u_area, i_area = drawable.mask_union_intersection(masks_np2, face_xyxy, final_vis_mask=True) | |
| if u_area is None or area == 0 or np.all(i_area[1:] == 0): | |
| continue | |
| u_area += 1e-6 | |
| i_area[17] = 0 | |
| if np.all(i_area[1:] == 0): | |
| continue | |
| ori_face_part_stats = drawable.face_part_stats | |
| drawable.face_part_stats = { | |
| 'union': u_area, 'intersection': i_area, 'iou': i_area / u_area, 'ioa': i_area / area, 'area': area, 'ios': i_area / seg_areas | |
| } | |
| if len(ori_face_part_stats) > 0: | |
| drawable.face_part_stats['ioa'][17] = ori_face_part_stats['ioa'][17] | |
| drawable.face_part_stats['ioa'][1] = ori_face_part_stats['ioa'][1] | |
| face_part_id = np.argmax(drawable.face_part_stats['ioa'][1:]) + 1 | |
| face_part_id = int(face_part_id) | |
| # fix brows & ears attached to hair | |
| if face_part_id == 17: | |
| if drawable.face_part_stats['ioa'][2] > 0.5: | |
| face_part_id = 2 | |
| elif drawable.face_part_stats['ioa'][3] > 0.5: | |
| face_part_id = 3 | |
| elif drawable.face_part_stats['ioa'][7] > 0.5: | |
| face_part_id = 7 | |
| elif drawable.face_part_stats['ioa'][8] > 0.5: | |
| face_part_id = 8 | |
| # fix brows & ears attached to face | |
| if face_part_id == 1: | |
| if drawable.face_part_stats['ioa'][2] > 0.3: | |
| face_part_id = 2 | |
| if drawable.face_part_stats['ioa'][3] > 0.3: | |
| face_part_id = 3 | |
| # assign drawables not classified as hair in the first pass | |
| if drawable.face_part_id != 17: | |
| drawable.face_part_id = face_part_id | |
| # drawable was classiified as hair in the first pass and classified as ear now | |
| elif face_part_id in {7, 8} and drawable.draw_order <= base_face_draworder: | |
| drawable.face_part_id = face_part_id | |
| elif face_part_id in {2, 3}: | |
| drawable.face_part_id = face_part_id | |
| if drawable.face_part_id in {1, 17}: | |
| dx, dy, dw, dh = drawable.get_bbox(xyxy=face_xyxy) | |
| dx2 = dx + dw | |
| dy2 = dy + dh | |
| if not (dx > bx and dx2 < bx2 and dy > by and dy2 < by2): | |
| continue | |
| if dy > max(leye_y + leye_h, reye_y + reye_h): | |
| continue | |
| # if (dx - bx_mid) * (dx2 - bx_mid) < 0: | |
| # continue | |
| # eye lids | |
| if dw < max(leye_w, reye_w) * 1.1 and dh < max(leye_h / 2, reye_h / 2): | |
| if drawable.face_part_stats['ioa'][4] > 0.4 or np.any(drawable.bitwise_and(masks_np[4], face_xyxy)): | |
| drawable.face_part_id = 4 | |
| elif drawable.face_part_stats['ioa'][5] > 0.4 or np.any(drawable.bitwise_and(masks_np[5], face_xyxy)): | |
| drawable.face_part_id = 5 | |
| if drawable.face_part_id == 11 and drawable.get_bbox(xyxy=face_xyxy)[3] / bh > 0.4: | |
| if np.any(drawable.bitwise_and(hair_mask, face_xyxy)): | |
| drawable.face_part_id = 17 | |
| if eyew / bw > 0.5: | |
| split_lr_part(lmodel, (4, 5)) | |
| lbrow_detected, rbrow_detected = lmodel.brow_detected() | |
| if lbrow_detected ^ rbrow_detected: | |
| brow_mask = lmodel.compose_face_drawables([2, 3], mask_only=True, xyxy=face_xyxy) | |
| brx, bry, brw, brh = cv2.boundingRect(cv2.findNonZero(brow_mask.astype(np.uint8))) | |
| if eye_detected and brw / eyew > 0.6: | |
| split_lr_part(lmodel, (2, 3)) | |
| lbrow_detected = rbrow_detected = True | |
| if not lbrow_detected: | |
| lbrow_detected = find_brow(lmodel, 2) | |
| if not rbrow_detected: | |
| rbrow_detected = find_brow(lmodel, 3) | |
| if lbrow_detected and rbrow_detected: | |
| brow_mask = lmodel.compose_face_drawables([2, 3], mask_only=True, xyxy=face_xyxy) | |
| brx, bry, brw, brh = cv2.boundingRect(cv2.findNonZero(brow_mask.astype(np.uint8))) | |
| if eye_detected and brw / eyew > 0.6: | |
| split_lr_part(lmodel, (2, 3)) | |
| # lmodel.compose_face_drawables([2], xyxy=face_xyxy, output_type='pil', mask_only=True).save('local_tst.png') | |
| # lmodel.compose_face_drawables([3], xyxy=face_xyxy, output_type='pil', mask_only=True).save('local_tst.png') | |
| pass | |
| lear_detected, rear_detected = lmodel.face_part_detected([7, 8]) | |
| if lear_detected ^ rear_detected: | |
| ear_mask = lmodel.compose_face_drawables([7, 8], mask_only=True, xyxy=face_xyxy) | |
| brx, bry, brw, brh = cv2.boundingRect(cv2.findNonZero(ear_mask.astype(np.uint8))) | |
| if brw / bw > 0.75: | |
| split_lr_part(lmodel, (7, 8)) | |
| elif lear_detected: | |
| ear_mask = lmodel.compose_face_drawables([7, 8], mask_only=True, xyxy=face_xyxy) | |
| brx, bry, brw, brh = cv2.boundingRect(cv2.findNonZero(ear_mask.astype(np.uint8))) | |
| if brw / bw > 0.75: | |
| split_lr_part(lmodel, (7, 8)) | |
| neck_base_mask = None | |
| face_mask = lmodel.compose_face_drawables(1, mask_only=True, final_visible_mask=True, xyxy=face_xyxy) | |
| base_neck_ids = set() | |
| for d in lmodel.drawables: | |
| if d.face_part_id != 14: | |
| continue | |
| d_mask = d.get_full_mask(final_visible_mask=True, xyxy=face_xyxy) | |
| if np.any(np.bitwise_and(face_mask, d_mask)): | |
| if neck_base_mask is None: | |
| neck_base_mask = d_mask | |
| else: | |
| neck_base_mask = np.bitwise_or(neck_base_mask, d_mask) | |
| base_neck_ids.add(d.draw_order) | |
| continue | |
| if neck_base_mask is not None: | |
| # save_tmp_img(neck_base_mask, mask2img=True) | |
| for d in lmodel.drawables: | |
| if d.face_part_id != 14 or d.draw_order in base_neck_ids: | |
| continue | |
| area, u_area, i_area = d.mask_union_intersection(neck_base_mask[None], face_xyxy, final_vis_mask=True) | |
| if i_area[0] / (d.final_visible_area + 1e-6) < 0.95: | |
| d.face_part_id = None | |
| # fix hat assigned as cloth | |
| for d in lmodel.drawables: | |
| if d.face_part_id == 16: | |
| msk = d.get_full_mask(True, face_xyxy) | |
| if np.any(msk): | |
| ymean = np.mean(np.where(msk)[0]) | |
| if ymean < 1 / 3 * ch: | |
| d.face_part_id = 18 | |
| nose_detected = lmodel.face_part_detected(10) | |
| mouth_detected = lmodel.face_part_detected(11) | |
| # if not nose_detected: | |
| # for d in lmodel.drawables: | |
| # if d.face_part_id != 1: | |
| # continue | |
| # if d.face_part_stats['ioa'][10] > 0.4: | |
| # d.face_part_id = 10 | |
| # nose_detected = True | |
| if not nose_detected: | |
| d = lmodel.maxios_mindrawable(10, 0.7, ioa_thr=0.3) | |
| if d is not None: | |
| d.face_part_id = 10 | |
| # save_tmp_img(compose_from_drawables([d], xyxy=face_xyxy)) | |
| pass | |
| if save_preview: | |
| vis_face_det = visualize_facedet_output(lmodel.final, [facedet])[y1: y2, x1: x2, :3] | |
| rst_preds = visualize_segs(masks_np, src_img=np.array(face_image), image_weight=0.3) | |
| rst_preds2 = visualize_segs(masks_np2, src_img=np.array(base_face), image_weight=0.3) | |
| vis_list = [face_image, rst_preds, vis_face_det, base_face, rst_preds2] | |
| for flabel, flist in VALID_FACE_GROUPS.items(): | |
| rst = lmodel.compose_face_drawables(face_part_ids=flist, xyxy=face_xyxy) | |
| rst = Image.fromarray(rst) | |
| pil_draw_text(rst, flabel, point=(0, 0), stroke_width=2, font_size=int(bw / 5)) | |
| rst = pil_ensure_rgb(rst) | |
| vis_list.append(rst) | |
| # save_tmp_img(masks_np[11], mask2img=True) | |
| imglist2imgrid(vis_list, cols=4, output_type='pil').save(osp.join(lmodel.directory, 'face_parsing_preview.jpg'), q=95) | |
| # imglist2imgrid(vis_list, cols=4, output_type='pil').save('local_tst.png') | |
| pass | |
| if lmodel.final_bbox is not None: | |
| face_xyxy[0] += lmodel.final_bbox[0] | |
| face_xyxy[2] += lmodel.final_bbox[0] | |
| face_xyxy[1] += lmodel.final_bbox[1] | |
| face_xyxy[3] += lmodel.final_bbox[1] | |
| lmodel.save_face_parsing(metadata=FACE_LABEL2NAME, face_seg_xyxy=face_xyxy) | |
| pass | |
| except Exception as e: | |
| raise | |
| print(f'failed to process {srcp}: {e}') | |
| def instance_segmentation(exec_list, rank_to_worldsize): | |
| from annotators.animeinsseg.instance_segmentation import apply_instance_segmentation | |
| exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize) | |
| for p in tqdm(exec_list): | |
| img = Image.open(p) | |
| img = np.array(pil_ensure_rgb(img)) | |
| instances = apply_instance_segmentation(img) | |
| instances_dict = { | |
| 'masks': instances.masks if instances.masks is not None else [], | |
| 'scores': instances.scores, | |
| 'bboxes': instances.bboxes | |
| } | |
| instances_dict['masks'] = [mask2rle(m) for m in instances_dict['masks'] if instances_dict['masks'] is not None] | |
| d = osp.dirname(p) | |
| savep = osp.join(d, 'instances.json') | |
| dict2json(instances_dict, savep) | |
| def sam_infer_l2d(exec_list, ckpt, rank_to_worldsize): | |
| from live2d.scrap_model import animal_ear_detected, Drawable, VALID_BODY_PARTS_V1, VALID_BODY_PARTS_V2 | |
| from utils.cv import fgbg_hist_matching, quantize_image, random_crop, rle2mask, mask2rle, img_alpha_blending, resize_short_side_to, batch_save_masks, batch_load_masks | |
| from utils.torch_utils import seed_everything, init_model_from_pretrained | |
| from utils.visualize import visualize_segs_with_labels | |
| from modules.semanticsam import SemanticSam, Sam | |
| import torch | |
| if ckpt == 'sam_l2d_19cls_iter2_18k': | |
| model: SemanticSam = init_model_from_pretrained( | |
| pretrained_model_name_or_path='24yearsold/l2d_sam_iter2', | |
| weights_name='checkpoint-18000.pt', | |
| module_cls=SemanticSam, | |
| download_from_hf=True, | |
| model_args=dict(class_num=19) | |
| ).to(device='cuda') | |
| else: | |
| raise Exception(f'Invalid ckpt: {ckpt}') | |
| seed_everything(42) | |
| exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize) | |
| for ii, p in enumerate(tqdm(exec_list[0:])): | |
| try: | |
| instance_mask, crop_xyxy, score = load_detected_character(p) | |
| # if instance_mask is None: | |
| # print(f'skip {p}, no character instance detected') | |
| # continue | |
| lmodel = Live2DScrapModel(p, crop_xyxy=crop_xyxy, pad_to_square=False) | |
| lmodel.init_drawable_visible_map() | |
| final_img = compose_from_drawables(lmodel.drawables) | |
| model_dir = lmodel.directory | |
| with torch.inference_mode(): | |
| preds = model.inference(final_img[..., :3])[0] | |
| masks_np = (preds > 0).to(device='cpu', dtype=torch.bool).numpy() | |
| batch_save_masks(masks_np, osp.join(model_dir, 'sam_masks.json')) | |
| except Exception as e: | |
| print(f'Failed to process {p}: {e}') | |
| def infer_synsample_tags(exec_list, tags, rank_to_worldsize): | |
| # from annotators.animeinsseg.instance_segmentation import apply_instance_segmentation | |
| from annotators.wdv3_tagger import apply_wdv3_tagger | |
| import os.path as osp | |
| tags = tags.split(',') | |
| exec_list = load_exec_list(exec_list, rank_to_worldsize=rank_to_worldsize) | |
| img_ext = '.png' | |
| tagcluster_bodypart = json2dict('assets/tagcluster_bodypart_v2.json') | |
| tag2generaltag = {} | |
| for general_tag, tlist in tagcluster_bodypart.items(): | |
| for t in tlist: | |
| if t in tag2generaltag and tag2generaltag[t] != general_tag: | |
| print(f'conflict tag def: {t} - {general_tag}, ' + tag2generaltag[t]) | |
| tag2generaltag[t] = general_tag | |
| valid_taglst = set(list(tag2generaltag.keys()) + ['smile']) | |
| for p in tqdm(exec_list): | |
| pbase = osp.splitext(p)[0] | |
| for t in tags: | |
| if t == 'fullpage': | |
| p = pbase | |
| else: | |
| p = pbase + f'_{t}' | |
| try: | |
| imgp = p + img_ext | |
| if not osp.exists(imgp): | |
| # raise Exception(f'{imgp}') | |
| continue | |
| img = Image.open(p + img_ext) | |
| img = pil_ensure_rgb(img) | |
| img_input = img.resize((448, 448), resample=Image.Resampling.LANCZOS) | |
| caption, taglist, ratings, character, general = apply_wdv3_tagger(img_input, exclude_cls=exclude_cls) | |
| # img_input.save('local_tst.png') | |
| # general = [t for t in general if t in valid_taglst] | |
| general_tags = ','.join([t for t in general]) | |
| savep = p + '.txt' | |
| # print(general_tags) | |
| with open(savep, 'w', encoding='utf8') as f: | |
| f.write(general_tags) | |
| # print(savep) | |
| except Exception as e: | |
| print(f'failed on {p}: ', e) | |
| # return | |
| if __name__ == '__main__': | |
| cli() |