Spaces:
Build error
Build error
| import gzip | |
| import json | |
| import os | |
| import glob | |
| import random | |
| import math | |
| import numpy as np | |
| import torch | |
| import torch.utils.data as data | |
| from importlib_resources import open_binary | |
| from scipy.io import loadmat | |
| from tabulate import tabulate | |
| import itertools | |
| import json | |
| from scipy import ndimage | |
| import xml.etree.ElementTree as ET | |
| from csv import DictReader | |
| from pycocotools.mask import decode as decode_RLE | |
| import os | |
| import sys | |
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../../')) | |
| # import stacked_hourglass.res | |
| # from stacked_hourglass.datasets.common import DataInfo | |
| from src.configs.anipose_data_info import COMPLETE_DATA_INFO | |
| from src.stacked_hourglass.utils.imutils import load_image, draw_labelmap, draw_multiple_labelmaps | |
| from src.stacked_hourglass.utils.misc import to_torch | |
| from src.stacked_hourglass.utils.transforms import shufflelr, crop, color_normalize, fliplr, transform | |
| import src.stacked_hourglass.datasets.utils_stanext as utils_stanext | |
| from src.stacked_hourglass.utils.visualization import save_input_image_with_keypoints | |
| # from configs.dog_breeds.dog_breed_class import COMPLETE_ABBREV_DICT, COMPLETE_SUMMARY_BREEDS, SIM_MATRIX_RAW, SIM_ABBREV_INDICES | |
| class AniPose(data.Dataset): | |
| DATA_INFO = COMPLETE_DATA_INFO | |
| # Suggested joints to use for average PCK calculations. | |
| ACC_JOINTS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] # don't know ... | |
| def __init__(self, image_path=None, is_train=True, inp_res=256, out_res=64, sigma=1, | |
| scale_factor=0.25, rot_factor=30, label_type='Gaussian', | |
| do_augment='default', shorten_dataset_to=None, dataset_mode='keyp_only'): | |
| # self.img_folder_mpii = image_path # root image folders | |
| self.is_train = is_train # training set or test set | |
| if do_augment == 'yes': | |
| self.do_augment = True | |
| elif do_augment == 'no': | |
| self.do_augment = False | |
| elif do_augment=='default': | |
| if self.is_train: | |
| self.do_augment = True | |
| else: | |
| self.do_augment = False | |
| else: | |
| raise ValueError | |
| self.inp_res = inp_res | |
| self.out_res = out_res | |
| self.sigma = sigma | |
| self.scale_factor = scale_factor | |
| self.rot_factor = rot_factor | |
| self.label_type = label_type | |
| self.dataset_mode = dataset_mode | |
| if self.dataset_mode=='complete' or self.dataset_mode=='keyp_and_seg': | |
| self.calc_seg = True | |
| else: | |
| self.calc_seg = False | |
| self.kp_dict = self.keyp_name_to_ind() | |
| # import pdb; pdb.set_trace() | |
| self.top_folder = '/ps/scratch/nrueegg/new_projects/Animals/data/animal_pose_dataset/' | |
| self.folder_imgs_0 = '/ps/project/datasets/VOCdevkit/VOC2012/JPEGImages/' | |
| self.folder_imgs_1 = os.path.join(self.top_folder, 'animalpose_image_part2', 'dog') | |
| self.folder_annot_0 = os.path.join(self.top_folder, 'PASCAL2011_animal_annotation', 'dog') | |
| self.folder_annot_1 = os.path.join(self.top_folder, 'animalpose_anno2', 'dog') | |
| all_annot_files_0 = glob.glob(self.folder_annot_0 + '/*.xml') # 1571 | |
| '''all_annot_files_0_raw.sort() | |
| all_annot_files_0 = [] # 1331 | |
| for ind_f, f in enumerate(all_annot_files_0_raw): | |
| name = (f.split('/')[-1]).split('.xml')[0] | |
| name_main = name[:-2] | |
| if ind_f > 0: | |
| if (not name_main == name_main_last) or (ind_f == len(all_annot_files_0_raw)-1): | |
| all_annot_files_0.append(f_last) | |
| f_last = f | |
| name_main_last = name_main''' | |
| all_annot_files_1 = glob.glob(self.folder_annot_1 + '/*.xml') # 200 | |
| all_annot_files = all_annot_files_0 + all_annot_files_1 | |
| # old for hg_anipose_v0 | |
| # self.train_name_list = all_annot_files | |
| # self.test_name_list = all_annot_files[0:50] + all_annot_files[200:250] | |
| # new for hg_anipose_v1 | |
| self.train_name_list = all_annot_files[:-50] | |
| self.test_name_list = all_annot_files[-50:] | |
| '''all_annot_files.sort() | |
| self.train_name_list = all_annot_files[:24] | |
| self.test_name_list = all_annot_files[24:36]''' | |
| print('anipose dataset size: ') | |
| print(len(self.train_name_list)) | |
| print(len(self.test_name_list)) | |
| # ----------------------------------------- | |
| def read_content(sewlf, xml_file, annot_type='animal_pose'): | |
| # annot_type is either 'animal_pose' or 'animal_pose_voc' or 'voc' | |
| # examples: | |
| # animal_pose: '/ps/scratch/nrueegg/new_projects/Animals/data/animal_pose_dataset/animalpose_anno2/cat/ca137.xml' | |
| # animal_pose_voc: '/ps/scratch/nrueegg/new_projects/Animals/data/animal_pose_dataset/PASCAL2011_animal_annotation/cat/2008_005380_1.xml' | |
| # voc: '/ps/project/datasets/VOCdevkit/VOC2012/Annotations/2011_000192.xml' | |
| if annot_type == 'animal_pose' or annot_type == 'animal_pose_voc': | |
| my_dict = {} | |
| tree = ET.parse(xml_file) | |
| root = tree.getroot() | |
| for child in root: # list | |
| if child.tag == 'image': | |
| my_dict['image'] = child.text | |
| elif child.tag == 'category': | |
| my_dict['category'] = child.text | |
| elif child.tag == 'visible_bounds': | |
| my_dict['visible_bounds'] = child.attrib | |
| elif child.tag == 'keypoints': | |
| n_kp = len(child) | |
| xyzvis = np.zeros((n_kp, 4)) | |
| kp_names = [] | |
| for ind_kp, kp in enumerate(child): # list | |
| xyzvis[ind_kp, 0] = kp.attrib['x'] | |
| xyzvis[ind_kp, 1] = kp.attrib['y'] | |
| xyzvis[ind_kp, 2] = kp.attrib['z'] | |
| xyzvis[ind_kp, 3] = kp.attrib['visible'] | |
| kp_names.append(kp.attrib['name']) | |
| my_dict['keypoints_xyzvis'] = xyzvis | |
| my_dict['keypoints_names'] = kp_names | |
| elif child.tag == 'voc_id': # animal_pose_voc only | |
| my_dict['voc_id'] = child.text | |
| elif child.tag == 'polylinesegments': # animal_pose_voc only | |
| my_dict['polylinesegments'] = child[0].attrib | |
| else: | |
| print('tag does not exist: ' + child.tag) | |
| # print(my_dict) | |
| elif annot_type == 'voc': | |
| my_dict = {} | |
| print('not yet read') | |
| else: | |
| print('this annot_type does not exist') | |
| import pdb; pdb.set_trace() | |
| return my_dict | |
| def keyp_name_to_ind(self): | |
| '''AniPose_JOINT_NAMES = [ | |
| 'L_Eye', 'R_Eye', 'Nose', 'L_EarBase', 'Throat', 'R_F_Elbow', 'R_F_Paw', | |
| 'R_B_Paw', 'R_EarBase', 'L_F_Elbow', 'L_F_Paw', 'Withers', 'TailBase', | |
| 'L_B_Paw', 'L_B_Elbow', 'R_B_Elbow', 'L_F_Knee', 'R_F_Knee', 'L_B_Knee', | |
| 'R_B_Knee']''' | |
| kps = self.DATA_INFO.joint_names | |
| kps_dict = {} | |
| for ind_kp, kp in enumerate(kps): | |
| kps_dict[kp] = ind_kp | |
| kps_dict[kp.lower()] = ind_kp | |
| if kp.lower() == 'l_earbase': | |
| kps_dict['l_ear'] = ind_kp | |
| if kp.lower() == 'r_earbase': | |
| kps_dict['r_ear'] = ind_kp | |
| if kp.lower() == 'tailbase': | |
| kps_dict['tail'] = ind_kp | |
| return kps_dict | |
| def __getitem__(self, index): | |
| # import pdb; pdb.set_trace() | |
| if self.is_train: | |
| xml_path = self.train_name_list[index] | |
| else: | |
| xml_path = self.test_name_list[index] | |
| name = (xml_path.split('/')[-1]).split('.xml')[0] | |
| annot_dict = self.read_content(xml_path, annot_type='animal_pose_voc') | |
| if xml_path.split('/')[-3] == 'PASCAL2011_animal_annotation': | |
| img_path = os.path.join(self.folder_imgs_0, annot_dict['image'] + '.jpg') | |
| keyword_ymin = 'ymin' | |
| else: | |
| # import pdb; pdb.set_trace() | |
| img_path = os.path.join(self.folder_imgs_1, annot_dict['image']) | |
| keyword_ymin = 'xmax' | |
| '''print(img_path) | |
| print(annot_dict['keypoints_xyzvis'].shape) | |
| print(annot_dict['keypoints_names'])''' | |
| sf = self.scale_factor | |
| rf = self.rot_factor | |
| vis_np = np.zeros((self.DATA_INFO.n_keyp)) | |
| pts_np = np.ones((self.DATA_INFO.n_keyp, 2)) * (-1000) | |
| for ind_key, key in enumerate(annot_dict['keypoints_names']): | |
| key_lower = key.lower() | |
| ind_new = self.kp_dict[key_lower] | |
| vis_np[ind_new] = annot_dict['keypoints_xyzvis'][ind_key, 3] | |
| # remark: the first training run (animalpose_hg8_v0) was without subtracting 1 which would be important! | |
| # pts_np[ind_new] = annot_dict['keypoints_xyzvis'][ind_key, 0:2] | |
| # what we were doing until 08.09.2022: | |
| pts_np[ind_new] = annot_dict['keypoints_xyzvis'][ind_key, 0:2] - 1 | |
| # new 08.09.2022 | |
| # pts_np[ind_new] = annot_dict['keypoints_xyzvis'][ind_key, 0:2] | |
| # pts_np[ind_new] = annot_dict['keypoints_xyzvis'][ind_key, 0:2] # - 1 | |
| '''vis_np = annot_dict['keypoints_xyzvis'][:20, 3] | |
| pts_np = annot_dict['keypoints_xyzvis'][:20, :2] | |
| pts_np[vis_np==0] = -1000''' | |
| pts_np = np.concatenate((pts_np, vis_np[:, None]), axis=1) | |
| pts = torch.Tensor(pts_np) | |
| # what we were doing until 08.09.2022: | |
| # bbox_xywh = [float(annot_dict['visible_bounds']['xmin']), float(annot_dict['visible_bounds'][keyword_ymin]), \ | |
| # float(annot_dict['visible_bounds']['width']), float(annot_dict['visible_bounds']['height'])] | |
| bbox_xywh = [float(annot_dict['visible_bounds']['xmin'])-1, float(annot_dict['visible_bounds'][keyword_ymin])-1, \ | |
| float(annot_dict['visible_bounds']['width']), float(annot_dict['visible_bounds']['height'])] | |
| '''pts = torch.Tensor(np.asarray(data['joints'])[:20, :]) | |
| # pts[:, 0:2] -= 1 # Convert pts to zero based | |
| # inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r) | |
| # sf = scale * 200.0 / res[0] # res[0]=256 | |
| # center = center * 1.0 / sf | |
| # scale = scale / sf = 256 / 200 | |
| # h = 200 * scale | |
| bbox_xywh = data['img_bbox']''' | |
| bbox_c = [bbox_xywh[0]+0.5*bbox_xywh[2], bbox_xywh[1]+0.5*bbox_xywh[3]] | |
| bbox_max = max(bbox_xywh[2], bbox_xywh[3]) | |
| bbox_diag = math.sqrt(bbox_xywh[2]**2 + bbox_xywh[3]**2) | |
| # bbox_s = bbox_max / 200. # the dog will fill the image -> bbox_max = 256 | |
| # bbox_s = bbox_diag / 200. # diagonal of the boundingbox will be 200 | |
| bbox_s = bbox_max / 200. * 256. / 200. # maximum side of the bbox will be 200 | |
| c = torch.Tensor(bbox_c) | |
| s = bbox_s | |
| # For single-person pose estimation with a centered/scaled figure | |
| nparts = pts.size(0) | |
| img = load_image(img_path) # CxHxW | |
| # segmentation map (we reshape it to 3xHxW, such that we can do the | |
| # same transformations as with the image) | |
| if self.calc_seg: | |
| raise NotImplementedError | |
| seg = torch.Tensor(utils_stanext.get_seg_from_entry(data)[None, :, :]) | |
| seg = torch.cat(3*[seg]) | |
| r = 0 | |
| # self.is_train = False | |
| do_flip = False | |
| if self.do_augment: | |
| s = s*torch.randn(1).mul_(sf).add_(1).clamp(1-sf, 1+sf)[0] | |
| r = torch.randn(1).mul_(rf).clamp(-2*rf, 2*rf)[0] if random.random() <= 0.6 else 0 | |
| # Flip | |
| if random.random() <= 0.5: | |
| do_flip = True | |
| img = fliplr(img) | |
| if self.calc_seg: | |
| seg = fliplr(seg) | |
| # pts = shufflelr(pts, img.size(2), self.DATA_INFO.hflip_indices) | |
| # remark: for BITE we figure out that a -1 was missing in the point mirroring term | |
| # idea: | |
| # image coordinates are 0, 1, 2, 3 | |
| # image size is 4 | |
| # the new point location for former 0 should be 3 and not 4! | |
| pts = shufflelr(pts, img.size(2)-1, self.DATA_INFO.hflip_indices) | |
| c[0] = img.size(2) - c[0] - 1 | |
| # Color | |
| img[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) | |
| img[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) | |
| img[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) | |
| # Prepare image and groundtruth map | |
| inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r) | |
| inp = color_normalize(inp, self.DATA_INFO.rgb_mean, self.DATA_INFO.rgb_stddev) | |
| if self.calc_seg: | |
| seg = crop(seg, c, s, [self.inp_res, self.inp_res], rot=r) | |
| # Generate ground truth | |
| tpts = pts.clone() | |
| target_weight = tpts[:, 2].clone().view(nparts, 1) | |
| # cvpr version: | |
| ''' | |
| target = torch.zeros(nparts, self.out_res, self.out_res) | |
| for i in range(nparts): | |
| # if tpts[i, 2] > 0: # This is evil!! | |
| if tpts[i, 1] > 0: | |
| tpts[i, 0:2] = to_torch(transform(tpts[i, 0:2]+1, c, s, [self.out_res, self.out_res], rot=r, as_int=False)) | |
| target[i], vis = draw_labelmap(target[i], tpts[i]-1, self.sigma, type=self.label_type) | |
| target_weight[i, 0] *= vis | |
| # NEW: | |
| target_new, vis_new = draw_multiple_labelmaps((self.out_res, self.out_res), tpts[:, :2]-1, self.sigma, type=self.label_type) | |
| target_weight_new = tpts[:, 2].clone().view(nparts, 1) * vis_new | |
| target_new[(target_weight_new==0).reshape((-1)), :, :] = 0 | |
| ''' | |
| target = torch.zeros(nparts, self.out_res, self.out_res) | |
| for i in range(nparts): | |
| # if tpts[i, 2] > 0: # This is evil!! | |
| '''if tpts[i, 1] > 0: | |
| tpts[i, 0:2] = to_torch(transform(tpts[i, 0:2], c, s, [self.out_res, self.out_res], rot=r, as_int=False)) | |
| target[i], vis = draw_labelmap(target[i], tpts[i], self.sigma, type=self.label_type) | |
| target_weight[i, 0] *= vis''' | |
| if tpts[i, 1] > 0: | |
| # this pytorch function (transforms) assumes that coordinates which start at 1 instead of 0! | |
| tpts[i, 0:2] = to_torch(transform(tpts[i, 0:2]+1, c, s, [self.out_res, self.out_res], rot=r, as_int=False)) - 1 | |
| target[i], vis = draw_labelmap(target[i], tpts[i], self.sigma, type=self.label_type) | |
| target_weight[i, 0] *= vis | |
| # Meta info | |
| '''this_breed = self.breed_dict[name.split('/')[0]]''' | |
| # add information about location within breed similarity matrix | |
| '''folder_name = name.split('/')[0] | |
| breed_name = folder_name.split(folder_name.split('-')[0] + '-')[1] | |
| abbrev = COMPLETE_ABBREV_DICT[breed_name] | |
| try: | |
| sim_breed_index = COMPLETE_SUMMARY_BREEDS[abbrev]._ind_in_xlsx_matrix | |
| except: # some breeds are not in the xlsx file | |
| sim_breed_index = -1''' | |
| # meta = {'index' : index, 'center' : c, 'scale' : s, 'do_flip' : do_flip, 'rot' : r, 'resolution' : [self.out_res, self.out_res], 'name' : name, | |
| # 'pts' : pts, 'tpts' : tpts, 'target_weight': target_weight, 'breed_index': this_breed['index']} | |
| # meta = {'index' : index, 'center' : c, 'scale' : s, 'do_flip' : do_flip, 'rot' : r, 'resolution' : self.out_res, | |
| # 'pts' : pts, 'tpts' : tpts, 'target_weight': target_weight, 'breed_index': this_breed['index']} | |
| # meta = {'index' : index, 'center' : c, 'scale' : s, | |
| # 'pts' : pts, 'tpts' : tpts, 'target_weight': target_weight, | |
| # 'breed_index': this_breed['index'], 'sim_breed_index': sim_breed_index} | |
| meta = {'index' : index, 'center' : c, 'scale' : s, | |
| 'pts' : pts, 'tpts' : tpts, 'target_weight': target_weight} | |
| # import pdb; pdb.set_trace() | |
| if self.dataset_mode=='keyp_only': | |
| ''' | |
| debugging_path = '/is/cluster/work/nrueegg/icon_pifu_related/barc_for_bite/debugging/anipose/' | |
| if self.is_train: | |
| prefix = 'anipose_train_' | |
| else: | |
| prefix = 'anipose_test_' | |
| save_input_image_with_keypoints(inp, meta['tpts'], out_path=debugging_path + prefix + str(index) + '.png', ratio_in_out=self.inp_res/self.out_res) | |
| ''' | |
| return inp, target, meta | |
| elif self.dataset_mode=='keyp_and_seg': | |
| raise NotImplementedError | |
| meta['silh'] = seg[0, :, :] | |
| meta['name'] = name | |
| return inp, target, meta | |
| elif self.dataset_mode=='complete': | |
| raise NotImplementedError | |
| target_dict = meta | |
| target_dict['silh'] = seg[0, :, :] | |
| # NEW for silhouette loss | |
| distmat_tofg = ndimage.distance_transform_edt(1-target_dict['silh']) # values between 0 and up to 100 or more | |
| target_dict['silh_distmat_tofg'] = distmat_tofg | |
| distmat_tobg = ndimage.distance_transform_edt(target_dict['silh']) | |
| target_dict['silh_distmat_tobg'] = distmat_tobg | |
| return inp, target_dict | |
| else: | |
| raise ValueError | |
| def __len__(self): | |
| if self.is_train: | |
| return len(self.train_name_list) # len(self.train_list) | |
| else: | |
| return len(self.test_name_list) # len(self.valid_list) | |