File size: 2,594 Bytes
55e58d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
'''
This script is used to generate the semantic labels for the objects in the scene.
'''
from utils.config import get_dataset, get_args
import os
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def cos_sim(x_cos):
return (1 + x_cos) / 2
def main(args):
dataset = get_dataset(args)
total_point_num = dataset.get_scene_points().shape[0]
label_features_dict = dataset.get_label_features()
label_text_features = np.stack(list(label_features_dict.values()))
descriptions = list(label_features_dict.keys())
object_dict = np.load(f'{dataset.object_dict_dir}/{args.config}/object_dict.npy', allow_pickle=True).item()
clip_feature = np.load(f'{dataset.object_dict_dir}/{args.config}/open-vocabulary_features.npy', allow_pickle=True).item()
label2id = dataset.get_label_id()[0]
num_instance = len(object_dict)
pred_dict = {
"pred_masks": np.zeros((total_point_num, num_instance), dtype=bool),
"pred_score": np.ones(num_instance),
"pred_classes": np.zeros(num_instance, dtype=np.int32),
"pred_descriptions": ['' for _ in range(num_instance)]
}
# For each object, average the visual features of the representative masks as its object feature.
# Then get semantic label according to the similarity between the object feature and the label text features.
for idx, (key, value) in enumerate(object_dict.items()):
repre_mask_list = value['repre_mask_list']
if len(repre_mask_list) == 0:
continue
feature_list = []
feature_list = [clip_feature[f'{mask_info[0]}_{mask_info[1]}'] for mask_info in repre_mask_list]
feature = np.stack(feature_list)
object_feature = np.mean(feature, axis=0, keepdims=True)
raw_similarity = np.dot(object_feature, label_text_features.T)
exp_sim = cos_sim(raw_similarity)
prob = exp_sim
probs = np.max(prob, axis=0)
max_label_id = np.argmax(probs)
prob = probs[max_label_id]
pred_dict['pred_score'][idx] = prob
label_id = label2id[descriptions[max_label_id]]
pred_dict['pred_classes'][idx] = label_id
pred_dict['pred_descriptions'][idx] = descriptions[max_label_id]
point_ids = object_dict[key]['point_ids']
binary_mask = np.zeros(total_point_num, dtype=bool)
binary_mask[list(point_ids)] = True
pred_dict['pred_masks'][:, idx] = binary_mask
np.savez(f'{dataset.object_dict_dir}/prediction.npz', **pred_dict)
if __name__ == '__main__':
args = get_args()
main(args) |