import os from tqdm import tqdm import time from utils.config import get_args from pathlib import Path import pickle import torch import open_clip from open_clip import tokenizer import numpy as np def load_clip(): print(f'[INFO] loading CLIP model...') model, _, _ = open_clip.create_model_and_transforms("ViT-H-14", pretrained="laion2b_s32b_b79k") model.cuda() model.eval() print(f'[INFO]', ' finish loading CLIP model...') return model def extract_text_feature(descriptions, clip_model, target_path): text_tokens = tokenizer.tokenize(descriptions).cuda() with torch.no_grad(): text_features = clip_model.encode_text(text_tokens).float() text_features /= text_features.norm(dim=-1, keepdim=True) text_features = text_features.cpu().numpy() text_features_dict = {} for i, description in enumerate(descriptions): text_features_dict[description] = text_features[i] np.save(os.path.join(target_path, "text_features.npy"), text_features_dict) return text_features_dict clip_model = load_clip() def execute_commands(commands_list, command_type, process_num): print('====> Start', command_type) from multiprocessing import Pool pool = Pool(process_num) for _ in tqdm(pool.imap_unordered(os.system, commands_list), total=len(commands_list)): pass pool.close() pool.join() pool.terminate() print('====> Finish', command_type) def parallel_compute(general_command, command_name, resource_type, cuda_list, seq_name_list): cuda_num = len(cuda_list) if resource_type == 'cuda': commands = [] for i, cuda_id in enumerate(cuda_list): process_seq_name = seq_name_list[i::cuda_num] if len(process_seq_name) == 0: continue process_seq_name = '+'.join(process_seq_name) command = f'CUDA_VISIBLE_DEVICES={cuda_id} {general_command % process_seq_name}' commands.append(command) execute_commands(commands, command_name, cuda_num) elif resource_type == 'cpu': commands = [] for seq_name in seq_name_list: commands.append(f'{general_command} --seq_name {seq_name}') execute_commands(commands, command_name, cuda_num) def main(args): CUDA_LIST = args.devices config = "itw" cropformer_path = args.cropformer_path root = f'data/{config}/processed' image_path_pattern = 'color/*.jpg' # stride = 10 # seq_name_list = get_seq_name_list(config) seq_name_list = os.listdir(root) with open("labels.txt", "r") as f: labels = f.read().split(";") extract_text_feature(labels, clip_model, f"{root}/{seq_name_list[0]}") # Step 6: Get labels for each 3D instances. parallel_compute(f'PYTHONPATH=. python semantics/itw-ovq.py --config {config}', 'get text labels', 'cpu', CUDA_LIST, seq_name_list) if __name__ == '__main__': args = get_args() main(args)