import open_clip
from open_clip import tokenizer
import torch
import numpy as np
from evaluation.constants import MATTERPORT_LABELS, SCANNET_LABELS, SCANNETPP_LABELS, SCANNET18_LABELS, SCANNETPP84_LABELS, SCANNETPP84_IDS, ARKIT_LABELS, ARKIT_IDS

def load_clip():
    print(f'[INFO] loading CLIP model...')
    model, _, _ = open_clip.create_model_and_transforms("ViT-H-14", pretrained="laion2b_s32b_b79k")
    model.cuda()
    model.eval()
    print(f'[INFO]', ' finish loading CLIP model...')
    return model

def extract_text_feature(save_path, descriptions):
    text_tokens = tokenizer.tokenize(descriptions).cuda()
    with torch.no_grad():
        text_features = model.encode_text(text_tokens).float()
        text_features /= text_features.norm(dim=-1, keepdim=True)
        text_features = text_features.cpu().numpy()

    text_features_dict = {}
    for i, description in enumerate(descriptions):
        text_features_dict[description] = text_features[i]

    np.save(save_path, text_features_dict)

def get_text_feature(text):
    text_tokens = tokenizer.tokenize(text).cuda()
    with torch.no_grad():
        text_features = model.encode_text(text_tokens).float()
    return text_features.cpu().numpy()

model = load_clip()
extract_text_feature('data/text_features/scannet.npy', SCANNET_LABELS)
extract_text_feature('data/text_features/scannetpp.npy', SCANNETPP_LABELS)
extract_text_feature('data/text_features/matterport3d.npy', MATTERPORT_LABELS)
extract_text_feature('data/text_features/scannet18.npy', SCANNET18_LABELS)
extract_text_feature('data/text_features/scannetpp84.npy', SCANNETPP84_LABELS)
extract_text_feature('data/text_features/arkit.npy', ARKIT_LABELS)