import axengine as axe import torch import numpy as np from transformers import AutoModel, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5') model = AutoModel.from_pretrained('BAAI/bge-small-en-v1.5') model.eval() model_axe = axe.InferenceSession('./bge-small-en-v1.5_u16_npu3.axmodel') sentences_1 = ["I really love math"] sentences_2 = ["I pretty like mathematics"] encoded_input1 = tokenizer(sentences_1, padding='max_length', max_length=512, truncation=True, return_tensors='pt') encoded_input2 = tokenizer(sentences_2, padding='max_length', max_length=512, truncation=True, return_tensors='pt') model_features_axe1 = model_axe.run(None,{'input_ids':encoded_input1.input_ids.numpy().astype(np.int32)}) model_features_axe2 = model_axe.run(None,{'input_ids':encoded_input2.input_ids.numpy().astype(np.int32)}) # Perform pooling. In this case, cls pooling. embeddings_1 = model_features_axe1[0][:, 0] embeddings_1 /= np.linalg.norm(embeddings_1, axis=1, keepdims=True) embeddings_2 = model_features_axe2[0][:, 0] embeddings_2 /= np.linalg.norm(embeddings_2, axis=1, keepdims=True) similarity = embeddings_1 @ embeddings_2.T print("similarity:",similarity) with torch.no_grad(): model_output1 = model(**encoded_input1) model_output2 = model(**encoded_input2) embeddings_gt1 = model_output1[0].detach().cpu().numpy()[:, 0] embeddings_gt1 /= np.linalg.norm(embeddings_gt1, axis=1, keepdims=True) embeddings_gt2 = model_output2[0].detach().cpu().numpy()[:, 0] embeddings_gt2 /= np.linalg.norm(embeddings_gt2, axis=1, keepdims=True) similarity = embeddings_gt1 @ embeddings_gt2.T print("gt similarity:",similarity)