Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import torch | |
| from src.tools.api import get_openai_embedding | |
| def get_top_k_indices(emb: torch.FloatTensor, | |
| candidate_embs: torch.FloatTensor, | |
| return_similarity=False, k=-1) -> list: | |
| ''' | |
| Args: | |
| emb (torch.Tensor): embedding of the query | |
| candidate_embs (torch.Tensor): embeddings of the candidates | |
| k (int): number of candidates to return. | |
| If k <= 0, rank all candidates | |
| ''' | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| sim = torch.matmul(emb.to(device), candidate_embs.to(device).T).cpu().view(-1) | |
| if k > 0: | |
| indices = torch.topk(sim, | |
| k=min(k, len(sim)), | |
| dim=-1, sorted=True).indices.view(-1).cpu() | |
| else: | |
| indices = torch.argsort(sim, dim=-1, descending=True).view(-1).cpu() | |
| indices = indices.tolist() | |
| if return_similarity: | |
| return indices, sim[indices] | |
| return indices | |
| def sentence_emb_similarity(s1, s2): | |
| ''' | |
| Args: | |
| s1 (str): sentence 1 | |
| s2 (str): sentence 2 | |
| ''' | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| emb1 = get_openai_embedding(s1).to(device) | |
| emb2 = get_openai_embedding(s2).to(device) | |
| return torch.matmul(emb1, emb2.T).view(-1).cpu() | |
| def normalize(x: torch.FloatTensor) -> torch.FloatTensor: | |
| ''' | |
| Args: | |
| x (torch.Tensor): tensor to normalize | |
| ''' | |
| return (x - x.min()) / (x.max() - x.min()) | |