Spaces:
Runtime error
Runtime error
| import os | |
| from dotenv import load_dotenv | |
| import torch | |
| # from huggingface_hub import login | |
| from transformers import AutoTokenizer, AutoModel | |
| load_dotenv() | |
| huggingface_token = os.environ.get("HF_TOKEN", "") | |
| # login(huggingface_token) | |
| # Model is private? | |
| # auto_tokenizer = AutoTokenizer.from_pretrained( | |
| # "CocoonBusiness/VectorSearch", token=huggingface_token, low_cpu_mem_usage=True | |
| # ) | |
| auto_tokenizer = AutoTokenizer.from_pretrained("xValentim/vector-search-bert-based", low_cpu_mem_usage=True) | |
| model = AutoModel.from_pretrained( | |
| "jegorkitskerkin/robbert-v2-dutch-base-mqa-finetuned", low_cpu_mem_usage=True | |
| ) | |
| def get_embeddings(text_list): | |
| encoded_input = auto_tokenizer( | |
| text_list, | |
| padding=True, | |
| truncation=True, | |
| max_length=500, | |
| return_tensors="pt", | |
| add_special_tokens=True, | |
| ) | |
| model_output = model(**encoded_input) | |
| embeddings = mean_pooling(model_output, encoded_input["attention_mask"]) | |
| # Make 1D vector | |
| return embeddings.detach().numpy().flatten().tolist() | |
| def mean_pooling(model_output, attention_mask): | |
| token_embeddings = model_output[0] | |
| input_mask_expanded = ( | |
| attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() | |
| ) | |
| return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( | |
| input_mask_expanded.sum(1), min=1e-9 | |
| ) | |