File size: 3,139 Bytes
26ad0af f50587e 2a5dc1c 26ad0af 2a5dc1c 26ad0af f50587e 2a5dc1c f50587e 26ad0af 7efbea1 26ad0af f50587e 26ad0af f50587e 26ad0af f50587e 26ad0af f50587e 26ad0af 42e86ac 26ad0af f50587e 26ad0af 42e86ac f50587e 7efbea1 42e86ac 7efbea1 42e86ac 26ad0af f50587e 2a5dc1c 26ad0af f50587e 2a5dc1c 26ad0af f50587e 26ad0af f50587e 26ad0af f50587e 26ad0af f50587e 2a5dc1c 26ad0af f50587e 2a5dc1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
---
library_name: transformers
tags: []
---
# Introduction
We introduce **Elb**edding, *TBD*
For more technical details, refer to our paper: *TBD*
# Model Details
- Base Decoder-only LLM: *TBD*
- Pooling Type: Last EOS Token
- Maximum context length: 512
- Embedding Dimension: 4096
# How to use with 🤗 Transformers?
```python
from typing import List
from transformers import AutoTokenizer, AutoModel
import torch
def get_detailed_instruct(queries: List[str]) -> List[str]:
return [f"Instruct: Retrieve semantically similar text.\nQuery: {query}" for query in queries]
def tokenize(sentences: List[str], tokenizer: AutoTokenizer):
texts = [x + tokenizer.eos_token for x in sentences]
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512).to("cuda")
inputs.input_ids[:, -1] = tokenizer.eos_token_id
inputs.pop("token_type_ids", None)
return inputs
def pool(last_hidden_state: torch.Tensor, attention_mask: torch.Tensor, do_normalize: bool = True) -> torch.Tensor:
left_padding = attention_mask[:, -1].sum() == attention_mask.shape[0]
if left_padding:
embeddings = last_hidden_state[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_state.shape[0]
embeddings = last_hidden_state[torch.arange(batch_size, device=last_hidden_state.device).long(), sequence_lengths.long()]
if do_normalize:
embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
return embeddings
model = AutoModel.from_pretrained(pretrained_model_name_or_path="lamarr-llm-development/elbedding", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path="lamarr-llm-development/elbedding", trust_remote_code=True)
model = model.to("cuda")
sentences = ["Hi how are you doing?"]
# sentences = get_detailed_instruct(sentences) # if the sentence is a query
sentences_inputs = tokenize(sentences=sentences, tokenizer=tokenizer)
sentences_outputs = model(**sentences_inputs)
embeddings = pool(
last_hidden_state=sentences_outputs.last_hidden_state,
attention_mask=sentences_inputs.attention_mask,
)
print(embeddings)
```
# How to use with Sentence Transformers?
```python
from sentence_transformers import SentenceTransformer
from typing import List
def get_detailed_instruct(queries: List[str]) -> List[str]:
return [f"Instruct: Retrieve semantically similar text.\nQuery: {query}" for query in queries]
model = SentenceTransformer("lamarr-llm-development/elbedding", trust_remote_code=True)
# sentences = get_detailed_instruct(sentences) # if the sentence is a query
sentences = ["Hi how are you doing?"]
embeddings = model.encode(sentences=sentences, normalize_embeddings=True)
print(embeddings)
```
## Supported Languages
*TBD*
## MTEB Benchmark Evaluation
*TBD*
## FAQ
**Do I need to add instructions to the query?**
Yes, this is how the model is trained, otherwise you will see a performance degradation. On the other hand, there is no need to add instructions to the document side.
## Citation
*TBD*
## Limitations
*TBD*
|