Update README.md
Browse files
README.md
CHANGED
|
@@ -14,7 +14,7 @@ For more technical details, refer to our paper: ...
|
|
| 14 |
- Maximum context length: 512
|
| 15 |
- Embedding Dimension: 4096
|
| 16 |
|
| 17 |
-
# How to use?
|
| 18 |
|
| 19 |
```python
|
| 20 |
from typing import List
|
|
@@ -46,31 +46,35 @@ def pool(last_hidden_state: torch.Tensor, attention_mask: torch.Tensor, do_norma
|
|
| 46 |
return embeddings
|
| 47 |
|
| 48 |
|
| 49 |
-
model = AutoModel.from_pretrained(pretrained_model_name_or_path="lamarr-llm-development/
|
| 50 |
-
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path="lamarr-llm-development/
|
| 51 |
|
| 52 |
model = model.to("cuda")
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
passages = [
|
| 64 |
-
"Konrad Adenauer (geboren am 5. Januar 1876 in Köln; gestorben am 19. April 1967 in Rhöndorf) war ein deutscher Politiker und der erste Bundeskanzler der Bundesrepublik Deutschland von 1949 bis 1963. Er war einer der Gründerväter der Bundesrepublik von Deutschland und spielte eine Schlüsselrolle beim Wiederaufbau nach dem Zweiten Weltkrieg.",
|
| 65 |
-
"Nürnberg ist eine Stadt im deutschen Bundesland Bayern. Es ist bekannt für seine historische Altstadt, mittelalterliche Befestigungsanlagen und seinen jährlichen Weihnachtsmarkt. Nürnberg ist auch für seine Bratwurst bekannt, eine Wurstsorte, die in Deutschland ein beliebtes Streetfood ist."
|
| 66 |
-
]
|
| 67 |
-
passages_inputs = tokenize(sentences=passages, tokenizer=tokenizer)
|
| 68 |
-
passages_outputs = model(**passages_inputs)
|
| 69 |
-
passages_embs = pool(last_hidden_state=passages_outputs.last_hidden_state, attention_mask=passages_inputs.attention_mask)
|
| 70 |
-
|
| 71 |
-
scores = (queries_embs @ passages_embs.T) * 100
|
| 72 |
-
print(scores.tolist())
|
| 73 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
## Supported Languages
|
| 76 |
...
|
|
|
|
| 14 |
- Maximum context length: 512
|
| 15 |
- Embedding Dimension: 4096
|
| 16 |
|
| 17 |
+
# How to use with Transformers?
|
| 18 |
|
| 19 |
```python
|
| 20 |
from typing import List
|
|
|
|
| 46 |
return embeddings
|
| 47 |
|
| 48 |
|
| 49 |
+
model = AutoModel.from_pretrained(pretrained_model_name_or_path="lamarr-llm-development/elbedding", trust_remote_code=True)
|
| 50 |
+
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path="lamarr-llm-development/elbedding", trust_remote_code=True)
|
| 51 |
|
| 52 |
model = model.to("cuda")
|
| 53 |
|
| 54 |
+
sentences = ["Hi how are you doing?"]
|
| 55 |
+
# sentences = get_detailed_instruct(sentences) # if the sentence is a query
|
| 56 |
+
sentences_inputs = tokenize(sentences=sentences, tokenizer=tokenizer)
|
| 57 |
+
sentences_outputs = model(**sentences_inputs)
|
| 58 |
+
embeddings = pool(
|
| 59 |
+
last_hidden_state=sentences_outputs.last_hidden_state,
|
| 60 |
+
attention_mask=sentences_inputs.attention_mask,
|
| 61 |
+
)
|
| 62 |
+
print(embeddings)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
```
|
| 64 |
+
# How to use with Sentence Transformers?
|
| 65 |
+
|
| 66 |
+
```python
|
| 67 |
+
from sentence_transformers import SentenceTransformer
|
| 68 |
+
|
| 69 |
+
model = SentenceTransformer("lamarr-llm-development/elbedding", trust_remote_code=True)
|
| 70 |
+
|
| 71 |
+
# sentences = get_detailed_instruct(sentences) # if the sentence is a query
|
| 72 |
+
sentences = ["Hi how are you doing?"]
|
| 73 |
+
embeddings = model.encode(sentences=sentences, normalize_embeddings=True)
|
| 74 |
+
print(embeddings)
|
| 75 |
+
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
|
| 79 |
## Supported Languages
|
| 80 |
...
|