Update README.md
Browse files
README.md
CHANGED
|
@@ -133,7 +133,7 @@ attn_implementation = "eager" # Or "flash_attention_2"
|
|
| 133 |
model = SentenceTransformer(
|
| 134 |
"nvidia/llama-embed-nemotron-8b",
|
| 135 |
trust_remote_code=True,
|
| 136 |
-
model_kwargs={"attn_implementation": attn_implementation, "torch_dtype": "
|
| 137 |
tokenizer_kwargs={"padding_side": "left"},
|
| 138 |
)
|
| 139 |
|
|
@@ -152,7 +152,7 @@ document_embeddings = model.encode_document(documents)
|
|
| 152 |
scores = (query_embeddings @ document_embeddings.T)
|
| 153 |
|
| 154 |
print(scores.tolist())
|
| 155 |
-
# [[0.
|
| 156 |
```
|
| 157 |
|
| 158 |
Or using Hugging Face Transformers like here:
|
|
|
|
| 133 |
model = SentenceTransformer(
|
| 134 |
"nvidia/llama-embed-nemotron-8b",
|
| 135 |
trust_remote_code=True,
|
| 136 |
+
model_kwargs={"attn_implementation": attn_implementation, "torch_dtype": "bfloat16"},
|
| 137 |
tokenizer_kwargs={"padding_side": "left"},
|
| 138 |
)
|
| 139 |
|
|
|
|
| 152 |
scores = (query_embeddings @ document_embeddings.T)
|
| 153 |
|
| 154 |
print(scores.tolist())
|
| 155 |
+
# [[0.3770667314529419, 0.05808388814330101]]
|
| 156 |
```
|
| 157 |
|
| 158 |
Or using Hugging Face Transformers like here:
|