ybabakhin commited on
Commit
5129df8
·
verified ·
1 Parent(s): 07320ee

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -133,7 +133,7 @@ attn_implementation = "eager" # Or "flash_attention_2"
133
  model = SentenceTransformer(
134
  "nvidia/llama-embed-nemotron-8b",
135
  trust_remote_code=True,
136
- model_kwargs={"attn_implementation": attn_implementation, "torch_dtype": "float32"},
137
  tokenizer_kwargs={"padding_side": "left"},
138
  )
139
 
@@ -152,7 +152,7 @@ document_embeddings = model.encode_document(documents)
152
  scores = (query_embeddings @ document_embeddings.T)
153
 
154
  print(scores.tolist())
155
- # [[0.37646484375, 0.057891845703125]]
156
  ```
157
 
158
  Or using Hugging Face Transformers like here:
 
133
  model = SentenceTransformer(
134
  "nvidia/llama-embed-nemotron-8b",
135
  trust_remote_code=True,
136
+ model_kwargs={"attn_implementation": attn_implementation, "torch_dtype": "bfloat16"},
137
  tokenizer_kwargs={"padding_side": "left"},
138
  )
139
 
 
152
  scores = (query_embeddings @ document_embeddings.T)
153
 
154
  print(scores.tolist())
155
+ # [[0.3770667314529419, 0.05808388814330101]]
156
  ```
157
 
158
  Or using Hugging Face Transformers like here: