patch inference on CPU & Windows + Update README snippets
#2
by
tomaarsen
HF Staff
- opened
- README.md +31 -16
- config.json +0 -1
- tokenizer_config.json +1 -1
README.md
CHANGED
|
@@ -6,6 +6,8 @@ base_model:
|
|
| 6 |
- answerdotai/ModernBERT-base
|
| 7 |
pipeline_tag: sentence-similarity
|
| 8 |
library_name: transformers
|
|
|
|
|
|
|
| 9 |
---
|
| 10 |
|
| 11 |
# gte-reranker-modernbert-base
|
|
@@ -32,28 +34,39 @@ The `gte-modernbert` models demonstrates competitive performance in several text
|
|
| 32 |
|
| 33 |
## Usage
|
| 34 |
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
```python
|
| 37 |
-
# Requires transformers>=4.
|
| 38 |
import torch
|
| 39 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 40 |
|
| 41 |
-
model_name_or_path =
|
| 42 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
| 43 |
model = AutoModelForSequenceClassification.from_pretrained(
|
| 44 |
-
model_name_or_path,
|
| 45 |
-
torch_dtype=torch.float16
|
| 46 |
)
|
| 47 |
model.eval()
|
| 48 |
|
| 49 |
-
pairs = [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
with torch.no_grad():
|
| 52 |
inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
|
| 53 |
scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
|
| 54 |
print(scores)
|
| 55 |
|
| 56 |
-
# tensor([
|
| 57 |
```
|
| 58 |
Use with `sentence-transformers`:
|
| 59 |
|
|
@@ -63,22 +76,24 @@ pip install sentence-transformers
|
|
| 63 |
```
|
| 64 |
|
| 65 |
```python
|
| 66 |
-
# Requires
|
| 67 |
from sentence_transformers import CrossEncoder
|
| 68 |
|
| 69 |
-
model_name_or_path = 'Alibaba-NLP/gte-reranker-modernbert-base'
|
| 70 |
-
|
| 71 |
model = CrossEncoder(
|
| 72 |
-
|
| 73 |
automodel_args={"torch_dtype": "auto"},
|
| 74 |
-
trust_remote_code=True,
|
| 75 |
)
|
| 76 |
|
| 77 |
-
pairs = [
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
| 82 |
```
|
| 83 |
|
| 84 |
## Training Details
|
|
|
|
| 6 |
- answerdotai/ModernBERT-base
|
| 7 |
pipeline_tag: sentence-similarity
|
| 8 |
library_name: transformers
|
| 9 |
+
tags:
|
| 10 |
+
- sentence-transformers
|
| 11 |
---
|
| 12 |
|
| 13 |
# gte-reranker-modernbert-base
|
|
|
|
| 34 |
|
| 35 |
## Usage
|
| 36 |
|
| 37 |
+
> [!TIP]
|
| 38 |
+
> For `transformers` and `sentence-transformers`, if your GPU supports it, the efficient Flash Attention 2 will be used automatically if you have `flash_attn` installed. It is not mandatory.
|
| 39 |
+
>
|
| 40 |
+
> ```bash
|
| 41 |
+
> pip install flash_attn
|
| 42 |
+
> ```
|
| 43 |
+
|
| 44 |
+
Use with `transformers`
|
| 45 |
```python
|
| 46 |
+
# Requires transformers>=4.48.0
|
| 47 |
import torch
|
| 48 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 49 |
|
| 50 |
+
model_name_or_path = "Alibaba-NLP/gte-reranker-modernbert-base"
|
| 51 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
| 52 |
model = AutoModelForSequenceClassification.from_pretrained(
|
| 53 |
+
model_name_or_path,
|
| 54 |
+
torch_dtype=torch.float16,
|
| 55 |
)
|
| 56 |
model.eval()
|
| 57 |
|
| 58 |
+
pairs = [
|
| 59 |
+
["what is the capital of China?", "Beijing"],
|
| 60 |
+
["how to implement quick sort in python?", "Introduction of quick sort"],
|
| 61 |
+
["how to implement quick sort in python?", "The weather is nice today"],
|
| 62 |
+
]
|
| 63 |
|
| 64 |
with torch.no_grad():
|
| 65 |
inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
|
| 66 |
scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
|
| 67 |
print(scores)
|
| 68 |
|
| 69 |
+
# tensor([ 2.1387, 2.4609, -1.6729])
|
| 70 |
```
|
| 71 |
Use with `sentence-transformers`:
|
| 72 |
|
|
|
|
| 76 |
```
|
| 77 |
|
| 78 |
```python
|
| 79 |
+
# Requires transformers>=4.48.0
|
| 80 |
from sentence_transformers import CrossEncoder
|
| 81 |
|
|
|
|
|
|
|
| 82 |
model = CrossEncoder(
|
| 83 |
+
"Alibaba-NLP/gte-reranker-modernbert-base",
|
| 84 |
automodel_args={"torch_dtype": "auto"},
|
|
|
|
| 85 |
)
|
| 86 |
|
| 87 |
+
pairs = [
|
| 88 |
+
["what is the capital of China?", "Beijing"],
|
| 89 |
+
["how to implement quick sort in python?","Introduction of quick sort"],
|
| 90 |
+
["how to implement quick sort in python?", "The weather is nice today"],
|
| 91 |
+
]
|
| 92 |
|
| 93 |
+
scores = model.predict(pairs)
|
| 94 |
+
print(scores)
|
| 95 |
+
# [0.8945664 0.9213594 0.15742092]
|
| 96 |
+
# NOTE: Sentence Transformers calls Softmax over the outputs by default, hence the scores are in [0, 1] range.
|
| 97 |
```
|
| 98 |
|
| 99 |
## Training Details
|
config.json
CHANGED
|
@@ -42,7 +42,6 @@
|
|
| 42 |
"num_hidden_layers": 22,
|
| 43 |
"pad_token_id": 50283,
|
| 44 |
"position_embedding_type": "absolute",
|
| 45 |
-
"reference_compile": true,
|
| 46 |
"sep_token_id": 50282,
|
| 47 |
"sparse_pred_ignore_index": -100,
|
| 48 |
"sparse_prediction": false,
|
|
|
|
| 42 |
"num_hidden_layers": 22,
|
| 43 |
"pad_token_id": 50283,
|
| 44 |
"position_embedding_type": "absolute",
|
|
|
|
| 45 |
"sep_token_id": 50282,
|
| 46 |
"sparse_pred_ignore_index": -100,
|
| 47 |
"sparse_prediction": false,
|
tokenizer_config.json
CHANGED
|
@@ -938,7 +938,7 @@
|
|
| 938 |
"input_ids",
|
| 939 |
"attention_mask"
|
| 940 |
],
|
| 941 |
-
"model_max_length":
|
| 942 |
"pad_to_multiple_of": null,
|
| 943 |
"pad_token": "[PAD]",
|
| 944 |
"pad_token_type_id": 0,
|
|
|
|
| 938 |
"input_ids",
|
| 939 |
"attention_mask"
|
| 940 |
],
|
| 941 |
+
"model_max_length": 8192,
|
| 942 |
"pad_to_multiple_of": null,
|
| 943 |
"pad_token": "[PAD]",
|
| 944 |
"pad_token_type_id": 0,
|