Commit
·
c1b87e2
1
Parent(s):
4c939c0
Update README.md
Browse files
README.md
CHANGED
|
@@ -17,15 +17,27 @@ via Contrastive Fine-Tuning of mMiniLMv2 without References
|
|
| 17 |
## How to use
|
| 18 |
|
| 19 |
```python
|
|
|
|
| 20 |
from transformers import AutoTokenizer, AutoModel
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
tokenizer = AutoTokenizer.from_pretrained("aixplain/NoRef-ER")
|
| 23 |
model = AutoModel.from_pretrained("aixplain/NoRef-ER")
|
| 24 |
|
| 25 |
-
|
|
|
|
| 26 |
"In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced.",
|
| 27 |
"In Italy, pizzas serves in formal settings, such as at an restaurant, is presented unslicing."
|
| 28 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
scores = model.score(**tokens)
|
| 30 |
```
|
| 31 |
|
|
|
|
| 17 |
## How to use
|
| 18 |
|
| 19 |
```python
|
| 20 |
+
import re
|
| 21 |
from transformers import AutoTokenizer, AutoModel
|
| 22 |
|
| 23 |
+
def preprocess(text: str):
|
| 24 |
+
text = text.lower()
|
| 25 |
+
text = re.sub(r'[\(\[].*?[\)\]]', '', text)
|
| 26 |
+
text = re.sub(r'[^\w\s]', '', text)
|
| 27 |
+
return text
|
| 28 |
+
|
| 29 |
tokenizer = AutoTokenizer.from_pretrained("aixplain/NoRef-ER")
|
| 30 |
model = AutoModel.from_pretrained("aixplain/NoRef-ER")
|
| 31 |
|
| 32 |
+
# preprocess
|
| 33 |
+
texts = [
|
| 34 |
"In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced.",
|
| 35 |
"In Italy, pizzas serves in formal settings, such as at an restaurant, is presented unslicing."
|
| 36 |
+
]
|
| 37 |
+
preprocessed_texts = [preprocess(text) for text in texts]
|
| 38 |
+
# tokenize
|
| 39 |
+
tokens = tokenizer(preprocessed_texts, padding=True, return_tensors="pt")
|
| 40 |
+
# score
|
| 41 |
scores = model.score(**tokens)
|
| 42 |
```
|
| 43 |
|