Update README.md
Browse files
README.md
CHANGED
|
@@ -21,4 +21,24 @@ Result from default T5 tokenizer (just as an example):
|
|
| 21 |
Result from this tokenizer:
|
| 22 |
```
|
| 23 |
['▁SELECT', '▁?answer', '▁WHERE', '▁{', '▁wd:Q8', '259', '46', '▁wdt:P371', '▁?X', '▁.', '▁?X', '▁wdt:P2048', '▁?answer', '}']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
```
|
|
|
|
| 21 |
Result from this tokenizer:
|
| 22 |
```
|
| 23 |
['▁SELECT', '▁?answer', '▁WHERE', '▁{', '▁wd:Q8', '259', '46', '▁wdt:P371', '▁?X', '▁.', '▁?X', '▁wdt:P2048', '▁?answer', '}']
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
# How to use
|
| 27 |
+
|
| 28 |
+
```python
|
| 29 |
+
from transformers import AutoTokenizer
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained("InfAI/sparql-tokenizer")
|
| 31 |
+
tokenizer.tokenize("SELECT ?answer WHERE { wd:Q825946 wdt:P371 ?X . ?X wdt:P2048 ?answer}")
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
```
|
| 35 |
+
['▁SELECT', '▁?answer', '▁WHERE', '▁{', '▁wd:Q8', '259', '46', '▁wdt:P371', '▁?X', '▁.', '▁?X', '▁wdt:P2048', '▁?answer', '}']
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
```python
|
| 39 |
+
tokenizer("SELECT ?answer WHERE { wd:Q825946 wdt:P371 ?X . ?X wdt:P2048 ?answer}")
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
```
|
| 43 |
+
{'input_ids': [441, 444, 431, 422, 606, 1388, 720, 1791, 456, 418, 456, 3657, 444, 185], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
|
| 44 |
```
|