G2PTL Update
Browse files
README.md
CHANGED
|
@@ -4,11 +4,11 @@ license: apache-2.0
|
|
| 4 |
---
|
| 5 |
|
| 6 |
|
| 7 |
-
# G2PTL
|
| 8 |
|
| 9 |
## Introduction
|
| 10 |
|
| 11 |
-
G2PTL: A Geography-Graph Pre-trained model for address.
|
| 12 |
|
| 13 |
|
| 14 |
## Model description
|
|
@@ -47,8 +47,8 @@ You can use this model directly with a pipeline for masked language modeling:
|
|
| 47 |
|
| 48 |
```Python
|
| 49 |
>>> from transformers import pipeline, AutoModel, AutoTokenizer
|
| 50 |
-
>>> model = AutoModel.from_pretrained('
|
| 51 |
-
>>> tokenizer = AutoTokenizer.from_pretrained('
|
| 52 |
|
| 53 |
>>> mask_filler = pipeline(task= 'fill-mask', model= model,tokenizer = tokenizer)
|
| 54 |
>>> mask_filler("浙江省杭州市[MASK]杭区五常街道阿里巴巴西溪园区")
|
|
@@ -80,8 +80,8 @@ You can also use this model for multiple [MASK] filling in PyTorch:
|
|
| 80 |
```python
|
| 81 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
| 82 |
import torch
|
| 83 |
-
model = AutoModel.from_pretrained('
|
| 84 |
-
tokenizer = AutoTokenizer.from_pretrained('
|
| 85 |
model.eval()
|
| 86 |
text = ['浙江省杭州市[MASK][MASK][MASK]五常街道阿里巴巴西溪园区']
|
| 87 |
encoded_input = tokenizer(text, return_tensors='pt')
|
|
@@ -101,8 +101,8 @@ Here is how to use this model to get the HTC output of a given text in PyTorch:
|
|
| 101 |
|
| 102 |
```python
|
| 103 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
| 104 |
-
model = AutoModel.from_pretrained('
|
| 105 |
-
tokenizer = AutoTokenizer.from_pretrained('
|
| 106 |
model.eval()
|
| 107 |
text = "浙江省杭州市五常街道阿里巴巴西溪园区"
|
| 108 |
encoded_input = tokenizer(text, return_tensors='pt')
|
|
@@ -119,8 +119,8 @@ Here is how to use this model to get the features/embeddings of a given text in
|
|
| 119 |
|
| 120 |
```python
|
| 121 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
| 122 |
-
model = AutoModel.from_pretrained('
|
| 123 |
-
tokenizer = AutoTokenizer.from_pretrained('
|
| 124 |
model.eval()
|
| 125 |
text = "浙江省杭州市余杭区五常街道阿里巴巴西溪园区"
|
| 126 |
encoded_input = tokenizer(text, return_tensors='pt')
|
|
@@ -133,8 +133,8 @@ Here is how to use this model to get cosine similarity between two address texts
|
|
| 133 |
```python
|
| 134 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
| 135 |
import torch
|
| 136 |
-
model = AutoModel.from_pretrained('
|
| 137 |
-
tokenizer = AutoTokenizer.from_pretrained('
|
| 138 |
model.eval()
|
| 139 |
text = ["浙江省杭州市余杭区五常街道阿里巴巴西溪园区", "浙江省杭州市阿里巴巴西溪园区"]
|
| 140 |
encoded_input = tokenizer(text, return_tensors='pt', padding=True)
|
|
|
|
| 4 |
---
|
| 5 |
|
| 6 |
|
| 7 |
+
# G2PTL-1
|
| 8 |
|
| 9 |
## Introduction
|
| 10 |
|
| 11 |
+
G2PTL-1: A Geography-Graph Pre-trained model for address. This work is the first version of G2PTL (v1.0)
|
| 12 |
|
| 13 |
|
| 14 |
## Model description
|
|
|
|
| 47 |
|
| 48 |
```Python
|
| 49 |
>>> from transformers import pipeline, AutoModel, AutoTokenizer
|
| 50 |
+
>>> model = AutoModel.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
| 51 |
+
>>> tokenizer = AutoTokenizer.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
| 52 |
|
| 53 |
>>> mask_filler = pipeline(task= 'fill-mask', model= model,tokenizer = tokenizer)
|
| 54 |
>>> mask_filler("浙江省杭州市[MASK]杭区五常街道阿里巴巴西溪园区")
|
|
|
|
| 80 |
```python
|
| 81 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
| 82 |
import torch
|
| 83 |
+
model = AutoModel.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
| 84 |
+
tokenizer = AutoTokenizer.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
| 85 |
model.eval()
|
| 86 |
text = ['浙江省杭州市[MASK][MASK][MASK]五常街道阿里巴巴西溪园区']
|
| 87 |
encoded_input = tokenizer(text, return_tensors='pt')
|
|
|
|
| 101 |
|
| 102 |
```python
|
| 103 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
| 104 |
+
model = AutoModel.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
| 105 |
+
tokenizer = AutoTokenizer.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
| 106 |
model.eval()
|
| 107 |
text = "浙江省杭州市五常街道阿里巴巴西溪园区"
|
| 108 |
encoded_input = tokenizer(text, return_tensors='pt')
|
|
|
|
| 119 |
|
| 120 |
```python
|
| 121 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
| 122 |
+
model = AutoModel.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
| 123 |
+
tokenizer = AutoTokenizer.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
| 124 |
model.eval()
|
| 125 |
text = "浙江省杭州市余杭区五常街道阿里巴巴西溪园区"
|
| 126 |
encoded_input = tokenizer(text, return_tensors='pt')
|
|
|
|
| 133 |
```python
|
| 134 |
from transformers import pipeline, AutoModel, AutoTokenizer
|
| 135 |
import torch
|
| 136 |
+
model = AutoModel.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
| 137 |
+
tokenizer = AutoTokenizer.from_pretrained('Cainiao-AI/G2PTL', trust_remote_code=True)
|
| 138 |
model.eval()
|
| 139 |
text = ["浙江省杭州市余杭区五常街道阿里巴巴西溪园区", "浙江省杭州市阿里巴巴西溪园区"]
|
| 140 |
encoded_input = tokenizer(text, return_tensors='pt', padding=True)
|