Update README.md
Browse files
README.md
CHANGED
|
@@ -5,7 +5,7 @@ language:
|
|
| 5 |
language_details: "yue-Hant-HK; zh-Hant-HK"
|
| 6 |
license: cc-by-4.0
|
| 7 |
datasets:
|
| 8 |
-
-
|
| 9 |
metrics:
|
| 10 |
- accuracy
|
| 11 |
- exact_match
|
|
@@ -50,8 +50,8 @@ This model is also available at Zenodo: https://doi.org/10.5281/zenodo.16889492
|
|
| 50 |
```python
|
| 51 |
from transformers import ElectraTokenizer, ElectraForMaskedLM, pipeline
|
| 52 |
|
| 53 |
-
tokenizer = ElectraTokenizer.from_pretrained("
|
| 54 |
-
model = ElectraForMaskedLM.from_pretrained("
|
| 55 |
|
| 56 |
unmasker = pipeline("fill-mask", model=model, tokenizer=tokenizer)
|
| 57 |
unmasker("從中環[MASK]到尖沙咀。")
|
|
@@ -62,8 +62,8 @@ unmasker("從中環[MASK]到尖沙咀。")
|
|
| 62 |
```python
|
| 63 |
from transformers import ElectraTokenizer, ElectraForPreTraining
|
| 64 |
|
| 65 |
-
tokenizer = ElectraTokenizer.from_pretrained("
|
| 66 |
-
model = ElectraForPreTraining.from_pretrained("
|
| 67 |
|
| 68 |
inputs = tokenizer("從中環坐車到[MASK]。", return_tensors="pt")
|
| 69 |
outputs = model(**inputs) # logits for replaced token detection
|
|
|
|
| 5 |
language_details: "yue-Hant-HK; zh-Hant-HK"
|
| 6 |
license: cc-by-4.0
|
| 7 |
datasets:
|
| 8 |
+
- IKMLab-team/hk_content_corpus
|
| 9 |
metrics:
|
| 10 |
- accuracy
|
| 11 |
- exact_match
|
|
|
|
| 50 |
```python
|
| 51 |
from transformers import ElectraTokenizer, ElectraForMaskedLM, pipeline
|
| 52 |
|
| 53 |
+
tokenizer = ElectraTokenizer.from_pretrained("IKMLab-team/HKELECTRA/generator/small")
|
| 54 |
+
model = ElectraForMaskedLM.from_pretrained("IKMLab-team/HKELECTRA/generator/small")
|
| 55 |
|
| 56 |
unmasker = pipeline("fill-mask", model=model, tokenizer=tokenizer)
|
| 57 |
unmasker("從中環[MASK]到尖沙咀。")
|
|
|
|
| 62 |
```python
|
| 63 |
from transformers import ElectraTokenizer, ElectraForPreTraining
|
| 64 |
|
| 65 |
+
tokenizer = ElectraTokenizer.from_pretrained("IKMLab-team/HKELECTRA/discriminator/small")
|
| 66 |
+
model = ElectraForPreTraining.from_pretrained("IKMLab-team/HKELECTRA/discriminator/small")
|
| 67 |
|
| 68 |
inputs = tokenizer("從中環坐車到[MASK]。", return_tensors="pt")
|
| 69 |
outputs = model(**inputs) # logits for replaced token detection
|