Instructions to use huuminh365/CustomBERT with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use huuminh365/CustomBERT with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="huuminh365/CustomBERT")# Load model directly from transformers import AutoTokenizer, AutoModelForMaskedLM tokenizer = AutoTokenizer.from_pretrained("huuminh365/CustomBERT") model = AutoModelForMaskedLM.from_pretrained("huuminh365/CustomBERT") - Notebooks
- Google Colab
- Kaggle
Commit ·
c7cf1e8
1
Parent(s): 087b7ec
update tokenizer.py
Browse files- tokenizer.py +3 -2
tokenizer.py
CHANGED
|
@@ -327,6 +327,9 @@ class PhobertTokenizer(PreTrainedTokenizer):
|
|
| 327 |
"""
|
| 328 |
Loads a pre-existing dictionary from a text file and adds its symbols to this instance.
|
| 329 |
"""
|
|
|
|
|
|
|
|
|
|
| 330 |
if isinstance(f, str):
|
| 331 |
try:
|
| 332 |
with open(f, "r", encoding="utf-8") as fd:
|
|
@@ -345,5 +348,3 @@ class PhobertTokenizer(PreTrainedTokenizer):
|
|
| 345 |
raise ValueError("Incorrect dictionary format, expected '<token> <cnt>'")
|
| 346 |
word = line[:idx]
|
| 347 |
self.encoder[word] = len(self.encoder)
|
| 348 |
-
for word in LATEX_VOC:
|
| 349 |
-
self.encoder[word] = len(self.encoder)
|
|
|
|
| 327 |
"""
|
| 328 |
Loads a pre-existing dictionary from a text file and adds its symbols to this instance.
|
| 329 |
"""
|
| 330 |
+
|
| 331 |
+
for word in LATEX_VOC:
|
| 332 |
+
self.encoder[word] = len(self.encoder)
|
| 333 |
if isinstance(f, str):
|
| 334 |
try:
|
| 335 |
with open(f, "r", encoding="utf-8") as fd:
|
|
|
|
| 348 |
raise ValueError("Incorrect dictionary format, expected '<token> <cnt>'")
|
| 349 |
word = line[:idx]
|
| 350 |
self.encoder[word] = len(self.encoder)
|
|
|
|
|
|