Instructions to use paulhindemith/fasttext-jp-embedding with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use paulhindemith/fasttext-jp-embedding with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("feature-extraction", model="paulhindemith/fasttext-jp-embedding", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("paulhindemith/fasttext-jp-embedding", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
Commit ·
8acc99c
1
Parent(s): 1680e48
commit files to HF hub
Browse files- fasttext_jp_tokenizer.py +4 -1
fasttext_jp_tokenizer.py
CHANGED
|
@@ -100,7 +100,10 @@ class FastTextJpTokenizer(MeCabTokenizer):
|
|
| 100 |
Returns:
|
| 101 |
int: ID
|
| 102 |
"""
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
def _convert_id_to_token(self, index: int) -> str:
|
| 106 |
"""IDから単語
|
|
|
|
| 100 |
Returns:
|
| 101 |
int: ID
|
| 102 |
"""
|
| 103 |
+
id = self.stoi.get(token)
|
| 104 |
+
if id is not None:
|
| 105 |
+
return id
|
| 106 |
+
return self.stoi[self.unk_token]
|
| 107 |
|
| 108 |
def _convert_id_to_token(self, index: int) -> str:
|
| 109 |
"""IDから単語
|