Instructions to use GenerTeam/GENERanno-eukaryote-1.2b-cds-annotator-preview with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use GenerTeam/GENERanno-eukaryote-1.2b-cds-annotator-preview with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="GenerTeam/GENERanno-eukaryote-1.2b-cds-annotator-preview", trust_remote_code=True)# Load model directly from transformers import AutoModelForTokenClassification model = AutoModelForTokenClassification.from_pretrained("GenerTeam/GENERanno-eukaryote-1.2b-cds-annotator-preview", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
Update tokenizer.py
Browse files- tokenizer.py +5 -6
tokenizer.py
CHANGED
|
@@ -53,12 +53,11 @@ class DNAKmerTokenizer(PreTrainedTokenizer):
|
|
| 53 |
"|".join(re.escape(token) for token in self.special_tokens)
|
| 54 |
)
|
| 55 |
self.dna_pattern = re.compile(f"[A-Z]{{{self.k}}}|[A-Z]+")
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
self.mask_token_id = self._convert_token_to_id(self.mask_token)
|
| 62 |
super().__init__(**kwargs)
|
| 63 |
|
| 64 |
@property
|
|
|
|
| 53 |
"|".join(re.escape(token) for token in self.special_tokens)
|
| 54 |
)
|
| 55 |
self.dna_pattern = re.compile(f"[A-Z]{{{self.k}}}|[A-Z]+")
|
| 56 |
+
kwargs.setdefault("bos_token", "<s>")
|
| 57 |
+
kwargs.setdefault("eos_token", "</s>")
|
| 58 |
+
kwargs.setdefault("mask_token", "<mask>")
|
| 59 |
+
kwargs.setdefault("unk_token", "<oov>")
|
| 60 |
+
kwargs.setdefault("pad_token", "<pad>")
|
|
|
|
| 61 |
super().__init__(**kwargs)
|
| 62 |
|
| 63 |
@property
|