Instructions to use ctheodoris/Geneformer with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ctheodoris/Geneformer with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="ctheodoris/Geneformer")# Load model directly from transformers import AutoTokenizer, AutoModelForMaskedLM tokenizer = AutoTokenizer.from_pretrained("ctheodoris/Geneformer") model = AutoModelForMaskedLM.from_pretrained("ctheodoris/Geneformer") - Inference
- Notebooks
- Google Colab
- Kaggle
Fix typo
#301
by hchen725 - opened
- geneformer/tokenizer.py +1 -1
geneformer/tokenizer.py
CHANGED
|
@@ -366,7 +366,7 @@ class TranscriptomeTokenizer:
|
|
| 366 |
example["length_uncropped"] = len(example["input_ids"])
|
| 367 |
|
| 368 |
# Truncate/Crop input_ids to input size
|
| 369 |
-
if
|
| 370 |
example["input_ids"] = example["input_ids"][0:self.input_size-2] # truncate to leave space for CLS and SEP token
|
| 371 |
example["input_ids"] = np.insert(example["input_ids"], 0, self.gene_token_dict.get("<cls>"))
|
| 372 |
example["input_ids"] = np.insert(example["input_ids"], len(example["input_ids"]), self.gene_token_dict.get("<sep>"))
|
|
|
|
| 366 |
example["length_uncropped"] = len(example["input_ids"])
|
| 367 |
|
| 368 |
# Truncate/Crop input_ids to input size
|
| 369 |
+
if self.special_token:
|
| 370 |
example["input_ids"] = example["input_ids"][0:self.input_size-2] # truncate to leave space for CLS and SEP token
|
| 371 |
example["input_ids"] = np.insert(example["input_ids"], 0, self.gene_token_dict.get("<cls>"))
|
| 372 |
example["input_ids"] = np.insert(example["input_ids"], len(example["input_ids"]), self.gene_token_dict.get("<sep>"))
|