Instructions to use ctheodoris/Geneformer with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ctheodoris/Geneformer with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="ctheodoris/Geneformer")# Load model directly from transformers import AutoTokenizer, AutoModelForMaskedLM tokenizer = AutoTokenizer.from_pretrained("ctheodoris/Geneformer") model = AutoModelForMaskedLM.from_pretrained("ctheodoris/Geneformer") - Inference
- Notebooks
- Google Colab
- Kaggle
Get the gene keys and gene list keys from the token dictionary instead of medians
#304
by hchen725 - opened
- geneformer/tokenizer.py +1 -1
geneformer/tokenizer.py
CHANGED
|
@@ -132,7 +132,7 @@ class TranscriptomeTokenizer:
|
|
| 132 |
self.gene_token_dict = pickle.load(f)
|
| 133 |
|
| 134 |
# gene keys for full vocabulary
|
| 135 |
-
self.gene_keys = list(self.
|
| 136 |
|
| 137 |
# protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
|
| 138 |
self.genelist_dict = dict(zip(self.gene_keys, [True] * len(self.gene_keys)))
|
|
|
|
| 132 |
self.gene_token_dict = pickle.load(f)
|
| 133 |
|
| 134 |
# gene keys for full vocabulary
|
| 135 |
+
self.gene_keys = list(self.gene_token_dict.keys())
|
| 136 |
|
| 137 |
# protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
|
| 138 |
self.genelist_dict = dict(zip(self.gene_keys, [True] * len(self.gene_keys)))
|