ctheodoris
/

Geneformer

Model card Files Files and versions

Fix typo

#301

by hchen725 - opened Feb 23, 2024

base: refs/heads/main

←

from: refs/pr/301

Discussion Files changed

Files changed (1) hide show

geneformer/tokenizer.py +1 -1

geneformer/tokenizer.py CHANGED Viewed

@@ -366,7 +366,7 @@ class TranscriptomeTokenizer:
                 example["length_uncropped"] = len(example["input_ids"])
             # Truncate/Crop input_ids to input size
-            if tk.special_token:
                 example["input_ids"] = example["input_ids"][0:self.input_size-2] # truncate to leave space for CLS and SEP token
                 example["input_ids"] = np.insert(example["input_ids"], 0, self.gene_token_dict.get("<cls>"))
                 example["input_ids"] = np.insert(example["input_ids"], len(example["input_ids"]), self.gene_token_dict.get("<sep>"))

                 example["length_uncropped"] = len(example["input_ids"])
             # Truncate/Crop input_ids to input size
+            if self.special_token:
                 example["input_ids"] = example["input_ids"][0:self.input_size-2] # truncate to leave space for CLS and SEP token
                 example["input_ids"] = np.insert(example["input_ids"], 0, self.gene_token_dict.get("<cls>"))
                 example["input_ids"] = np.insert(example["input_ids"], len(example["input_ids"]), self.gene_token_dict.get("<sep>"))