move V1 autoformatting to after validate_options

Files changed (3) hide show

geneformer/classifier.py CHANGED Viewed

@@ -234,10 +234,6 @@ class Classifier:
         self.token_dictionary_file = token_dictionary_file
         self.nproc = nproc
         self.ngpu = ngpu
-        if self.model_version == "V1":
-            from . import TOKEN_DICTIONARY_FILE_30M
-            self.token_dictionary_file = TOKEN_DICTIONARY_FILE_30M
         if self.training_args is None:
             logger.warning(
@@ -258,7 +254,10 @@ class Classifier:
                 ] = self.cell_state_dict["states"]
         # load token dictionary (Ensembl IDs:token)
-        if self.token_dictionary_file is None:
             self.token_dictionary_file = TOKEN_DICTIONARY_FILE
         with open(self.token_dictionary_file, "rb") as f:
             self.gene_token_dict = pickle.load(f)

         self.token_dictionary_file = token_dictionary_file
         self.nproc = nproc
         self.ngpu = ngpu
         if self.training_args is None:
             logger.warning(
                 ] = self.cell_state_dict["states"]
         # load token dictionary (Ensembl IDs:token)
+        if self.model_version == "V1":
+            from . import TOKEN_DICTIONARY_FILE_30M
+            self.token_dictionary_file = TOKEN_DICTIONARY_FILE_30M
+        elif self.token_dictionary_file is None:
             self.token_dictionary_file = TOKEN_DICTIONARY_FILE
         with open(self.token_dictionary_file, "rb") as f:
             self.gene_token_dict = pickle.load(f)

geneformer/emb_extractor.py CHANGED Viewed

@@ -518,6 +518,8 @@ class EmbExtractor:
             self.summary_stat = summary_stat
             self.exact_summary_stat = None
         if self.model_version == "V1":
             from . import TOKEN_DICTIONARY_FILE_30M
             self.token_dictionary_file = TOKEN_DICTIONARY_FILE_30M
@@ -527,8 +529,6 @@ class EmbExtractor:
                     "model_version selected as V1 so changing emb_mode from 'cls' to 'cell' as V1 models do not have a <cls> token."
                 )
-        self.validate_options()
         # load token dictionary (Ensembl IDs:token)
         if self.token_dictionary_file is None:
             token_dictionary_file = TOKEN_DICTIONARY_FILE

             self.summary_stat = summary_stat
             self.exact_summary_stat = None
+        self.validate_options()
         if self.model_version == "V1":
             from . import TOKEN_DICTIONARY_FILE_30M
             self.token_dictionary_file = TOKEN_DICTIONARY_FILE_30M
                     "model_version selected as V1 so changing emb_mode from 'cls' to 'cell' as V1 models do not have a <cls> token."
                 )
         # load token dictionary (Ensembl IDs:token)
         if self.token_dictionary_file is None:
             token_dictionary_file = TOKEN_DICTIONARY_FILE

geneformer/in_silico_perturber.py CHANGED Viewed

@@ -231,7 +231,9 @@ class InSilicoPerturber:
         self.nproc = nproc
         self.model_version = model_version
         self.token_dictionary_file = token_dictionary_file
-        self.clear_mem_ncells = clear_mem_ncells
         if self.model_version == "V1":
             from . import TOKEN_DICTIONARY_FILE_30M
@@ -245,10 +247,8 @@ class InSilicoPerturber:
                 self.emb_mode = "cell_and_gene"
                 logger.warning(
                     "model_version selected as V1 so changing emb_mode from 'cls_and_gene' to 'cell_and_gene' as V1 models do not have a <cls> token."
-                )
-        self.validate_options()
         # load token dictionary (Ensembl IDs:token)
         if self.token_dictionary_file is None:
             token_dictionary_file = TOKEN_DICTIONARY_FILE

         self.nproc = nproc
         self.model_version = model_version
         self.token_dictionary_file = token_dictionary_file
+        self.clear_mem_ncells = clear_mem_ncells
+        self.validate_options()
         if self.model_version == "V1":
             from . import TOKEN_DICTIONARY_FILE_30M
                 self.emb_mode = "cell_and_gene"
                 logger.warning(
                     "model_version selected as V1 so changing emb_mode from 'cls_and_gene' to 'cell_and_gene' as V1 models do not have a <cls> token."
+                )
         # load token dictionary (Ensembl IDs:token)
         if self.token_dictionary_file is None:
             token_dictionary_file = TOKEN_DICTIONARY_FILE