Christina Theodoris commited on
Commit ·
e78c44d
1
Parent(s): 2181aa4
Modify tokenizer to allow renaming attr names btwn loom and .dataset
Browse files- geneformer/tokenizer.py +4 -3
geneformer/tokenizer.py
CHANGED
|
@@ -106,7 +106,8 @@ class TranscriptomeTokenizer:
|
|
| 106 |
|
| 107 |
def tokenize_files(self, loom_data_directory):
|
| 108 |
tokenized_cells = []
|
| 109 |
-
|
|
|
|
| 110 |
|
| 111 |
# loops through directories to tokenize .loom files
|
| 112 |
for loom_file_path in loom_data_directory.glob("*.loom"):
|
|
@@ -115,8 +116,8 @@ class TranscriptomeTokenizer:
|
|
| 115 |
loom_file_path
|
| 116 |
)
|
| 117 |
tokenized_cells += file_tokenized_cells
|
| 118 |
-
for k in
|
| 119 |
-
cell_metadata[k] += file_cell_metadata[k]
|
| 120 |
|
| 121 |
return tokenized_cells, cell_metadata
|
| 122 |
|
|
|
|
| 106 |
|
| 107 |
def tokenize_files(self, loom_data_directory):
|
| 108 |
tokenized_cells = []
|
| 109 |
+
loom_cell_attr = [attr_key for attr_key in self.custom_attr_name_dict.keys()]
|
| 110 |
+
cell_metadata = {attr_key: [] for attr_key in self.custom_attr_name_dict.values()}
|
| 111 |
|
| 112 |
# loops through directories to tokenize .loom files
|
| 113 |
for loom_file_path in loom_data_directory.glob("*.loom"):
|
|
|
|
| 116 |
loom_file_path
|
| 117 |
)
|
| 118 |
tokenized_cells += file_tokenized_cells
|
| 119 |
+
for k in loom_cell_attr:
|
| 120 |
+
cell_metadata[self.custom_attr_name_dict[k]] += file_cell_metadata[k]
|
| 121 |
|
| 122 |
return tokenized_cells, cell_metadata
|
| 123 |
|