Christina Theodoris
commited on
Commit
·
abdf980
1
Parent(s):
50e921d
Add error for no files found and suppress loompy import warning
Browse files- geneformer/tokenizer.py +13 -0
geneformer/tokenizer.py
CHANGED
|
@@ -17,10 +17,17 @@ Usage:
|
|
| 17 |
import pickle
|
| 18 |
from pathlib import Path
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
import loompy as lp
|
| 21 |
import numpy as np
|
| 22 |
from datasets import Dataset
|
| 23 |
|
|
|
|
|
|
|
| 24 |
GENE_MEDIAN_FILE = Path(__file__).parent / "gene_median_dictionary.pkl"
|
| 25 |
TOKEN_DICTIONARY_FILE = Path(__file__).parent / "token_dictionary.pkl"
|
| 26 |
|
|
@@ -111,7 +118,9 @@ class TranscriptomeTokenizer:
|
|
| 111 |
cell_metadata = {attr_key: [] for attr_key in self.custom_attr_name_dict.values()}
|
| 112 |
|
| 113 |
# loops through directories to tokenize .loom files
|
|
|
|
| 114 |
for loom_file_path in loom_data_directory.glob("*.loom"):
|
|
|
|
| 115 |
print(f"Tokenizing {loom_file_path}")
|
| 116 |
file_tokenized_cells, file_cell_metadata = self.tokenize_file(
|
| 117 |
loom_file_path
|
|
@@ -123,6 +132,10 @@ class TranscriptomeTokenizer:
|
|
| 123 |
else:
|
| 124 |
cell_metadata = None
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
return tokenized_cells, cell_metadata
|
| 127 |
|
| 128 |
def tokenize_file(self, loom_file_path):
|
|
|
|
| 17 |
import pickle
|
| 18 |
from pathlib import Path
|
| 19 |
|
| 20 |
+
import logging
|
| 21 |
+
|
| 22 |
+
import warnings
|
| 23 |
+
warnings.filterwarnings("ignore", message=".*The 'nopython' keyword.*")
|
| 24 |
+
|
| 25 |
import loompy as lp
|
| 26 |
import numpy as np
|
| 27 |
from datasets import Dataset
|
| 28 |
|
| 29 |
+
logger = logging.getLogger(__name__)
|
| 30 |
+
|
| 31 |
GENE_MEDIAN_FILE = Path(__file__).parent / "gene_median_dictionary.pkl"
|
| 32 |
TOKEN_DICTIONARY_FILE = Path(__file__).parent / "token_dictionary.pkl"
|
| 33 |
|
|
|
|
| 118 |
cell_metadata = {attr_key: [] for attr_key in self.custom_attr_name_dict.values()}
|
| 119 |
|
| 120 |
# loops through directories to tokenize .loom files
|
| 121 |
+
file_found = 0
|
| 122 |
for loom_file_path in loom_data_directory.glob("*.loom"):
|
| 123 |
+
file_found = 1
|
| 124 |
print(f"Tokenizing {loom_file_path}")
|
| 125 |
file_tokenized_cells, file_cell_metadata = self.tokenize_file(
|
| 126 |
loom_file_path
|
|
|
|
| 132 |
else:
|
| 133 |
cell_metadata = None
|
| 134 |
|
| 135 |
+
if file_found == 0:
|
| 136 |
+
logger.error(
|
| 137 |
+
f"No .loom files found in directory {loom_data_directory}.")
|
| 138 |
+
raise
|
| 139 |
return tokenized_cells, cell_metadata
|
| 140 |
|
| 141 |
def tokenize_file(self, loom_file_path):
|