update tokenizer to use total counts
Browse files- geneformer/tokenizer.py +6 -1
geneformer/tokenizer.py
CHANGED
|
@@ -183,7 +183,12 @@ class TranscriptomeTokenizer:
|
|
| 183 |
filter_pass_loc, coding_miRNA_loc # filter cells and genes
|
| 184 |
]
|
| 185 |
|
| 186 |
-
X_norm = (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
tokenized_cells += [
|
| 189 |
tokenize_cell(X_norm[i, ...].A.flatten(), coding_miRNA_tokens)
|
|
|
|
| 183 |
filter_pass_loc, coding_miRNA_loc # filter cells and genes
|
| 184 |
]
|
| 185 |
|
| 186 |
+
X_norm = (
|
| 187 |
+
adata_filter.X
|
| 188 |
+
/ adata.obs["n_counts"].values.reshape(-1, 1)
|
| 189 |
+
* 10_000
|
| 190 |
+
/ norm_factor_vector
|
| 191 |
+
).tocsr()
|
| 192 |
|
| 193 |
tokenized_cells += [
|
| 194 |
tokenize_cell(X_norm[i, ...].A.flatten(), coding_miRNA_tokens)
|