Fixed issue, loom and h5ad now produce same checksums
Browse files- geneformer/tokenizer.py +9 -9
geneformer/tokenizer.py
CHANGED
|
@@ -194,11 +194,11 @@ class TranscriptomeTokenizer:
|
|
| 194 |
else:
|
| 195 |
var_exists = True
|
| 196 |
|
| 197 |
-
if var_exists
|
| 198 |
filter_pass_loc = np.where(
|
| 199 |
-
[
|
| 200 |
)[0]
|
| 201 |
-
elif
|
| 202 |
print(
|
| 203 |
f"{adata_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
|
| 204 |
)
|
|
@@ -208,10 +208,10 @@ class TranscriptomeTokenizer:
|
|
| 208 |
|
| 209 |
for i in range(0, len(filter_pass_loc), chunk_size):
|
| 210 |
idx = filter_pass_loc[i:i+chunk_size]
|
| 211 |
-
X = adata[idx].X
|
| 212 |
|
| 213 |
-
X_view =
|
| 214 |
-
|
|
|
|
| 215 |
X_norm = sp.csr_matrix(X_norm)
|
| 216 |
|
| 217 |
tokenized_cells += [
|
|
@@ -258,11 +258,11 @@ class TranscriptomeTokenizer:
|
|
| 258 |
else:
|
| 259 |
var_exists = True
|
| 260 |
|
| 261 |
-
if var_exists
|
| 262 |
filter_pass_loc = np.where(
|
| 263 |
-
[
|
| 264 |
)[0]
|
| 265 |
-
elif
|
| 266 |
print(
|
| 267 |
f"{loom_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
|
| 268 |
)
|
|
|
|
| 194 |
else:
|
| 195 |
var_exists = True
|
| 196 |
|
| 197 |
+
if var_exists:
|
| 198 |
filter_pass_loc = np.where(
|
| 199 |
+
[i == 1 for i in adata.obs["filter_pass"]]
|
| 200 |
)[0]
|
| 201 |
+
elif not var_exists:
|
| 202 |
print(
|
| 203 |
f"{adata_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
|
| 204 |
)
|
|
|
|
| 208 |
|
| 209 |
for i in range(0, len(filter_pass_loc), chunk_size):
|
| 210 |
idx = filter_pass_loc[i:i+chunk_size]
|
|
|
|
| 211 |
|
| 212 |
+
X_view = adata[idx, coding_miRNA_loc].X
|
| 213 |
+
n_counts = adata[idx].obs['n_counts'].values[:, None]
|
| 214 |
+
X_norm = (X_view / n_counts * target_sum / norm_factor_vector)
|
| 215 |
X_norm = sp.csr_matrix(X_norm)
|
| 216 |
|
| 217 |
tokenized_cells += [
|
|
|
|
| 258 |
else:
|
| 259 |
var_exists = True
|
| 260 |
|
| 261 |
+
if var_exists:
|
| 262 |
filter_pass_loc = np.where(
|
| 263 |
+
[i == 1 for i in data.ca["filter_pass"]]
|
| 264 |
)[0]
|
| 265 |
+
elif not var_exists:
|
| 266 |
print(
|
| 267 |
f"{loom_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
|
| 268 |
)
|