Commit
·
188029e
1
Parent(s):
f4fea1e
Rename isp stats methods to clarify mode.
Browse files
geneformer/in_silico_perturber_stats.py
CHANGED
|
@@ -67,7 +67,8 @@ def n_detections(token, dict_list):
|
|
| 67 |
def get_fdr(pvalues):
|
| 68 |
return list(smt.multipletests(pvalues, alpha=0.05, method="fdr_bh")[1])
|
| 69 |
|
| 70 |
-
|
|
|
|
| 71 |
random_tuples = []
|
| 72 |
for i in trange(cos_sims_df.shape[0]):
|
| 73 |
token = cos_sims_df["Gene"][i]
|
|
@@ -131,6 +132,7 @@ def isp_stats(cos_sims_df, dict_list):
|
|
| 131 |
|
| 132 |
return cos_sims_full_df
|
| 133 |
|
|
|
|
| 134 |
def isp_stats_vs_null(cos_sims_df, dict_list, null_dict_list):
|
| 135 |
cos_sims_full_df = cos_sims_df.copy()
|
| 136 |
|
|
@@ -293,7 +295,7 @@ class InSilicoPerturberStats:
|
|
| 293 |
if self.mode not in ["goal_state_shift", "vs_null"]:
|
| 294 |
logger.error(
|
| 295 |
"Currently, only modes available are stats for goal_state_shift \
|
| 296 |
-
and comparing
|
| 297 |
raise
|
| 298 |
|
| 299 |
self.gene_token_id_dict = invert_dict(self.gene_token_dict)
|
|
@@ -314,7 +316,7 @@ class InSilicoPerturberStats:
|
|
| 314 |
|
| 315 |
dict_list = read_dictionaries(input_data_directory, "cell")
|
| 316 |
if self.mode == "goal_state_shift":
|
| 317 |
-
cos_sims_df =
|
| 318 |
|
| 319 |
# quantify number of detections of each gene
|
| 320 |
cos_sims_df["N_Detections"] = [n_detections(i, dict_list) for i in cos_sims_df["Gene"]]
|
|
|
|
| 67 |
def get_fdr(pvalues):
|
| 68 |
return list(smt.multipletests(pvalues, alpha=0.05, method="fdr_bh")[1])
|
| 69 |
|
| 70 |
+
# stats comparing cos sim shifts towards goal state of test perturbations vs random perturbations
|
| 71 |
+
def isp_stats_to_goal_state(cos_sims_df, dict_list):
|
| 72 |
random_tuples = []
|
| 73 |
for i in trange(cos_sims_df.shape[0]):
|
| 74 |
token = cos_sims_df["Gene"][i]
|
|
|
|
| 132 |
|
| 133 |
return cos_sims_full_df
|
| 134 |
|
| 135 |
+
# stats comparing cos sim shifts of test perturbations vs null distribution
|
| 136 |
def isp_stats_vs_null(cos_sims_df, dict_list, null_dict_list):
|
| 137 |
cos_sims_full_df = cos_sims_df.copy()
|
| 138 |
|
|
|
|
| 295 |
if self.mode not in ["goal_state_shift", "vs_null"]:
|
| 296 |
logger.error(
|
| 297 |
"Currently, only modes available are stats for goal_state_shift \
|
| 298 |
+
and vs_null (comparing to null distribution).")
|
| 299 |
raise
|
| 300 |
|
| 301 |
self.gene_token_id_dict = invert_dict(self.gene_token_dict)
|
|
|
|
| 316 |
|
| 317 |
dict_list = read_dictionaries(input_data_directory, "cell")
|
| 318 |
if self.mode == "goal_state_shift":
|
| 319 |
+
cos_sims_df = isp_stats_to_goal_state(cos_sims_df_initial, dict_list)
|
| 320 |
|
| 321 |
# quantify number of detections of each gene
|
| 322 |
cos_sims_df["N_Detections"] = [n_detections(i, dict_list) for i in cos_sims_df["Gene"]]
|