Spaces:

QBRC
/

multiTAP

Running

App Files Files Community

fangjiang commited on Nov 26, 2025

Commit

9011565

1 Parent(s): 4162a00

update cytof classes and utils

Browse files

Files changed (9) hide show

app.py +6 -6
cytof/__pycache__/__init__.cpython-38.pyc +0 -0
cytof/__pycache__/classes.cpython-38.pyc +0 -0
cytof/__pycache__/hyperion_preprocess.cpython-38.pyc +0 -0
cytof/__pycache__/hyperion_segmentation.cpython-38.pyc +0 -0
cytof/__pycache__/segmentation_functions.cpython-38.pyc +0 -0
cytof/__pycache__/utils.cpython-38.pyc +0 -0
cytof/classes.py +155 -56
cytof/utils.py +14 -9

app.py CHANGED Viewed

@@ -378,12 +378,13 @@ hr {
 """
 with gr.Blocks() as demo:
-    gr.HTML(custom_css)
     cytof_state = gr.State(CytofImage())
      # used in scenrios where users define/remove channels multiple times
     cytof_original_state = gr.State(CytofImage())
     gr.Markdown('<div class="h-1">Step 1. Upload images</div>')
     gr.Markdown('<div class="h-2">You may upload one or two files depending on your use case.</div>')
     gr.Markdown('<div class="h-2 bold">Case 1: &nbsp; Upload a single file</div>')
@@ -515,7 +516,7 @@ with gr.Blocks() as demo:
         with gr.Column(scale=2):
             gr.Markdown('<div class="h-2">This analysis measures the degree of co-expression within a pair of neighborhoods.</div>')
             gr.Markdown('<div class="h-2">Select the clustering method:</div>')
-            info_text = gr.Markdown(update_info_text('K-neighbor'))
             cluster_method = gr.Radio(['k-neighbor', 'distance'], value='k-neighbor', elem_classes='test', label='')
             cluster_threshold = gr.Slider(minimum=1, maximum=100, step=1, value=30, interactive=True, label='Clustering threshold')
             spatial_btn = gr.Button('Run spatial interaction analysis')
@@ -531,8 +532,7 @@ with gr.Blocks() as demo:
     gr.Markdown('<br>')
     gr.Markdown('<div class="h-1">Step 6. Visualize positive markers</div>')
     gr.Markdown('<div class="h-2">Select two markers for side-by-side comparison to visualize their positive states in cells. This serves two purposes: </div>')
-    gr.Markdown('<div class="h-2 bold">(1) Validate the co-expression analysis results.</div>')
-    gr.Markdown('<div class="h-2 bold">(2) Validate teh spatial interaction analysis results.</div>')
     with gr.Row(): # two marker positive visualization - dropdown options

 """
 with gr.Blocks() as demo:
     cytof_state = gr.State(CytofImage())
      # used in scenrios where users define/remove channels multiple times
     cytof_original_state = gr.State(CytofImage())
+    gr.HTML(custom_css)
     gr.Markdown('<div class="h-1">Step 1. Upload images</div>')
     gr.Markdown('<div class="h-2">You may upload one or two files depending on your use case.</div>')
     gr.Markdown('<div class="h-2 bold">Case 1: &nbsp; Upload a single file</div>')
         with gr.Column(scale=2):
             gr.Markdown('<div class="h-2">This analysis measures the degree of co-expression within a pair of neighborhoods.</div>')
             gr.Markdown('<div class="h-2">Select the clustering method:</div>')
+            info_text = gr.Markdown(update_info_text('k-neighbor'))
             cluster_method = gr.Radio(['k-neighbor', 'distance'], value='k-neighbor', elem_classes='test', label='')
             cluster_threshold = gr.Slider(minimum=1, maximum=100, step=1, value=30, interactive=True, label='Clustering threshold')
             spatial_btn = gr.Button('Run spatial interaction analysis')
     gr.Markdown('<br>')
     gr.Markdown('<div class="h-1">Step 6. Visualize positive markers</div>')
     gr.Markdown('<div class="h-2">Select two markers for side-by-side comparison to visualize their positive states in cells. This serves two purposes: </div>')
+    gr.Markdown('<div class="h-2 bold">(1) Validate the co-expression analysis results. (2) Validate teh spatial interaction analysis results.</div>')
     with gr.Row(): # two marker positive visualization - dropdown options

cytof/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (233 Bytes). View file

cytof/__pycache__/classes.cpython-38.pyc ADDED Viewed

Binary file (57.5 kB). View file

cytof/__pycache__/hyperion_preprocess.cpython-38.pyc ADDED Viewed

Binary file (11 kB). View file

cytof/__pycache__/hyperion_segmentation.cpython-38.pyc ADDED Viewed

Binary file (11.7 kB). View file

cytof/__pycache__/segmentation_functions.cpython-38.pyc ADDED Viewed

Binary file (21.6 kB). View file

cytof/__pycache__/utils.cpython-38.pyc ADDED Viewed

Binary file (15.6 kB). View file

cytof/classes.py CHANGED Viewed

@@ -137,7 +137,7 @@ class CytofImage():
             self.df = pd.concat([self.df, df2])
     def quality_control(self, thres: int = 50) -> None:
-        setattr(self, "keep", False)
         if (max(self.df['X']) < thres) \
                 or (max(self.df['Y']) < thres):
             print("At least one dimension of the image {}-{} is smaller than {}, exclude from analyzing" \
@@ -488,7 +488,6 @@ class CytofImage():
         # attach quantile dictionary to self
         self.dict_quantiles = quantiles
-        print('dict quantiles:', quantiles)
         # return quantiles
     def _vis_normalization(self, savename: Optional[str] = None):
@@ -979,8 +978,19 @@ class CytofImageTiff(CytofImage):
         return new_instance
     def quality_control(self,  thres: int = 50) -> None:
-        setattr(self, "keep", False)
-        if any([x < thres for x in self.image.shape]):
             print(f"At least one dimension of the image {self.slide}-{self.roi} is smaller than {thres}, \
                 hence exclude from analyzing" )
             self.keep = False
@@ -1157,7 +1167,7 @@ def apply_threshold_to_column(column, threshold):
 class CytofCohort():
     def __init__(self, cytof_images: Optional[dict] = None,
                  df_cohort: Optional[pd.DataFrame] = None,
-                 dir_out: str = "./",
                  cohort_name: str = "cohort1"):
         """
         cytof_images:
@@ -1170,13 +1180,15 @@ class CytofCohort():
             "cell_sum": ["cell_sum", "cell_morphology"],
             "cell_ave": ["cell_ave", "cell_morphology"],
             "cell_sum_only": ["cell_sum"],
-            "cell_ave_only": ["cell_ave"]
-        }
-        self.name    = cohort_name
-        self.dir_out = os.path.join(dir_out, self.name)
-        if not os.path.exists(self.dir_out):
-            os.makedirs(self.dir_out)
     def __getitem__(self, key):
         'Extracts a particular cytof image from the cohort'
         return self.cytof_images[key]
@@ -1187,12 +1199,16 @@ class CytofCohort():
     def __repr__(self):
         return f"CytofCohort(name={self.name})"
-    def save_cytof_cohort(self, savename):
-        directory = os.path.dirname(savename)
-        if not os.path.exists(directory):
-            os.makedirs(directory)
-        pkl.dump(self, open(savename, "wb"))
     def batch_process_feature(self):
         """
         Batch process: if the CytofCohort is initialized by a dictionary of CytofImages
@@ -1204,8 +1220,9 @@ class CytofCohort():
                 setattr(self, "dict_feat", cytof_img.features)
             if not hasattr(self, "markers"):
                 setattr(self, "markers", cytof_img.markers)
-            print('dict quantiles in batch process:', cytof_img.dict_quantiles)
             try:
                 qs &= set(list(cytof_img.dict_quantiles.keys()))
             except:
@@ -1226,24 +1243,36 @@ class CytofCohort():
     def batch_process(self, params: Dict):
         sys.path.append("../CLIscripts")
         from process_single_roi import process_single, SetParameters
         for i, (slide, roi, fname) in self.df_cohort.iterrows():
             paramsi = SetParameters(filename=fname,
-                        outdir=self.dir_out,
-                        label_marker_file=params.get('label_marker_file', None),
-                        slide=slide,
-                        roi=roi,
-                        quality_control_thres=params.get("quality_control_thres", 50),
-                        channels_remove=params.get("channels_remove", None),
-                        channels_dict=params.get("channels_dict", None),
-                        use_membrane=params.get("use_membrane",True),
-                        cell_radius=params.get("cell_radius", 5),
-                        normalize_qs=params.get("normalize_qs", 75),
-                        iltype=params.get('iltype', None))
-            cytof_img = process_single(paramsi, downstream_analysis=False, verbose=False)
-            self.cytof_images[f"{slide}_{roi}"] = cytof_img
-        self.batch_process_feature()
     def get_feature(self,
                     normq: int = 75,
@@ -1310,12 +1339,12 @@ class CytofCohort():
                            normq: int = 75,
                            feat_type: str = "normed_scaled",
                            feat_set: str = "all",
-                           markers: str = "all",
                            verbose: bool = False):
         assert feat_type in ["normed_scaled", "normed", ""], f"feature type {feat_type} not supported!"
-        assert (markers == "all" or isinstance(markers, list))
-        assert feat_set in self.feat_sets.keys(), f"feature set {feat_set} not supported!"
         description = "original" if feat_type=="" else f"{normq}{feat_type}"
         n_attr      = f"df_feature{feat_type}" if feat_type=="" else f"df_feature_{normq}{feat_type}" # the attribute name to achieve from cytof_img
@@ -1330,15 +1359,22 @@ class CytofCohort():
             if "morphology" in y:
                 feat_names += self.dict_feat[y]
             else:
-                if markers == "all": # features extracted from all markers are kept
-                    feat_names += self.dict_feat[y]
-                    markers = self.markers
                 else: # only features correspond to markers kept (markers are a subset of self.markers)
-                    ids = [self.markers.index(x) for x in markers] # TODO: the case where marker in markers not in self.markers???
                     feat_names += [self.dict_feat[y][x] for x in ids]
         df_feature = getattr(self, n_attr)[feat_names]
-        return df_feature, markers, feat_names, description, n_attr
     ###############################################################
     ################## PhenoGraph Clustering ######################
@@ -1347,21 +1383,46 @@ class CytofCohort():
                               normq:int = 75,
                               feat_type:str = "normed_scaled",
                               feat_set: str = "all",
-                              pheno_markers: Union[str, List] = "all",
                               k: int = None,
                               save_vis: bool = False,
                               verbose:bool = True):
-        if pheno_markers == "all":
-            pheno_markers_ = "_all"
         else:
-            pheno_markers_ = "_subset1"
         assert feat_type in ["normed_scaled", "normed", ""], f"feature type {feat_type} not supported!"
-        df_feature, pheno_markers, feat_names, description, n_attr = self._get_feature_subset(normq=normq,
                                                                                           feat_type=feat_type,
                                                                                           feat_set=feat_set,
-                                                                                          markers=pheno_markers,
                                                                                           verbose=verbose)
         # set number of nearest neighbors k and run PhenoGraph for phenotype clustering
         k = k if k else int(df_feature.shape[0] / 100)
@@ -1381,13 +1442,13 @@ class CytofCohort():
         if not hasattr(self, "phenograph"):
             setattr(self, "phenograph", {})
         key_pheno  = f"{description}_{feat_set}_feature_{k}"
-        key_pheno += f"{pheno_markers_}_markers"
         N = len(np.unique(communities))
         self.phenograph[key_pheno] = {
             "data": df_feature,
-            "markers": pheno_markers,
             "features": feat_names,
             "description": {"normalization": description, "feature_set": feat_set}, # normalization and/or scaling | set of feature (in self.feat_sets)
             "communities": communities,
@@ -1428,7 +1489,8 @@ class CytofCohort():
                        save_vis: bool = False,
                        show_plots: bool = False,
                        plot_together: bool = True,
-                       fig_width: int = 5 # only when plot_together is True
                        ):
         assert level.upper() in ["COHORT", "SLIDE", "ROI"], "Only 'cohort', 'slide' and 'roi' are accetable values for level"
         this_pheno = self.phenograph[key_pheno]
@@ -1485,15 +1547,17 @@ class CytofCohort():
                 fig, axs = plt.subplots(1,ncol, figsize=(ncol*fig_width, fig_width))
             proj_2d = proj_2ds[key]
             commu = commus[key]
             # Visualize 1: plot 2d projection together
             print("Visualization in 2d - {}-{}".format(level, key))
             savename = os.path.join(vis_savedir, f"cluster_scatter_{level}_{key}.png") if (save_vis and not plot_together) else None
             ax = axs[0] if plot_together else None
-            fig_scatter = visualize_scatter(data=proj_2d, communities=commu, n_community=n_community,
-                                            title=key, savename=savename, show=show_plots, ax=ax)
-            figs_scatter[key] = fig_scatter
             figs_exps[key]    = {}
             # Visualize 2: protein expression
             for axid, acm_tpe in enumerate(accumul_type):
                 ids = [i for (i, x) in enumerate(feat_names) if re.search(".{}".format(acm_tpe), x)]
@@ -1526,10 +1590,10 @@ class CytofCohort():
                     if (save_vis and not plot_together) else None
                 vis_exp = cluster_protein_exp_norm if normalize else cluster_protein_exp
                 ax = axs[axid+1] if plot_together else None
-                fig_exps = visualize_expression(data=vis_exp, markers=markers,
                                                 group_ids=group_ids, title="{} - {}-{}".format(level, acm_tpe, key),
                                                 savename=savename, show=show_plots, ax=ax)
-                figs_exps[key][acm_tpe]   = fig_exps
                 cluster_protein_exps[key] = vis_exp
             plt.tight_layout()
             if plot_together:
@@ -1892,3 +1956,38 @@ class CytofCohort():
             slide_co_expression_dict[slide_key] = (edge_percentage_norm, df_expected.columns)
         return slide_co_expression_dict

             self.df = pd.concat([self.df, df2])
     def quality_control(self, thres: int = 50) -> None:
+        setattr(self, "keep", True)
         if (max(self.df['X']) < thres) \
                 or (max(self.df['Y']) < thres):
             print("At least one dimension of the image {}-{} is smaller than {}, exclude from analyzing" \
         # attach quantile dictionary to self
         self.dict_quantiles = quantiles
         # return quantiles
     def _vis_normalization(self, savename: Optional[str] = None):
         return new_instance
     def quality_control(self,  thres: int = 50) -> None:
+        setattr(self, "keep", True)
+        shape = self.image.shape
+        if len(shape) == 2:
+            # Just height and width
+            dims_to_check = shape
+        else:
+            # Assume the channel dimension is the smallest one
+            channel_dim = min(shape)
+            dims_to_check = [d for d in shape if d != channel_dim]
+        if any(x < thres for x in dims_to_check):
             print(f"At least one dimension of the image {self.slide}-{self.roi} is smaller than {thres}, \
                 hence exclude from analyzing" )
             self.keep = False
 class CytofCohort():
     def __init__(self, cytof_images: Optional[dict] = None,
                  df_cohort: Optional[pd.DataFrame] = None,
+                 dir_out: Optional[str] = "./",
                  cohort_name: str = "cohort1"):
         """
         cytof_images:
             "cell_sum": ["cell_sum", "cell_morphology"],
             "cell_ave": ["cell_ave", "cell_morphology"],
             "cell_sum_only": ["cell_sum"],
+            "cell_ave_only": ["cell_ave"],
+        } # need at least cell sum/ave; cannot be just morphology; can't make cluster v. channel heatmap
+        self.name = cohort_name
+        self.dir_out = os.path.join(dir_out, self.name) if isinstance(dir_out, str) else None
+        if self.dir_out:
+            os.makedirs(self.dir_out, exist_ok=True)
+            print('Output folder created:', self.dir_out)
     def __getitem__(self, key):
         'Extracts a particular cytof image from the cohort'
         return self.cytof_images[key]
     def __repr__(self):
         return f"CytofCohort(name={self.name})"
+    def save_cytof_cohort(self):
+        if self.dir_out:
+            save_path = f'{os.path.join(self.dir_out, self.name)}.pkl'
+            pkl.dump(self, open(save_path, "wb"))
+            return save_path
+        else:
+            raise FileNotFoundError('self.dir_out not specified')
     def batch_process_feature(self):
         """
         Batch process: if the CytofCohort is initialized by a dictionary of CytofImages
                 setattr(self, "dict_feat", cytof_img.features)
             if not hasattr(self, "markers"):
                 setattr(self, "markers", cytof_img.markers)
+            if not hasattr(self, "channels"):
+                setattr(self, "channels", cytof_img.channels)
             try:
                 qs &= set(list(cytof_img.dict_quantiles.keys()))
             except:
     def batch_process(self, params: Dict):
         sys.path.append("../CLIscripts")
         from process_single_roi import process_single, SetParameters
+        success_rows = []
         for i, (slide, roi, fname) in self.df_cohort.iterrows():
             paramsi = SetParameters(filename=fname,
+                outdir=self.dir_out,
+                label_marker_file=params.get('label_marker_file', None),
+                slide=slide,
+                roi=roi,
+                quality_control_thres=params.get("quality_control_thres", 50),
+                channels_remove=params.get("channels_remove", None),
+                channels_dict=params.get("channels_dict", None),
+                use_membrane=params.get("use_membrane",True),
+                cell_radius=params.get("cell_radius", 5),
+                normalize_qs=params.get("normalize_qs", 75),
+                iltype=params.get('iltype', None))
+            try:
+                cytof_img = process_single(paramsi, downstream_analysis=False, verbose=False)
+                self.cytof_images[f"{slide}_{roi}"] = cytof_img
+                # image successfully processed, record index
+                success_rows.append(i)
+            except Exception as e:
+                print(f"Skipping {slide}_{roi} due to error: {e}")
+                continue
+        # update df_cohort to contain only successfully calculated rows
+        self.df_cohort = self.df_cohort.loc[success_rows].reset_index(drop=True)
     def get_feature(self,
                     normq: int = 75,
                            normq: int = 75,
                            feat_type: str = "normed_scaled",
                            feat_set: str = "all",
+                           channels: Union[str, List] = "all",
                            verbose: bool = False):
         assert feat_type in ["normed_scaled", "normed", ""], f"feature type {feat_type} not supported!"
+        assert (channels == "all" or set(channels).issubset(set(self.channels))), f"input channels {channels} not a subset of self.channels"
+        assert feat_set in self.feat_sets.keys() , f"feature set {feat_set} not supported!"
         description = "original" if feat_type=="" else f"{normq}{feat_type}"
         n_attr      = f"df_feature{feat_type}" if feat_type=="" else f"df_feature_{normq}{feat_type}" # the attribute name to achieve from cytof_img
             if "morphology" in y:
                 feat_names += self.dict_feat[y]
             else:
+                if channels == "all": # features extracted from all channels are kept
+                    feat_names += self.dict_feat[y]
+                    channels_return = self.channels.copy() # return all channel names except nuclei and membrane
+                    channels_return.remove('nuclei') # all instances have nuclei channel
+                    try:
+                        channels_return.remove('membrane') # some might not have membrane
+                    except ValueError:
+                        pass
                 else: # only features correspond to markers kept (markers are a subset of self.markers)
+                    ids = [self.channels.index(x) for x in channels]
                     feat_names += [self.dict_feat[y][x] for x in ids]
+                    channels_return = channels.copy() # return only subset
         df_feature = getattr(self, n_attr)[feat_names]
+        return df_feature, channels_return, feat_names, description, n_attr
     ###############################################################
     ################## PhenoGraph Clustering ######################
                               normq:int = 75,
                               feat_type:str = "normed_scaled",
                               feat_set: str = "all",
+                              pheno_channels: Union[str, List] = "all",
                               k: int = None,
                               save_vis: bool = False,
                               verbose:bool = True):
+        """performs PhenoGraph clustering on normalized and/or scaled features
+        Parameters
+        ----------
+        normq : int, optional
+            xth quantile of normalization; for finding df_feature attribute, by default 75
+        feat_type : str, optional
+            for finding df_feature attribute for PhenoGraph, by default "normed_scaled"
+        feat_set : str, optional
+            element in [cell_sum, cell_ave, cell_sum_only, cell_ave_only, all]; all will include all aforementioned feature sets, by default "all"
+        pheno_channels : Union[str, List], optional
+            list of channels used for PhenoGraph, by default "all"
+        k : int, optional
+            k neighbors, by default None
+        save_vis : bool, optional
+            whether to save viasualization, by default False
+        verbose : bool, optional
+            whether to print progress details, by default True
+        Returns
+        -------
+        key_pheno
+            string literal that can be indexed in self.phenograph
+        """
+        if pheno_channels == "all":
+            pheno_channels_ = "_all"
         else:
+            pheno_channels_ = "_subset1"
         assert feat_type in ["normed_scaled", "normed", ""], f"feature type {feat_type} not supported!"
+        df_feature, channels, feat_names, description, n_attr = self._get_feature_subset(normq=normq,
                                                                                           feat_type=feat_type,
                                                                                           feat_set=feat_set,
+                                                                                          channels=pheno_channels,
                                                                                           verbose=verbose)
         # set number of nearest neighbors k and run PhenoGraph for phenotype clustering
         k = k if k else int(df_feature.shape[0] / 100)
         if not hasattr(self, "phenograph"):
             setattr(self, "phenograph", {})
         key_pheno  = f"{description}_{feat_set}_feature_{k}"
+        key_pheno += f"{pheno_channels_}_markers"
         N = len(np.unique(communities))
         self.phenograph[key_pheno] = {
             "data": df_feature,
+            "markers": channels, # preserve key for downstream
             "features": feat_names,
             "description": {"normalization": description, "feature_set": feat_set}, # normalization and/or scaling | set of feature (in self.feat_sets)
             "communities": communities,
                        save_vis: bool = False,
                        show_plots: bool = False,
                        plot_together: bool = True,
+                       fig_width: int = 5, # only when plot_together is True,
+                       scatter_dot_size: int = 2
                        ):
         assert level.upper() in ["COHORT", "SLIDE", "ROI"], "Only 'cohort', 'slide' and 'roi' are accetable values for level"
         this_pheno = self.phenograph[key_pheno]
                 fig, axs = plt.subplots(1,ncol, figsize=(ncol*fig_width, fig_width))
             proj_2d = proj_2ds[key]
             commu = commus[key]
             # Visualize 1: plot 2d projection together
             print("Visualization in 2d - {}-{}".format(level, key))
             savename = os.path.join(vis_savedir, f"cluster_scatter_{level}_{key}.png") if (save_vis and not plot_together) else None
             ax = axs[0] if plot_together else None
+            fig_scatter, ax_scatter = visualize_scatter(data=proj_2d, communities=commu, n_community=n_community,
+                                            title=key, scatter_dot_size=scatter_dot_size, savename=savename, show=show_plots, ax=ax)
+            figs_scatter[key] = (fig_scatter, ax_scatter)
             figs_exps[key]    = {}
             # Visualize 2: protein expression
             for axid, acm_tpe in enumerate(accumul_type):
                 ids = [i for (i, x) in enumerate(feat_names) if re.search(".{}".format(acm_tpe), x)]
                     if (save_vis and not plot_together) else None
                 vis_exp = cluster_protein_exp_norm if normalize else cluster_protein_exp
                 ax = axs[axid+1] if plot_together else None
+                fig_exps, ax_exps = visualize_expression(data=vis_exp, markers=markers,
                                                 group_ids=group_ids, title="{} - {}-{}".format(level, acm_tpe, key),
                                                 savename=savename, show=show_plots, ax=ax)
+                figs_exps[key][acm_tpe]   = (fig_exps, ax_exps)
                 cluster_protein_exps[key] = vis_exp
             plt.tight_layout()
             if plot_together:
             slide_co_expression_dict[slide_key] = (edge_percentage_norm, df_expected.columns)
         return slide_co_expression_dict
+    def cohort_interaction_graphs(self, feature_name, accumul_type, method: str = "distance", threshold=50):
+        assert method in ["distance", "k-neighbor"], "Method can be either 'distance' or 'k-neighbor'!"
+        # used to store ROI-level interaction graphs
+        marker_roi_list = list()
+        for roi_keys, cytof_img in self.cytof_images.items():
+            print(f"Processing ROI {roi_keys}")
+            df_expected_prob, df_cell_interaction_prob = cytof_img.roi_interaction_graphs(feature_name=feature_name, accumul_type=accumul_type, method=method, threshold=threshold, return_components=False)
+            # do some post processing
+            marker_all = df_expected_prob.columns
+            epsilon = 1e-6
+            # Normalize and fix Nan
+            edge_percentage_norm = np.log10(df_cell_interaction_prob.values / (df_expected_prob.values+epsilon) + epsilon)
+            # if observed/expected = 0, then log odds ratio will have log10(epsilon)
+            # no observed means interaction cannot be determined, does not mean strong negative interaction
+            edge_percentage_norm[edge_percentage_norm == np.log10(epsilon)] = 0
+            edge_perc_remapped = pd.DataFrame(edge_percentage_norm, index=marker_all, columns=marker_all)
+            edge_perc_remapped["roi_id"] = roi_keys
+            marker_roi_list.append(edge_perc_remapped)
+        # concatenate all pt df
+        edge_percentage_cohort = pd.concat(marker_roi_list, axis=0)
+        edge_percentage_cohort = edge_percentage_cohort.reset_index(names='marker')
+        # cohort specific: 0 was used to indicate not observed, but average over will skew the df
+        edge_percentage_cohort = edge_percentage_cohort.replace(0, np.nan)
+        return edge_percentage_cohort, marker_all

cytof/utils.py CHANGED Viewed

@@ -358,7 +358,7 @@ def check_feature_distribution(feature_summary_df, features):
 #         return None
 #     return fig
-def visualize_scatter(data, communities, n_community, title, figsize=(5,5), savename=None, show=False, ax=None):
     """
     data = data to visualize (N, 2)
     communities = group indices correspond to each sample in data (N, 1) or (N, )
@@ -372,10 +372,15 @@ def visualize_scatter(data, communities, n_community, title, figsize=(5,5), save
     else:
         fig = None
     ax.set_title(title)
-    sns.scatterplot(x=data[:,0], y=data[:,1], hue=communities, palette='tab20',
-                    hue_order=np.arange(n_community), ax=ax)
-                    #                 legend=legend,
-                    # hue_order=np.arange(n_community))
     ax.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.)
     # plt.axis('tight')
@@ -387,7 +392,7 @@ def visualize_scatter(data, communities, n_community, title, figsize=(5,5), save
         plt.show()
     if clos:
         plt.close('all')
-    return fig
 def visualize_expression(data, markers, group_ids, title, figsize=(5,5), savename=None, show=False, ax=None):
     clos = not show and ax is None
@@ -403,8 +408,8 @@ def visualize_expression(data, markers, group_ids, title, figsize=(5,5), savenam
                 yticklabels=group_ids,
                 ax=ax
                )
-    ax.set_xlabel("Markers")
-    ax.set_ylabel("Phenograph clusters")
     ax.set_title("normalized expression - {}".format(title))
     ax.xaxis.set_tick_params(labelsize=8)
     if savename is not None:
@@ -414,7 +419,7 @@ def visualize_expression(data, markers, group_ids, title, figsize=(5,5), savenam
         plt.show()
     if clos:
         plt.close('all')
-    return fig
 def _get_thresholds(df_feature: pd.DataFrame,
                     features: List[str],

 #         return None
 #     return fig
+def visualize_scatter(data, communities, n_community, title, scatter_dot_size, figsize=(5,5), savename=None, show=False, ax=None):
     """
     data = data to visualize (N, 2)
     communities = group indices correspond to each sample in data (N, 1) or (N, )
     else:
         fig = None
     ax.set_title(title)
+    sns.scatterplot(x=data[:,0],
+                    y=data[:,1],
+                    hue=communities,
+                    palette='tab20',
+                    s=scatter_dot_size,
+                    alpha=0.9,
+                    linewidth=0,
+                    hue_order=np.arange(n_community), ax=ax
+                    )
     ax.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.)
     # plt.axis('tight')
         plt.show()
     if clos:
         plt.close('all')
+    return fig, ax
 def visualize_expression(data, markers, group_ids, title, figsize=(5,5), savename=None, show=False, ax=None):
     clos = not show and ax is None
                 yticklabels=group_ids,
                 ax=ax
                )
+    # ax.set_xlabel("Markers")
+    ax.set_ylabel("PhenoGraph clusters")
     ax.set_title("normalized expression - {}".format(title))
     ax.xaxis.set_tick_params(labelsize=8)
     if savename is not None:
         plt.show()
     if clos:
         plt.close('all')
+    return fig, ax
 def _get_thresholds(df_feature: pd.DataFrame,
                     features: List[str],