Spaces:
Sleeping
Sleeping
add minor improvements on loading and saving
Browse files- cytof/classes.py +10 -231
cytof/classes.py
CHANGED
|
@@ -37,17 +37,6 @@ from sklearn.cluster import KMeans
|
|
| 37 |
from itertools import product
|
| 38 |
|
| 39 |
|
| 40 |
-
# SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 41 |
-
# sys.path.append(os.path.dirname(SCRIPT_DIR))
|
| 42 |
-
|
| 43 |
-
# # from hyperion_segmentation import cytof_nuclei_segmentation, cytof_cell_segmentation, visualize_segmentation
|
| 44 |
-
# # from utils import (save_multi_channel_img, generate_color_dict, show_color_table,
|
| 45 |
-
# # visualize_scatter, visualize_expression, _get_thresholds, _generate_summary)
|
| 46 |
-
|
| 47 |
-
# from cytof.hyperion_segmentation import cytof_nuclei_segmentation, cytof_cell_segmentation, visualize_segmentation
|
| 48 |
-
# from cytof.utils import (save_multi_channel_img, generate_color_dict, show_color_table,
|
| 49 |
-
# visualize_scatter, visualize_expression, _get_thresholds, _generate_summary)
|
| 50 |
-
|
| 51 |
## added for test
|
| 52 |
import platform
|
| 53 |
from pathlib import Path
|
|
@@ -61,18 +50,6 @@ from hyperion_segmentation import cytof_nuclei_segmentation, cytof_cell_segmenta
|
|
| 61 |
from cytof.utils import (save_multi_channel_img, generate_color_dict, show_color_table,
|
| 62 |
visualize_scatter, visualize_expression, _get_thresholds, _generate_summary)
|
| 63 |
|
| 64 |
-
# def _get_colors(n):
|
| 65 |
-
# base_colors = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1),
|
| 66 |
-
# (0, 1, 1), (1, 0, 1), (1, 1, 0),
|
| 67 |
-
# (1, 1, 1)])
|
| 68 |
-
|
| 69 |
-
# n0 = len(base_colors)
|
| 70 |
-
# if n <= n0:
|
| 71 |
-
# colours = base_colors[:n]
|
| 72 |
-
# else:
|
| 73 |
-
# colours = np.vstack((base_colors, cm.rainbow(np.linspace(0, 1, n-n0))[:,:-1]))
|
| 74 |
-
# return colours
|
| 75 |
-
|
| 76 |
def get_name(dfrow):
|
| 77 |
return os.path.join(dfrow['path'], dfrow['ROI'])
|
| 78 |
|
|
@@ -118,6 +95,9 @@ class CytofImage():
|
|
| 118 |
return f"CytofImage(slide={self.slide}, roi={self.roi})"
|
| 119 |
|
| 120 |
def save_cytof(self, savename: str):
|
|
|
|
|
|
|
|
|
|
| 121 |
pkl.dump(self, open(savename, "wb"))
|
| 122 |
|
| 123 |
def get_markers(self, imarker0: Optional[str] = None):
|
|
@@ -599,11 +579,6 @@ class CytofImage():
|
|
| 599 |
else:
|
| 600 |
channels = self.channels
|
| 601 |
'''assert all([x.lower() in channels_temp for x in channels]), "Not all provided channels are available!"'''
|
| 602 |
-
# for (i, chn) in enumerate(channels):
|
| 603 |
-
# savename = os.path.join(savedir, f"{chn}.tiff")
|
| 604 |
-
# im_temp = self.image[..., i]
|
| 605 |
-
# im_temp_ = np.clip(im_temp / np.quantile(im_temp, 0.99), 0, 1)
|
| 606 |
-
# save_multi_channel_img((im_temp_ * 255).astype(np.uint8), savename)
|
| 607 |
for chn in channels:
|
| 608 |
savename = os.path.join(savedir, f"{chn}{ext}")
|
| 609 |
# i = channels_temp.index(chn.lower())
|
|
@@ -1190,7 +1165,6 @@ class CytofCohort():
|
|
| 1190 |
"""
|
| 1191 |
self.cytof_images = cytof_images or {}
|
| 1192 |
self.df_cohort = df_cohort# or None# pd.read_csv(file_cohort) # the slide-ROI
|
| 1193 |
-
# self.df_io = None pd.read_csv(file_io) # the input-output correspondence file
|
| 1194 |
self.feat_sets = {
|
| 1195 |
"all": ["cell_sum", "cell_ave", "cell_morphology"],
|
| 1196 |
"cell_sum": ["cell_sum", "cell_morphology"],
|
|
@@ -1214,6 +1188,9 @@ class CytofCohort():
|
|
| 1214 |
return f"CytofCohort(name={self.name})"
|
| 1215 |
|
| 1216 |
def save_cytof_cohort(self, savename):
|
|
|
|
|
|
|
|
|
|
| 1217 |
pkl.dump(self, open(savename, "wb"))
|
| 1218 |
|
| 1219 |
def batch_process_feature(self):
|
|
@@ -1247,8 +1224,8 @@ class CytofCohort():
|
|
| 1247 |
|
| 1248 |
|
| 1249 |
def batch_process(self, params: Dict):
|
| 1250 |
-
sys.path.append("../
|
| 1251 |
-
from
|
| 1252 |
for i, (slide, roi, fname) in self.df_cohort.iterrows():
|
| 1253 |
paramsi = SetParameters(filename=fname,
|
| 1254 |
outdir=self.dir_out,
|
|
@@ -1395,9 +1372,8 @@ class CytofCohort():
|
|
| 1395 |
kmeans = KMeans(n_clusters=k, random_state=42).fit(df_feature)
|
| 1396 |
communities = kmeans.labels_
|
| 1397 |
else:
|
| 1398 |
-
communities, graph, Q = phenograph.cluster(df_feature, k=k,
|
| 1399 |
|
| 1400 |
-
print('Performing dimensionality reduction with UMAP now...')
|
| 1401 |
# project to 2D using UMAP
|
| 1402 |
umap_2d = umap.UMAP(n_components=2, init='random', random_state=0)
|
| 1403 |
proj_2d = umap_2d.fit_transform(df_feature)
|
|
@@ -1699,14 +1675,12 @@ class CytofCohort():
|
|
| 1699 |
thres = kwars.get(_, default_thres[_])
|
| 1700 |
"""print("{}: {}".format(_, thres))"""
|
| 1701 |
networks = self._gather_roi_kneighbor_graphs(key_pheno, method=method, **{_: thres})
|
| 1702 |
-
# networks = _gather_roi_kneighbor_graphs(self, key_pheno, method=method, **{_: thres})
|
| 1703 |
|
| 1704 |
if level == "slide":
|
| 1705 |
keys = ['edge_nums', 'expected_percentage', 'num_cell']
|
| 1706 |
for slide in self.df_cohort['Slide'].unique():
|
| 1707 |
cond = self.df_cohort['Slide'] == slide
|
| 1708 |
df_slide = self.df_cohort.loc[cond, :]
|
| 1709 |
-
# rois = df_slide.apply(lambda row: get_name(row), axis=1).values
|
| 1710 |
rois = df_slide['ROI'].values
|
| 1711 |
'''keys = list(networks.values())[0].keys()'''
|
| 1712 |
networks[slide] = {}
|
|
@@ -1756,6 +1730,7 @@ class CytofCohort():
|
|
| 1756 |
figsize=(6, 6), row_cluster=False, col_cluster=False)
|
| 1757 |
plt.title(f_key)
|
| 1758 |
plt.show()
|
|
|
|
| 1759 |
# IMPORTANT: attch to individual ROIs
|
| 1760 |
self.attach_individual_roi_pheno(key_pheno, override=True)
|
| 1761 |
return interacts, clustergrid
|
|
@@ -1792,7 +1767,6 @@ class CytofCohort():
|
|
| 1792 |
visualize=vis_thres,
|
| 1793 |
verbose=verbose)
|
| 1794 |
setattr(self, "marker_thresholds", thres)
|
| 1795 |
-
print('done getting threshold for summary')
|
| 1796 |
|
| 1797 |
# split to each ROI
|
| 1798 |
_attr_marker_pos, seen = [], 0
|
|
@@ -1818,29 +1792,6 @@ class CytofCohort():
|
|
| 1818 |
seen += 1
|
| 1819 |
return _attr_marker_pos
|
| 1820 |
|
| 1821 |
-
def get_roi_co_exp_compoent(self, feature_name, accumul_type):
|
| 1822 |
-
"""
|
| 1823 |
-
Creates the components for ROI level co-expression. This is expected to be used in cohort analysis
|
| 1824 |
-
"""
|
| 1825 |
-
|
| 1826 |
-
# initialize dictionaries to record componenets of the log odds-ratio
|
| 1827 |
-
co_positive_counts_dict = dict() # theta in manuscript
|
| 1828 |
-
expected_counts_dict = dict() # E in manuscript
|
| 1829 |
-
num_cells_dict = dict()
|
| 1830 |
-
|
| 1831 |
-
for i, cytof_img in enumerate(self.cytof_images.values()):
|
| 1832 |
-
slide, roi = cytof_img.slide, cytof_img.roi
|
| 1833 |
-
|
| 1834 |
-
# compute the co-expression at the ROI level, but does not compute the final probability
|
| 1835 |
-
df_co_pos_prob, df_expected_prob, n_cell = cytof_img.roi_co_expression(feature_name=feature_name, accumul_type=accumul_type, return_components=True)
|
| 1836 |
-
|
| 1837 |
-
# store the counts and number of cells separately
|
| 1838 |
-
co_positive_counts_dict[roi] = df_co_pos_prob
|
| 1839 |
-
expected_counts_dict[roi] = df_expected_prob
|
| 1840 |
-
num_cells_dict[roi] = n_cell
|
| 1841 |
-
|
| 1842 |
-
return co_positive_counts_dict, expected_counts_dict, num_cells_dict
|
| 1843 |
-
|
| 1844 |
def co_expression_analysis(self,
|
| 1845 |
normq: int = 75,
|
| 1846 |
feat_type: str = "normed",
|
|
@@ -1941,175 +1892,3 @@ class CytofCohort():
|
|
| 1941 |
slide_co_expression_dict[slide_key] = (edge_percentage_norm, df_expected.columns)
|
| 1942 |
|
| 1943 |
return slide_co_expression_dict
|
| 1944 |
-
|
| 1945 |
-
|
| 1946 |
-
def _gather_roi_co_exp(self,
|
| 1947 |
-
feat_name: str,
|
| 1948 |
-
accumul_type: str = "sum"):
|
| 1949 |
-
"""roi level co-expression analysis"""
|
| 1950 |
-
n_attr = f"df_feature_{feat_name}"
|
| 1951 |
-
expected_percentages = {}
|
| 1952 |
-
edge_percentages = {}
|
| 1953 |
-
num_cells = {}
|
| 1954 |
-
df_slide_roi = self.df_cohort
|
| 1955 |
-
|
| 1956 |
-
for i, cytof_img in enumerate(self.cytof_images.values()):
|
| 1957 |
-
slide, roi = cytof_img.slide, cytof_img.roi
|
| 1958 |
-
df_feat = getattr(cytof_img , n_attr)
|
| 1959 |
-
|
| 1960 |
-
if i == 0:
|
| 1961 |
-
# all gene (marker) columns
|
| 1962 |
-
marker_col_all = [x for x in df_feat.columns if f"cell_{accumul_type}" in x]
|
| 1963 |
-
ids = [cytof_img.channels.index(x.split(f"_cell_{accumul_type}")[0]) for x in marker_col_all]
|
| 1964 |
-
marker_all = list(np.array(cytof_img.markers, dtype=object)[np.array(ids)])
|
| 1965 |
-
n_marker = len(marker_col_all)
|
| 1966 |
-
n_cell = len(df_feat)
|
| 1967 |
-
|
| 1968 |
-
# corresponding marker positive info file
|
| 1969 |
-
if not hasattr(cytof_img, f"cell_count_{feat_name}_{accumul_type}"):
|
| 1970 |
-
print('no marker positive analysis found. Generating analysis...')
|
| 1971 |
-
self.generate_summary()
|
| 1972 |
-
|
| 1973 |
-
df_info_cell = getattr(cytof_img, f"cell_count_{feat_name}_{accumul_type}")
|
| 1974 |
-
pos_nums = df_info_cell["positive counts"].values
|
| 1975 |
-
pos_ratios = df_info_cell["positive ratio"].values
|
| 1976 |
-
thresholds = df_info_cell["threshold"].values
|
| 1977 |
-
|
| 1978 |
-
# create new expected_percentage matrix for each ROI
|
| 1979 |
-
expected_percentage = np.zeros((n_marker, n_marker))
|
| 1980 |
-
edge_percentage = np.zeros_like(expected_percentage)
|
| 1981 |
-
|
| 1982 |
-
"""expected_percentage
|
| 1983 |
-
an N by N matrix, where N represent for the number of total gene (marker)
|
| 1984 |
-
each ij-th element represents for the percentage that both the i-th and the j-th gene is "positive"
|
| 1985 |
-
based on the threshold defined previously"""
|
| 1986 |
-
for ii in range(n_marker):
|
| 1987 |
-
for jj in range(n_marker):
|
| 1988 |
-
expected_percentage[ii, jj] = pos_nums[ii] * pos_nums[jj]
|
| 1989 |
-
expected_percentages[roi] = expected_percentage
|
| 1990 |
-
|
| 1991 |
-
"""edge_percentage
|
| 1992 |
-
an N by N matrix, where N represent for the number of gene (marker)
|
| 1993 |
-
each ij-th element represents for the percentage of cells that show positive in both i-th and j-th gene
|
| 1994 |
-
"""
|
| 1995 |
-
edge_nums = np.zeros_like(expected_percentage)
|
| 1996 |
-
for ii in range(n_marker):
|
| 1997 |
-
_x = df_feat[marker_col_all[ii]].values > thresholds[ii]
|
| 1998 |
-
# _x = df_feat[marker_col_all[ii]].values > thresholds[marker_idx[ii]]
|
| 1999 |
-
for jj in range(n_marker):
|
| 2000 |
-
_y = df_feat[marker_col_all[jj]].values > thresholds[jj]
|
| 2001 |
-
# _y = df_feat[marker_col_all[jj]].values > thresholds[marker_idx[jj]]
|
| 2002 |
-
edge_nums[ii, jj] = np.sum(np.all([_x, _y], axis=0)) # / n_cell
|
| 2003 |
-
edge_percentages[roi] = edge_nums
|
| 2004 |
-
num_cells[roi] = n_cell
|
| 2005 |
-
|
| 2006 |
-
return expected_percentages, edge_percentages, num_cells, marker_all, marker_col_all
|
| 2007 |
-
|
| 2008 |
-
def _co_expression_analysis_(self,
|
| 2009 |
-
normq: int = 75,
|
| 2010 |
-
feat_type: str = "normed",
|
| 2011 |
-
co_exp_markers: Union[str, List] = "all",
|
| 2012 |
-
accumul_type: Union[str, List[str]] = "sum",
|
| 2013 |
-
level: str = "slide",
|
| 2014 |
-
verbose: bool = False,
|
| 2015 |
-
clustergrid=None):
|
| 2016 |
-
|
| 2017 |
-
assert level in ["slide", "roi"], "Only slide or roi levels are accepted!"
|
| 2018 |
-
assert feat_type in ["original", "normed", "scaled"]
|
| 2019 |
-
if feat_type == "original":
|
| 2020 |
-
feat_name = ""
|
| 2021 |
-
elif feat_type == "normed":
|
| 2022 |
-
feat_name = f"{normq}normed"
|
| 2023 |
-
else:
|
| 2024 |
-
feat_name = f"{normq}normed_scaled"
|
| 2025 |
-
if verbose:
|
| 2026 |
-
print(feat_name)
|
| 2027 |
-
|
| 2028 |
-
expected_percentages, edge_percentages, num_cells, marker_all, marker_col_all = \
|
| 2029 |
-
self._gather_roi_co_exp(feat_name)
|
| 2030 |
-
|
| 2031 |
-
|
| 2032 |
-
if co_exp_markers != "all":
|
| 2033 |
-
# assert (isinstance(co_exp_markers, list) and all([x in cytof_img.markers for x in co_exp_markers]))
|
| 2034 |
-
assert (isinstance(co_exp_markers, list) and all([x in marker_all for x in co_exp_markers]))
|
| 2035 |
-
marker_idx = np.array([marker_all.index(x) for x in co_exp_markers])
|
| 2036 |
-
marker_all = [marker_all[x] for x in marker_idx]
|
| 2037 |
-
marker_col_all = [marker_col_all[x] for x in marker_idx]
|
| 2038 |
-
else:
|
| 2039 |
-
marker_idx = np.arange(len(marker_all))
|
| 2040 |
-
|
| 2041 |
-
df_slide_roi = self.df_cohort
|
| 2042 |
-
if level == "slide":
|
| 2043 |
-
# expected_percentages, edge_percentages = {}, {}
|
| 2044 |
-
for slide in df_slide_roi["Slide"].unique(): ## for each slide
|
| 2045 |
-
print('co-exp slide:', slide)
|
| 2046 |
-
print('exp-perc keys before:', expected_percentages.keys())
|
| 2047 |
-
for seen_roi, f_roi in enumerate(df_slide_roi.loc[df_slide_roi["Slide"] == slide, "ROI"]): ## for each ROI
|
| 2048 |
-
roi = f_roi.replace(".txt", "")
|
| 2049 |
-
print('roi finding:', roi)
|
| 2050 |
-
if roi not in expected_percentages:
|
| 2051 |
-
continue
|
| 2052 |
-
if seen_roi == 0:
|
| 2053 |
-
expected_percentages[slide] = expected_percentages[roi]
|
| 2054 |
-
edge_percentages[slide] = edge_percentages[roi]
|
| 2055 |
-
num_cells[slide] = num_cells[roi]
|
| 2056 |
-
else:
|
| 2057 |
-
expected_percentages[slide] += expected_percentages[roi]
|
| 2058 |
-
edge_percentages[slide] += edge_percentages[roi]
|
| 2059 |
-
num_cells[slide] += num_cells[roi]
|
| 2060 |
-
expected_percentages.pop(roi)
|
| 2061 |
-
edge_percentages.pop(roi)
|
| 2062 |
-
num_cells.pop(roi)
|
| 2063 |
-
|
| 2064 |
-
print('exp-perc keys after:', expected_percentages.keys())
|
| 2065 |
-
# print('exp_after:', expected_percentages)
|
| 2066 |
-
|
| 2067 |
-
co_exps = {}
|
| 2068 |
-
for key, expected_percentage in expected_percentages.items():
|
| 2069 |
-
print('key for co-exp:', key)
|
| 2070 |
-
# print(type(expected_percentage), type(num_cells[key]))
|
| 2071 |
-
# print('exp_perc:', expected_percentage)
|
| 2072 |
-
expected_percentage = expected_percentage / num_cells[key] ** 2
|
| 2073 |
-
edge_percentage = edge_percentages[key] / num_cells[key]
|
| 2074 |
-
|
| 2075 |
-
# Normalize
|
| 2076 |
-
edge_percentage_norm = np.log10(edge_percentage / expected_percentage + 0.1)
|
| 2077 |
-
|
| 2078 |
-
# Fix Nan
|
| 2079 |
-
edge_percentage_norm[np.isnan(edge_percentage_norm)] = np.log10(1 + 0.1)
|
| 2080 |
-
|
| 2081 |
-
co_exps[key] = edge_percentage_norm
|
| 2082 |
-
|
| 2083 |
-
# plot
|
| 2084 |
-
for f_key, edge_percentage_norm in co_exps.items():
|
| 2085 |
-
# fig, ax = plt.subplots(1,1, figsize=(6,6))
|
| 2086 |
-
# # ax = sns.heatmap(edge_percentage_norm, center=np.log10(1 + 0.1),
|
| 2087 |
-
# sns.heatmap(edge_percentage_norm[marker_idx, :][:, marker_idx], center=np.log10(1 + 0.1),
|
| 2088 |
-
# cmap='RdBu_r', vmin=-1, vmax=3, xticklabels=marker_all, yticklabels=marker_all, ax=ax)
|
| 2089 |
-
# ax.set_aspect('equal')
|
| 2090 |
-
# plt.title(f_key)
|
| 2091 |
-
# plt.show()
|
| 2092 |
-
|
| 2093 |
-
if clustergrid is None:
|
| 2094 |
-
plt.figure()
|
| 2095 |
-
clustergrid = sns.clustermap(edge_percentage_norm[marker_idx, :][:, marker_idx],
|
| 2096 |
-
# clustergrid = sns.clustermap(edge_percentage_norm,
|
| 2097 |
-
center=np.log10(1 + 0.1), cmap='RdBu_r', vmin=-1, vmax=3,
|
| 2098 |
-
xticklabels=marker_all, yticklabels=marker_all)
|
| 2099 |
-
if verbose:
|
| 2100 |
-
plt.title(f_key)
|
| 2101 |
-
plt.show()
|
| 2102 |
-
|
| 2103 |
-
else:
|
| 2104 |
-
if verbose:
|
| 2105 |
-
plt.figure()
|
| 2106 |
-
sns.clustermap(edge_percentage_norm[marker_idx, :][:, marker_idx] \
|
| 2107 |
-
[clustergrid.dendrogram_row.reordered_ind, :]\
|
| 2108 |
-
[:, clustergrid.dendrogram_row.reordered_ind],
|
| 2109 |
-
center=np.log10(1 + 0.1), cmap='RdBu_r', vmin=-1, vmax=3,
|
| 2110 |
-
xticklabels=np.array(marker_all)[clustergrid.dendrogram_row.reordered_ind],
|
| 2111 |
-
yticklabels=np.array(marker_all)[clustergrid.dendrogram_row.reordered_ind],
|
| 2112 |
-
figsize=(6, 6), row_cluster=False, col_cluster=False)
|
| 2113 |
-
plt.title(f_key)
|
| 2114 |
-
plt.show()
|
| 2115 |
-
return co_exps, marker_all, marker_idx, clustergrid
|
|
|
|
| 37 |
from itertools import product
|
| 38 |
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
## added for test
|
| 41 |
import platform
|
| 42 |
from pathlib import Path
|
|
|
|
| 50 |
from cytof.utils import (save_multi_channel_img, generate_color_dict, show_color_table,
|
| 51 |
visualize_scatter, visualize_expression, _get_thresholds, _generate_summary)
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
def get_name(dfrow):
|
| 54 |
return os.path.join(dfrow['path'], dfrow['ROI'])
|
| 55 |
|
|
|
|
| 95 |
return f"CytofImage(slide={self.slide}, roi={self.roi})"
|
| 96 |
|
| 97 |
def save_cytof(self, savename: str):
|
| 98 |
+
directory = os.path.dirname(savename)
|
| 99 |
+
if not os.path.exists(directory):
|
| 100 |
+
os.makedirs(directory)
|
| 101 |
pkl.dump(self, open(savename, "wb"))
|
| 102 |
|
| 103 |
def get_markers(self, imarker0: Optional[str] = None):
|
|
|
|
| 579 |
else:
|
| 580 |
channels = self.channels
|
| 581 |
'''assert all([x.lower() in channels_temp for x in channels]), "Not all provided channels are available!"'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
for chn in channels:
|
| 583 |
savename = os.path.join(savedir, f"{chn}{ext}")
|
| 584 |
# i = channels_temp.index(chn.lower())
|
|
|
|
| 1165 |
"""
|
| 1166 |
self.cytof_images = cytof_images or {}
|
| 1167 |
self.df_cohort = df_cohort# or None# pd.read_csv(file_cohort) # the slide-ROI
|
|
|
|
| 1168 |
self.feat_sets = {
|
| 1169 |
"all": ["cell_sum", "cell_ave", "cell_morphology"],
|
| 1170 |
"cell_sum": ["cell_sum", "cell_morphology"],
|
|
|
|
| 1188 |
return f"CytofCohort(name={self.name})"
|
| 1189 |
|
| 1190 |
def save_cytof_cohort(self, savename):
|
| 1191 |
+
directory = os.path.dirname(savename)
|
| 1192 |
+
if not os.path.exists(directory):
|
| 1193 |
+
os.makedirs(directory)
|
| 1194 |
pkl.dump(self, open(savename, "wb"))
|
| 1195 |
|
| 1196 |
def batch_process_feature(self):
|
|
|
|
| 1224 |
|
| 1225 |
|
| 1226 |
def batch_process(self, params: Dict):
|
| 1227 |
+
sys.path.append("../CLIscripts")
|
| 1228 |
+
from process_single_roi import process_single, SetParameters
|
| 1229 |
for i, (slide, roi, fname) in self.df_cohort.iterrows():
|
| 1230 |
paramsi = SetParameters(filename=fname,
|
| 1231 |
outdir=self.dir_out,
|
|
|
|
| 1372 |
kmeans = KMeans(n_clusters=k, random_state=42).fit(df_feature)
|
| 1373 |
communities = kmeans.labels_
|
| 1374 |
else:
|
| 1375 |
+
communities, graph, Q = phenograph.cluster(df_feature, k=k, n_jobs=-1) # run PhenoGraph
|
| 1376 |
|
|
|
|
| 1377 |
# project to 2D using UMAP
|
| 1378 |
umap_2d = umap.UMAP(n_components=2, init='random', random_state=0)
|
| 1379 |
proj_2d = umap_2d.fit_transform(df_feature)
|
|
|
|
| 1675 |
thres = kwars.get(_, default_thres[_])
|
| 1676 |
"""print("{}: {}".format(_, thres))"""
|
| 1677 |
networks = self._gather_roi_kneighbor_graphs(key_pheno, method=method, **{_: thres})
|
|
|
|
| 1678 |
|
| 1679 |
if level == "slide":
|
| 1680 |
keys = ['edge_nums', 'expected_percentage', 'num_cell']
|
| 1681 |
for slide in self.df_cohort['Slide'].unique():
|
| 1682 |
cond = self.df_cohort['Slide'] == slide
|
| 1683 |
df_slide = self.df_cohort.loc[cond, :]
|
|
|
|
| 1684 |
rois = df_slide['ROI'].values
|
| 1685 |
'''keys = list(networks.values())[0].keys()'''
|
| 1686 |
networks[slide] = {}
|
|
|
|
| 1730 |
figsize=(6, 6), row_cluster=False, col_cluster=False)
|
| 1731 |
plt.title(f_key)
|
| 1732 |
plt.show()
|
| 1733 |
+
|
| 1734 |
# IMPORTANT: attch to individual ROIs
|
| 1735 |
self.attach_individual_roi_pheno(key_pheno, override=True)
|
| 1736 |
return interacts, clustergrid
|
|
|
|
| 1767 |
visualize=vis_thres,
|
| 1768 |
verbose=verbose)
|
| 1769 |
setattr(self, "marker_thresholds", thres)
|
|
|
|
| 1770 |
|
| 1771 |
# split to each ROI
|
| 1772 |
_attr_marker_pos, seen = [], 0
|
|
|
|
| 1792 |
seen += 1
|
| 1793 |
return _attr_marker_pos
|
| 1794 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1795 |
def co_expression_analysis(self,
|
| 1796 |
normq: int = 75,
|
| 1797 |
feat_type: str = "normed",
|
|
|
|
| 1892 |
slide_co_expression_dict[slide_key] = (edge_percentage_norm, df_expected.columns)
|
| 1893 |
|
| 1894 |
return slide_co_expression_dict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|