# give some time reference to the user print('Importing Gradio app packages... (first launch takes about 3-5 minutes)') import gradio as gr import yaml import skimage import numpy as np import matplotlib.pyplot as plt from matplotlib.pyplot import cm import plotly.express as px import plotly.graph_objs as go from plotly.subplots import make_subplots import os import seaborn as sns from cytof import classes from classes import CytofImage, CytofCohort, CytofImageTiff from cytof.hyperion_preprocess import cytof_read_data_roi from cytof.utils import show_color_table OUTDIR = './output' def cytof_tiff_eval(file_path, marker_path, cytof_state): # set to generic names because uploaded filenames is unpredictable slide = 'slide0' roi = 'roi1' # read in the data cytof_img, _ = cytof_read_data_roi(file_path, slide, roi) # case 1. user uploaded TXT/CSV if marker_path is None: # get markers cytof_img.get_markers() # prepsocess cytof_img.preprocess() cytof_img.get_image() # case 2. user uploaded TIFF else: labels_markers = yaml.load(open(marker_path, "rb"), Loader=yaml.Loader) cytof_img.set_markers(**labels_markers) viz = cytof_img.check_channels(ncols=3, savedir='.') msg = f'Your uploaded TIFF has {len(cytof_img.markers)} markers' cytof_state = cytof_img return msg, viz, cytof_state def channel_select(cytof_img): # one for define unwanted channels, one for defining nuclei, one for defining membrane return gr.Dropdown(choices=cytof_img.channels, multiselect=True), gr.Dropdown(choices=cytof_img.channels, multiselect=True), gr.Dropdown(choices=cytof_img.channels, multiselect=True) def nuclei_select(cytof_img): # one for defining nuclei, one for defining membrane return gr.Dropdown(choices=cytof_img.channels, multiselect=True), gr.Dropdown(choices=cytof_img.channels, multiselect=True) def modify_channels(cytof_img, unwanted_channels, nuc_channels, mem_channels): """ 3-step function. 1) removes unwanted channels, 2) define nuclei channels, 3) define membrane channels """ cytof_img_updated = cytof_img.copy() cytof_img_updated.remove_special_channels(unwanted_channels) # define and remove nuclei channels nuclei_define = {'nuclei' : nuc_channels} channels_rm = cytof_img_updated.define_special_channels(nuclei_define) cytof_img_updated.remove_special_channels(channels_rm) # define and keep membrane channels membrane_define = {'membrane' : mem_channels} cytof_img_updated.define_special_channels(membrane_define) # only get image when need to derive from df. CytofImageTIFF has inherent image attribute if type(cytof_img_updated) is CytofImage: cytof_img_updated.get_image() nuclei_channel_str = ', '.join(channels_rm) membrane_channel_str = ', '.join(mem_channels) msg = 'Your remaining channels are: ' + ', '.join(cytof_img_updated.channels) + '.\n\n Nuclei channels: ' + nuclei_channel_str + '.\n\n Membrane channels: ' + membrane_channel_str return msg, cytof_img_updated def update_dropdown_options(cytof_img, selected_self, selected_other1, selected_other2): """ Remove the selected option in the dropdown from the other two dropdowns """ updated_choices = cytof_img.channels.copy() unavail_options = selected_self + selected_other1 + selected_other2 for opt in unavail_options: updated_choices.remove(opt) return gr.Dropdown(choices=updated_choices+selected_other1, value=selected_other1, multiselect=True), gr.Dropdown(choices=updated_choices+selected_other2, value=selected_other2, multiselect=True) def cell_seg(cytof_img, radius): # check if membrane channel available use_membrane = 'membrane' in cytof_img.channels nuclei_seg, cell_seg = cytof_img.get_seg(use_membrane=use_membrane, radius=radius, show_process=False) # visualize nuclei and cells segmentation marked_image_nuclei = cytof_img.visualize_seg(segtype="nuclei", show=False) marked_image_cell = cytof_img.visualize_seg(segtype="cell", show=False) # visualizing nuclei and/or membrane, plus the first marker in channels marker_visualized = cytof_img.channels[0] # similar to plt.imshow() fig = px.imshow(marked_image_cell) # add scatter plot dots as legends fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', marker=dict(color='white'), name='membrane boundaries')) fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', marker=dict(color='yellow'), name='nucleus boundaries')) fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', marker=dict(color='red'), name='nucleus')) fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', marker=dict(color='green'), name=marker_visualized)) fig.update_layout(legend=dict(orientation="v", bgcolor='lightgray')) return fig, cytof_img def feature_extraction(cytof_img, cohort_state, percentile_threshold): # extract and normalize all features cytof_img.extract_features(filename=cytof_img.filename) cytof_img.feature_quantile_normalization(qs=[percentile_threshold]) # create dir if not exist if not os.path.isdir(OUTDIR): os.makedirs(OUTDIR) cytof_img.export_feature(f"df_feature_{percentile_threshold}normed", os.path.join(OUTDIR, f"feature_{percentile_threshold}normed.csv")) df_feature = getattr(cytof_img, f"df_feature_{percentile_threshold}normed" ) # each file upload in Gradio will always have the same filename # also the temp path created by Gradio is too long to be visually satisfying. df_feature = df_feature.loc[:, df_feature.columns != 'filename'] # calculates quantiles between each marker and cell cytof_img.calculate_quantiles(qs=[75]) dict_cytof_img = {f"{cytof_img.slide}_{cytof_img.roi}": cytof_img} # convert to cohort and prepare downstream analysis cytof_cohort = CytofCohort(cytof_images=dict_cytof_img, dir_out=OUTDIR) cytof_cohort.batch_process_feature() cytof_cohort.generate_summary() cohort_state = cytof_cohort msg = 'Feature extraction completed!' return cytof_img, cytof_cohort, df_feature def co_expression(cytof_img, percentile_threshold): feat_name = f"{percentile_threshold}normed" df_co_pos_prob, df_expected_prob = cytof_img.roi_co_expression(feature_name=feat_name, accumul_type='sum', return_components=False) epsilon = 1e-6 # avoid divide by 0 or log(0) # Normalize and fix Nan edge_percentage_norm = np.log10(df_co_pos_prob.values / (df_expected_prob.values+epsilon) + epsilon) # if observed/expected = 0, then log odds ratio will have log10(epsilon) # no observed means co-expression cannot be determined, does not mean strong negative co-expression edge_percentage_norm[edge_percentage_norm == np.log10(epsilon)] = 0 # do some post processing marker_all_clean = [m.replace('_cell_sum', '') for m in df_expected_prob.columns] # fig = plt.figure() clustergrid = sns.clustermap(edge_percentage_norm, # clustergrid = sns.clustermap(edge_percentage_norm, center=np.log10(1 + epsilon), cmap='RdBu_r', vmin=-1, vmax=3, xticklabels=marker_all_clean, yticklabels=marker_all_clean) # retrieve matplotlib.Figure object from clustermap fig = clustergrid.ax_heatmap.get_figure() return fig, cytof_img def spatial_interaction(cytof_img, percentile_threshold, method, cluster_threshold): feat_name = f"{percentile_threshold}normed" df_expected_prob, df_cell_interaction_prob = cytof_img.roi_interaction_graphs(feature_name=feat_name, accumul_type='sum', method=method, threshold=cluster_threshold) epsilon = 1e-6 # Normalize and fix Nan edge_percentage_norm = np.log10(df_cell_interaction_prob.values / (df_expected_prob.values+epsilon) + epsilon) # if observed/expected = 0, then log odds ratio will have log10(epsilon) # no observed means interaction cannot be determined, does not mean strong negative interaction edge_percentage_norm[edge_percentage_norm == np.log10(epsilon)] = 0 # do some post processing marker_all_clean = [m.replace('_cell_sum', '') for m in df_expected_prob.columns] clustergrid = sns.clustermap(edge_percentage_norm, # clustergrid = sns.clustermap(edge_percentage_norm, center=np.log10(1 + epsilon), cmap='bwr', vmin=-2, vmax=2, xticklabels=marker_all_clean, yticklabels=marker_all_clean) # retrieve matplotlib.Figure object from clustermap fig = clustergrid.ax_heatmap.get_figure() return fig, cytof_img def get_marker_pos_options(cytof_img): options = cytof_img.channels.copy() # nuclei is guaranteed to exist after defining channels options.remove('nuclei') # search for channel "membrane" and delete, skip if cannot find try: options.remove('membrane') except ValueError: pass return gr.Dropdown(choices=options, interactive=True), gr.Dropdown(choices=options, interactive=True) def viz_pos_marker_pair(cytof_img, marker1, marker2, percentile_threshold): stain_nuclei1, stain_cell1, color_dict = cytof_img.visualize_marker_positive( marker=marker1, feature_type="normed", accumul_type="sum", normq=percentile_threshold, show_boundary=True, color_list=[(0,0,1), (0,1,0)], # negative, positive color_bound=(0,0,0), show_colortable=False) stain_nuclei2, stain_cell2, color_dict = cytof_img.visualize_marker_positive( marker=marker2, feature_type="normed", accumul_type="sum", normq=percentile_threshold, show_boundary=True, color_list=[(0,0,1), (0,1,0)], # negative, positive color_bound=(0,0,0), show_colortable=False) # create two subplots fig = make_subplots(rows=1, cols=2, shared_xaxes=True, shared_yaxes=True, subplot_titles=(f"positive {marker1} cells", f"positive {marker2} cells")) fig.add_trace(px.imshow(stain_cell1).data[0], row=1, col=1) fig.add_trace(px.imshow(stain_cell2).data[0], row=1, col=2) # Synchronize axes fig.update_xaxes(matches='x') fig.update_yaxes(matches='y') fig.update_layout(title_text=" ") return fig def phenograph(cytof_cohort): key_pheno = cytof_cohort.clustering_phenograph() df_feats, commus, cluster_protein_exps, figs, figs_scatter, figs_exps = cytof_cohort.vis_phenograph( key_pheno=key_pheno, level="cohort", save_vis=False, show_plots=False, plot_together=False) umap = figs_scatter['cohort'] expression = figs_exps['cohort']['cell_sum'] return umap, cytof_cohort def cluster_interaction_fn(cytof_img, cytof_cohort): # avoid calling the clustering algorithm again. cohort is guaranteed to have one phenogrpah key_pheno = list(cytof_cohort.phenograph.keys())[0] epsilon = 1e-6 interacts, clustergrid = cytof_cohort.cluster_interaction_analysis(key_pheno) interact = interacts[cytof_img.slide] clustergrid_interaction = sns.clustermap(interact, center=np.log10(1+epsilon), cmap='RdBu_r', vmin=-1, vmax=1, xticklabels=np.arange(interact.shape[0]), yticklabels=np.arange(interact.shape[0])) # retrieve matplotlib.Figure object from clustermap fig = clustergrid.ax_heatmap.get_figure() return fig, cytof_img, cytof_cohort def get_cluster_pos_options(cytof_img): options = cytof_img.channels.copy() # nuclei is guaranteed to exist after defining channels options.remove('nuclei') # search for channel "membrane" and delete, skip if cannot find try: options.remove('membrane') except ValueError: pass return gr.Dropdown(choices=options, interactive=True) def viz_cluster_positive(marker, percentile_threshold, cytof_img, cytof_cohort): # avoid calling the clustering algorithm again. cohort is guaranteed to have one phenogrpah key_pheno = list(cytof_cohort.phenograph.keys())[0] # marker positive cell stain_nuclei1, stain_cell1, color_dict = cytof_img.visualize_marker_positive( marker=marker, feature_type="normed", accumul_type="sum", normq=percentile_threshold, show_boundary=True, color_list=[(0,0,1), (0,1,0)], # negative, positive color_bound=(0,0,0), show_colortable=False) # attch PhenoGraph results to individual ROIs cytof_cohort.attach_individual_roi_pheno(key_pheno, override=True) # PhenoGraph clustering visualization pheno_stain_nuclei, pheno_stain_cell, color_dict = cytof_img.visualize_pheno(key_pheno=key_pheno) # create two subplots fig = make_subplots(rows=1, cols=2, shared_xaxes=True, shared_yaxes=True, subplot_titles=(f"positive {marker} cells", "PhenoGraph clusters on cells")) fig.add_trace(px.imshow(stain_cell1).data[0], row=1, col=1) fig.add_trace(px.imshow(pheno_stain_cell).data[0], row=1, col=2) # Synchronize axes fig.update_xaxes(matches='x') fig.update_yaxes(matches='y') fig.update_layout(title_text=" ") return fig, cytof_img, cytof_cohort # Gradio App template with gr.Blocks() as demo: cytof_state = gr.State(CytofImage()) # used in scenrios where users define/remove channels multiple times cytof_original_state = gr.State(CytofImage()) gr.Markdown("# Step 1. Upload images") gr.Markdown('You may upload one or two files depending on your use case.') gr.Markdown('Case 1: A single TXT or CSV file that contains information about antibodies, rare heavy metal isotopes, and image channel names. Make sure files are following the CyTOF, IMC, or multiplex data convention. Leave the `Marker File` upload section blank.') gr.Markdown('Case 2: Multiple file uploads required. First, a TIFF file containing Regions of Interest (ROIs) stored as multiplexed images. Then, upload a `Marker File` listing the channels to identify the antibodies.') with gr.Row(): # first row where 1) asks for TIFF upload and 2) displays marker info img_path = gr.File(file_types=[".tiff", '.tif', '.txt', '.csv'], label='(Required) A file containing Regions of Interest (ROIs) of multiplexed imaging slides.') img_info = gr.Textbox(label='Marker information', info='Ensure the number of markers displayed below matches the expected number.') with gr.Row(equal_height=True): # second row where 1) asks for marker file upload and 2) displays the visualization of individual channels with gr.Column(): marker_path = gr.File(file_types=['.txt'], label='(Optional) Marker File. A list used to identify the antibodies in each TIFF layer. Upload one TXT file.') with gr.Row(): clear_btn = gr.Button("Clear") submit_btn = gr.Button("Upload") img_viz = gr.Plot(label="Visualization of individual channels") gr.Markdown("# Step 2. Modify existing channels") gr.Markdown("After visualizing the individual channels, did you notice any that should not be included in the next steps? Remove those if so.") gr.Markdown("Define channels designed to visualize nuclei. Optionally, define channels designed to visualize membranes.") with gr.Row(equal_height=True): # third row selects nuclei channels with gr.Column(): selected_unwanted_channel = gr.Dropdown(label='(Optional) Select the unwanted channel', interactive=True) selected_nuclei = gr.Dropdown(label='(Required) Select the nuclei channel', interactive=True) selected_membrane = gr.Dropdown(label='(Optional) Select the membrane channel', interactive=True) define_btn = gr.Button('Modify channels') channel_feedback = gr.Textbox(label='Channels info update') # upload the file, and gather channel info. Then populate to the unwanted_channel, nuclei, and membrane components submit_btn.click( fn=cytof_tiff_eval, inputs=[img_path, marker_path, cytof_original_state], outputs=[img_info, img_viz, cytof_original_state], api_name='upload' ).success( fn=channel_select, inputs=cytof_original_state, outputs=[selected_unwanted_channel, selected_nuclei, selected_membrane] ) selected_unwanted_channel.change(fn=update_dropdown_options, inputs=[cytof_original_state, selected_unwanted_channel, selected_nuclei, selected_membrane], outputs=[selected_nuclei, selected_membrane], api_name='dropdown_monitor1') # api_name used to identify in the endpoints selected_nuclei.change(fn=update_dropdown_options, inputs=[cytof_original_state, selected_nuclei, selected_membrane, selected_unwanted_channel], outputs=[selected_membrane, selected_unwanted_channel], api_name='dropdown_monitor2') selected_membrane.change(fn=update_dropdown_options, inputs=[cytof_original_state, selected_membrane, selected_nuclei, selected_unwanted_channel], outputs=[selected_nuclei, selected_unwanted_channel], api_name='dropdown_monitor3') # modifies the channels per user input define_btn.click(fn=modify_channels, inputs=[cytof_original_state, selected_unwanted_channel, selected_nuclei, selected_membrane], outputs=[channel_feedback, cytof_state]) gr.Markdown('# Step 3. Perform cell segmentation based on the defined nuclei and membrane channels') with gr.Row(): # This row defines cell radius and performs segmentation with gr.Column(): cell_radius = gr.Number(value=5, precision=0, label='Cell size', info='Please enter the desired radius for cell segmentation (in pixels; default value: 5)') seg_btn = gr.Button("Segment") seg_viz = gr.Plot(label="Visualization of the segmentation. Hover over graph to zoom, pan, save, etc.") seg_btn.click(fn=cell_seg, inputs=[cytof_state, cell_radius], outputs=[seg_viz, cytof_state]) gr.Markdown('# Step 4. Extract cell features') cohort_state = gr.State(CytofCohort()) with gr.Row(): # feature extraction related functinos with gr.Column(): gr.CheckboxGroup(choices=['Yes', 'Yes', 'Yes'], label='Note: This step will take significantly longer than the previous ones. A 130MB IMC file takes about 14 minutes to compute. Did you read this note?') norm_percentile = gr.Slider(minimum=50, maximum=99, step=1, value=75, interactive=True, label='Normalized quantification percentile') extract_btn = gr.Button('Extract') feat_df = gr.DataFrame(headers=['id','coordinate_x','coordinate_y','area_nuclei'], label='Feature extraction summary') extract_btn.click(fn=feature_extraction, inputs=[cytof_state, cohort_state, norm_percentile], outputs=[cytof_state, cohort_state, feat_df]) gr.Markdown('# Step 5. Downstream analysis') with gr.Row(): # show co-expression and spatial analysis with gr.Column(): co_exp_viz = gr.Plot(label="Visualization of cell coexpression of markers") co_exp_btn = gr.Button('Run co-expression analysis') with gr.Column(): spatial_viz = gr.Plot(label="Visualization of cell spatial interaction of markers") cluster_method = gr.Radio(label='Select the clustering method', value='k-neighbor', choices=['k-neighbor', 'distance'], info='K-neighbor: classifies the threshold number of surrounding cells as neighborhood pairs. Distance: classifies cells within threshold distance as neighborhood pairs.') cluster_threshold = gr.Slider(minimum=1, maximum=100, step=1, value=30, interactive=True, label='Clustering threshold') spatial_btn = gr.Button('Run spatial interaction analysis') co_exp_btn.click(fn=co_expression, inputs=[cytof_state, norm_percentile], outputs=[co_exp_viz, cytof_state]) # spatial_btn logic is in step6. This is populate the marker positive dropdown options gr.Markdown('# Step 6. Visualize positive markers') gr.Markdown('Select two markers for side-by-side comparison to visualize their positive states in cells. This serves two purposes. 1) Validate the co-expression analysis results. High expression level should mean a similar number of positive markers within the two slides, whereas low expression level mean a large difference of in the number of positive markers. 2) Validate the spatial interaction analysis results. High interaction means the two positive markers are in close proximity of each other (proximity is previously defined in `clustering threshold`), and vice versa.') with gr.Row(): # two marker positive visualization - dropdown options selected_marker1 = gr.Dropdown(label='Select one marker', info='Select a marker to visualize', interactive=True) selected_marker2 = gr.Dropdown(label='Select another marker', info='Selecting the same marker as the previous one is allowed', interactive=True) pos_viz_btn = gr.Button('Visualize these two markers') with gr.Row(): # two marker positive visualization - visualization marker_pos_viz = gr.Plot(label="Visualization of the two markers. Hover over graph to zoom, pan, save, etc.") spatial_btn.click( fn=spatial_interaction, inputs=[cytof_state, norm_percentile, cluster_method, cluster_threshold], outputs=[spatial_viz, cytof_state] ).success( fn=get_marker_pos_options, inputs=[cytof_state], outputs=[selected_marker1, selected_marker2] ) pos_viz_btn.click(fn=viz_pos_marker_pair, inputs=[cytof_state, selected_marker1, selected_marker2, norm_percentile], outputs=[marker_pos_viz]) gr.Markdown('# Step 7. Phenogrpah Clustering') gr.Markdown('Cells can be clustered into sub-groups based on the extracted single-cell data. Time reference: a 300MB IMC file takes about 2 minutes to compute.') with gr.Row(): # add two plots to visualize phenograph results phenograph_umap = gr.Plot(label="UMAP results") cluster_interaction = gr.Plot(label="Spatial interaction of clusters") with gr.Row(equal_height=False): # action components umap_btn = gr.Button('Run Phenograph clustering') cluster_interact_btn = gr.Button('Run clustering interaction') cluster_interact_btn.click(cluster_interaction_fn, inputs=[cytof_state, cohort_state], outputs=[cluster_interaction, cytof_state, cohort_state]) with gr.Row(): with gr.Column(): selected_cluster_marker = gr.Dropdown(label='Select one marker', info='Select a marker to visualize', interactive=True) cluster_positive_btn = gr.Button('Compare clusters and positive markers') with gr.Column(): cluster_v_positive = gr.Plot(label="Cluster assignment vs. positive cells. Hover over graph to zoom, pan, save, etc.") umap_btn.click( fn=phenograph, inputs=[cohort_state], outputs=[phenograph_umap, cohort_state] ).success( fn=get_cluster_pos_options, inputs=[cytof_state], outputs=[selected_cluster_marker], api_name='selectClusterMarker' ) cluster_positive_btn.click(fn=viz_cluster_positive, inputs=[selected_cluster_marker, norm_percentile, cytof_state, cohort_state], outputs=[cluster_v_positive, cytof_state, cohort_state]) # clear everything if clicked clear_components = [img_path, marker_path, img_info, img_viz, channel_feedback, seg_viz, feat_df, co_exp_viz, spatial_viz, marker_pos_viz, phenograph_umap, cluster_interaction, cluster_v_positive] clear_btn.click(lambda: [None]*len(clear_components), outputs=clear_components) if __name__ == "__main__": demo.launch()