Spaces:

LaboLabrie
/

CycIF

Sleeping

App Files Files Community

KashyapiNagaHarshitha commited on Jul 18, 2024

Commit

6372547

verified ·

1 Parent(s): ea9e947

Upload Step5_Marker_Threshold_Classification.py

Browse files

Files changed (1) hide show

Step5_Marker_Threshold_Classification.py +1508 -0

Step5_Marker_Threshold_Classification.py ADDED Viewed

	@@ -0,0 +1,1508 @@

+#!/usr/bin/env python
+# coding: utf-8
+# # IV. MARKERS TRESHOLDS NOTEBOOK
+# ## IV.1. PACKAGES IMPORT
+import os
+import random
+import re
+import pandas as pd
+import numpy as np
+import seaborn as sb
+import matplotlib.pyplot as plt
+import matplotlib.colors as mplc
+import subprocess
+import warnings
+import panel as pn
+import json
+from scipy import signal
+from scipy.stats import pearsonr
+import plotly.figure_factory as ff
+import plotly
+import plotly.graph_objs as go
+from plotly.subplots import make_subplots
+from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
+import plotly.express as px
+import sys
+sys.setrecursionlimit(5000)
+from my_modules import *
+#Silence FutureWarnings & UserWarnings
+warnings.filterwarnings('ignore', category= FutureWarning)
+warnings.filterwarnings('ignore', category= UserWarning)
+# ## IV.2. *DIRECTORIES
+# Set base directory
+#input_path = '/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431'
+#set_path = 'test'
+present_dir = os.path.dirname(os.path.realpath(__file__))
+stored_variables_path = os.path.join(present_dir,'stored_variables.json')
+with open(stored_variables_path, 'r') as file:
+        stored_vars = json.load(file)
+        directory = stored_vars['base_dir']
+        input_path = os.path.join(present_dir,directory)
+        set_path = stored_vars['set_path']
+        selected_metadata_files = stored_vars['selected_metadata_files']
+        ls_samples = stored_vars['ls_samples']
+base_dir = input_path
+set_name = set_path
+project_name = set_name                 # Project name
+step_suffix = 'mt'                      # Curent part (here part IV)
+previous_step_suffix_long = "_zscore"   # Previous part (here ZSCORE NOTEBOOK)
+# Initial input data directory
+input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long)
+# ZSCORE/LOG2 output directories
+output_data_dir = os.path.join(base_dir, project_name + "_" + step_suffix)
+# ZSCORE/LOG2 images subdirectory
+output_images_dir = os.path.join(output_data_dir,"images")
+# Data and Metadata directories
+# Metadata directories
+metadata_dir = os.path.join(base_dir, project_name + "_metadata")
+# images subdirectory
+metadata_images_dir = os.path.join(metadata_dir,"images")
+# Create directories if they don't already exist
+#for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:
+#   if not os.path.exists(d):
+        #print("Creation of the" , d, "directory...")
+#        os.makedirs(d)
+    #else :
+    #    print("The", d, "directory already exists !")
+#os.chdir(input_data_dir)
+# Verify paths
+#print('base_dir :', base_dir)
+#print('input_data_dir :', input_data_dir)
+#print('output_data_dir :', output_data_dir)
+#print('output_images_dir :', output_images_dir)
+#print('metadata_dir :', metadata_dir)
+#print('metadata_images_dir :', metadata_images_dir)
+# ## IV.3. FILES
+# ### IV.3.1. METADATA
+filename = "marker_intensity_metadata.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+#if not os.path.exists(filename):
+#    print("WARNING: Could not find desired file: "+filename)
+#else :
+#    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+metadata = pd.read_csv(filename)
+# Verify size with verify_line_no() function in my_modules.py
+#verify_line_no(filename, metadata.shape[0] + 1)
+# Verify headers
+exp_cols = ['Round','Target','Channel','target_lower','full_column','marker','localisation']
+compare_headers(exp_cols, metadata.columns.values, "Marker metadata file")
+metadata = metadata.dropna()
+metadata.head()
+# ### IV.3.2. NOT_INTENSITIES
+filename = "not_intensities.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+#if not os.path.exists(filename):
+#    print("WARNING: Could not find desired file: "+filename)
+#else :
+#    print("The",filename,"file was imported for further analysis!")
+not_intensities = []
+with open(filename, 'r') as fh:
+    not_intensities = fh.read().strip().split("\n")
+    # take str, strip whitespace, split on new line character
+# Verify size
+#print("\nVerifying data read from file is the correct length...\n")
+#verify_line_no(filename, len(not_intensities))
+# Print to console
+#print("not_intensities =\n", not_intensities)
+# ### IV.3.3. FULL_TO_SHORT_COLUMN_NAMES
+filename = "full_to_short_column_names.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+#if not os.path.exists(filename):
+#    print("WARNING: Could not find desired file: " + filename)
+#else :
+#    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+full_to_short_names = df.set_index('full_name').T.to_dict('records')[0]
+#print('full_to_short_names =\n',full_to_short_names)
+# ### IV.3.4. SHORT_TO_FULL_COLUMN_NAMES
+filename = "short_to_full_column_names.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+#if not os.path.exists(filename):
+#    print("WARNING: Could not find desired file: " + filename)
+#else :
+#    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+# Verify size
+#print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+short_to_full_names = df.set_index('short_name').T.to_dict('records')[0]
+# Print information
+#print('short_to_full_names =\n',short_to_full_names)
+# ### IV.3.10. DATA
+# List files in the directory
+# Check if the directory exists
+if os.path.exists(input_data_dir):
+    # List files in the directory
+    ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith("_zscore.csv")]
+#    print("The following CSV files were detected:")
+#    print([sample for sample in ls_samples])
+#else:
+#    print(f"The directory {input_data_dir} does not exist.")
+# Import all the others files
+dfs = {}
+# Set variable to hold default header values
+# First gather information on expected headers using first file in ls_samples
+# Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
+df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
+expected_headers = df.columns.values
+#print('Header order should be :\n', expected_headers, '\n')
+###############################
+# !! This may take a while !! #
+###############################
+for sample in ls_samples:
+    file_path = os.path.join(input_data_dir,sample)
+    try:
+        # Read the CSV file
+        df = pd.read_csv(file_path, index_col=0)
+        # Check if the DataFrame is empty, if so, don't continue trying to process df and remove it
+        if not df.empty:
+            # Reorder the columns to match the expected headers list
+            df = df.reindex(columns=expected_headers)
+#            print(sample, "file is processed !\n")
+            #print(df)
+    except pd.errors.EmptyDataError:
+#       print(f'\nEmpty data error in {sample} file. Removing from analysis...')
+        ls_samples.remove(sample)
+    # Add df to dfs
+    dfs[sample] = df
+#print(dfs)
+# Merge dfs into one df
+df = pd.concat(dfs.values(), ignore_index=False , sort = False)
+del dfs
+print(df.head())
+intial_df = pn.pane.DataFrame(df.head(40), width = 2500)
+# ### Marker Classification
+# ## IV.5. *DOTPLOTS
+df
+# Load existing data from stored_variables.json with error handling
+try:
+    with open(stored_variables_path, 'r') as file:
+        data = json.load(file)
+except json.JSONDecodeError as e:
+#    print(f"Error reading JSON file: {e}")
+    data = {}
+# Debug: Print loaded data to verify keys
+#print(data)
+df
+df.head()
+# ### IV.7.2. DOTPLOTS-DETERMINED TRESHOLD
+#Empty dict in stored_variables to store the cell type classification for each marker
+#stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json'
+try:
+    with open(stored_variables_path, 'r') as f:
+        stored_variables = json.load(f)
+except FileNotFoundError:
+    stored_variables = {}
+# Check if 'thresholds' field is present, if not, add it
+if 'cell_type_classification' not in stored_variables:
+    cell_type_classification = {}
+    stored_variables['cell_type_classification'] = cell_type_classification
+    with open(stored_variables_path, 'w') as f:
+        json.dump(stored_variables, f, indent=4)
+#Empty dict in stored_variables to store the cell subtype classification for each marker
+#stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json'
+try:
+    with open(stored_variables_path, 'r') as f:
+        stored_variables = json.load(f)
+except FileNotFoundError:
+    stored_variables = {}
+# Check if 'thresholds' field is present, if not, add it
+if 'cell_subtype_classification' not in stored_variables:
+    cell_type_classification = {}
+    stored_variables['cell_subtype_classification'] = cell_type_classification
+    with open(stored_variables_path, 'w') as f:
+        json.dump(stored_variables, f, indent=4)
+df
+data = df
+import json
+import panel as pn
+# Load existing stored variables
+with open(stored_variables_path, 'r') as f:
+    stored_variables = json.load(f)
+# Initialize a dictionary to hold threshold inputs
+threshold_inputs = {}
+# Create widgets for each marker to get threshold inputs from the user
+for marker in stored_variables['markers']:
+    threshold_inputs[marker] = pn.widgets.FloatInput(name=f'{marker} Threshold', value=0.0, step=0.1)
+# Load stored_variables.json
+#stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json'
+try:
+    with open(stored_variables_path, 'r') as f:
+        stored_variables = json.load(f)
+except FileNotFoundError:
+    stored_variables = {}
+# Check if 'thresholds' field is present, if not, add it
+if 'thresholds' not in stored_variables:
+    thresholds = {marker: input_widget.value for marker, input_widget in threshold_inputs.items()}
+    stored_variables['thresholds'] = thresholds
+    with open(stored_variables_path, 'w') as f:
+        json.dump(stored_variables, f, indent=4)
+# Save button to save thresholds to stored_variables.json
+def save_thresholds(event):
+    thresholds = {marker: input_widget.value for marker, input_widget in threshold_inputs.items()}
+    stored_variables['thresholds'] = thresholds
+    with open(stored_variables_path, 'w') as f:
+        json.dump(stored_variables, f, indent=4)
+    pn.state.notifications.success('Thresholds saved successfully!')
+save_button2 = pn.widgets.Button(name='Save Thresholds', button_type='primary')
+save_button2.on_click(save_thresholds)
+# Create a GridSpec layout
+grid = pn.GridSpec()
+# Add the widgets to the grid with three per row
+row = 0
+col = 0
+for marker in stored_variables['markers']:
+    grid[row, col] = threshold_inputs[marker]
+    col += 1
+    if col == 5:
+        col = 0
+        row += 1
+# Add the save button at the end
+grid[row + 1, :5] = save_button2
+# Panel layout
+threshold_panel = pn.Column(
+    pn.pane.Markdown("## Define Thresholds for Markers"),
+    grid)
+import pandas as pd
+import json
+# Load stored variables from the JSON file
+with open(stored_variables_path, 'r') as file:
+    stored_variables = json.load(file)
+# Step 1: Identify intensities
+intensities = list(df.columns)
+def assign_cell_type(row):
+    for intensity in intensities:
+        marker = intensity.split('_')[0]  # Extract marker from intensity name
+        if marker in stored_variables['thresholds']:
+            threshold = stored_variables['thresholds'][marker]
+            if row[intensity] > threshold:
+                for cell_type, markers in stored_variables['cell_type_classification'].items():
+                    if marker in markers:
+                        return cell_type
+        return 'STROMA'  # Default if no condition matches
+# Step 5: Apply the classification function to the DataFrame
+df['cell_type'] = df.apply(lambda row: assign_cell_type(row), axis=1)
+df.head()
+# Check if 'IMMUNE' is present in any row of the cell_type column
+present_stroma = df['cell_type'].str.contains('STROMA').sum()
+present_cancer = df['cell_type'].str.contains('CANCER').sum()
+present_immune = df['cell_type'].str.contains('IMMUNE').sum()
+present_endothelial = df['cell_type'].str.contains('ENDOTHELIAL').sum()
+# Print the result
+#print(present_stroma)
+#print(present_cancer)
+#print(present_immune)
+#print(present_endothelial)
+#print(len(df))
+df.head(30)
+df
+# ## IV.8. *HEATMAPS
+#print(df.columns)
+# Assuming df_merged is your DataFrame
+if 'Sample_ID.1' in df.columns:
+    df = df.rename(columns={'Sample_ID.1': 'Sample_ID'})
+ #   print("After renaming Sample_ID", df.columns)
+# Selecting a subset of rows from the DataFrame df based on the 'Sample_ID' column
+# and then randomly choosing 20,000 rows from that subset to create the DataFrame test_dfkeep = ['TMA.csv']
+with open(stored_variables_path, 'r') as file:
+        ls_samples = stored_vars['ls_samples']
+keep = ls_samples
+keep_cell_type = ['ENDOTHELIAL','CANCER', 'STROMA', 'IMMUNE']
+#if 'Sample_ID' in df.columns:
+#   print("The",df.loc[df['cell_type'].isin(keep_cell_type)])
+test2_df = df.loc[(df['cell_type'].isin(keep_cell_type))
+                & (df['Sample_ID'].isin(keep)), :].copy()
+#print(test2_df.head())
+random_rows = np.random.choice(len(test2_df),20000)
+df2 = test2_df.iloc[random_rows,:].copy()
+df2
+#print(df2)
+# ### COLORS
+# #### SAMPLES COLORS
+color_values = sb.color_palette("husl",n_colors = len(ls_samples))
+sb.palplot(sb.color_palette(color_values))
+TMA_samples = [s for s in df.Sample_ID.unique() if 'TMA' in s]
+TMA_color_values = sb.color_palette(n_colors = len(TMA_samples),palette = "gray")
+sb.palplot(sb.color_palette(TMA_color_values))
+# Store in a dictionary
+color_dict = dict()
+color_dict = dict(zip(df.Sample_ID.unique(), color_values))
+# Replace all TMA samples' colors with gray
+i = 0
+for key in color_dict.keys():
+    if 'TMA' in key:
+        color_dict[key] = TMA_color_values[i]
+        i +=1
+color_dict
+color_df_sample = color_dict_to_df(color_dict, "Sample_ID")
+# Save to file in metadatadirectory
+filename = "sample_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+color_df_sample.to_csv(filename, index = False)
+color_df_sample
+# Legend of sample info only
+g  = plt.figure(figsize = (1,1)).add_subplot(111)
+g.axis('off')
+handles = []
+for item in color_dict.keys():
+        h = g.bar(0,0, color = color_dict[item],
+                  label = item, linewidth =0)
+        handles.append(h)
+first_legend = plt.legend(handles=handles, loc='upper right', title = 'Sample')
+filename = "Sample_legend.png"
+filename = os.path.join(metadata_images_dir, filename)
+plt.savefig(filename, bbox_inches = 'tight')
+filename = "sample_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+#if not os.path.exists(filename):
+#    print("WARNING: Could not find desired file: " + filename)
+#else :
+#    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+df = df.drop(columns = ['hex'])
+# our tuple of float values for rgb, (r, g, b) was read in
+# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
+# substrings and convert them back into floats
+df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
+# Verify size
+#print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+sample_color_dict = df.set_index('Sample_ID')['rgb'].to_dict()
+# Print information
+#print('sample_color_dict =\n',sample_color_dict)
+# #### CELL TYPES COLORS
+# Define your custom colors for each cell type
+custom_colors = {
+    'CANCER': (0.1333, 0.5451, 0.1333),
+    'STROMA': (0.4, 0.4, 0.4),
+    'IMMUNE': (1, 1, 0),
+    'ENDOTHELIAL': (0.502, 0, 0.502)
+}
+# Retrieve the list of cell types
+cell_types = list(custom_colors.keys())
+# Extract the corresponding colors from the dictionary
+color_values = [custom_colors[cell] for cell in cell_types]
+# Display the colors
+sb.palplot(sb.color_palette(color_values))
+# Store in a dctionnary
+celltype_color_dict = dict(zip(cell_types, color_values))
+celltype_color_dict
+# Save color information (mapping and legend) to metadata directory
+# Create dataframe
+celltype_color_df = color_dict_to_df(celltype_color_dict, "cell_type")
+celltype_color_df.head()
+# Save to file in metadatadirectory
+filename = "celltype_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+celltype_color_df.to_csv(filename, index = False)
+#print("File" + filename + " was created!")
+# Legend of cell type info only
+g  = plt.figure(figsize = (1,1)).add_subplot(111)
+g.axis('off')
+handles = []
+for item in celltype_color_dict.keys():
+        h = g.bar(0,0, color = celltype_color_dict[item],
+                  label = item, linewidth =0)
+        handles.append(h)
+first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell type'),
+filename = "Celltype_legend.png"
+filename = os.path.join(metadata_images_dir, filename)
+plt.savefig(filename, bbox_inches = 'tight')
+filename = "celltype_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+#if not os.path.exists(filename):
+#    print("WARNING: Could not find desired file: "+filename)
+#else :
+#    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+df = df.drop(columns = ['hex'])
+# our tuple of float values for rgb, (r, g, b) was read in
+# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
+# substrings and convert them back into floats
+df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
+# Verify size
+#print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+cell_type_color_dict = df.set_index('cell_type')['rgb'].to_dict()
+# Print information
+#print('cell_type_color_dict =\n',cell_type_color_dict)
+# Colors dictionaries
+sample_row_colors =df2.Sample_ID.map(sample_color_dict)
+#print(sample_row_colors[1:5])
+cell_type_row_colors = df2.cell_type.map(cell_type_color_dict)
+#print(cell_type_row_colors[1:5])
+# ## Cell Subtype Colours
+import pandas as pd
+import os
+def rgb_tuple_from_str(rgb_str):
+    # Cleaning the string to remove any unexpected 'np.float64'
+    rgb_str = rgb_str.replace("(","").replace(")","").replace(" ","").replace("np.float64", "")
+    try:
+        rgb = list(map(float, rgb_str.split(",")))
+        return tuple(rgb)
+    except ValueError as e:
+#        print(f"Error converting {rgb_str} to floats: {e}")
+        return None  # or handle the error as needed
+filename = "cellsubtype_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+#if not os.path.exists(filename):
+#    print("WARNING: Could not find desired file: " + filename)
+#else:
+#    print("The", filename, "file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header=0)
+df = df.drop(columns=['hex'])
+# Clean the 'rgb' column to remove unexpected strings
+df['rgb'] = df['rgb'].str.replace("np.float64", "", regex=False)
+# Apply the function to convert string to tuple of floats
+df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis=1)
+# Verify size
+#print("Verifying data read from file is the correct length...\n")
+# verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+cell_subtype_color_dict = df.set_index('cell_subtype')['rgb'].to_dict()
+# Print information
+#print('cell_subtype_color_dict =\n', cell_subtype_color_dict)
+df2
+# Colors dictionaries
+sample_row_colors =df2.Sample_ID.map(sample_color_dict)
+#print(sample_row_colors[1:5])
+cell_subtype_row_colors = df2.cell_subtype.map(cell_subtype_color_dict)
+#print(cell_subtype_row_colors[1:5])
+# #### Cell Type
+df
+#print(f"Loaded sample files: {ls_samples}")
+selected_intensities = list(df.columns)
+selected_intensities = list(df.columns)
+#print(selected_intensities)
+df
+df2
+df = df2
+df
+import json
+import pandas as pd
+import numpy as np
+import panel as pn
+import plotly.graph_objects as go
+pn.extension('plotly')
+# Load the selected intensities from the JSON file
+with open(stored_variables_path, 'r') as f:
+    json_data = json.load(f)
+ls_samples = json_data["ls_samples"]
+#print(f"Loaded sample files: {ls_samples}")
+# Checkbox group to select files
+checkbox_group = pn.widgets.CheckBoxGroup(name='Select Files', options=ls_samples)
+# Initially empty dropdowns for X and Y axis selection
+x_axis_dropdown = pn.widgets.Select(name='Select X-Axis', options=[])
+y_axis_dropdown = pn.widgets.Select(name='Select Y-Axis', options=[])
+# Input field for the number of random samples
+random_sample_input = pn.widgets.IntInput(name='Number of Random Samples', value=20000, step=100)
+# Sliders for interactive X and Y lines
+x_line_slider = pn.widgets.FloatSlider(name='X Axis Line Position', start=0, end=1, step=0.01)
+y_line_slider = pn.widgets.FloatSlider(name='Y Axis Line Position', start=0, end=1, step=0.01)
+# Placeholder for the dot plot
+plot_placeholder = pn.pane.Plotly()
+# Placeholder for the digital reconstruction plot
+reconstruction_placeholder = pn.pane.Plotly()
+# Function to create the dot plot
+def create_dot_plot(selected_files, x_axis, y_axis, n_samples, x_line_pos, y_line_pos):
+    if not selected_files:
+#        print("No files selected.")
+        return go.Figure()
+    keep = selected_files
+    test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy()
+#    print(f"Number of samples in test2_df: {len(test2_df)}")
+    if len(test2_df) > n_samples:
+        random_rows = np.random.choice(len(test2_df), n_samples)
+        test_df = test2_df.iloc[random_rows, :].copy()
+    else:
+        test_df = test2_df
+#    print(f"Number of samples in test_df: {len(test_df)}")
+    if x_axis not in test_df.columns or y_axis not in test_df.columns:
+#        print(f"Selected axes {x_axis} or {y_axis} not in DataFrame columns.")
+        return go.Figure()
+    fig = go.Figure()
+    title = 'Threshold'
+    fig.add_trace(go.Scatter(
+        x=test_df[x_axis],
+        y=test_df[y_axis],
+        mode='markers',
+        marker=dict(color='LightSkyBlue', size=2)
+    ))
+    # Add vertical and horizontal lines
+    fig.add_vline(x=x_line_pos, line_width=2, line_dash="dash", line_color="red")
+    fig.add_hline(y=y_line_pos, line_width=2, line_dash="dash", line_color="red")
+    fig.update_layout(
+        title=title,
+        plot_bgcolor='white',
+        autosize=True,
+        margin=dict(l=20, r=20, t=40, b=20),
+        xaxis=dict(title=x_axis, linecolor='black', range=[test_df[x_axis].min(), test_df[x_axis].max()]),
+        yaxis=dict(title=y_axis, linecolor='black', range=[test_df[y_axis].min(), test_df[y_axis].max()])
+    )
+    return fig
+def assign_cell_types_again():
+    with open(stored_variables_path, 'r') as file:
+        stored_variables = json.load(file)
+    intensities = list(df.columns)
+    def assign_cell_type(row):
+        for intensity in intensities:
+            marker = intensity.split('_')[0]  # Extract marker from intensity name
+            if marker in stored_variables['thresholds']:
+                threshold = stored_variables['thresholds'][marker]
+                if row[intensity] > threshold:
+                    for cell_type, markers in stored_variables['cell_type_classification'].items():
+                        if marker in markers:
+                            return cell_type
+        return 'STROMA'  # Default if no condition matches
+    df['cell_type'] = df.apply(lambda row: assign_cell_type(row), axis=1)
+    return df
+# Function to create the digital reconstruction plot
+def create_reconstruction_plot(selected_files):
+    if not selected_files:
+#       print("No files selected.")
+        return go.Figure()
+    df = assign_cell_types_again()
+    fig = go.Figure()
+    for sample in selected_files:
+        sample_id = sample
+        sample_id2 = sample.split('_')[0]
+        location_colors = df.loc[df['Sample_ID'] == sample_id, ['Nuc_X', 'Nuc_Y_Inv', 'cell_type']]
+        title = sample_id2 + " Background Subtracted XY Map cell types"
+        for celltype in df.loc[df['Sample_ID'] == sample_id, 'cell_type'].unique():
+            fig.add_scatter(
+                mode='markers',
+                marker=dict(size=3, opacity=0.5, color='rgb' + str(cell_type_color_dict[celltype])),
+                x=location_colors.loc[location_colors['cell_type'] == celltype, 'Nuc_X'],
+                y=location_colors.loc[location_colors['cell_type'] == celltype, 'Nuc_Y_Inv'],
+                name=celltype
+            )
+        fig.update_layout(
+            title=title,
+            plot_bgcolor='white',
+            autosize=True,
+            margin=dict(l=20, r=20, t=40, b=20),
+            legend=dict(
+                title='Cell Types',
+                font=dict(
+                    family='Arial',
+                    size=12,
+                    color='black'
+                ),
+                bgcolor='white',
+                bordercolor='black',
+                borderwidth=0.4,
+                itemsizing='constant'
+            ),
+            xaxis=dict(title='Nuc_X', linecolor='black', range=[location_colors['Nuc_X'].min(), location_colors['Nuc_X'].max()]),
+            yaxis=dict(title='Nuc_Y_Inv', linecolor='black', range=[location_colors['Nuc_Y_Inv'].min(), location_colors['Nuc_Y_Inv'].max()])
+        )
+    return fig
+def update_dropdown_options(event):
+    selected_files = checkbox_group.value
+#    print(f"Selected files in update_dropdown_options: {selected_files}")
+    if selected_files:
+        keep = selected_files
+        test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy()
+        selected_intensities = list(test2_df.columns)
+        selected_intensities = [col for col in selected_intensities if '_Intensity_Average' in col]
+#        print(f"Updated dropdown options: {selected_intensities}")
+        x_axis_dropdown.options = selected_intensities
+        y_axis_dropdown.options = selected_intensities
+    else:
+        x_axis_dropdown.options = []
+        y_axis_dropdown.options = []
+def update_slider_ranges(event):
+    selected_files = checkbox_group.value
+    x_axis = x_axis_dropdown.value
+    y_axis = y_axis_dropdown.value
+#    print("Axis:",x_axis,y_axis)
+    if selected_files and x_axis and y_axis:
+        keep = selected_files
+        test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy()
+        x_range = (test2_df[x_axis].min(), test2_df[x_axis].max())
+        y_range = (test2_df[y_axis].min(), test2_df[y_axis].max())
+#        print("Ranges:",x_range,y_range)
+        x_line_slider.start = -abs(x_range[1])
+        x_line_slider.end = abs(x_range[1])
+        y_line_slider.start = -abs(y_range[1])
+        y_line_slider.end = abs(y_range[1])
+        x_line_slider.value = 0
+        y_line_slider.value = 0
+def on_value_change(event):
+    selected_files = checkbox_group.value
+    x_axis = x_axis_dropdown.value
+    y_axis = y_axis_dropdown.value
+    n_samples = random_sample_input.value
+    x_line_pos = x_line_slider.value
+    y_line_pos = y_line_slider.value
+#    print(f"Selected files: {selected_files}")
+#    print(f"X-Axis: {x_axis}, Y-Axis: {y_axis}, Number of samples: {n_samples}, X Line: {x_line_pos}, Y Line: {y_line_pos}")
+    plot = create_dot_plot(selected_files, x_axis, y_axis, n_samples, x_line_pos, y_line_pos)
+    reconstruction_plot = create_reconstruction_plot(selected_files)
+    plot_placeholder.object = plot
+    reconstruction_placeholder.object = reconstruction_plot
+# Link value changes to function
+checkbox_group.param.watch(update_dropdown_options, 'value')
+checkbox_group.param.watch(update_slider_ranges, 'value')
+x_axis_dropdown.param.watch(update_slider_ranges, 'value')
+y_axis_dropdown.param.watch(update_slider_ranges, 'value')
+x_axis_dropdown.param.watch(on_value_change, 'value')
+y_axis_dropdown.param.watch(on_value_change, 'value')
+random_sample_input.param.watch(on_value_change, 'value')
+x_line_slider.param.watch(on_value_change, 'value')
+y_line_slider.param.watch(on_value_change, 'value')
+# Layout
+plot_with_reconstruction = pn.Column(
+    "## Select Files to Construct Dot Plot",
+    checkbox_group,
+    x_axis_dropdown,
+    y_axis_dropdown,
+    random_sample_input,
+    pn.Row(x_line_slider, y_line_slider),
+    pn.Row(
+        pn.Column(
+            "## Dot Plot",
+            pn.Column(plot_placeholder)),
+        pn.Column(
+            "## Digital Reconstruction Plot",
+            reconstruction_placeholder),
+))
+# Serve the app
+#plot_with_reconstruction.show()
+# ## MAKE HEATMAPS
+# ### Cell Subtype
+# Create data structure to hold everything we need for row/column annotations
+# annotations is a dictionary
+## IMPORTANT - if you use 'annotations', it MUST have both 'rows' and 'cols'
+## objects inside. These can be empty lists, but they must be there!
+anns = {}
+# create a data structure to hold everything we need for only row annotations
+# row_annotations is a list, where each item therein is a dictioary corresponding
+# to all of the data pertaining to that particular annotation
+# Adding each item (e.g., Sample, then Cluster), one at a time to ensure ordering
+# is as anticipated on figure
+row_annotations = []
+row_annotations.append({'label':'Sample',
+                        'type':'row',
+                        'mapping':sample_row_colors,
+                        'dict':sample_color_dict,
+                        'location':'center left',
+                        'bbox_to_anchor':(0.1, 0.9)})
+row_annotations.append({'label':'Cell type',
+                        'type':'row',
+                        'mapping':cell_type_row_colors,
+                        'dict':cell_type_color_dict,
+                        'location':'center left',
+                        'bbox_to_anchor':(0.17, 0.9)})
+anns['rows'] = row_annotations
+# Now we repeat the process for column annotations
+col_annotations = []
+anns['cols'] = col_annotations
+# To simplify marker display in the following figures (heatmap, etc)
+figure_marker_names = {key: value.split('_')[0] for key, value in full_to_short_names.items()}
+not_intensities
+df2
+df2.drop('cell_subtype', axis = 'columns')
+not_intensities = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size',
+                   'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID',
+                   'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)']
+df2 = assign_cell_types_again()
+df2.drop('cell_subtype', axis = 'columns')
+df2.head()
+# Save one heatmap
+data = df
+data
+#print(data.columns)
+# Selecting a subset of rows from df based on the 'Sample_ID' column
+# and then random>ly choosing 50,000 rows from that subset to create the DataFrame test_df
+with open(stored_variables_path, 'r') as file:
+        ls_samples = stored_vars['ls_samples']
+keep = list(ls_samples)
+keep_cell_type = ['STROMA','CANCER','IMMUNE','ENDOTHELIAL']
+# Check the individual conditions
+cell_type_condition = data['cell_type'].isin(keep_cell_type)
+sample_id_condition = data['Sample_ID'].isin(keep)
+#print("Cell type condition:")
+#print(cell_type_condition.head())
+#print("Sample ID condition:")
+#print(sample_id_condition.head())
+# Combine the conditions
+combined_condition = cell_type_condition & sample_id_condition
+#print("Combined condition:")
+#print(combined_condition.head())
+# Apply the combined condition to filter the DataFrame
+test2_df = data.loc[combined_condition].copy()
+#print("Filtered DataFrame:")
+#print(test2_df.head())
+#test2_df = data.loc[data['cell_type'].isin(keep_cell_type) & data['Sample_ID'].isin(keep)].copy()
+#print("Test2_df",test2_df.head())
+#print(len(test2_df))
+#random_rows = np.random.choice(len(test2_df),len(test2_df))
+random_rows = np.random.choice(len(test2_df),1000)
+test_df = test2_df.iloc[random_rows,:].copy()
+#print(len(test_df))
+test_df
+import json
+import panel as pn
+import param
+import pandas as pd
+# Initialize Panel extension
+pn.extension('tabulator')
+# Path to the stored variables file
+file_path = stored_variables_path
+# Load existing data from stored_variables.json with error handling
+def load_data():
+    try:
+        with open(file_path, 'r') as file:
+            return json.load(file)
+    except json.JSONDecodeError as e:
+        print(f"Error reading JSON file: {e}")
+        return {}
+data = load_data()
+# Define markers, cell types, and cell subtypes from the loaded data
+markers = data.get('markers', [])
+cell_types = data.get('cell_type', [])
+cell_subtypes = data.get('cell_subtype', [])
+# Sanitize option names
+def sanitize_options(options):
+    return [opt.replace(' ', '_').replace('+', 'plus').replace('α', 'a').replace("'", "") for opt in options]
+sanitized_cell_types = sanitize_options(cell_types)
+sanitized_cell_subtypes = sanitize_options(cell_subtypes)
+# Helper function to create a Parameterized class and DataFrame
+def create_classification_df(items, item_label):
+    params = {item_label: param.String()}
+    for marker in markers:
+        params[marker] = param.Boolean(default=False)
+    Classification = type(f'{item_label}Classification', (param.Parameterized,), params)
+    classification_widgets = []
+    for item in items:
+        item_params = {marker: False for marker in markers}
+        item_params[item_label] = item
+        classification_widgets.append(Classification(**item_params))
+    classification_df = pd.DataFrame([cw.param.values() for cw in classification_widgets])
+    classification_df = classification_df[[item_label] + markers]
+    return classification_df
+# Create DataFrames for cell types and cell subtypes
+cell_type_df = create_classification_df(sanitized_cell_types, 'CELL_TYPE')
+cell_subtype_df = create_classification_df(sanitized_cell_subtypes, 'CELL_SUBTYPE')
+# Define formatters for Tabulator widgets
+tabulator_formatters = {marker: {'type': 'tickCross'} for marker in markers}
+# Create Tabulator widgets
+cell_type_table = pn.widgets.Tabulator(cell_type_df, formatters=tabulator_formatters)
+cell_subtype_table = pn.widgets.Tabulator(cell_subtype_df, formatters=tabulator_formatters)
+# Save functions for cell types and cell subtypes
+def save_data(table, classification_key, item_label):
+    current_data = table.value
+    df_bool = current_data.replace({'✔': True, '✘': False})
+    classification = {}
+    for i, row in df_bool.iterrows():
+        item = row[item_label]
+        selected_markers = [marker for marker in markers if row[marker]]
+        classification[item] = selected_markers
+    data[classification_key] = classification
+#    try:
+    with open(file_path, 'w') as file:
+        json.dump(data, file, indent=4)
+#        print(f"{classification_key} saved successfully.")
+#    except IOError as e:
+#        print(f"Error writing JSON file: {e}")
+# Button actions
+def save_cell_type_selections(event):
+    save_data(cell_type_table, 'cell_type_classification', 'CELL_TYPE')
+def save_cell_subtype_selections(event):
+    save_data(cell_subtype_table, 'cell_subtype_classification', 'CELL_SUBTYPE')
+# Create save buttons
+save_cell_type_button = pn.widgets.Button(name='Save Cell Type Selections', button_type='primary')
+save_cell_type_button.on_click(save_cell_type_selections)
+save_cell_subtype_button = pn.widgets.Button(name='Save Cell Subtype Selections', button_type='primary')
+save_cell_subtype_button.on_click(save_cell_subtype_selections)
+cell_type_classification_app_main = pn.Column(
+        pn.pane.Markdown("# Cell Type Classification"),
+        cell_type_table,
+        save_cell_type_button
+    )
+cell_subtype_classification_app_main = pn.Column(
+        pn.pane.Markdown("# Cell Subtype Classification"),
+        cell_subtype_table,
+        save_cell_subtype_button
+    )
+#cell_subtype_classification_app_main.show()
+import json
+import panel as pn
+# Load existing stored variables
+with open(stored_variables_path, 'r') as f:
+    stored_variables = json.load(f)
+# Initialize a dictionary to hold threshold inputs
+subtype_threshold_inputs = {}
+# Create widgets for each marker to get threshold inputs from the user
+for marker in stored_variables['markers']:
+    subtype_threshold_inputs[marker] = pn.widgets.FloatInput(name=f'{marker} Threshold', value=0.0, step=0.1)
+try:
+    with open(stored_variables_path, 'r') as f:
+        stored_variables = json.load(f)
+except FileNotFoundError:
+    stored_variables = {}
+# Check if 'thresholds' field is present, if not, add it
+if 'subtype_thresholds' not in stored_variables:
+    subtype_thresholds = {marker: input_widget.value for marker, input_widget in subtype_threshold_inputs.items()}
+    stored_variables['subtype_thresholds'] = subtype_thresholds
+    with open(stored_variables_path, 'w') as f:
+        json.dump(stored_variables, f, indent=4)
+# Save button to save thresholds to stored_variables.json
+def save_thresholds(event):
+    subtype_thresholds = {marker: input_widget.value for marker, input_widget in subtype_threshold_inputs.items()}
+    stored_variables['subtype_thresholds'] = subtype_thresholds
+    with open(stored_variables_path, 'w') as f:
+        json.dump(stored_variables, f, indent=4)
+save_button = pn.widgets.Button(name='Save Thresholds', button_type='primary')
+save_button.on_click(save_thresholds)
+# Create a GridSpec layout
+subtype_grid = pn.GridSpec()
+# Add the widgets to the grid with five per row
+row = 0
+col = 0
+for marker in stored_variables['markers']:
+    subtype_grid[row, col] = subtype_threshold_inputs[marker]
+    col += 1
+    if col == 5:
+        col = 0
+        row += 1
+# Add the save button at the end, spanning across all columns of the new row
+subtype_grid[row + 1, :5] = save_button
+# Panel layout
+subtype_threshold_panel = pn.Column(
+    pn.pane.Markdown("## Define Thresholds for Markers"),
+    subtype_grid)
+# Display the panel
+#subtype_threshold_panel.show()
+with open(stored_variables_path, 'r') as file:
+        stored_variables = json.load(file)
+intensities = list(df.columns)
+def assign_cell_subtypes(row):
+    for intensity in intensities:
+        marker = intensity.split('_')[0]  # Extract marker from intensity name
+        if marker in stored_variables['subtype_thresholds']:
+            threshold = stored_variables['subtype_thresholds'][marker]
+            if row[intensity] > threshold:
+                for cell_subtype, markers in stored_variables['cell_subtype_classification'].items():
+                    if marker in markers:
+                        return cell_subtype
+            return 'DC'
+df = assign_cell_types_again()
+df['cell_subtype'] = df.apply(lambda row: assign_cell_subtypes(row), axis=1)
+df
+data
+# Define a color dictionary
+cell_subtype_color_dict = {
+    'DC': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
+    'B': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765),
+    'TCD4': (0.6980392156862745, 0.8745098039215686, 0.5411764705882353),
+    'Exhausted TCD4': (0.2, 0.6274509803921569, 0.17254901960784313),
+    'Exhausted TCD8': (0.984313725490196, 0.6039215686274509, 0.6),
+    'TCD8': (0.8901960784313725, 0.10196078431372549, 0.10980392156862745),
+    'M1': (0.9921568627450981, 0.7490196078431373, 0.43529411764705883),
+    'M2': (1.0, 0.4980392156862745, 0.0),
+    'Treg': (0.792156862745098, 0.6980392156862745, 0.8392156862745098),
+    'Other CD45+': (0.41568627450980394, 0.23921568627450981, 0.6039215686274509),
+    'Cancer': (1.0, 1.0, 0.6),
+    'myCAF αSMA+': (0.6941176470588235, 0.34901960784313724, 0.1568627450980392),
+    'Stroma': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
+    'Endothelial': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765)
+}
+# Add the 'rgb' prefix to the colors
+cell_subtype_color_dict = {k: f"rgb{v}" for k, v in cell_subtype_color_dict.items()}
+# Load stored variables from JSON file
+def load_stored_variables(path):
+    with open(path, 'r') as file:
+        return json.load(file)
+# Get subtype intensities columns
+subtype_intensities = [col for col in df.columns if '_Intensity_Average' in col]
+# Assign cell subtype based on thresholds and classifications
+def assign_cell_subtype(row):
+    #print("new_row")
+    stored_variables = load_stored_variables(stored_variables_path)
+    for subtype_intensity in subtype_intensities:
+        marker = subtype_intensity.split('_')[0]
+        if marker in stored_variables['subtype_thresholds']:
+            subtype_threshold = stored_variables['subtype_thresholds'][marker]
+            if row[subtype_intensity] > subtype_threshold:
+                for cell_subtype, markers in stored_variables['cell_subtype_classification'].items():
+                    #print(cell_subtype,marker,markers)
+                    if marker in markers:
+                        #print("Markers:",marker)
+                        return cell_subtype  # Return the assigned subtype
+    return 'DC'  # Default value if no conditions match
+# Main function to assign cell subtypes to DataFrame
+def assign_cell_subtypes_again():
+    df['cell_subtype'] = df.apply(lambda row: assign_cell_subtype(row), axis=1)
+    return df
+import json
+import pandas as pd
+import numpy as np
+import panel as pn
+import plotly.graph_objects as go
+pn.extension('plotly')
+# Load the selected intensities from the JSON file
+with open(stored_variables_path, 'r') as f:
+    json_data = json.load(f)
+subtype_ls_samples = json_data["ls_samples"]
+#print(f"Loaded sample files: {subtype_ls_samples}")
+# Checkbox group to select files
+subtype_checkbox_group = pn.widgets.CheckBoxGroup(name='Select Files', options=subtype_ls_samples)
+# Initially empty dropdowns for X and Y axis selection
+subtype_x_axis_dropdown = pn.widgets.Select(name='Select X-Axis', options=[])
+subtype_y_axis_dropdown = pn.widgets.Select(name='Select Y-Axis', options=[])
+# Input field for the number of random samples
+subtype_random_sample_input = pn.widgets.IntInput(name='Number of Random Samples', value=20000, step=100)
+# Sliders for interactive X and Y lines
+subtype_x_line_slider = pn.widgets.FloatSlider(name='X Axis Line Position', start=0, end=1, step=0.01)
+subtype_y_line_slider = pn.widgets.FloatSlider(name='Y Axis Line Position', start=0, end=1, step=0.01)
+# Placeholder for the dot plot
+subtype_plot_placeholder = pn.pane.Plotly()
+# Placeholder for the digital reconstruction plot
+subtype_reconstruction_placeholder = pn.pane.Plotly()
+def update_color_dict():
+    # Define a color dictionary
+    cell_subtype_color_dict = {
+        'DC': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
+        'B': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765),
+        'TCD4': (0.6980392156862745, 0.8745098039215686, 0.5411764705882353),
+        'Exhausted TCD4': (0.2, 0.6274509803921569, 0.17254901960784313),
+        'Exhausted TCD8': (0.984313725490196, 0.6039215686274509, 0.6),
+        'TCD8': (0.8901960784313725, 0.10196078431372549, 0.10980392156862745),
+        'M1': (0.9921568627450981, 0.7490196078431373, 0.43529411764705883),
+        'M2': (1.0, 0.4980392156862745, 0.0),
+        'Treg': (0.792156862745098, 0.6980392156862745, 0.8392156862745098),
+        'Other CD45+': (0.41568627450980394, 0.23921568627450981, 0.6039215686274509),
+        'Cancer': (1.0, 1.0, 0.6),
+        'myCAF αSMA+': (0.6941176470588235, 0.34901960784313724, 0.1568627450980392),
+        'Stroma': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
+        'Endothelial': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765)
+    }
+    # Add the 'rgb' prefix to the colors
+    cell_subtype_color_dict = {k: f"rgb{v}" for k, v in cell_subtype_color_dict.items()}
+    return cell_subtype_color_dict
+# Function to create the dot plot
+def create_subtype_dot_plot(subtype_selected_files, subtype_x_axis, subtype_y_axis, subtype_n_samples, subtype_x_line_pos, subtype_y_line_pos):
+    if not subtype_selected_files:
+#        print("No files selected.")
+        return go.Figure()
+    subtype_keep = subtype_selected_files
+#    print(df)
+    subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy()
+    #subtype_test2_df = df.loc[df['Sample_ID'].isin('TMA.csv'), :].copy()
+#    print(f"Number of samples in test2_df: {len(subtype_test2_df)}")
+    if len(subtype_test2_df) > subtype_n_samples:
+        subtype_random_rows = np.random.choice(len(subtype_test2_df), subtype_n_samples)
+        subtype_test_df = subtype_test2_df.iloc[subtype_random_rows, :].copy()
+    else:
+        subtype_test_df = subtype_test2_df
+#    print(f"Number of samples in test_df: {len(subtype_test_df)}")
+    if subtype_x_axis not in subtype_test_df.columns or subtype_y_axis not in subtype_test_df.columns:
+#        print(f"Selected axes {subtype_x_axis} or {subtype_y_axis} not in DataFrame columns.")
+        return go.Figure()
+    fig = go.Figure()
+    title = 'Threshold'
+    fig.add_trace(go.Scatter(
+        x=subtype_test_df[subtype_x_axis],
+        y=subtype_test_df[subtype_y_axis],
+        mode='markers',
+        marker=dict(color='LightSkyBlue', size=2)
+    ))
+    # Add vertical and horizontal lines
+    fig.add_vline(x=subtype_x_line_pos, line_width=2, line_dash="dash", line_color="red")
+    fig.add_hline(y=subtype_y_line_pos, line_width=2, line_dash="dash", line_color="red")
+    fig.update_layout(
+        title=title,
+        plot_bgcolor='white',
+        autosize=True,
+        margin=dict(l=20, r=20, t=40, b=20),
+        xaxis=dict(title=subtype_x_axis, linecolor='black', range=[subtype_test_df[subtype_x_axis].min(), subtype_test_df[subtype_x_axis].max()]),
+        yaxis=dict(title=subtype_y_axis, linecolor='black', range=[subtype_test_df[subtype_y_axis].min(), subtype_test_df[subtype_y_axis].max()])
+    )
+    return fig
+def create_subtype_reconstruction_plot(subtype_selected_files):
+    cell_subtype_color_dict = update_color_dict()
+#    print(subtype_selected_files)
+    if not subtype_selected_files:
+#        print("No files selected.")
+        return go.Figure()
+    df = assign_cell_subtypes_again()
+    subtype_fig = go.Figure()
+    for sample in subtype_selected_files:
+        sample_id = sample
+        sample_id2 = sample.split('_')[0]
+        location_colors = df.loc[df['Sample_ID'] == sample_id, ['Nuc_X', 'Nuc_Y_Inv', 'cell_subtype']]
+#        print(location_colors.head())
+        title = sample_id2 + " Background Subtracted XY Map cell subtypes"
+        for cellsubtype in df.loc[df['Sample_ID'] == sample_id, 'cell_subtype'].unique():
+            color = str(cell_subtype_color_dict[cellsubtype])
+            subtype_fig.add_scatter(
+                mode='markers',
+                marker=dict(size=3, opacity=0.5, color=color),
+                x=location_colors.loc[location_colors['cell_subtype'] == cellsubtype, 'Nuc_X'],
+                y=location_colors.loc[location_colors['cell_subtype'] == cellsubtype, 'Nuc_Y_Inv'],
+                name=cellsubtype
+            )
+        subtype_fig.update_layout(title=title, plot_bgcolor='white')
+        subtype_fig.update_xaxes(title_text='Nuc_X', linecolor='black')
+        subtype_fig.update_yaxes(title_text='Nuc_Y_Inv', linecolor='black')
+        # Adjust the size of the points
+        for trace in subtype_fig.data:
+            trace.marker.size = 2
+        subtype_fig.update_layout(
+            title=title,
+            plot_bgcolor='white',
+            legend=dict(
+                title='Cell Subtypes',  # Legend title
+                font=dict(
+                    family='Arial',
+                    size=12,
+                    color='black'
+                ),
+                bgcolor='white',
+                bordercolor='black',
+                borderwidth=0.4,
+                itemsizing='constant'
+            )
+        )
+        # Save the figure as an image if needed
+        #subtype_fig.write_image(output_images_dir + "/" + title.replace(" ", "_") + ".png", width=1200, height=800, scale=4)
+#        print(sample_id, "processed!")
+    return subtype_fig
+def update_subtype_dropdown_options(event):
+#    print(1)
+    subtype_selected_files = subtype_checkbox_group.value
+#    print(f"Selected files in update_dropdown_options: {subtype_selected_files}")
+    if subtype_selected_files:
+        subtype_keep = subtype_selected_files
+        subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy()
+        subtype_selected_intensities = list(subtype_test2_df.columns)
+        subtype_selected_intensities = [col for col in subtype_selected_intensities if '_Intensity_Average' in col]
+#        print(f"Updated dropdown options: {subtype_selected_intensities}")
+        subtype_x_axis_dropdown.options = subtype_selected_intensities
+        subtype_y_axis_dropdown.options = subtype_selected_intensities
+    else:
+        subtype_x_axis_dropdown.options = []
+        subtype_y_axis_dropdown.options = []
+def update_subtype_slider_ranges(event):
+    subtype_selected_files = subtype_checkbox_group.value
+    subtype_x_axis = subtype_x_axis_dropdown.value
+    subtype_y_axis = subtype_y_axis_dropdown.value
+    if subtype_selected_files and subtype_x_axis and subtype_y_axis:
+        subtype_keep = subtype_selected_files
+        subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy()
+        subtype_x_range = (subtype_test2_df[subtype_x_axis].min(), subtype_test2_df[subtype_x_axis].max())
+        subtype_y_range = (subtype_test2_df[subtype_y_axis].min(), subtype_test2_df[subtype_y_axis].max())
+        subtype_x_line_slider.start = -abs(subtype_x_range[1])
+        subtype_x_line_slider.end = abs(subtype_x_range[1])
+        subtype_y_line_slider.start = -abs(subtype_y_range[1])
+        subtype_y_line_slider.end = abs(subtype_y_range[1])
+        subtype_x_line_slider.value = 0
+        subtype_y_line_slider.value = 0
+def on_subtype_value_change(event):
+    subtype_selected_files = subtype_checkbox_group.value
+    subtype_x_axis = subtype_x_axis_dropdown.value
+    subtype_y_axis = subtype_y_axis_dropdown.value
+    subtype_n_samples = subtype_random_sample_input.value
+    subtype_x_line_pos = subtype_x_line_slider.value
+    subtype_y_line_pos = subtype_y_line_slider.value
+#    print(f"Selected files: {subtype_selected_files}")
+#    print(f"X-Axis: {subtype_x_axis}, Y-Axis: {subtype_y_axis}, Number of samples: {subtype_n_samples}, X Line: {subtype_x_line_pos}, Y Line: {subtype_y_line_pos}")
+    subtype_plot = create_subtype_dot_plot(subtype_selected_files, subtype_x_axis, subtype_y_axis, subtype_n_samples, subtype_x_line_pos, subtype_y_line_pos)
+    subtype_reconstruction_plot = create_subtype_reconstruction_plot(subtype_selected_files)
+    subtype_plot_placeholder.object = subtype_plot
+    subtype_reconstruction_placeholder.object = subtype_reconstruction_plot
+# Link value changes to function
+subtype_checkbox_group.param.watch(update_subtype_dropdown_options, 'value')
+subtype_checkbox_group.param.watch(update_subtype_slider_ranges, 'value')
+subtype_x_axis_dropdown.param.watch(update_subtype_slider_ranges, 'value')
+subtype_y_axis_dropdown.param.watch(update_subtype_slider_ranges, 'value')
+subtype_x_axis_dropdown.param.watch(on_subtype_value_change, 'value')
+subtype_y_axis_dropdown.param.watch(on_subtype_value_change, 'value')
+subtype_random_sample_input.param.watch(on_subtype_value_change, 'value')
+subtype_x_line_slider.param.watch(on_subtype_value_change, 'value')
+subtype_y_line_slider.param.watch(on_subtype_value_change, 'value')
+# Layout
+plot_with_subtype_reconstruction = pn.Column(
+    "## Select Files to Construct Dot Plot",
+    subtype_checkbox_group,
+    subtype_x_axis_dropdown,
+    subtype_y_axis_dropdown,
+    subtype_random_sample_input,
+    pn.Row(subtype_x_line_slider, subtype_y_line_slider),
+    pn.Row(
+        pn.Column(
+            "## Dot Plot",
+           pn.Column(subtype_plot_placeholder)),
+        pn.Column(
+            "## Cell Subtype Digital Reconstruction Plot",
+            subtype_reconstruction_placeholder),
+)
+)
+subtype_x_axis = subtype_x_axis_dropdown.value
+subtype_y_axis = subtype_y_axis_dropdown.value
+#print(subtype_x_axis      ,subtype_y_axis)
+# Normalize the values in df2.cell_subtype
+df2['cell_subtype'] = df2['cell_subtype'].str.strip().str.lower()
+# Normalize the keys in cell_subtype_color_dict
+cell_subtype_color_dict = {k.strip().lower(): v for k, v in cell_subtype_color_dict.items()}
+# Map the cell_subtype values to colors
+cell_subtype_row_colors = df2.cell_subtype.map(cell_subtype_color_dict)
+# Debugging: print the unique values and the resulting mapped colors
+#print("Unique values in df2.cell_subtype:", df2.cell_subtype.unique())
+#print("Keys in cell_subtype_color_dict:", cell_subtype_color_dict.keys())
+#print(cell_subtype_row_colors[1:5])
+data
+cell_subtype_color_dict
+# Remove the 'rgb' prefix
+cell_subtype_color_dict = {k: v[3:] for k, v in cell_subtype_color_dict.items()}
+cell_subtype_color_dict
+# Colors dictionaries
+sample_row_colors =df.Sample_ID.map(sample_color_dict)
+#print(sample_row_colors[1:5])
+cell_subtype_row_colors = df.cell_subtype.map(cell_subtype_color_dict)
+#print(cell_subtype_row_colors[1:5])
+# Count of each immune_checkpoint type by cell_subtype
+counts = df.groupby(['cell_type', 'cell_subtype']).size().reset_index(name='count')
+counts
+total = sum(counts['count'])
+counts['percentage'] = counts.groupby('cell_subtype')['count'].transform(lambda x: (x / total) * 100)
+#print(counts)
+# ## IV.10. SAVE
+# Save the data by Sample_ID
+# Check for the existence of the output file first
+for sample in ls_samples:
+    #sample_id = sample.split('_')[0]
+    sample_id = sample
+    filename = os.path.join(output_data_dir,  sample_id + "_" + step_suffix + ".csv")
+    if os.path.exists(filename):
+        df_save = df.loc[df['Sample_ID'] == sample_id, :]
+        df_save.to_csv(filename, index=True, index_label='ID', mode='w')  # 'mode='w'' overwrites the file
+#        print("File " + filename + " was overwritten!")
+    else:
+        df_save = df.loc[df['Sample_ID'] == sample_id, :]
+        df_save.to_csv(filename, index=True, index_label='ID')  # Save normally if the file doesn't exist
+#        print("File " + filename + " was created and saved !")
+# All samples
+filename = os.path.join(output_data_dir,   "all_Samples_" + project_name + ".csv")
+# Save the DataFrame to a CSV file
+df.to_csv(filename, index=True, index_label='ID')
+#print("Merged file " + filename + " created!")
+# ## Panel App
+# Create widgets and panes
+df_widget = pn.widgets.DataFrame(metadata, name="MetaData")
+# Define the three tabs content
+metadata_tab = pn.Column(pn.pane.Markdown("## Initial DataFrame"),intial_df)
+dotplot_tab = pn.Column(plot_with_reconstruction)
+celltype_classification_tab = pn.Column(cell_type_classification_app_main, threshold_panel)
+cellsubtype_classification_tab = pn.Column(cell_subtype_classification_app_main, subtype_threshold_panel)
+subtype_dotplot_tab = pn.Column(plot_with_subtype_reconstruction,)
+app4_5 = pn.template.GoldenTemplate(
+    site="Cyc-IF",
+    title="Marker Threshold & Classification",
+    main=[
+        pn.Tabs(
+            ("Metadata", metadata_tab),
+            ("Classify-Celltype-Marker",celltype_classification_tab),
+            ("Cell_Types", dotplot_tab),
+            ("Classify-Cell Subtype-Marker",cellsubtype_classification_tab),
+            ("Cell-Subtypes", subtype_dotplot_tab),
+#           ("Heatmap",pn.Column(celltype_heatmap, cell_subtype_heatmap))
+        )
+    ]
+)
+app4_5.show()