Spaces:
Running
Running
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| import plotly.express as px | |
| from collections import Counter | |
| css_colors = ["darkmagenta", "darkolivegreen", "darkorange", "darkorchid", "darkred", "darksalmon", "darkseagreen", "darkslateblue", "darkturquoise", "darkviolet", "deeppink", "deepskyblue", "dodgerblue", "firebrick", "coral", "magenta", "maroon", "mediumaquamarine", "mediumblue", "mediumorchid", "mediumpurple", "mediumseagreen", "mediumslateblue", "mediumspringgreen", "mediumturquoise", "mediumvioletred", "midnightblue", "mintcream", "mistyrose", "moccasin", "navajowhite", "navy", "oldlace", "olive", "olivedrab", "orange", "orangered", "orchid", "aqua", "aquamarine", "azure", "blue", "blueviolet", "brown", "burlywood", "cadetblue", "chartreuse", "chocolate", "cornflowerblue", "cornsilk", "crimson", "cyan", "darkblue", "darkcyan", "darkgoldenrod", "darkgreen", "darkkhaki", "floralwhite", "forestgreen", "fuchsia", "gainsboro", "ghostwhite", "gold", "goldenrod", "green", "greenyellow", "honeydew", "hotpink", "indianred", "indigo", "ivory", "khaki", "lavender", "lavenderblush", "lawngreen", "lemonchiffon", "lightblue", "lightcoral", "lightcyan", "lightgoldenrodyellow", "lightgreen", "lightpink", "lightsalmon", "lightseagreen", "lightskyblue", "lightsteelblue", "lightyellow", "lime", "limegreen", "linen", "palegoldenrod", "palegreen", "paleturquoise", "palevioletred", "papayawhip", "peachpuff", "peru", "pink", "plum", "powderblue", "purple", "red", "rosybrown", "royalblue", "rebeccapurple", "saddlebrown", "salmon", "sandybrown", "seagreen", "seashell", "sienna", "silver", "skyblue", "slateblue", "snow", "springgreen", "steelblue", "tan", "teal", "thistle", "tomato", "turquoise", "violet", "wheat", "white", "whitesmoke", "yellow", "yellowgreen"] # "darkgray", "darkgrey", "slategray", "slategrey", "lightslategray", "lightslategrey", "lightgray", "lightgrey", "gray", "grey", "dimgray", "dimgrey", "darkslategray", "darkslategrey", "aliceblue", "black", "beige", "antiquewhite", "bisque", "blanchedalmond", | |
| # Read data | |
| data = [] | |
| with open("data/inventory.txt", "r") as fin: | |
| for f in fin: | |
| c_data = pd.read_csv(f.strip(), sep = "\t") | |
| data.append(c_data) | |
| data = pd.concat(data) | |
| unique_celltypes = sorted([c for c in data["Celltype"].unique() if "CCI" not in c and "BTO" not in c]) | |
| max_safe_scores = pd.read_csv("data/max_safe_scores.csv", sep = "\t").rename(columns = {"Score": "Max SAFE Score", "Label": "Celltype"}) | |
| mean_safe_scores = pd.read_csv("data/mean_safe_scores.csv", sep = "\t").rename(columns = {"Score": "Mean SAFE Score", "Label": "Celltype"}) | |
| neighborhood_enrichment = pd.read_csv("data/safe_neighborhoods_enriched.csv", sep = "\t").rename(columns = {"Label": "Celltype"}) | |
| safe_scores = max_safe_scores.merge(mean_safe_scores, on = "Celltype") | |
| safe_scores = safe_scores.merge(neighborhood_enrichment, on = "Celltype") | |
| print(safe_scores) | |
| # Helper functions | |
| def plot_protein_emb(protein): | |
| hover_keys = {"Name": True, "Celltype": True, "x": False, "y": False, "Selected": False} | |
| p_data = data.copy() | |
| p_data["Selected"] = [c if p == protein.lower() else "Not Selected" for p, c in zip(p_data["Name"].str.lower(), p_data["Celltype"].tolist())] | |
| p_data["Size"] = [1 if i == "Not Selected" else 10 for i in p_data["Selected"].tolist()] | |
| symbol_map = {s: "circle" if s == 1 else "star" for s in p_data["Size"].unique()} | |
| p_celltypes = p_data["Selected"].unique() | |
| color_map = {c: i for c, i in zip(p_celltypes, css_colors) if c != "Not Selected"} | |
| color_map.update({"Not Selected": "lightgrey"}) | |
| fig = px.scatter(p_data, x = "x", y = "y", color = "Selected", color_discrete_map = color_map, symbol = "Size", symbol_map = symbol_map, size = "Size", opacity = 0.8, hover_data = hover_keys) | |
| fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)"}, {"paper_bgcolor": "rgba(0, 0, 0, 0)"}) | |
| fig.update_xaxes(title_text = "", showticklabels = False) | |
| fig.update_yaxes(title_text = "", showticklabels = False) | |
| fig.update_layout(showlegend = False) | |
| fig.update_traces(marker=dict(line=dict(width=0))) | |
| protein_context_df = p_data[p_data["Selected"] != "Not Selected"][["Name", "Celltype", "x", "y"]] | |
| return fig, protein_context_df | |
| def get_protein_counts(df): | |
| counts = Counter(df["Celltype"].tolist()) | |
| df = pd.DataFrame({"Celltype": list(counts.keys()), "Activated Proteins": list(counts.values())}) | |
| df = df.sort_values(by = "Celltype") | |
| df = df.merge(safe_scores, on = "Celltype") | |
| print(df) | |
| return df | |
| def plot_celltype_emb(celltype): | |
| hover_keys = {"Name": True, "Celltype": True, "x": False, "y": False} | |
| if "All" in celltype: | |
| fig = px.scatter(data, x = "x", y = "y", color = "Celltype", opacity = 0.4, hover_data = hover_keys) | |
| activated_proteins_df = get_protein_counts(data) | |
| else: | |
| hover_keys.update({"Selected": False}) | |
| c_data = data.copy() | |
| celltype = [c.lower() for c in celltype] | |
| color_map = {c: i for c, i in zip(celltype, css_colors)} | |
| color_map.update({"Not Selected": "lightgrey"}) | |
| c_data["Selected"] = [c if c in celltype else "Not Selected" for c in c_data["Celltype"].tolist()] | |
| fig = px.scatter(c_data, x = "x", y = "y", color = "Selected", color_discrete_map = color_map, opacity = 0.8, hover_data = hover_keys) | |
| activated_proteins_df = get_protein_counts(c_data[c_data["Selected"] != "Not Selected"]) | |
| fig.update_layout({"plot_bgcolor": "rgba(0, 0, 0, 0)"}, {"paper_bgcolor": "rgba(0, 0, 0, 0)"}) | |
| fig.update_xaxes(title_text = "", showticklabels = False) | |
| fig.update_yaxes(title_text = "", showticklabels = False) | |
| fig.update_layout(showlegend = False) | |
| return fig, activated_proteins_df | |
| # Create gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown('<center><h1>Contextualizing Protein Representations with PINNACLE</h1></center>') | |
| gr.Markdown('Protein interaction networks are a critical component to study the function and therapeutic potential of proteins. \ | |
| However, accurately modeling protein interactions across diverse biological contexts, such as tissues and cell types, \ | |
| remains a significant challenge for existing algorithms. Here, we introduce <b>PINNACLE</b>, a flexible geometric deep learning approach \ | |
| that trains on contextualized protein interaction networks to generate context-aware protein representations. Leveraging a \ | |
| multi-organ single cell transcriptomic atlas of humans, <b>PINNACLE provides 394,760 protein representations split across 156 cell-type \ | |
| contexts from 24 tissues and organs</b>. Our contextualized protein representations, infused with cellular and tissue organization, \ | |
| can easily be adapted for diverse downstream tasks.') | |
| gr.Markdown(' For more information, please check out our manuscript and documentation (links provided at the bottom of the page)!') | |
| with gr.Tabs(): | |
| with gr.TabItem("Protein"): | |
| with gr.Column(): | |
| gr.Markdown('<center><h3>Select protein of interest to examine across biological contexts</h3></center>') | |
| protein = gr.Textbox(info = "Enter a protein name (in HGNC symbol)", lines = 1, value = "TNF", label = "Protein") | |
| protein_submit_btn = gr.Button("Submit") | |
| gr.Markdown('<center><h3>Contextualized protein representations</h3></center>') | |
| protein_plot = gr.Plot() | |
| with gr.Accordion(label = "Protein Contexts", open = False): | |
| protein_context_df = gr.Dataframe(headers = ["Protein", "Celltype", "x", "y"], overflow_row_behaviour = "paginate") | |
| with gr.TabItem("Cell Type"): | |
| with gr.Column(): | |
| gr.Markdown('<center><h3>Select biological context by specifying cell type of interest</h3></center>') | |
| celltype = gr.Dropdown(["All"] + unique_celltypes, info = "Please select from the following cell types.", value = ["All"], multiselect = True, label="Cell Type") | |
| celltype_submit_btn = gr.Button("Submit") | |
| gr.Markdown('<center><h3>Contextualized protein representations</h3></center>') | |
| celltype_plot = gr.Plot() | |
| with gr.Accordion(label = "Cell Type Context", open = False): | |
| activated_proteins_df = gr.Dataframe(headers = ["Celltype", "Activated Proteins"], overflow_row_behaviour = "paginate") | |
| gr.Markdown("<p style='text-align: center'><a href='https://github.com/mims-harvard/PINNACLE'>Github Repo</a>" \ | |
| "| <a href='https://zitniklab.hms.harvard.edu/projects/PINNACLE/'>Documentation</a> " \ | |
| "| <a href='https://www.nature.com/articles/s41592-024-02341-3/'>Publication</a></p>") | |
| protein_submit_btn.click(plot_protein_emb, inputs = [protein], outputs = [protein_plot, protein_context_df]) | |
| celltype_submit_btn.click(plot_celltype_emb, inputs = [celltype], outputs = [celltype_plot, activated_proteins_df]) | |
| # Launch | |
| if __name__ == "__main__": | |
| demo.launch() | |