Spaces:
Running
Running
| import streamlit as st | |
| st.set_page_config(layout="wide") | |
| import pandas as pd | |
| import numpy as np | |
| from zipfile import ZipFile | |
| import plotly.express as px | |
| import plotly.graph_objs as go | |
| LLR_FILE='ALL_hum_isoforms_ESM1b_LLR.zip' | |
| df=pd.read_csv('isoform_list.csv',index_col=0) | |
| uids=list(df.index.values) | |
| clinvar = pd.read_csv('clinvar.csv.gz') | |
| def load_LLR(uniprot_id): | |
| '''Loads the LLRs for a given uniprot id. Returns a 20xL dataframe | |
| rows are indexed by AA change, | |
| (AAorder=['K','R','H','E','D','N','Q','T','S','C','G','A','V','L','I','M','P','Y','F','W']) | |
| columns indexed by WT_AA+position e.g, "G 12" | |
| Usage example: load_LLR('P01116') or load_LLR('P01116-2')''' | |
| with ZipFile(LLR_FILE) as myzip: | |
| data = myzip.open(myzip.namelist()[0]+uniprot_id+'_LLR.csv') | |
| return pd.read_csv(data,index_col=0) | |
| def meltLLR(LLR,gene_prefix=None,ignore_pos=False): | |
| vars = LLR.melt(ignore_index=False) | |
| vars['variant'] = [''.join(i.split(' '))+j for i,j in zip(vars['variable'],vars.index)] | |
| vars['score'] = vars['value'] | |
| vars = vars.set_index('variant') | |
| if not ignore_pos: | |
| vars['pos'] = [int(i[1:-1]) for i in vars.index] | |
| del vars['variable'],vars['value'] | |
| if gene_prefix is not None: | |
| vars.index=gene_prefix+'_'+vars.index | |
| return vars | |
| def plot_interactive(uniprot_id, show_clinvar=False): | |
| primaryLLR = load_LLR(uniprot_id) | |
| template='plotly_white' | |
| fig = px.imshow(primaryLLR.values, x=primaryLLR.columns, y=primaryLLR.index, color_continuous_scale='Viridis_r',zmax=0,zmin=-20, | |
| labels=dict(y="Amino acid change", x="Protein sequence", color="LLR"), | |
| template=template, | |
| title=selection) | |
| fig.update_xaxes(tickangle=-90,range=[0,99],rangeslider=dict(visible=True),dtick=1) | |
| fig.update_yaxes(dtick=1) | |
| fig.update_layout({ | |
| 'plot_bgcolor': 'rgba(0, 0, 0, 0)', | |
| 'paper_bgcolor': 'rgba(0, 0, 0, 0)', | |
| },font={'family':'Arial','size':11}, | |
| hoverlabel=dict(font=dict(family='Arial', size=14))) | |
| fig.update_traces( | |
| hovertemplate="<br>".join([ | |
| "<b>%{x} %{y}</b>"+ | |
| " (%{z:.2f})", | |
| ])+'<extra></extra>' | |
| ) | |
| if show_clinvar: | |
| iso_clinvar = clinvar[clinvar.LLR_file_id == uniprot_id] | |
| iso_clinvar = iso_clinvar[iso_clinvar.ClinicalSignificance.isin(['Benign','Pathogenic'])] | |
| b_mut=set(iso_clinvar[iso_clinvar.ClinicalSignificance=='Benign'].variant.values) | |
| p_mut=set(iso_clinvar[iso_clinvar.ClinicalSignificance=='Pathogenic'].variant.values) | |
| hwt_x=[] | |
| hwt_y=[] | |
| cust=[] | |
| phwt_x=[] | |
| phwt_y=[] | |
| pcust=[] | |
| for i in primaryLLR.columns: | |
| for j in list(primaryLLR.index): | |
| mut = i[0]+i[2:]+j | |
| if mut in b_mut: | |
| hwt_x+=[i] | |
| hwt_y+=[j] | |
| cust+=[primaryLLR.loc[j,i]] | |
| elif mut in p_mut: | |
| phwt_x+=[i] | |
| phwt_y+=[j] | |
| pcust+=[primaryLLR.loc[j,i]] | |
| fig.add_trace(go.Scatter( | |
| x=phwt_x, | |
| y=phwt_y, | |
| customdata=pcust, | |
| mode='markers', | |
| marker=dict(size=8), | |
| showlegend=False, | |
| hovertemplate="<br>".join([ | |
| "<b>%{x} %{y}</b>"+ | |
| " (%{customdata:.2f})", | |
| ])+'<extra></extra>') | |
| ) | |
| fig.add_trace(go.Scatter( | |
| x=hwt_x, | |
| y=hwt_y, | |
| customdata=cust, | |
| mode='markers', | |
| showlegend=False, | |
| marker=dict(size=8), | |
| hovertemplate="<br>".join([ | |
| "<b>%{x} %{y}</b>"+ | |
| " (%{customdata:.2f})", | |
| ])+'<extra></extra>') | |
| ) | |
| return fig | |
| selection = st.selectbox("uniprot_id:", df, index= 6251) | |
| uid=df[df.txt==selection].index.values[0] | |
| show_clinvar = st.checkbox('show ClinVar annotations (red: pathogenic, green: benign)',value=False) | |
| fig = plot_interactive(uid,show_clinvar=show_clinvar) | |
| fig.update_layout(width = 800, height = 600, autosize = False) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.download_button( | |
| label="Download data as CSV", | |
| data=meltLLR(load_LLR(uid)).to_csv(), | |
| file_name=selection+'.csv', | |
| mime='text/csv', | |
| ) | |
| st.markdown(""" | |
| To obtain ESM effect scores for non-missense mutations (e.g. indels) or non-human proteins, | |
| please use the [esm-variants command-line tool](https://github.com/ntranoslab/esm-variants). | |
| """) |