Spaces:
Sleeping
Sleeping
| import os | |
| import numpy as np | |
| import pandas as pd | |
| import streamlit as st | |
| import scanpy as sc | |
| #import mpld3 | |
| import matplotlib.pyplot as plt | |
| #from mpl_toolkits.axes_grid1 import make_axes_locatable | |
| #import matplotlib.gridspec as gridspec | |
| #from sunbird.categorical_encoding import frequency_encoding | |
| import seaborn as sns | |
| plt.rcParams.update({'figure.autolayout': True}) | |
| plt.rcParams['axes.linewidth'] = 0.0001 | |
| from functions import pathway_analyses | |
| #sc.settings.set_figure_params(dpi=80, facecolor='white',fontsize=4) | |
| sc.settings.set_figure_params(dpi=80, facecolor='white',fontsize=12) | |
| #disable st.pyplot warning | |
| st.set_page_config(layout="wide") | |
| st.markdown( | |
| """ | |
| <style> | |
| .streamlit-expanderHeader { | |
| font-size: x-large; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| m=st.markdown(""" | |
| <style> | |
| div.stTitle { | |
| font-size:40px; | |
| } | |
| </style>""" | |
| ,unsafe_allow_html=True) | |
| st.set_option('deprecation.showPyplotGlobalUse', False) | |
| #load Data | |
| cwd=os.getcwd()+'/'#+'data/' | |
| #@st.cache_data | |
| def get_data(): | |
| if 'adata_annot' not in st.session_state or 'cell_type' not in st.session_state or 'broad_type' not in st.session_state: | |
| adata_annot = sc.read_h5ad(cwd+'multiregion_brainaging_annotated.h5ad') | |
| st.session_state['adata_annot'] = adata_annot | |
| if 'genes_list' not in st.session_state: | |
| genes=adata_annot.var.index | |
| #genes_list=sorted(genes.unique()) | |
| st.session_state['genes_list'] = sorted(genes.unique()) | |
| if 'cell_type' not in st.session_state: | |
| #cell_type=diff_fdr[diff_fdr.type=='cell_type']['tissue'] | |
| #cell_type=sorted(cell_type.unique()) | |
| anno=adata_annot.obs.new_anno | |
| #cell_type=sorted(anno.unique()) | |
| st.session_state['cell_type'] = sorted(anno.unique()) | |
| if 'broad_type' not in st.session_state: | |
| broad_celltype=adata_annot.obs.broad_celltype | |
| #broad_type=sorted(broad_type.unique()) | |
| st.session_state['broad_type'] = sorted(broad_celltype.unique()) | |
| #Also load Go Terms | |
| if 'go_table' not in st.session_state: | |
| bp = pathway_analyses.read_pathways('pathway_databases/GO_Biological_Process_2021.txt') | |
| go_bp_paths = bp.set_index(0) | |
| go_bp_paths.fillna("", inplace=True) | |
| go_bp_paths_dict = go_bp_paths.to_dict(orient='index') | |
| gene_set_by_path = {key: [val for val in value.values() if val != ""] for key, value in go_bp_paths_dict.items()} | |
| gene_set_by_path = pd.DataFrame.from_dict(gene_set_by_path, orient='index').transpose() | |
| st.session_state['path_ways']=gene_set_by_path.columns | |
| st.session_state['go_table']=gene_set_by_path | |
| #done load Data | |
| #st.title('Single nuclei atlas of human aging in brain regions') | |
| st.title('Brain Age Browser') | |
| #genes_list,adata_annot=get_data() | |
| get_data() | |
| tab1, tab2,readme = st.tabs(["Gene Expression by CellType", "Age associations for multiple genes", "README"]) | |
| data = np.random.randn(10, 1) | |
| with tab1: | |
| with st.form(key='columns_in_form'): | |
| #c1, c2, c3 = st.columns([4,4,2]) | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| selected_gene = st.selectbox( | |
| 'Please select a gene', | |
| st.session_state['genes_list']) | |
| with c2: | |
| selected_celltype = st.selectbox( | |
| 'Please select CellType', | |
| st.session_state['cell_type'] | |
| ) | |
| Updated=st.form_submit_button(label = 'Go') | |
| if not isinstance(selected_gene, type(None)) and not isinstance(selected_celltype, type(None)) and Updated: | |
| fig = plt.figure(figsize=(6, 6)) | |
| col1,col2= st.columns([1,1]) | |
| with col1: | |
| fig11, axx11 = plt.subplots(figsize=(5,5)) | |
| sc.pl.umap(st.session_state['adata_annot'], color='new_anno', title='', legend_loc='on data',legend_fontsize='8', frameon=False,show=False, ax=axx11) | |
| st.pyplot(fig11) | |
| with col2: | |
| fig12, axx12 = plt.subplots(figsize=(5,5)) | |
| #sc.pl.umap(st.session_state['adata_annot'], color='new_anno', title='', legend_loc='on data', frameon=False,show=False, ax=axx2) | |
| sc.pl.umap(st.session_state['adata_annot'], color=selected_gene, title='', legend_loc='best', frameon=False,show=False,legend_fontsize='xx-small', ax=axx12)#,vmax='p99') | |
| #plt.xticks(rotation = 45) | |
| #plt.colorbar(cax=cax) | |
| axx12.set_title(selected_gene, fontsize=12) | |
| st.pyplot(fig12) | |
| #Subset Younv and Old | |
| adata_Young = st.session_state['adata_annot'][st.session_state['adata_annot'].obs['Age_group']=='young'] | |
| adata_Old = st.session_state['adata_annot'][st.session_state['adata_annot'].obs['Age_group']=='old'] | |
| #Young/Old but for cell_type | |
| adata_YoungAst = adata_Young[adata_Young.obs['new_anno']==selected_celltype] | |
| adata_OldAst = adata_Old[adata_Old.obs['new_anno']==selected_celltype] | |
| # # #Young/Old but for cell_type | |
| # # adata_YoungAst = adata_Young[adata_Young.obs['broad_celltype']==selected_celltype] | |
| # # adata_OldAst = adata_Old[adata_Old.obs['broad_celltype']==selected_celltype] | |
| #Young | |
| dot_size=.05 | |
| font_sz=4 | |
| fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2,figsize=(3,3)) | |
| #plt.subplots_adjust(wspace=0, hspace=0) | |
| #plt.tight_layout() | |
| #fig.tight_layout(rect=[0, 0.03, 1, 0.95]) #[left, bottom, right, top] | |
| sc.pl.umap(adata_Young, color=selected_gene, title="", legend_loc='right margin', color_map='viridis',frameon=True,show=False,size=dot_size, legend_fontsize='xx-small',colorbar_loc=None,ax=ax1) | |
| ax1.set_title('All', fontsize=font_sz) | |
| ax1.set_ylabel('Young', fontsize=font_sz) | |
| #ax1.set_xlabel('', fontsize=0) | |
| ax1.get_xaxis().set_visible(False) | |
| sc.pl.umap(adata_YoungAst, color=selected_gene, title="", legend_loc='right margin', color_map='viridis', frameon=True,show=False,size=dot_size,legend_fontsize='xx-small',colorbar_loc=None, ax=ax2) | |
| ax2.set_title(selected_celltype, fontsize=font_sz) | |
| #ax2.set_xlabel('', fontsize=0) | |
| ax2.set_ylabel('', fontsize=0) | |
| ax2.get_xaxis().set_visible(False) | |
| ax2.get_yaxis().set_visible(False) | |
| sc.pl.umap(adata_Old, color=selected_gene, title="", legend_loc='right margin', color_map='viridis', frameon=True,show=False,size=dot_size,legend_fontsize='xx-small', colorbar_loc="bottom",ax=ax3) | |
| #ax3.set_xlabel('x-label', fontsize=12) | |
| ax3.set_ylabel('Old', fontsize=font_sz) | |
| #ax3.set_xlabel('', fontsize=0) | |
| ax3.get_xaxis().set_visible(False) | |
| #ax3.get_title().set_visible(False) | |
| sc.pl.umap(adata_OldAst, color=selected_gene, title="", legend_loc='right margin', color_map='viridis', frameon=True,show=False,size=dot_size,legend_fontsize='xx-small', colorbar_loc="bottom",ax=ax4) | |
| #ax4.set_xlabel('', fontsize=0) | |
| #ax4.set_ylabel('', fontsize=0) | |
| ax4.get_xaxis().set_visible(False) | |
| ax4.get_yaxis().set_visible(False) | |
| #ax4.get_title().set_visible(False) | |
| plt.suptitle(selected_gene+"\ncoefficient estimate: 0.24 | BH-FDR p=7.91x$10^{-3}$",fontsize=font_sz) | |
| #plt.subplots_adjust(top=0.95) | |
| #plt.tight_layout(pad=0, w_pad=0, h_pad=0) | |
| #plt.tight_layout() | |
| st.pyplot(plt.gcf()) | |
| with tab2: | |
| with st.form(key='multiselect_form'): | |
| c1, c2, c3 = st.columns([4,4,2]) | |
| with c1: | |
| multi_genes = st.multiselect( | |
| 'Select Genes List', | |
| st.session_state['genes_list']) | |
| with c2: | |
| go_term = st.selectbox( | |
| 'Select GO Term', | |
| st.session_state['path_ways']) | |
| with c3: | |
| Choice = st.radio( | |
| "", | |
| ('Gene Set','GO Term')) | |
| Updated_tab2=st.form_submit_button(label = 'Show Results') | |
| if not isinstance(multi_genes, type(None)) and Updated_tab2: | |
| if Choice=='Gene Set': | |
| multi_genes = np.sort(multi_genes) | |
| else: | |
| multi_genes=st.session_state['go_table'].loc[:,go_term] | |
| multi_genes=multi_genes.dropna().values | |
| multi_genes=np.sort(multi_genes) | |
| #multi_genes=['WNT3', 'VPS13C', 'VAMP4', 'UBTF', 'UBAP2', 'TMEM175', 'TMEM163', 'SYT17', 'STK39', 'SPPL2B', 'SIPA1L2', 'SH3GL2', 'SCARB2', 'SCAF11', 'RPS6KL1', 'RPS12', 'RIT2', 'RIMS1', 'RETREG3', 'PMVK', 'PAM', 'NOD2', 'MIPOL1', 'MEX3C', 'MED12L', 'MCCC1', 'MBNL2', 'MAPT', 'LRRK2', 'KRTCAP2', 'KCNS3', 'KCNIP3', 'ITGA8', 'IP6K2', 'GPNMB', 'GCH1', 'GBA', 'FYN', 'FCGR2A', 'FBRSL1', 'FAM49B', 'FAM171A2', 'ELOVL7', 'DYRK1A', 'DNAH17', 'DLG2', 'CTSB', 'CRLS1', 'CRHR1', 'CLCN3', 'CHRNB1', 'CAMK2D', 'CAB39L', 'BRIP1', 'BIN3', 'ASXL3', 'SNCA'] | |
| ######### THIS IS FOR CLUSTERMAP | |
| # figxx = plt.subplots(figsize=(5, 5)) | |
| # hmpdat=st.session_state['adata_annot'][:, multi_genes] #.to_df() | |
| # #st.write(hmpdat) | |
| # samples=hmpdat.obs.new_anno | |
| # dfh = pd.DataFrame(hmpdat.X.toarray(), columns = multi_genes) | |
| # dfh=dfh.T | |
| # dfh.columns=samples.values.to_list() | |
| # sns.clustermap(dfh) | |
| # st.pyplot(plt.gcf()) | |
| ###### | |
| #col1,col2= st.columns([1,1]) | |
| #fig_szx=2*len(st.session_state['cell_type']) | |
| #fig_szy=100*len(multi_genes) | |
| #with col1: | |
| fig11, axx11 = plt.subplots(figsize=(5, 5)) | |
| #sc.pl.umap(st.session_state['adata_annot'], color='new_anno', title='', legend_loc='on data',legend_fontsize='8', frameon=False,show=False, ax=axx11) | |
| axx11=sc.pl.dotplot(st.session_state['adata_annot'], multi_genes,'new_anno',size_title='Fraction of\n Expressing Cells',colorbar_title='Mean\nExpression',cmap='BuPu',swap_axes=True,show=False,vmax=5) | |
| #st.pyplot(fig11) | |
| #st.pyplot(plt.gcf().set_size_inches(fig_szx, fig_szy)) | |
| st.pyplot(plt.gcf()) | |
| # with col2: | |
| fig12, axx12 = plt.subplots(figsize=(5, 5)) | |
| #sc.pl.umap(st.session_state['adata_annot'], color='new_anno', title='', legend_loc='on data', frameon=False,show=False, ax=axx2) | |
| #sc.pl.umap(st.session_state['adata_annot'], color=selected_gene, title=selected_gene, legend_loc='best', frameon=False,show=False,legend_fontsize='xx-small', ax=axx12)#,vmax='p99') | |
| axx12=sc.pl.heatmap(st.session_state['adata_annot'], multi_genes, groupby='new_anno', vmin=-1, vmax=1, cmap='BuPu', dendrogram=True, swap_axes=True, show_gene_labels=True,var_group_rotation=45)#,ax=ax2) | |
| plt.xticks(rotation = 45) | |
| #plt.xticks(rotation = 45) | |
| #st.pyplot(fig12) | |
| #st.pyplot(plt.gcf().set_size_inches(fig_szx, fig_szy)) | |
| st.pyplot(plt.gcf()) | |
| ####### | |
| #multi_genes=['WNT3', 'VPS13C', 'VAMP4', 'UBTF', 'UBAP2', 'TMEM175', 'TMEM163', 'SYT17', 'STK39', 'SPPL2B', 'SIPA1L2', 'SH3GL2', 'SCARB2', 'SCAF11', 'RPS6KL1', 'RPS12', 'RIT2', 'RIMS1', 'RETREG3', 'PMVK', 'PAM', 'NOD2', 'MIPOL1', 'MEX3C', 'MED12L', 'MCCC1', 'MBNL2', 'MAPT', 'LRRK2', 'KRTCAP2', 'KCNS3', 'KCNIP3', 'ITGA8', 'IP6K2', 'GPNMB', 'GCH1', 'GBA', 'FYN', 'FCGR2A', 'FBRSL1', 'FAM49B', 'FAM171A2', 'ELOVL7', 'DYRK1A', 'DNAH17', 'DLG2', 'CTSB', 'CRLS1', 'CRHR1', 'CLCN3', 'CHRNB1', 'CAMK2D', 'CAB39L', 'BRIP1', 'BIN3', 'ASXL3', 'SNCA'] | |
| #multi_genes=np.sort(multi_genes) | |
| # fig, ax1 = plt.subplots(1,2) | |
| # sc.pl.dotplot(st.session_state['adata_annot'], multi_genes,'new_anno',size_title='Fraction of\n Expressing Cells',colorbar_title='Mean\nExpression',cmap='RdBu_r',show=False, ax=ax1[0]) | |
| # st.pyplot(plt.gcf().set_size_inches(10, 10)) | |
| # fig, ax2 = plt.subplots(1,2) | |
| # ax2=sc.pl.heatmap(st.session_state['adata_annot'], multi_genes, 'new_anno', vmin=-1, vmax=1, cmap='RdBu_r', dendrogram=True, swap_axes=True) | |
| # st.pyplot(plt.gcf().set_size_inches(10, 10)) | |
| #ax[0]=sc.pl.dotplot(st.session_state['adata_annot'],multi_genes,'new_anno',show=False) | |
| #fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20,4), gridspec_kw={'wspace':0.9}) | |
| #commented these-working ones | |
| # fig, (ax1) = plt.subplots(1, 1, figsize=(20,4), gridspec_kw={'wspace':0.9}) | |
| # #ax = plt.subplot() | |
| # ax1_dict=sc.pl.dotplot(st.session_state['adata_annot'], multi_genes,'new_anno',size_title='Fraction of\n Expressing Cells',colorbar_title='Mean\nExpression',cmap='BuPu',swap_axes=True,show=False, ax=ax1,vmax=5) | |
| # #ax_dict=sc.pl.dotplot(st.session_state['adata_annot'], multi_genes,'new_anno',size_title='Fraction of\n Expressing Cells',colorbar_title='Mean\nExpression',cmap='RdBu_r',swap_axes=True,show=False, ax=ax) | |
| # st.pyplot(plt.gcf().set_size_inches(10, 15)) | |
| # #ax2_dict=sc.pl.dotplot(st.session_state['adata_annot'], multi_genes,'Sex',size_title='Fraction of\n Expressing Cells',colorbar_title='Mean\nExpression',cmap='RdBu_r',swap_axes=True,show=False, ax=ax2) | |
| # fig, (ax2) = plt.subplots(1, 1, figsize=(20,4), gridspec_kw={'wspace':0.9}) | |
| # #ax2_dict=sc.pl.matrixplot(st.session_state['adata_annot'], multi_genes, 'new_anno', vmin=-1, vmax=1, show=False, cmap='BuPu',dendrogram=True, swap_axes=True, ax=ax2) | |
| # #sc.pl.heatmap(adata_annot, genes_lst, groupby='new_anno', vmin=-1, vmax=1, cmap='RdBu_r', dendrogram=True, swap_axes=True, figsize=(11,4)) | |
| # ax2_dict=sc.pl.heatmap(st.session_state['adata_annot'], multi_genes, groupby='new_anno', vmin=-1, vmax=1, cmap='BuPu', dendrogram=True, swap_axes=True)#,ax=ax2) | |
| # st.pyplot(plt.gcf().set_size_inches(10, 15)) | |
| with readme: | |
| expander = st.expander("How to use this app") | |
| #st.header('How to use this app') | |
| expander.markdown('Please select **Results Menue** checkbox from the sidebar') | |
| expander.markdown('Select a Gene from the dropdown list') | |
| expander.markdown('A table showing all reference gudies from three LISTS will appear in the main panel') | |
| expander.markdown('To see results for each of the selected reference guide from ListA, ListB and ListC, Please select respective checkbox') | |
| expander.markdown('Results are shown as two tables, **MATCHED** and **MUTATED** guides tables and **NOT FOUND** table if guides are not found in GRCh38 and LR reference fasta files') | |
| expander.markdown('**MATCHED** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**') | |
| expander.markdown('**MUTATED** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**') | |
| expander1 = st.expander('Introduction') | |
| expander1.markdown( | |
| """ This app helps navigate all probable genomic **miss-matched/Mutations (upto 2 bp)** for a given sgRNA (from 3 lists of CRISPRi dual sgRNA libraries) in GRCh38 reference fasta and a Reference fasta generated from BAM generated against KOLF2.1J longread data. | |
| """ | |
| ) | |
| expander1.markdown('Merged bam file was converted to fasta file using following steps:') | |
| expander1.markdown('- samtools mpileup to generate bcf file') | |
| expander1.markdown('- bcftools to generate vcf file') | |
| expander1.markdown('- bcftools consensus to generate fasta file') | |
| expander1.markdown('A GPU based [Cas-OFFinder](http://www.rgenome.net/cas-offinder/) tool was used to find off-target sequences (upto 2 miss-matched) for each geiven reference guide against GRCh38 and LR fasta references.') | |
| css = ''' | |
| <style> | |
| .stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p { | |
| font-size:1.5rem; | |
| } | |
| </style> | |
| ''' | |
| st.markdown(css, unsafe_allow_html=True) |