Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import zipfile | |
| import urllib.request | |
| import glob | |
| import SigProfilerMatrixGenerator | |
| from SigProfilerMatrixGenerator import install as genInstall | |
| import shutil | |
| import os | |
| import re | |
| from SigProfilerExtractor import sigpro as sig | |
| from SigProfilerMatrixGenerator.scripts import SigProfilerMatrixGeneratorFunc as datadump | |
| import sys | |
| import numpy as np | |
| import pandas as pd | |
| import base64 | |
| import streamlit.components.v1 as components | |
| from liftover import get_lifter | |
| converter = get_lifter('hg38', 'hg19') | |
| curdir= os.getcwd() | |
| def remove_old_vcf(): | |
| vcfrem=glob.glob('input/*.vcf') | |
| for filepath in vcfrem: | |
| os.remove(filepath) | |
| vcfrem=glob.glob('input/input/*.vcf') | |
| for filepath in vcfrem: | |
| os.remove(filepath) | |
| def show_pdf(file_path): | |
| with open(file_path,"rb") as f: | |
| base64_pdf = base64.b64encode(f.read()).decode('utf-8') | |
| pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="1500" height="1000" type="application/pdf"></iframe>' | |
| st.markdown(pdf_display, unsafe_allow_html=True) | |
| def showdl_counts(file_to_lookat,to_dl_sbs96,to_dl_sbs1536): | |
| for j in np.arange(0,len(to_dl_sbs96)): | |
| if to_dl_sbs96[j] != []: | |
| download_link1 = f'<a href="data:application/octet-stream;base64, \ | |
| {base64.b64encode(to_dl_sbs96[j]).decode()}" download=" \ | |
| {file_to_lookat[j].name}96SBS.txt">Download {file_to_lookat[j].name} Single Base Substition (96) table</a>' | |
| download_link2 = f'<a href="data:application/octet-stream;base64, \ | |
| {base64.b64encode(to_dl_sbs1536[j]).decode()}" download=" \ | |
| {file_to_lookat[j].name}1536SBS.txt">Download {file_to_lookat[j].name} Single Base Substition (1536) table</a>' | |
| st.markdown(download_link1, unsafe_allow_html=True) | |
| st.markdown(download_link2, unsafe_allow_html=True) | |
| #@st.cache_data(experimental_allow_widgets=True) | |
| def showdl(file_to_lookat,to_dl_sbs,to_dl_indel,to_dl_dbs,to_dl_sbs_text,to_dl_indel_text,to_dl_dbs_text,to_dl_sbs_summary_text,to_dl_id_summary_text,to_dl_dbs_summary_text): | |
| for j in np.arange(0,len(to_dl_sbs)): | |
| if to_dl_sbs[j] != []: | |
| download_link1 = f'<a href="data:application/octet-stream;base64, \ | |
| {base64.b64encode(to_dl_sbs[j]).decode()}" download=" \ | |
| {file_to_lookat[j].name}SBS.pdf">Download {file_to_lookat[j].name} Single Base Substition pdf</a>' | |
| download_link2 = f'<a href="data:application/octet-stream;base64, \ | |
| {base64.b64encode(to_dl_sbs_text[j]).decode()}" download=" \ | |
| {file_to_lookat[j].name}SBS.txt">Download {file_to_lookat[j].name} Single Base Substition table</a>' | |
| download_link3 = f'<a href="data:application/octet-stream;base64, \ | |
| {base64.b64encode(to_dl_sbs_summary_text[j]).decode()}" download=" \ | |
| {file_to_lookat[j].name}SBS_summary.txt">Download {file_to_lookat[j].name} Summary Single Base Substition table</a>' | |
| st.markdown(download_link1, unsafe_allow_html=True) | |
| st.markdown(download_link2, unsafe_allow_html=True) | |
| st.markdown(download_link3, unsafe_allow_html=True) | |
| for j in np.arange(0,len(to_dl_indel)): | |
| if to_dl_indel[j] != []: | |
| download_link4 = f'<a href="data:application/octet-stream;base64, \ | |
| {base64.b64encode(to_dl_indel[j]).decode()}" download=" \ | |
| {file_to_lookat[j].name}Indel.pdf">Download {file_to_lookat[j].name} indel pdf</a>' | |
| download_link5 = f'<a href="data:application/octet-stream;base64, \ | |
| {base64.b64encode(to_dl_indel_text[j]).decode()}" download=" \ | |
| {file_to_lookat[j].name}Indel.txt">Download {file_to_lookat[j].name} indel table</a>' | |
| download_link6 = f'<a href="data:application/octet-stream;base64, \ | |
| {base64.b64encode(to_dl_id_summary_text[j]).decode()}" download=" \ | |
| {file_to_lookat[j].name}Indel.txt">Download {file_to_lookat[j].name} summary indel table</a>' | |
| st.markdown(download_link4, unsafe_allow_html=True) | |
| st.markdown(download_link5, unsafe_allow_html=True) | |
| st.markdown(download_link6, unsafe_allow_html=True) | |
| for j in np.arange(0,len(to_dl_dbs)): | |
| if to_dl_dbs[j] !=[]: | |
| download_link7 = f'<a href="data:application/octet-stream;base64, \ | |
| {base64.b64encode(to_dl_dbs[j]).decode()}" download=" \ | |
| {file_to_lookat[j].name}DBS.pdf">Download {file_to_lookat[j].name} Double Base Substitution pdf</a>' | |
| download_link8 = f'<a href="data:application/octet-stream;base64, \ | |
| {base64.b64encode(to_dl_dbs_text[j]).decode()}" download=" \ | |
| {file_to_lookat[j].name}DBS.txt">Download {file_to_lookat[j].name} Double Base Substitution table</a>' | |
| download_link9 = f'<a href="data:application/octet-stream;base64, \ | |
| {base64.b64encode(to_dl_dbs_summary_text[j]).decode()}" download=" \ | |
| {file_to_lookat[j].name}DBS.txt">Download {file_to_lookat[j].name} summary Double Base Substitution table</a>' | |
| st.markdown(download_link7, unsafe_allow_html=True) | |
| st.markdown(download_link8, unsafe_allow_html=True) | |
| st.markdown(download_link9, unsafe_allow_html=True) | |
| #st.download_button(label="Download image with single base substitution profiles", key=j, | |
| # data=to_dl_sbs[j], | |
| # file_name="SBS.pdf", | |
| # mime='application/octet-stream') | |
| #st.download_button(label="Download image with indel profiles", key=0.5+j, | |
| # data=to_dl_indel[j], | |
| # file_name="idel.pdf", | |
| # mime='application/octet-stream') | |
| def dl_counts(valforkey): | |
| with open("sbs96.txt","rb") as txt_file: | |
| sbs96_all_bytes = txt_file.read() | |
| txt_file.close() | |
| with open("sbs1536.txt","rb") as txt_file: | |
| sbs1536_all_bytes = txt_file.read() | |
| txt_file.close() | |
| return sbs96_all_bytes, sbs1536_all_bytes | |
| def dl(valforkey): | |
| #breakpoint() | |
| seev=glob.glob('output/SBS96/Suggested_Solution/COSMIC_SBS96_Decomposed_Solution/*pdf') | |
| for i in seev: | |
| st.write('pdf file with sbs96 output is here: '+i) | |
| with open("output/SBS96/Suggested_Solution/COSMIC_SBS96_Decomposed_Solution/SBS96_Decomposition_Plots.pdf", "rb") as pdf_file: | |
| PDFbyte1 = pdf_file.read() | |
| with open("output/SBS96/Samples.txt","rb") as txt_file: | |
| Txtbyte1 = txt_file.read() | |
| txt_file.close() | |
| hh=pd.read_table('output/SBS96/Samples.txt') | |
| hh['nums']=hh.iloc[:,1] | |
| hh['mutation_simple']=hh['MutationType'].apply(lambda x: re.sub('].$','',re.sub('^.\[','',x))) | |
| summary_table_sbs_all=hh.groupby('mutation_simple').apply(lambda x: x.sum())[['nums']] | |
| summary_table_sbs_all.to_csv('sbs_summary.txt',sep='\t',header=False,index=True) | |
| with open("sbs_summary.txt","rb") as txt_file: | |
| summary_table_sbs_all_bytes = txt_file.read() | |
| txt_file.close() | |
| #st.download_button(label="Download image with single base substitution profiles", key=valforkey, | |
| # data=PDFbyte1, | |
| # file_name="SBS.pdf", | |
| # mime='application/octet-stream') | |
| if glob.glob('output/ID83/Suggested_Solution/COSMIC_ID83_Decomposed_Solution/ID83_Decomposition_Plots.pdf') != []: | |
| with open("output/ID83/Suggested_Solution/COSMIC_ID83_Decomposed_Solution/ID83_Decomposition_Plots.pdf", "rb") as pdf_file: | |
| PDFbyte2 = pdf_file.read() | |
| with open("output/ID83/Samples.txt","rb") as txt_file: | |
| Txtbyte2 = txt_file.read() | |
| hh2=pd.read_table('output/ID83/Samples.txt') | |
| hh2['nums']=hh2.iloc[:,1] | |
| hh2['mutation_simple']=hh2['MutationType'].apply(lambda x: re.sub('].$','',re.sub('^.\[','',x))) | |
| summary_table_id_all=hh2.groupby('mutation_simple').apply(lambda x: x.sum())[['nums']] | |
| summary_table_id_all.to_csv('id_summary.txt',sep='\t',header=False,index=True) | |
| with open("id_summary.txt","rb") as txt_file: | |
| summary_table_id_all_bytes = txt_file.read() | |
| txt_file.close() | |
| else: | |
| PDFbyte2 = [] | |
| Txtbyte2 = [] | |
| summary_table_id_all_bytes=[] | |
| if glob.glob("output/DBS78/Suggested_Solution/COSMIC_DBS78_Decomposed_Solution/DBS78_Decomposition_Plots.pdf") != []: | |
| with open("output/DBS78/Suggested_Solution/COSMIC_DBS78_Decomposed_Solution/DBS78_Decomposition_Plots.pdf", "rb") as pdf_file: | |
| PDFbyte3 = pdf_file.read() | |
| with open("output/DBS78/Samples.txt","rb") as txt_file: | |
| Txtbyte3 = txt_file.read() | |
| hh3=pd.read_table('output/DBS78/Samples.txt') | |
| hh3['nums']=hh3.iloc[:,1] | |
| hh3['mutation_simple']=hh3['MutationType'].apply(lambda x: re.sub('].$','',re.sub('^.\[','',x))) | |
| summary_table_dbs_all=hh3.groupby('mutation_simple').apply(lambda x: x.sum())[['nums']] | |
| summary_table_dbs_all.to_csv('dbs_summary.txt',sep='\t',header=False,index=True) | |
| with open("dbs_summary.txt","rb") as txt_file: | |
| summary_table_dbs_all_bytes = txt_file.read() | |
| txt_file.close() | |
| else: | |
| PDFbyte3 = [] | |
| Txtbyte3=[] | |
| summary_table_dbs_all_bytes=[] | |
| os.system('rm -r output') | |
| os.system('rm -r input') | |
| #os.remove("output/SBS96/Suggested_Solution/COSMIC_SBS96_Decomposed_Solution/SBS96_Decomposition_Plots.pdf") | |
| #os.remove("output/SBS96/Suggested_Solution/COSMIC_SBS96_Decomposed_Solution/*") | |
| #os.remove("output/SBS96/Samples.txt") | |
| #os.remove("output/ID83/Suggested_Solution/COSMIC_ID83_Decomposed_Solution/ID83_Decomposition_Plots.pdf") | |
| #os.remove("output/ID83/Samples.txt") | |
| #os.remove("output/DBS78/Suggested_Solution/COSMIC_DBS78_Decomposed_Solution/DBS78_Decomposition_Plots.pdf") | |
| #os.remove("output/DBS78/Samples.txt") | |
| #st.download_button(label="Download image with indel profiles", key=0.5+valforkey, | |
| # data=PDFbyte2, | |
| # file_name="idel.pdf", | |
| # mime='application/octet-stream') | |
| return PDFbyte1,PDFbyte2,PDFbyte3,Txtbyte1,Txtbyte2,Txtbyte3, summary_table_sbs_all_bytes, summary_table_id_all_bytes,summary_table_dbs_all_bytes | |
| #st.write(glob.glob(os.path.join(os.path.dirname(SigProfilerMatrixGenerator.__file__),'references/*txt'))) | |
| with st.form('get signature'): | |
| if glob.glob(os.path.join(os.path.dirname(SigProfilerMatrixGenerator.__file__),'references/chromosomes/tsb/GRCh3[78]/')+'*txt') == []: | |
| st.write('There is no reference genome, we need to download this') | |
| refdownload=True | |
| else: | |
| st.write('using reference from here:'+glob.glob(os.path.join(os.path.dirname(SigProfilerMatrixGenerator.__file__),'references/chromosomes/tsb/GRCh3[78]/'))[0]) | |
| refdownload=False | |
| no_profiles_only_counts = st.radio('Do Not Perform COSMIC profile analysis',[False, True]) | |
| referencegenome =st.radio('reference',['hg19','GRCh38']) | |
| file_to_lookat=st.file_uploader('VCF upload here',type=[".vcf","xlsx"],accept_multiple_files=True) | |
| remove_old_vcf() | |
| sub=st.form_submit_button('submit input') | |
| if file_to_lookat !=[] and sub: | |
| #if st.button('get reference genome'): | |
| #st.write(os.path.dirname(SigProfilerMatrixGenerator.__file__)) | |
| dirtest=os.path.dirname(SigProfilerMatrixGenerator.__file__) | |
| #st.write(sys.path) | |
| if refdownload==True: | |
| if referencegenome=='GRCh38': | |
| st.write('using liftover with hg19 instead of downloading Grch38') | |
| with st.spinner('downloading hg19 reference'): | |
| urllib.request.urlretrieve('https://dl.dropboxusercontent.com/s/et97ewsct862x7m/references.zip?dl=0','references.zip') | |
| with zipfile.ZipFile('references.zip', 'r') as zip_ref: | |
| zip_ref.extractall(dirtest) | |
| #elif refdownload==True and referencegenome=='GRCh38': | |
| #with st.spinner('downloading GRCh38 reference'): | |
| # genInstall.install('GRCh38') | |
| #seev=glob.glob(dirtest+'/references/chromosomes/tsb/GRCh37/*txt') | |
| #for i in seev: | |
| # st.write(i) | |
| ##genInstall.install('GRCh37') | |
| to_dl_sbs=[] | |
| to_dl_indel=[] | |
| to_dl_dbs=[] | |
| to_dl_sbs_text=[] | |
| to_dl_indel_text=[] | |
| to_dl_dbs_text=[] | |
| to_dl_sbs_summary_text=[] | |
| to_dl_id_summary_text=[] | |
| to_dl_dbs_summary_text=[] | |
| to_dl_sbs96=[] | |
| to_dl_sbs1536=[] | |
| for j in np.arange(0,len(file_to_lookat)): | |
| if not os.path.exists('input'): | |
| os.mkdir('input') | |
| if not os.path.exists('input/input'): | |
| os.mkdir('input/input') | |
| remove_old_vcf() | |
| if re.findall('vcf$',file_to_lookat[j].name) != []: | |
| bytes_data=file_to_lookat[j].read() | |
| with open(os.path.join("input",file_to_lookat[j].name),"wb") as f: | |
| f.write(bytes_data) | |
| f.close() | |
| seev=glob.glob('input/*') | |
| else: | |
| table_of_penn_file=pd.read_excel(file_to_lookat[j]) | |
| tovcf=pd.DataFrame() | |
| table_of_penn_file=table_of_penn_file.iloc[1:,:] | |
| tt=table_of_penn_file[['Chrom','Pos']].apply(lambda x: converter[x[0]][int(x[1])][0],axis=1) | |
| table_of_penn_file['Chrom']= [a[0] for a in tt] | |
| table_of_penn_file['Pos']= [a[1] for a in tt] | |
| tovcf['Chrom']=table_of_penn_file['Chrom'] | |
| tovcf['Pos']=table_of_penn_file['Pos'] | |
| tovcf['db']='.' | |
| tovcf['ref']=table_of_penn_file['Ref'] | |
| tovcf['alt']=table_of_penn_file['Alt'] | |
| nameuse=re.sub('xlsx$','vcf',file_to_lookat[j].name) | |
| tovcf.to_csv(nameuse,sep='\t',header=False, index=False) | |
| st.write('file after liftover:') | |
| st.write(tovcf) | |
| #st.write(nameuse) | |
| with open(nameuse,"rb") as txt_file: | |
| bytes_data=txt_file.read() | |
| txt_file.close() | |
| with open(os.path.join("input",nameuse),"wb") as f: | |
| f.write(bytes_data) | |
| f.close() | |
| seev=glob.glob('input/*') | |
| #st.write(pd.read_csv(os.path.join("input",nameuse) )) | |
| #vcfuse=glob.glob('file_to_lookat[0].name')[0] | |
| #shutil.copy2(vcfuse,'input/'+vcfuse) | |
| #pdb.set_trace() | |
| if no_profiles_only_counts == True: | |
| refgen="GRCh37" | |
| project = "input" | |
| project_name = project.split("/")[-1] | |
| with st.spinner('computing counts only'): | |
| data = datadump.SigProfilerMatrixGeneratorFunc(project_name, refgen, project, exome=False, bed_file=None, chrom_based=False, plot=False, gs=False) | |
| data['96'].to_csv('sbs96.txt',sep='\t',header=False,index=True) | |
| data['1536'].to_csv('sbs1536.txt',sep='\t',header=False,index=True) | |
| sbs96_result,sbs1536_result=dl_counts(j) | |
| #st.write(data['96']) | |
| #st.write(data['1536']) | |
| to_dl_sbs96.append(sbs96_result) | |
| to_dl_sbs1536.append(sbs1536_result) | |
| remove_old_vcf() | |
| else: | |
| with st.spinner('computing signatures'): | |
| sig.sigProfilerExtractor("vcf", "output", "input", minimum_signatures=1, maximum_signatures=3,nmf_test_conv= 1000,nmf_tolerance= 1e-10,max_nmf_iterations=100000,min_nmf_iterations= 1000) | |
| if file_to_lookat !=[] and glob.glob('output/SBS96/Suggested_Solution/COSMIC_SBS96_Decomposed_Solution/*pdf'): | |
| sbs_result,indel_result,dbs_result,sbs_text,indel_text,dbs_text,summary_sbs,summary_id,summary_dbs=dl(j) | |
| to_dl_sbs.append(sbs_result) | |
| to_dl_sbs_text.append(sbs_text) | |
| to_dl_indel.append(indel_result) | |
| to_dl_indel_text.append(indel_text) | |
| to_dl_dbs.append(dbs_result) | |
| to_dl_dbs_text.append(dbs_text) | |
| to_dl_sbs_summary_text.append(summary_sbs) | |
| to_dl_id_summary_text.append(summary_id) | |
| to_dl_dbs_summary_text.append(summary_dbs) | |
| #show_pdf('output/SBS96/Suggested_Solution/COSMIC_SBS96_Decomposed_Solution/SBS96_Decomposition_Plots.pdf') | |
| remove_old_vcf() | |
| if no_profiles_only_counts == True: | |
| showdl_counts(file_to_lookat,to_dl_sbs96,to_dl_sbs1536) | |
| else: | |
| showdl(file_to_lookat,to_dl_sbs,to_dl_indel,to_dl_dbs,to_dl_sbs_text,to_dl_indel_text,to_dl_dbs_text,to_dl_sbs_summary_text,to_dl_id_summary_text,to_dl_dbs_summary_text) | |
| components.iframe("https://cancer.sanger.ac.uk/signatures/sbs/", height=3000,width=800) | |
| #show_pdf('output/ID83/Suggested_Solution/COSMIC_ID83_Decomposed_Solution/ID83_Decomposition_Plots.pdf') | |
| #components.iframe("https://cancer.sanger.ac.uk/signatures/id/",height=1000,width=800) |