Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from st_aggrid import AgGrid, GridOptionsBuilder,GridUpdateMode,DataReturnMode | |
| from iteration_utilities import duplicates | |
| from iteration_utilities import unique_everseen | |
| import os | |
| st.set_page_config(layout="wide") | |
| st.markdown( | |
| """ | |
| <style> | |
| .streamlit-expanderHeader { | |
| font-size: x-large; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| caution = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please note that Only one Guide (from pair) is found. Please see guides not found section for other guide</p>' | |
| caution1 = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please note that Each mutated guide is reported as a sepearte line. sgID_1/2, sgRNA_1/2, chr_sgRNA_1/2 and position_sgRNA_1/2 represent values for reference/mutated guide</p>' | |
| caution2 = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please Select a single/multiple guides and then select Check Box A, B or C Otherwise code will through error</p>' | |
| table_edit = '<p style="font-family:sans-serif; color:Green; font-size: 16px;">About Table: Please note that table can be <b>sorted by clicking on any column</b> and <b>Multiple rows can be selected</b> (by clicking check box in first column) to save only those rows.</p>' | |
| caution_genes = '<p style="font-family:sans-serif; color:Red; font-size: 16px;">Please make sure that desired genes from all three lists should be selected to generate Order Ready Table.</p>' | |
| #READ INPUT FILES | |
| cwd=os.getcwd()+'/'+'data/' | |
| #Here, gene column is modified for non-targeting guides in the format sgID_1|sgID_2 for coherent downstream manipulation | |
| listA = pd.read_csv(cwd+"guides_a_new.csv",index_col=False) | |
| listB = pd.read_csv(cwd+"guides_b_new.csv",index_col=False) | |
| listC = pd.read_csv(cwd+"guides_c_new.csv",index_col=False) | |
| lista_sz=listA.shape[0] | |
| listb_sz=listB.shape[0] | |
| listc_sz=listC.shape[0] | |
| #st.write(listA.shape) | |
| variantsa1=listA['gene'].unique() | |
| variantsb1=listB['gene'].unique() | |
| variantsc1=listC['gene'].unique() | |
| #Make a comprehensive lsit of genes in all 3 lists (Please not that non-targeting guide names are not same across three lists) | |
| con = np.concatenate((variantsa1, variantsb1, variantsc1)) | |
| variants_s=sorted(np.unique(con)) | |
| #NOW read GRCh38 and LR guides for stea as identified by LR-Guides pipeline | |
| #Format is: gene (as many entries as number of guides found, both matched and mutated), ref_guide, chr, position, mutated_guide (can also be same as reference), strand, num_mismatcg (excluding leading G), Please note that each guide has trailing NGG | |
| listA_found_ref = pd.read_csv(cwd+"seta_found_ref1.csv",index_col=False) | |
| listA_found_ref = listA_found_ref.sort_values('gene') | |
| lsita_ref_found_sz=listA_found_ref.shape[0] | |
| #remove # from chr# # | |
| listA_found_ref['chr'] = [x.split(' ')[-0] for x in listA_found_ref['chr']] | |
| listA_found_ref.rename(columns = {'strnad':'strand'}, inplace = True) #Also change strnad to strand (was misspelled in LR-Guides pipeline) | |
| #This (all such) file has 2-columns (gene as given in sgID_1/2, ref_guide). | |
| listA_notfound_ref = pd.read_csv(cwd+"seta_notfound_ref1.csv",index_col=False) | |
| listA_notfound_ref=listA_notfound_ref.sort_values('gene') | |
| lsita_ref_notfound_sz=listA_notfound_ref.shape[0] | |
| #LR guides | |
| listA_found_lr = pd.read_csv(cwd+"seta_found_LR1.csv",index_col=False) | |
| listA_found_lr=listA_found_lr.sort_values('gene') | |
| lsita_lr_found_sz=listA_found_lr.shape[0] | |
| listA_found_lr.rename(columns = {'strnad':'strand'}, inplace = True) | |
| listA_notfound_lr = pd.read_csv(cwd+"seta_notfound_LR1.csv",index_col=False) | |
| listA_notfound_lr=listA_notfound_lr.sort_values('gene') | |
| lsita_lr_notfound_sz=listA_notfound_lr.shape[0] | |
| #Also read GRCh38 and LR guides for set b | |
| listB_found_ref = pd.read_csv(cwd+"setb_found_ref1.csv",index_col=False) | |
| listB_found_ref=listB_found_ref.sort_values('gene') | |
| lsitb_ref_found_sz=listB_found_ref.shape[0] | |
| #remove # from chr# # | |
| listB_found_ref['chr'] = [x.split(' ')[-0] for x in listB_found_ref['chr']] | |
| listB_found_ref=listB_found_ref.sort_values('gene') | |
| listB_found_ref.rename(columns = {'strnad':'strand'}, inplace = True) | |
| listB_notfound_ref = pd.read_csv(cwd+"setb_notfound_ref1.csv",index_col=False) | |
| listB_notfound_ref=listB_notfound_ref.sort_values('gene') | |
| lsitb_ref_notfound_sz=listB_notfound_ref.shape[0] | |
| listB_found_lr = pd.read_csv(cwd+"setb_found_LR1.csv",index_col=False) | |
| listB_found_lr=listB_found_lr.sort_values('gene') | |
| lsitb_lr_found_sz=listB_found_lr.shape[0] | |
| listB_found_lr.rename(columns = {'strnad':'strand'}, inplace = True) | |
| listB_notfound_lr = pd.read_csv(cwd+"setb_notfound_LR1.csv",index_col=False) | |
| listB_notfound_lr=listB_notfound_lr.sort_values('gene') | |
| lsitb_lr_notfound_sz=listB_notfound_lr.shape[0] | |
| #Also read GRCh38 and LR guides for set c | |
| listC_found_ref = pd.read_csv(cwd+"setc_found_ref1.csv",index_col=False) | |
| listC_found_ref=listC_found_ref.sort_values('gene') | |
| lsitc_ref_found_sz=listC_found_ref.shape[0] | |
| #remove # from chr# # | |
| listC_found_ref['chr'] = [x.split(' ')[-0] for x in listC_found_ref['chr']] | |
| listC_found_ref.rename(columns = {'strnad':'strand'}, inplace = True) | |
| listC_notfound_ref = pd.read_csv(cwd+"setc_notfound_ref1.csv",index_col=False) | |
| listC_notfound_ref=listC_notfound_ref.sort_values('gene') | |
| lsitc_ref_notfound_sz=listC_notfound_ref.shape[0] | |
| listC_found_lr = pd.read_csv(cwd+"setc_found_LR1.csv",index_col=False) | |
| listC_found_lr=listC_found_lr.sort_values('gene') | |
| lsitc_lr_found_sz=listC_found_lr.shape[0] | |
| listC_found_lr.rename(columns = {'strnad':'strand'}, inplace = True) | |
| listC_notfound_lr = pd.read_csv(cwd+"setc_notfound_LR1.csv",index_col=False) | |
| listC_notfound_lr=listC_notfound_lr.sort_values('gene') | |
| lsitc_lr_notfound_sz=listC_notfound_lr.shape[0] | |
| #This for all guides order table | |
| set_start=0 | |
| regular_lista=listA[~listA['gene'].str.contains('non-targeting')]['sgID_AB']#[['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| regular_lista=regular_lista.sort_values() | |
| set_end=regular_lista.shape[0] #18905 | |
| #regular_lista=regular_lista.iloc[set_start:set_end] | |
| non_targeting_lista=listA[listA['gene'].str.contains('non-targeting')]['sgID_AB']#[['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| non_targeting_lista=non_targeting_lista.sort_values() | |
| #regular_lista=regular_lista.reset_index() | |
| regular_listb=listB[~listB['gene'].str.contains('non-targeting')]['sgID_AB']#[['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| regular_listb=regular_listb.sort_values() | |
| #regular_listb=regular_listb.iloc[set_start:set_end] | |
| non_targeting_listb=listB[listB['gene'].str.contains('non-targeting')]['sgID_AB']#[['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| non_targeting_listb=non_targeting_listb.sort_values() | |
| #regular_listb=regular_listb.reset_index() | |
| regular_listc=listC[~listC['gene'].str.contains('non-targeting')]['sgID_AB']#[['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| regular_listc=regular_listc.sort_values() | |
| #regular_listc=regular_listc[set_start:set_end] | |
| non_targeting_listc=listC[listC['gene'].str.contains('non-targeting')]['sgID_AB']#[['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| non_targeting_listc=non_targeting_listc.sort_values() | |
| #GENERAL FUNCTIONS | |
| def transform(df,str): | |
| cols = st.multiselect(str, | |
| df.columns.tolist(), | |
| df.columns.tolist() | |
| ) | |
| df = df[cols] | |
| return df | |
| def convert_df(df): | |
| return df.to_csv().encode('utf-8') | |
| def convert_df1(df): | |
| return df.to_csv(index=False).encode('utf-8') | |
| #########TABLE DISPLAY | |
| def tbl_disp(dat,var,ref,key,flg=1): | |
| dat.reset_index(drop=True, inplace=True) | |
| #df = transform(dft,'Please Select columns to save whole table') | |
| #fname = st.text_input('Please input file name to save Table', 'temp') | |
| #fname = st_keyup("Please input file name to save Table", value='temp') | |
| csv = convert_df(dat) | |
| if flg==1: | |
| st.download_button( | |
| label="Download Full Table as CSV file", | |
| data=csv, | |
| file_name=var+'_'+ref+'.csv',#fname+'.csv', | |
| mime='text/csv', | |
| #key=key, | |
| ) | |
| gb = GridOptionsBuilder.from_dataframe(dat) | |
| gb.configure_pagination(enabled=False)#,paginationAutoPageSize=False)#True) #Add pagination | |
| gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True) | |
| gb.configure_selection(selection_mode="multiple", use_checkbox=True) | |
| gb.configure_column("gene", headerCheckboxSelection = True) | |
| gb.configure_side_bar() | |
| gridOptions = gb.build() | |
| grid_response = AgGrid( | |
| dat, | |
| height=200, | |
| gridOptions=gridOptions, | |
| enable_enterprise_modules=True, | |
| update_mode=GridUpdateMode.MODEL_CHANGED, | |
| data_return_mode=DataReturnMode.FILTERED_AND_SORTED, | |
| fit_columns_on_grid_load=False, | |
| header_checkbox_selection_filtered_only=True, | |
| use_checkbox=True, | |
| width='100%' | |
| #key=key | |
| ) | |
| selected = grid_response['selected_rows'] | |
| if selected: | |
| #st.write('Selected rows') | |
| dfs = pd.DataFrame(selected) | |
| #st.dataframe(dfs[dfs.columns[1:dfs.shape[1]]]) | |
| #dfs1 = transform(dfs[dfs.columns[1:dfs.shape[1]]],'Please select columns to save selected Table') | |
| csv = convert_df1(dfs[dfs.columns[1:dfs.shape[1]]]) | |
| #csv = convert_df1(dfs1) | |
| if flg: | |
| st.download_button( | |
| label="Download Selected data as CSV", | |
| data=csv, | |
| file_name=var+'_'+ref+'.csv', | |
| mime='text/csv', | |
| ) | |
| return dfs | |
| def get_lists(ref_list,list_found_ref,list_notfound_ref): | |
| #This module retrieves guide_id and searches for guide sequences from the table | |
| #st.table(ref_list) | |
| a_ref=[] | |
| #st.table(ref_list) | |
| for i in range(len(ref_list)): | |
| a_ref.append(ref_list.sgID_AB.values[i].split('|')[0]) | |
| a_ref.append(ref_list.sgID_AB.values[i].split('|')[1]) | |
| set_found0_ref=[] | |
| #st.table(a_ref) | |
| for i in range(len(a_ref)): | |
| set_found0_ref.append(list_found_ref[list_found_ref['gene']==a_ref[i]]) | |
| #st.write(set_found0_ref) | |
| list_concatenated_found_ref = pd.concat(set_found0_ref) | |
| list_concatenated_match_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch == 0] #only select guides with zero mismatches for match list, MISSMATCH LIST LATER | |
| #Also remove Alternate loci's data | |
| list_concatenated_match_ref = list_concatenated_match_ref[list_concatenated_match_ref['chr'].str.contains('chr')] | |
| #st.table(list_concatenated_match_ref) | |
| #also create new list with both sgRNAs in one row | |
| dft=pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| guideflg1=1 | |
| #st.table(list_concatenated_match_ref) | |
| if list_concatenated_match_ref.shape[0]>0: | |
| guideflg1=0 | |
| t=list_concatenated_match_ref.reset_index(drop=True) | |
| #st.table(t) | |
| ########## | |
| #check even/odd entries | |
| if t.shape[0]==1: | |
| t1=t.loc[t.index.repeat(2)].reset_index(drop=True) | |
| #st.write(t1) | |
| dft=assemble_tbl(t1) | |
| elif t.shape[0]%2==0: #even | |
| dft=assemble_tbl(t) | |
| else: #odd | |
| t1 = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| i=0 | |
| while i <t.shape[0]: | |
| if i<t.shape[0]-1: | |
| if t.iloc[i]['gene'] == t.iloc[i+1]['gene'] and t.iloc[i]['chr'] == t.iloc[i+1]['chr'] and t.iloc[i]['position'] == t.iloc[i+1]['position']: | |
| #t1=t1.append(t.iloc[[i]], ignore_index = True) | |
| #t1=t1.append(t.iloc[[i+1]], ignore_index = True) | |
| t1=pd.concat([t1,t.iloc[[i]]], ignore_index = True) | |
| t1=pd.concat([t1,t.iloc[[i]]], ignore_index = True) | |
| i=i+2 | |
| else: #repeat entries | |
| #t1=t1.append(t.iloc[[i]], ignore_index = True) | |
| #t1=t1.append(t.iloc[[i]], ignore_index = True) | |
| t1=pd.concat([t1,t.iloc[[i]]], ignore_index = True) | |
| t1=pd.concat([t1,t.iloc[[i]]], ignore_index = True) | |
| #st.table(t1) | |
| i=i+1 | |
| else: | |
| #t1=t1.append(t.iloc[[i]], ignore_index = True) | |
| #t1=t1.append(t.iloc[[i]], ignore_index = True) | |
| t1=pd.concat([t1,t.iloc[[i]]], ignore_index = True) | |
| t1=pd.concat([t1,t.iloc[[i]]], ignore_index = True) | |
| i=i+1 | |
| #st.table(t1) | |
| dft=assemble_tbl(t1) | |
| list_concatenated_mutated_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch > 0] | |
| list_concatenated_mutated_ref=list_concatenated_mutated_ref.sort_values('position') | |
| #Also remove Alternate loci's data | |
| list_concatenated_mutated_ref = list_concatenated_mutated_ref[list_concatenated_mutated_ref['chr'].str.contains('chr')] | |
| dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2']) | |
| if list_concatenated_mutated_ref.shape[0]>0: | |
| dft_mut = get_mutated_res(list_concatenated_mutated_ref) | |
| #check not found | |
| seta_notfound0_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[0]] | |
| seta_notfound1_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[1]] | |
| list_concatenated_notfound_ref = pd.concat([seta_notfound0_ref,seta_notfound1_ref]) | |
| return dft.iloc[:1], dft_mut,list_concatenated_notfound_ref,list_concatenated_match_ref,list_concatenated_mutated_ref,guideflg1 | |
| ########### | |
| def get_mutated_res(list_concatenated_mutated_ref): | |
| ######### | |
| #if list_concatenated_mutated_ref.shape[0]>0: | |
| t=list_concatenated_mutated_ref.reset_index(drop=True) | |
| #st.table(t) | |
| dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2']) | |
| c1=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1'] | |
| c2=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']#, 'sgID_1_2'] | |
| #st.table(listA_concatenated_match_ref) | |
| #st.write(t.shape[0]) | |
| tf=0 | |
| #for i in range(0,t.shape[0],2): | |
| for i in range(t.shape[0]): | |
| l1=t.iloc[[i]] | |
| l1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','mutated_guide', 'strand', 'num_mismatch'] | |
| l2=l1.copy() | |
| l2.columns=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2','mutated_guide2', 'strand2', 'num_mismatch2'] | |
| list_concatenated_mutated_ref1=[] | |
| #listA_concatenated_mutated_ref1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1) | |
| list_concatenated_mutated_ref1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1) | |
| #st.table(listA_concatenated_mutated_ref1) | |
| list_concatenated_mutated_ref1=list_concatenated_mutated_ref1[['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','mutated_guide2','chr_sgRNA_2','position_sgRNA_2']] | |
| #also change if not leading G | |
| list_concatenated_mutated_ref1['sgRNA_1']='G'+list_concatenated_mutated_ref1['sgRNA_1'].str.slice(1, 20) | |
| #also change name of mutated_guide2 column | |
| list_concatenated_mutated_ref1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2'] | |
| list_concatenated_mutated_ref1['sgRNA_2']='G'+list_concatenated_mutated_ref1['sgRNA_2'].str.slice(1, 20) | |
| list_concatenated_mutated_ref1['sgID_1_2']=list_concatenated_mutated_ref1['sgID_1']+"|"+list_concatenated_mutated_ref1['sgID_1'] | |
| #dft_mut=dft_mut.append(list_concatenated_mutated_ref1) | |
| dft_mut=pd.concat([dft_mut,list_concatenated_mutated_ref1]) | |
| return dft_mut | |
| def not_found_check(set12,set34,set56,listA_notfound_lr,listB_notfound_lr,listC_notfound_lr): | |
| flg11=0 | |
| flg12=0 | |
| flg21=0 | |
| flg22=0 | |
| flg31=0 | |
| flg32=0 | |
| #st.write(set12.split('|')[1]) | |
| if listA_notfound_lr[listA_notfound_lr['gene']==set12.split('|')[0]].shape[0]>0: | |
| flg11=1 | |
| if listA_notfound_lr[listA_notfound_lr['gene']==set12.split('|')[1]].shape[0]>0: | |
| flg12=1 | |
| if listB_notfound_lr[listB_notfound_lr['gene']==set34.split('|')[0]].shape[0]>0: | |
| flg21=1 | |
| if listB_notfound_lr[listB_notfound_lr['gene']==set34.split('|')[1]].shape[0]>0: | |
| flg22=1 | |
| if listC_notfound_lr[listC_notfound_lr['gene']==set56.split('|')[0]].shape[0]>0: | |
| flg31=1 | |
| if listC_notfound_lr[listC_notfound_lr['gene']==set56.split('|')[1]].shape[0]>0: | |
| flg32=1 | |
| return flg11,flg12,flg21,flg22,flg31,flg32 | |
| def order_ready_tbl_CHM13(set12,set34,set56,listA_found_lr,listA_notfound_lr,listB_found_lr,listB_notfound_lr,listC_found_lr,listC_notfound_lr,ref_sel): | |
| # st.table(set12) | |
| # st.table(set34) | |
| # st.table(set56) | |
| dft_order_table=pd.DataFrame(columns=['gene','guide_type','sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2']) | |
| dft_notfound_all=pd.DataFrame(columns=['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']) | |
| #dft_notfound=pd.DataFrame(columns=['gene','ref_guide']) | |
| dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B']) | |
| dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B']) | |
| dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B']) | |
| set12=set12.reset_index(drop = True) | |
| set34=set34.reset_index(drop = True) | |
| set56=set56.reset_index(drop = True) | |
| for i in range(set12.shape[0]): | |
| gene_n=set12[i].split('_')[0] | |
| f=not_found_check(set12[i],set34[i],set56[i],listA_notfound_lr,listB_notfound_lr,listC_notfound_lr) | |
| #st.write(f) | |
| #st.write(set12[i],set34[i],set56[i]) | |
| #ref_listA=listA[listA['gene']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listA=listA[listA['sgID_AB']==set12.iloc[i]][['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listA = ref_listA[['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']] | |
| #st.write(ref_listA) | |
| #ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B'] | |
| resa,res_muta,res_notfounda,list_matcha,list_mutateda,gflga1=get_lists(ref_listA,listA_found_lr,listA_notfound_lr) | |
| #dft_a=dft_a.append(ref_listA) | |
| #listb | |
| ref_listB=listB[listB['sgID_AB']==set34.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listB = ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']] | |
| #ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B'] | |
| resb,res_mutb,res_notfoundb,list_matchb,list_mutatedb,gflgb1=get_lists(ref_listB,listB_found_lr,listB_notfound_lr) | |
| #dft_b=dft_b.append(ref_listB) | |
| #listc | |
| ref_listC=listC[listC['sgID_AB']==set56.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listC = ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']] | |
| #ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B'] | |
| resc,res_mutc,res_notfoundc,list_matchc,list_mutatedc,gflgc1=get_lists(ref_listC,listC_found_lr,listC_notfound_lr) | |
| #dft_c=dft_c.append(ref_listC) | |
| #st.table(ref_listA) | |
| # st.write(gflga1,gflgb1,gflgc1) | |
| if gflga1==0: | |
| #Also verigy that both guides are different | |
| #st.table(resa) | |
| if resa['sgID_1'][0] != resa['sgID_2'][0]: | |
| resa['gene']=gene_n | |
| resa['guide_type']='1-2' | |
| #dft_order_table=dft_order_table.append(resa) | |
| dft_order_table=pd.concat([dft_order_table, resa]) #dft_order_table.concat(resa) | |
| else: #it is nutation case, so check next | |
| if f[2]==0 or f[3] == 0: | |
| #st.write('came in 1') | |
| if not resb.empty: # and resb['sgID_1'][0] != resb['sgID_2'][0]: #second guide in from setb | |
| resa[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resb[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']] | |
| resa['sgID_1_2'] = resa['sgID_1']+"|"+resa['sgID_2'] | |
| if f[2]==0: | |
| resa['gene']=gene_n | |
| if f[0]==0: | |
| resa['guide_type']="1-3" | |
| else: | |
| resa['guide_type']="2-3" | |
| #dft_order_table=dft_order_table.append(resa) | |
| dft_order_table=pd.concat([dft_order_table,resa]) | |
| else: # f[2]==0: | |
| resa['gene']=gene_n | |
| if f[0]==0: | |
| resa['guide_type']="1-4" | |
| else: | |
| resa['guide_type']="2-4" | |
| #dft_order_table=dft_order_table.append(resa) | |
| dft_order_table=pd.concat([dft_order_table,resa]) | |
| else: | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True) | |
| else: | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True) | |
| elif resa.shape[0] >0: #at least one guide is from seta | |
| #if resa['sgID_1'][0] != resa['sgID_2'][0]: | |
| if f[2]==0 or f[3] == 0: | |
| #st.write('came in 1') | |
| if not resb.empty: # and resb['sgID_1'][0] != resb['sgID_2'][0]: #second guide in from setb | |
| resa[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resb[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']] | |
| resa['sgID_1_2'] = resa['sgID_1']+"|"+resa['sgID_2'] | |
| if f[2]==0: | |
| resa['gene']=gene_n | |
| resa['guide_type']=str(gflga1)+"-3" | |
| #dft_order_table=dft_order_table.append(resa) | |
| dft_order_table=pd.concat([dft_order_table,resa]) | |
| else: # f[2]==0: | |
| resa['gene']=gene_n | |
| resa['guide_type']=str(gflga1)+"-4" | |
| #dft_order_table=dft_order_table.append(resa) | |
| dft_order_table=pd.concat([dft_order_table,resa]) | |
| else: | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True) | |
| elif f[4]==0 or f[5] == 0: | |
| #st.write('came in 2') | |
| #if resa['sgID_1'][0] != resa['sgID_2'][0]: | |
| if not resc.empty: # and resc['sgID_1'][0] != resc['sgID_2'][0]: # resc.shape[0]>0: #second guide is from setc | |
| resa[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resc[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']] | |
| resa['sgID_1_2'] = resa['sgID_1']+"|"+resa['sgID_2'] | |
| #dft_order_table=dft_order_table.append(resa) | |
| if f[4]==0: | |
| resa['gene']=gene_n | |
| resa['guide_type']=str(gflga1)+"-5" | |
| #dft_order_table=dft_order_table.append(resa) | |
| dft_order_table=pd.concat([dft_order_table,resa]) | |
| else: # f[2]==0: | |
| resa['gene']=gene_n | |
| resa['guide_type']=str(gflga1)+"-6" | |
| #dft_order_table=dft_order_table.append(resa) | |
| dft_order_table=pd.concat([dft_order_table,resa]) | |
| else: | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True) | |
| elif resb.shape[0]>0: #at least one guide | |
| if gflgb1==0: | |
| if resb['sgID_1'][0] != resb['sgID_2'][0]: | |
| resb['gene']=gene_n | |
| resb['guide_type']='3-4' | |
| #dft_order_table=dft_order_table.append(resb) | |
| dft_order_table=pd.concat([dft_order_table,resb]) | |
| else: | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True) | |
| elif f[4]==0 or f[5] == 0: | |
| #if not resc.empty and resc['sgID_1'][0] != resc['sgID_2'][0]: | |
| resb[['sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2']] = resc[['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1']] | |
| resb['sgID_1_2'] = resb['sgID_1']+"|"+resb['sgID_2'] | |
| #dft_order_table=dft_order_table.append(resb) | |
| if f[4]==0: | |
| resb['gene']=gene_n | |
| resb['guide_type']=str(gflgb1+2)+"-5" | |
| #dft_order_table=dft_order_table.append(resb) | |
| dft_order_table=pd.concat([dft_order_table,resb]) | |
| else: # f[2]==0: | |
| resb['gene']=gene_n | |
| resb['guide_type']=str(gflgb1+2)+"-6" | |
| #dft_order_table=dft_order_table.append(resb) | |
| dft_order_table=pd.concat([dft_order_table,resb]) | |
| else: | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True) | |
| elif resc.shape[0]>0: #at least one guide | |
| if gflgc1==0: | |
| if resc['sgID_1'][0] != resc['sgID_2'][0]: | |
| resc['gene']=gene_n | |
| resc['guide_type']='5-6' | |
| #dft_order_table=dft_order_table.append(resc) | |
| dft_order_table=pd.concat([dft_order_table,resc]) | |
| else: | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True) | |
| else: | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True) | |
| else: | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listA], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listB], ignore_index = True) | |
| dft_notfound_all=pd.concat([dft_notfound_all,ref_listC], ignore_index = True) | |
| if dft_order_table.shape[0]>0: | |
| #check total guides found | |
| # st.write(str(set12.shape[0])) | |
| # st.write(str(set34.shape[0])) | |
| # st.write(str(set56.shape[0])) | |
| st.write('**Please note that for guides matching to multiple locations (an example is ABCC6), only first pair is returned**') | |
| szt=set12.shape[0] | |
| szf=dft_order_table.shape[0] | |
| # st.write(str(dft_order_table.shape[0])) | |
| szd=szt-szf | |
| if szd>0: | |
| st.write('Order Ready '+ref_sel+' guides List: '+str(szd)+'/'+str(szt)+' **guides were not found**') | |
| tbl_disp(dft_order_table,'select_genes','SetA_CHM13',5) | |
| else: | |
| st.write('Order Ready '+ref_sel+' guides List') | |
| tbl_disp(dft_order_table,'select_genes','SetA_CHM13',5) | |
| else: | |
| st.write('**No guides found in ListA, ListB and ListC**') | |
| if dft_notfound_all.shape[0]>0: | |
| st.write('**Guides not found in any lists**') | |
| tbl_disp(dft_notfound_all,'select_genes','SetA_CHM13',6) | |
| def assemble_tbl(t): | |
| dft = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2']) | |
| #for i in range(0,t.shape[0],2): | |
| mid=int(t.shape[0]/2) | |
| for i in range(int(t.shape[0]/2)): | |
| l1=t.iloc[[i]] | |
| l1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','mutated_guide', 'strand', 'num_mismatch'] | |
| #l2=t.iloc[[i+1]] | |
| l2=t.iloc[[mid]] | |
| l2.columns=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2','mutated_guide2', 'strand2', 'num_mismatch2'] | |
| listA_concatenated_match_LR1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1) | |
| listA_concatenated_match_LR1=listA_concatenated_match_LR1[['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']] | |
| listA_concatenated_match_LR1['sgRNA_1']=listA_concatenated_match_LR1['sgRNA_1'].str.slice(0, 20) | |
| listA_concatenated_match_LR1['sgRNA_2']=listA_concatenated_match_LR1['sgRNA_2'].str.slice(0, 20) | |
| listA_concatenated_match_LR1['sgID_1_2']=listA_concatenated_match_LR1['sgID_1']+"|"+listA_concatenated_match_LR1['sgID_2'] | |
| #dft=dft.append(listA_concatenated_match_LR1) | |
| dft=pd.concat([dft,listA_concatenated_match_LR1]) | |
| mid=mid+1 | |
| return dft | |
| #Get non-targeting lists | |
| def get_lists_non_targeting(ref_list,list_found_ref,list_notfound_ref): | |
| #This module retrieves guide_id and searches for guide sequences from the table | |
| #st.table(ref_list) | |
| a_ref=[] | |
| for i in range(len(ref_list)): | |
| a_ref.append(ref_list.sgID_AB.values[i].split('|')[0]) | |
| a_ref.append(ref_list.sgID_AB.values[i].split('|')[1]) | |
| set_found0_ref=[] | |
| for i in range(len(a_ref)): | |
| set_found0_ref.append(list_found_ref[list_found_ref['gene']==a_ref[i]]) | |
| list_concatenated_found_ref = pd.concat(set_found0_ref) | |
| list_concatenated_match_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch == 0] #only select guides with zero mismatches for match list, MISSMATCH LIST LATER | |
| #get matching to Alternating loci's | |
| list_concatenated_match_alt_ref = list_concatenated_match_ref[~list_concatenated_match_ref['chr'].str.contains('chr')] | |
| #Also remove Alternate loci's data | |
| list_concatenated_match_ref = list_concatenated_match_ref[list_concatenated_match_ref['chr'].str.contains('chr')] | |
| #st.table(list_concatenated_match_ref) | |
| #also create new list with both sgRNAs in one row | |
| dft=pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| if list_concatenated_match_ref.shape[0]>0: | |
| t=list_concatenated_match_ref.reset_index(drop=True) | |
| #st.table(t) | |
| ########## | |
| #check even/odd entries | |
| if t.shape[0]==1: | |
| t1=t.loc[t.index.repeat(2)].reset_index(drop=True) | |
| #st.write(t1) | |
| dft=assemble_tbl(t1) | |
| elif t.shape[0]%2==0: #even | |
| dft=assemble_tbl(t) | |
| else: #odd | |
| t1 = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| i=0 | |
| while i <t.shape[0]: | |
| if i<t.shape[0]-1: | |
| if t.iloc[i]['gene'] == t.iloc[i+1]['gene'] and t.iloc[i]['chr'] == t.iloc[i+1]['chr'] and t.iloc[i]['position'] == t.iloc[i+1]['position']: | |
| t1=pd.concat([t1,t.iloc[[i]]], ignore_index = True) | |
| t1=pd.concat([t1,t.iloc[[i+1]]], ignore_index = True) | |
| i=i+2 | |
| else: #repeat entries | |
| t1=pd.concat([t1,t.iloc[[i]]], ignore_index = True) | |
| t1=pd.concat([t1,t.iloc[[i]]], ignore_index = True) | |
| #st.table(t1) | |
| i=i+1 | |
| else: | |
| t1=pd.concat([t1,t.iloc[[i]]], ignore_index = True) | |
| t1=pd.concat([t1,t.iloc[[i]]], ignore_index = True) | |
| i=i+1 | |
| #st.table(t1) | |
| dft=assemble_tbl(t1) | |
| list_concatenated_mutated_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch > 0] | |
| list_concatenated_mutated_ref=list_concatenated_mutated_ref.sort_values('position') | |
| #Also remove Alternate loci's data | |
| list_concatenated_mutated_alt_ref = list_concatenated_mutated_ref[~list_concatenated_mutated_ref['chr'].str.contains('chr')] | |
| list_concatenated_mutated_ref = list_concatenated_mutated_ref[list_concatenated_mutated_ref['chr'].str.contains('chr')] | |
| dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2']) | |
| if list_concatenated_mutated_ref.shape[0]>0: | |
| dft_mut = get_mutated_res(list_concatenated_mutated_ref) | |
| #check not found | |
| seta_notfound0_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[0]] | |
| seta_notfound1_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[1]] | |
| #st.write(list_notfound_ref[list_notfound_ref['gene']==a_ref[0]]) | |
| #st.write(seta_notfound0_ref) | |
| #st.write(seta_notfound1_ref) | |
| #add guideflg1 to return which guide is found | |
| guideflg1=0 | |
| if seta_notfound0_ref.shape[0]>0: | |
| guideflg1=2 | |
| if seta_notfound1_ref.shape[0]>0: | |
| guideflg1=1 | |
| list_concatenated_notfound_ref = pd.concat([seta_notfound0_ref,seta_notfound1_ref]) | |
| #st.table(a_ref) | |
| #st.table(seta_notfound1_ref) | |
| #st.table(dft) | |
| #st.table(dft_mut) | |
| return dft, dft_mut,list_concatenated_notfound_ref,list_concatenated_match_ref,list_concatenated_mutated_ref,list_concatenated_match_alt_ref,list_concatenated_mutated_alt_ref,guideflg1 | |
| ########### | |
| #Get All Guides Stats | |
| #def process_all_guides(glist,list,ref_type,guide_type): | |
| def process_all_guides(glist,for_list,f_list,nf_list): | |
| #st.write(type(glist)) | |
| #st.table(for_list) | |
| #for_list=for_list.reset_index() | |
| variant_set=glist['gene'] | |
| dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B']) | |
| dft_resc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2']) | |
| dft_res_mutc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2']) | |
| dft_notfoundc=pd.DataFrame(columns=['gene','ref_guide']) | |
| df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| df_matched_alt_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| df_mutated_guides_alt_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| #st.table(for_list) | |
| for i in range(variant_set.shape[0]): | |
| #st.write(variant_set.iloc[i]) | |
| ref_listC=for_list[for_list['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listC =ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']] | |
| #st.table(ref_listC) | |
| #st.table(ref_listC) | |
| res,res_mut,res_notfound,list_match,list_mutated,list_match_alt,list_mutated_alt,gflgc1=get_lists_non_targeting(ref_listC,f_list,nf_list) | |
| #dft_c=dft_c.append(ref_listC) | |
| if res.shape[0]>0: | |
| dft_resc=pd.concat([dft_resc,res]) | |
| if res_mut.shape[0]>0: | |
| dft_res_mutc=pd.concat([dft_res_mutc,res_mut]) | |
| if res_notfound.shape[0]>0: | |
| dft_notfoundc= pd.concat([dft_notfoundc,res_notfound]) | |
| if list_match.shape[0]>0: | |
| df_matched_guides_ref= pd.concat([df_matched_guides_ref,list_match]) | |
| if list_mutated.shape[0]>0: | |
| df_mutated_guides_ref= pd.concat([df_mutated_guides_ref,list_mutated]) | |
| if list_match_alt.shape[0]>0: | |
| df_matched_alt_ref=pd.concat([df_matched_alt_ref,list_mutated]) | |
| if list_mutated_alt.shape[0]>0: | |
| df_mutated_guides_alt_ref=pd.concat([df_mutated_guides_alt_ref,list_mutated_alt]) | |
| if df_matched_guides_ref.shape[0]>0: | |
| #st.write(type(df_matched_guides_ref['gene'])) | |
| gl=df_matched_guides_ref['gene'] | |
| dupesm=gl[gl.duplicated()] | |
| if df_mutated_guides_ref.shape[0]>0: | |
| gl=df_mutated_guides_ref['gene'] | |
| dupesmu=gl[gl.duplicated()] | |
| #now check common between matched and mutated | |
| # if dupesm.shape[0]>0 and dupesmu.shape[0]>0: | |
| # common_list = set(dupesm).intersection(dupesmu) | |
| # st.table(common_list) | |
| # st.write('common guides between matched and mutated lists are: '+len(common_list)) | |
| if df_matched_guides_ref.shape[0]>0: | |
| if dupesm.shape[0]>0: | |
| st.write('**Matched Guides**: '+str(df_matched_guides_ref.shape[0])+' and: '+str(dupesm.shape[0])+' are repeated guides (matched to multiple locations)') | |
| tbl_disp(df_matched_guides_ref,'select_genes','SetC_GRCh38',17) | |
| #st.table(dupesm,'select_genes','SetC_GRCh38',17) | |
| tbl_disp(dupesm,'select_genes','SetC_GRCh38',17) | |
| else: | |
| st.write('**Matched Guides**: '+str(df_matched_guides_ref.shape[0])) | |
| tbl_disp(df_matched_guides_ref,'select_genes','SetC_GRCh38',17) | |
| if df_matched_alt_ref.shape[0]>0: | |
| st.write('**Matched Guides to Alt Loci**: '+str(df_matched_alt_ref.shape[0])) | |
| tbl_disp(df_matched_alt_ref,'select_genes','SetC_GRCh38',17) | |
| if df_mutated_guides_ref.shape[0]>0: | |
| #gl=df_mutated_guides_ref['gene'] | |
| #dupesmu=gl[gl.duplicated()] | |
| if dupesmu.shape[0]>0: | |
| st.write('**Mutated Guides (some might have >1 guides)**: '+str(df_mutated_guides_ref.shape[0])+' and: '+str(dupesmu.shape[0])+' are repeated guides') | |
| tbl_disp(df_mutated_guides_ref,'select_genes','SetC_GRCh38',18) | |
| #st.table(dupesmu) | |
| else: | |
| st.write('**Mutated Guides (some might have >1 guides)**: '+str(df_mutated_guides_ref.shape[0])) | |
| tbl_disp(df_mutated_guides_ref,'select_genes','SetC_GRCh38',18) | |
| if df_mutated_guides_alt_ref.shape[0]>0: | |
| st.write('**Mutated Guides to Alt Loci**: '+str(df_mutated_guides_alt_ref.shape[0])) | |
| tbl_disp(df_mutated_guides_alt_ref,'select_genes','SetC_GRCh38',18) | |
| if dft_notfoundc.shape[0]>0: | |
| st.write('**Guides Not Found**: '+str(dft_notfoundc.shape[0])) | |
| tbl_disp(dft_notfoundc,'select_genes','SetC_GRCh38',19) | |
| #CALC BASED ON LIST, GUIDE TYPE AND REFERENCE | |
| #END GENERAL FUNCTIONS | |
| st.title('Long Read Guides Search') | |
| st.write('**Important:** Please note that **MTMR3** is not present in guides_c list, so we have **removed it from list a and list b**') | |
| #tbl_disp(regulara,'variant','ref_guides',0,1) | |
| Calc = st.sidebar.radio( | |
| "", | |
| ('ReadME', 'Single/Multiple Guides','All','Not_Found')) | |
| if Calc == 'ReadME': | |
| expander = st.expander("How to use this app") | |
| #st.header('How to use this app') | |
| expander.markdown('Please select **Single Gene** OR **Multiple Genes** Menue checkbox from the sidebar') | |
| expander.markdown('Select a Gene (from genes dropdown list) OR Multiple genes (from table)') | |
| expander.markdown('A table showing all reference gudies from three LISTS will appear in the main panel. **Please not some of the genes (for example A1BG and GJB7) have multiple guide pairs and all of these are selected.**') | |
| expander.markdown('To see results for each of the selected reference guide from ListA, ListB and ListC, Please select respective checkbox') | |
| expander.markdown('Results are shown as two tables, **Matched** and **Mutated** guides tables and **NOT FOUND** table if guides are not found in GRCh38 and LR reference fasta files') | |
| expander.markdown('**Mutated** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**') | |
| expander.markdown('**Mutated** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**') | |
| expander1 = st.expander('Introduction') | |
| expander1.markdown( | |
| """ This app helps navigate all probable genomic **miss-matched/Mutations (upto 2 bp)** for a given sgRNA (from 3 lists of CRISPRi dual sgRNA libraries) in GRCh38 reference fasta and a Reference fasta generated from BAM generated against KOLF2.1J longread data. | |
| """ | |
| ) | |
| expander1.markdown('Merged bam file was converted to fasta file using following steps:') | |
| expander1.markdown('- samtools mpileup to generate bcf file') | |
| expander1.markdown('- bcftools to generate vcf file') | |
| expander1.markdown('- bcftools consensus to generate fasta file') | |
| expander1.markdown('A GPU based [Cas-OFFinder](http://www.rgenome.net/cas-offinder/) tool was used to find off-target sequences (upto 2 miss-matched) for each geiven reference guide against GRCh38 and LR fasta references.') | |
| elif Calc=='Single/Multiple Guides': | |
| flg_a_fount=0 | |
| flg_b_fount=0 | |
| flg_c_fount=0 | |
| #st.write('**General Stats:**') | |
| #st.write('**GRCh38 Stats: Guides Found: **'+str(lsita_ref_found_sz)+"/"+str(lista_sz)) | |
| with st.form(key='columns_in_form'): | |
| c2, c3 = st.columns(2) | |
| with c2: | |
| multi_genes = st.multiselect( | |
| 'Please select genes list to start processing', | |
| variants_s) | |
| Updated=st.form_submit_button(label = 'Update') | |
| listA_concatenated_orig = pd.DataFrame(columns=['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']) | |
| reflistA_concatenated = pd.DataFrame(columns=['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']) | |
| reflistB_concatenated = pd.DataFrame(columns=['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']) | |
| reflistC_concatenated = pd.DataFrame(columns=['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']) | |
| for variant in multi_genes: | |
| ref_listA=listA[listA['gene']==variant][['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listA = ref_listA[['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']] | |
| #ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B'] | |
| reflistA_concatenated=pd.concat([reflistA_concatenated,ref_listA]) | |
| ref_listB=listB[listB['gene']==variant][['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listB = ref_listB[['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']] | |
| #ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B'] | |
| reflistB_concatenated=pd.concat([reflistB_concatenated,ref_listB]) | |
| ref_listC=listC[listC['gene']==variant][['gene','guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listC = ref_listC[['gene','sgID_AB','guide_type','protospacer_A','protospacer_B']] | |
| #ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B'] | |
| reflistC_concatenated=pd.concat([reflistC_concatenated,ref_listC]) | |
| listA_concatenated_orig = pd.concat([listA_concatenated_orig,ref_listA,ref_listB,ref_listC]) | |
| if listA_concatenated_orig.shape[0] > 0: | |
| #st.markdown(table_edit,unsafe_allow_html=True) | |
| st.write('**Input** Guides (all 6 from 3 sets).') | |
| st.write('**Please Select Guides common to ALL 3 Lists to procede further Processing**') | |
| st.markdown(caution_genes,unsafe_allow_html=True) | |
| with st.form(key='columns_in_form_a'): | |
| c2, c3 = st.columns([10,2]) | |
| with c2: | |
| get_table_order=tbl_disp(listA_concatenated_orig,'variant','ref_guides',111,0) | |
| with c3: | |
| ref_sel = st.radio("Select Reference", | |
| ('CHM13','GRCh38'), | |
| horizontal=True) | |
| Updated1=st.form_submit_button(label = 'Generate Order Ready Table') | |
| if not isinstance(get_table_order, type(None)): # and Updated1:# and get_table_order.shape[0]>0: | |
| if ref_sel=='GRCh38': | |
| list_founda=listA_found_ref | |
| list_notfounda=listA_notfound_ref | |
| list_foundb=listB_found_ref | |
| list_notfoundb=listB_notfound_ref | |
| list_foundc=listC_found_ref | |
| list_notfoundc=listC_notfound_ref | |
| else: | |
| list_founda=listA_found_lr | |
| list_notfounda=listA_notfound_lr | |
| list_foundb=listB_found_lr | |
| list_notfoundb=listB_notfound_lr | |
| list_foundc=listC_found_lr | |
| list_notfoundc=listC_notfound_lr | |
| variant_set12=get_table_order[get_table_order['guide_type']=='1-2']['sgID_AB'] | |
| variant_set34=get_table_order[get_table_order['guide_type']=='3-4']['sgID_AB'] | |
| variant_set56=get_table_order[get_table_order['guide_type']=='5-6']['sgID_AB'] | |
| #st.table(variant_set12) | |
| #st.write(variant_set12) | |
| if variant_set12.shape[0]==variant_set34.shape[0]==variant_set56.shape[0]: | |
| #########Here we call order ready table | |
| #order_ready_tbl_GRCh38(variant_set12,variant_set34,variant_set56) | |
| #order_ready_tbl_CHM13(variant_set12,variant_set34,variant_set56,listA_found_lr,listA_notfound_lr,listB_found_lr,listB_notfound_lr,listC_found_lr,listC_notfound_lr) | |
| order_ready_tbl_CHM13(variant_set12,variant_set34,variant_set56,list_founda,list_notfounda,list_foundb,list_notfoundb,list_foundc,list_notfoundc,ref_sel) | |
| ########END ORDER READY TABLE | |
| elif variant_set12.shape[0]!=variant_set34.shape[0]: | |
| st.markdown("""**<span style='color:red'>SetA and SetB</span> guides are not same, Please correct the problem and re-run**""",unsafe_allow_html=True) | |
| elif variant_set12.shape[0]!=variant_set56.shape[0]: | |
| st.markdown("""**<span style='color:red'>SetA and SetC</span> guides are not same, Please correct the problem and re-run**""",unsafe_allow_html=True) | |
| elif variant_set34.shape[0]!=variant_set56.shape[0]: | |
| st.markdown("""**<span style='color:red'>SetB and SetC</span> guides are not same, Please correct the problem and re-run**""",unsafe_allow_html=True) | |
| else: | |
| st.markdown("""**<span style='color:red'>Probably Mixed guides are selected from three lists, Please correct the problem and re-run</span>**""",unsafe_allow_html=True) | |
| else: | |
| st.write('**Please select guides and Press Update Button to Begin Processing**') | |
| if 'get_table_order' in locals(): | |
| if not isinstance(get_table_order, type(None)): | |
| st.write('**For List wise results, Please select a List**') | |
| reflistA_concatenated=get_table_order[get_table_order['guide_type']=='1-2'] | |
| reflistA_concatenated.drop("_selectedRowNodeInfo",axis=1,inplace=True) | |
| reflistB_concatenated=get_table_order[get_table_order['guide_type']=='3-4'] | |
| reflistB_concatenated.drop("_selectedRowNodeInfo",axis=1,inplace=True) | |
| reflistC_concatenated=get_table_order[get_table_order['guide_type']=='5-6'] | |
| reflistC_concatenated.drop("_selectedRowNodeInfo",axis=1,inplace=True) | |
| #st.write('**Important:** If a guides is **not** in **found, mutated and not_found list (such as GSTT1), then it is found in Alternative Loci and Removed**') | |
| with st.form(key='columns_in_form_lists'): | |
| c2, c3= st.columns([10,1])#([10,10]) | |
| with c2: | |
| List_Selected = st.selectbox('Please select list', | |
| ('','ListA','ListB','ListC')) | |
| Show_ListResults=st.form_submit_button(label = 'GO') | |
| #ListARes = st.checkbox('Results For SetA',key=300) | |
| if List_Selected=='ListA':# and not isinstance(get_table, type(None)):#get_table!=None: | |
| ref_list= listA | |
| st.write('**Please select Guides From Table Below to processes from ListA**') | |
| with st.form(key='columns_in_form_listsA'): | |
| c2, c3= st.columns([100,2])#([10,10]) | |
| with c2: | |
| get_table=tbl_disp(reflistA_concatenated,variant,'ref_guides',2,0) | |
| #List_Selected = st.selectbox('Please select list', | |
| #('ListA','ListB','ListC')) | |
| Show_ListResults=st.form_submit_button(label = 'Show ListA Results') | |
| #st.write('**Please select Guides From Table Below to processes from ListA**') | |
| #get_table=tbl_disp(reflistA_concatenated,variant,'ref_guides',2,0) | |
| if not isinstance(get_table, type(None)): | |
| if ref_sel=='GRCh38': | |
| list_found=listA_found_ref | |
| list_notfound=listA_notfound_ref | |
| else: | |
| list_found=listA_found_lr | |
| list_notfound=listA_notfound_lr | |
| variant_set=get_table['sgID_AB'] | |
| dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B']) | |
| dft_resa=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2']) | |
| dft_res_muta=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2']) | |
| dft_notfounda=pd.DataFrame(columns=['gene','ref_guide']) | |
| df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| #CHECK FOR GRCh38 | |
| for i in range(variant_set.shape[0]): | |
| #ref_listA=listA[listA['sgID_AB']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listA=ref_list[ref_list['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']] | |
| #ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B'] | |
| #st.table(ref_listA) | |
| res,res_mut,res_notfound,list_match,list_mutated,gflga1=get_lists(ref_listA,list_found,list_notfound) | |
| #dft_a=dft_a.append(ref_listA) | |
| if res.shape[0]>0: | |
| dft_resa=pd.concat([dft_resa,res]) | |
| if res_mut.shape[0]>0: | |
| dft_res_muta=pd.concat([dft_res_muta,res_mut]) | |
| if res_notfound.shape[0]>0: | |
| dft_notfounda= pd.concat([dft_notfounda,res_notfound]) | |
| if list_match.shape[0]>0: | |
| df_matched_guides_ref= pd.concat([df_matched_guides_ref,list_match]) | |
| if list_mutated.shape[0]>0: | |
| df_mutated_guides_ref= pd.concat([df_mutated_guides_ref,list_mutated]) | |
| #st.write('Selected Reference Guides for **Set A**') | |
| #tbl_disp(dft_a,'All','ReferenceGuides',0) | |
| st.write('**Important:** If a guides is **not** in **found, mutated and not_found list (such as GSTT1), then it is found in Alternative Loci and Removed**') | |
| if dft_resa.shape[0]>0: | |
| st.write('Matched to '+ref_sel+' Reference Guides for **Set A**') | |
| tbl_disp(dft_resa,'select_genes','SetA_GRCh38',3) | |
| elif dft_res_muta.shape[0]>0: | |
| st.write('None of the guides Matched, So reporting **Mutated to** '+ref_sel+' Reference Guides for **Set A**') | |
| st.markdown(caution1,unsafe_allow_html=True) | |
| tbl_disp(dft_res_muta,'select_genes','SetA_Mutated_GRCh38',4) | |
| if dft_notfounda.shape[0]>0: | |
| st.write('**SetA Guides Not Found in '+ref_sel+' (None of the guides are Matched/Mutated)**') | |
| #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38') | |
| st.table(dft_notfounda) | |
| #ListBRes = st.checkbox('Results For SetB',key=40) | |
| if List_Selected=='ListB': # and not isinstance(get_table, type(None)):#get_table!=None: | |
| ref_list= listB | |
| st.write('**Please select Guides From Table Below to processes from ListB**') | |
| with st.form(key='columns_in_form_listsA'): | |
| c2, c3= st.columns([100,2])#([10,10]) | |
| with c2: | |
| get_table=tbl_disp(reflistB_concatenated,variant,'ref_guides',2,0) | |
| Show_ListResults=st.form_submit_button(label = 'Show ListB Results') | |
| if not isinstance(get_table, type(None)): | |
| if ref_sel=='GRCh38': | |
| list_found=listB_found_ref | |
| list_notfound=listB_notfound_ref | |
| else: | |
| list_found=listB_found_lr | |
| list_notfound=listB_notfound_lr | |
| #variant_set=get_table[['gene']] | |
| variant_set=get_table['sgID_AB'] | |
| dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B']) | |
| dft_resb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2']) | |
| dft_res_mutb=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2']) | |
| dft_notfoundb=pd.DataFrame(columns=['gene','ref_guide']) | |
| df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| #CHECK FOR GRCh38 | |
| for i in range(variant_set.shape[0]): | |
| #ref_listB=listB[listB['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listB=ref_list[ref_list['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listB =ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']] | |
| #ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B'] | |
| res,res_mut,res_notfound,list_match,list_mutated,gflgb1=get_lists(ref_listB,list_found,list_notfound) | |
| #dft_b=dft_b.append(ref_listB) | |
| if res.shape[0]>0: | |
| dft_resb=pd.concat([dft_resb,res]) | |
| if res_mut.shape[0]>0: | |
| dft_res_mutb=pd.concat([dft_res_mutb,res_mut]) | |
| if res_notfound.shape[0]>0: | |
| dft_notfoundb= pd.concat([dft_notfoundb,res_notfound]) | |
| if list_match.shape[0]>0: | |
| df_matched_guides_ref= pd.concat([df_matched_guides_ref,list_match]) | |
| if list_mutated.shape[0]>0: | |
| df_mutated_guides_ref= pd.concat([df_mutated_guides_ref,list_mutated]) | |
| #st.write('Selected Reference Guides for **Set B**') | |
| #tbl_disp(dft_b,'All','ReferenceGuides',0) | |
| st.write('**Important:** If a guides is **not** in **found, mutated and not_found list (such as GSTT1), then it is found in Alternative Loci and Removed**') | |
| if dft_resb.shape[0]>0: | |
| st.write('Matched to '+ref_sel+' Reference Guides for **Set B**') | |
| tbl_disp(dft_resb,'select_genes','SetB_GRCh38',10) | |
| elif dft_res_mutb.shape[0]>0: | |
| st.write('None of the guides Matched, So reporting **Mutated to '+ref_sel+' Reference Guides for **Set B**') | |
| st.markdown(caution1,unsafe_allow_html=True) | |
| tbl_disp(dft_res_mutb,'select_genes','SetB_Mutated_GRCh38',11) | |
| if dft_notfoundb.shape[0]>0: | |
| st.write('**SetB Guides Not Found in '+ref_sel+' (None of the guides are Matched/Mutated)**') | |
| #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38') | |
| st.table(dft_notfoundb) | |
| #ListCRes = st.checkbox('Results For SetC',key=50) | |
| if List_Selected=='ListC': # and not isinstance(get_table, type(None)):#get_table!=None: | |
| ref_list= listC | |
| st.write('**Please select Guides From Table Below to processes from ListC**') | |
| with st.form(key='columns_in_form_listsA'): | |
| c2, c3= st.columns([100,2])#([10,10]) | |
| with c2: | |
| get_table=tbl_disp(reflistC_concatenated,variant,'ref_guides',2,0) | |
| Show_ListResults=st.form_submit_button(label = 'Show ListC Results') | |
| if not isinstance(get_table, type(None)): | |
| if ref_sel=='GRCh38': | |
| list_found=listC_found_ref | |
| list_notfound=listC_notfound_ref | |
| else: | |
| list_found=listC_found_lr | |
| list_notfound=listC_notfound_lr | |
| variant_set=get_table['sgID_AB'] | |
| dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B']) | |
| dft_resc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2']) | |
| dft_res_mutc=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2']) | |
| dft_notfoundc=pd.DataFrame(columns=['gene','ref_guide']) | |
| df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch']) | |
| #CHECK FOR GRCh38 | |
| for i in range(variant_set.shape[0]): | |
| #ref_listC=listC[listC['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listC=ref_list[ref_list['sgID_AB']==variant_set.iloc[i]][['guide_type','protospacer_A','protospacer_B','sgID_AB']] | |
| ref_listC =ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']] | |
| #ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B'] | |
| res,res_mut,res_notfound,list_match,list_mutated,gflgc1=get_lists(ref_listC,list_found,list_notfound) | |
| #dft_c=dft_c.append(ref_listC) | |
| if res.shape[0]>0: | |
| dft_resc=pd.concat([dft_resc,res]) | |
| if res_mut.shape[0]>0: | |
| dft_res_mutc=pd.concat([dft_res_mutc,res_mut]) | |
| if res_notfound.shape[0]>0: | |
| dft_notfoundc= pd.concat([dft_notfoundc,res_notfound]) | |
| if list_match.shape[0]>0: | |
| df_matched_guides_ref= pd.concat([df_matched_guides_ref,list_match]) | |
| if list_mutated.shape[0]>0: | |
| df_mutated_guides_ref= pd.concat([df_mutated_guides_ref,list_mutated]) | |
| #st.write('Selected Reference Guides for **Set C**') | |
| #tbl_disp(dft_c,'All','ReferenceGuides',0) | |
| st.write('**Important:** If a guides is **not** in **found, mutated and not_found list (such as GSTT1), then it is found in Alternative Loci and Removed**') | |
| if dft_resc.shape[0]>0: | |
| st.write('Matched to '+ref_sel+' Reference Guides for **Set C**') | |
| tbl_disp(dft_resc,'select_genes','SetC_GRCh38',17) | |
| elif dft_res_mutc.shape[0]>0: | |
| st.write('None of the guides Matched, So reporting **Mutated to '+ref_sel+' Reference Guides for **Set C**') | |
| st.markdown(caution1,unsafe_allow_html=True) | |
| tbl_disp(dft_res_mutc,'select_genes','SetC_Mutated_GRCh38',18) | |
| if dft_notfoundc.shape[0]>0: | |
| st.write('**SetC Guides Not Found in '+ref_sel+' (None of the guides are Matched/Mutated)**') | |
| #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38') | |
| st.table(dft_notfoundc) | |
| elif Calc=='Not_Found': | |
| ListAResNotFound = st.checkbox('Results For SetA',key=30) | |
| if ListAResNotFound and listA_notfound_lr.shape[0]>0: | |
| listA_notfound_LR_sorted=listA_notfound_lr.sort_values('gene') | |
| sz1a=listA_notfound_LR_sorted.shape[0] | |
| vaild_guides_a = listA_notfound_LR_sorted[~listA_notfound_LR_sorted['gene'].str.contains("non")] | |
| sz2a=vaild_guides_a.shape[0] | |
| st.write(str(sz2a)+"/"+str(sz1a)+' Guides Not Found') | |
| tbl_disp(vaild_guides_a,'all_not_found','SetA_KOLF2.1',23,0) | |
| #now get gene names only | |
| genesa=vaild_guides_a['gene'].str.split('_').str[0] | |
| genesa1=genesa[genesa.duplicated(keep=False)] | |
| genesa2=genesa1.unique() | |
| pair_lista=[] | |
| for g in genesa2: | |
| g1=vaild_guides_a[vaild_guides_a['gene'].str.contains(g)] | |
| g2=g1.reset_index(drop=True) | |
| pair_lista.append([g2.gene[0],g2.ref_guide[0],g2.gene[1],g2.ref_guide[1]]) | |
| pair_missmatch_a = pd.DataFrame(pair_lista, columns=['sgID_1','sgRNA_1','sgID_2','sgRNA_2']) | |
| sz22a=pair_missmatch_a.shape[0] | |
| st.write(str(sz22a)+"/"+str(sz2a)+' Paired Guides Not Found') | |
| tbl_disp(pair_missmatch_a,'all_not_found','SetA_KOLF2.1',23,0) | |
| non_targeting_guides_a = listA_notfound_LR_sorted[listA_notfound_LR_sorted['gene'].str.contains("non")] | |
| sz3a=non_targeting_guides_a.shape[0] | |
| st.write(str(sz3a)+"/"+str(sz1a)+' no-targeting Guides Not Found') | |
| tbl_disp(non_targeting_guides_a,'all_not_found','SetA_KOLF2.1',23,0) | |
| ListBResNotFound = st.checkbox('Results For SetB',key=40) | |
| if ListBResNotFound: | |
| listB_notfound_LR_sorted=listB_notfound_lr.sort_values('gene') | |
| sz1b=listB_notfound_LR_sorted.shape[0] | |
| vaild_guides_b = listB_notfound_LR_sorted[~listB_notfound_LR_sorted['gene'].str.contains("non")] | |
| sz2b=vaild_guides_b.shape[0] | |
| st.write(str(sz2b)+"/"+str(sz1b)+' Guides Not Found') | |
| tbl_disp(vaild_guides_b,'all_not_found','SetA_KOLF2.1',23,0) | |
| #now get gene names only | |
| genesb=vaild_guides_b['gene'].str.split('_').str[0] | |
| genesb1=genesb[genesb.duplicated(keep=False)] | |
| genesb2=genesb1.unique() | |
| pair_listb=[] | |
| for g in genesb2: | |
| g1=vaild_guides_b[vaild_guides_b['gene'].str.contains(g)] | |
| g2=g1.reset_index(drop=True) | |
| pair_listb.append([g2.gene[0],g2.ref_guide[0],g2.gene[1],g2.ref_guide[1]]) | |
| pair_missmatch_b = pd.DataFrame(pair_listb, columns=['sgID_1','sgRNA_1','sgID_2','sgRNA_2']) | |
| sz22b=pair_missmatch_b.shape[0] | |
| st.write(str(sz22b)+"/"+str(sz2b)+' Paired Guides Not Found') | |
| tbl_disp(pair_missmatch_b,'all_not_found','SetA_KOLF2.1',23,0) | |
| non_targeting_guides_b = listB_notfound_LR_sorted[listB_notfound_LR_sorted['gene'].str.contains("non")] | |
| sz3b=non_targeting_guides_b.shape[0] | |
| st.write(str(sz3b)+"/"+str(sz1b)+' no-targeting Guides Not Found') | |
| tbl_disp(non_targeting_guides_b,'all_not_found','SetA_KOLF2.1',23,0) | |
| ListCResNotFound = st.checkbox('Results For SetC',key=50) | |
| if ListCResNotFound: | |
| listC_notfound_LR_sorted=listC_notfound_lr.sort_values('gene') | |
| sz1c=listC_notfound_LR_sorted.shape[0] | |
| vaild_guides_c = listC_notfound_LR_sorted[~listC_notfound_LR_sorted['gene'].str.contains("non")] | |
| sz2c=vaild_guides_c.shape[0] | |
| st.write(str(sz2c)+"/"+str(sz1c)+' Guides Not Found') | |
| tbl_disp(vaild_guides_c,'all_not_found','SetA_KOLF2.1',23,0) | |
| #now get gene names only | |
| genesc=vaild_guides_c['gene'].str.split('_').str[0] | |
| genesc1=genesc[genesc.duplicated(keep=False)] | |
| genesc2=genesc1.unique() | |
| pair_listc=[] | |
| for g in genesc2: | |
| g1=vaild_guides_c[vaild_guides_c['gene'].str.contains(g)] | |
| g2=g1.reset_index(drop=True) | |
| pair_listc.append([g2.gene[0],g2.ref_guide[0],g2.gene[1],g2.ref_guide[1]]) | |
| pair_missmatch_c = pd.DataFrame(pair_listc, columns=['sgID_1','sgRNA_1','sgID_2','sgRNA_2']) | |
| sz22c=pair_missmatch_c.shape[0] | |
| st.write(str(sz22c)+"/"+str(sz2c)+' Paired Guides Not Found') | |
| tbl_disp(pair_missmatch_c,'all_not_found','SetA_KOLF2.1',23,0) | |
| non_targeting_guides_c = listC_notfound_LR_sorted[listC_notfound_LR_sorted['gene'].str.contains("non")] | |
| sz3c=non_targeting_guides_c.shape[0] | |
| st.write(str(sz3c)+"/"+str(sz1c)+' no-targeting Guides Not Found') | |
| tbl_disp(non_targeting_guides_c,'all_not_found','SetA_KOLF2.1',23,0) | |
| else: | |
| guidetype = st.radio("Select Guide Type",('Non-targetting','Regular'),horizontal=True) | |
| if guidetype=='Non-targetting': | |
| with st.form(key='columns_in_form_non'): | |
| c2, c3 = st.columns([5,5])#([10,10]) | |
| with c2: | |
| guides_List = st.selectbox('Please select list', | |
| ('ListA','ListB','ListC')) | |
| with c3: | |
| ref_type_sel_non = st.radio("Select Reference", | |
| ('CHM13','GRCh38'), | |
| horizontal=True) | |
| Show_Results_non=st.form_submit_button(label = 'Non-targeting Guides Results') | |
| if Show_Results_non and guides_List=='ListA': | |
| for_list=listA | |
| if ref_type_sel_non=='GRCh38': | |
| f_list=listA_found_ref | |
| nf_list=listA_notfound_ref | |
| else: | |
| f_list=listA_found_lr | |
| nf_list=listA_notfound_lr | |
| st.write('Total: '+str(len(non_targeting_lista))+' Non-targeting Guide pairs and '+str(2*len(non_targeting_lista))+' single guides in ListA') | |
| process_all_guides(pd.DataFrame(pd.Series(non_targeting_lista,name='gene')),for_list,f_list,nf_list) | |
| if Show_Results_non and guides_List=='ListB': | |
| for_list=listB | |
| if ref_type_sel_non=='GRCh38': | |
| f_list=listB_found_ref | |
| nf_list=listB_notfound_ref | |
| else: | |
| f_list=listB_found_lr | |
| nf_list=listB_notfound_lr | |
| st.write('Total: '+str(len(non_targeting_listb))+' Non-targeting Guide pairs and '+str(2*len(non_targeting_listb))+' single guides in ListA') | |
| process_all_guides(pd.DataFrame(pd.Series(non_targeting_listb,name='gene')),for_list,f_list,nf_list) | |
| if Show_Results_non and guides_List=='ListC': | |
| for_list=listC | |
| if ref_type_sel_non=='GRCh38': | |
| f_list=listC_found_ref | |
| nf_list=listC_notfound_ref | |
| else: | |
| f_list=listC_found_lr | |
| nf_list=listC_notfound_lr | |
| st.write('Total: '+str(len(non_targeting_listc))+' Non-targeting Guide pairs and '+str(2*len(non_targeting_listc))+' single guides in ListA') | |
| process_all_guides(pd.DataFrame(pd.Series(non_targeting_listc,name='gene')),for_list,f_list,nf_list) | |
| elif guidetype=='Regular': | |
| st.write('**Maximum End Index=** '+str(regular_lista.shape[0])) | |
| with st.form(key='columns_in_form_regular'): | |
| c2, c3, c4 = st.columns([5,5,5])#([10,10]) | |
| with c2: | |
| set_start = int(st.text_input('Start Index', '0')) | |
| with c3: | |
| set_end = int(st.text_input('End Index', str(regular_lista.shape[0]))) | |
| with c4: | |
| ref_type_sel = st.radio("Select Reference", | |
| ('CHM13','GRCh38'), | |
| horizontal=True) | |
| Show_Results=st.form_submit_button(label = 'Show Regular Guides Results') | |
| if Show_Results:# and guides_List=="ListA": | |
| regular_listc=regular_listc[set_start:set_end] | |
| regular_listb=regular_listb.iloc[set_start:set_end] | |
| regular_lista=regular_lista.iloc[set_start:set_end] | |
| if ref_type_sel=='GRCh38': | |
| list_founda=listA_found_ref | |
| list_notfounda=listA_notfound_ref | |
| list_foundb=listB_found_ref | |
| list_notfoundb=listB_notfound_ref | |
| list_foundc=listC_found_ref | |
| list_notfoundc=listC_notfound_ref | |
| else: | |
| list_founda=listA_found_lr | |
| list_notfounda=listA_notfound_lr | |
| list_foundb=listB_found_lr | |
| list_notfoundb=listB_notfound_lr | |
| list_foundc=listC_found_lr | |
| list_notfoundc=listC_notfound_lr | |
| dupesq=list(duplicates(listA['gene'])) | |
| non_targetinga=variantsa1[pd.Series(variantsa1).str.contains('non-targeting')] | |
| regulara=variantsa1[~pd.Series(variantsa1).str.contains('non-targeting')] | |
| st.write('Total: '+str(len(regulara))+' Regular Guide (unique genes only) **Excluding:** '+str(len(non_targetinga))+' Non-targeting pairs **and** '+str(len(dupesq))+' Repeated entries (same gene names)') | |
| order_ready_tbl_CHM13(regular_lista,regular_listb,regular_listc,list_founda,list_notfounda,list_foundb,list_notfoundb,list_foundc,list_notfoundc,ref_type_sel) | |