import tiger import pandas as pd import streamlit as st ENTRY_METHODS = dict( manual='Manual entry of single transcript', fasta="Fasta file upload (supports multiple transcripts if they have unique ID's)" ) # containers DOCUMENTATION = st.container() MODE_SELECTION = st.container() TRANSCRIPT_ENTRY = st.container() RUNTIME = st.container() RESULTS = st.container() @st.cache_data def convert_df(df): # IMPORTANT: Cache the conversion to prevent computation on every rerun return df.to_csv().encode('utf-8') def mode_change_callback(): if st.session_state.mode == tiger.RUN_MODES['all']: st.session_state.check_off_targets = False st.session_state.disable_off_target_checkbox = True else: st.session_state.disable_off_target_checkbox = False def run(): # initialize transcript DataFrame transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL]) # manual entry if st.session_state.entry_method == ENTRY_METHODS['manual']: transcripts = pd.DataFrame({ tiger.ID_COL: ['ManualEntry'], tiger.SEQ_COL: [st.session_state.manual_entry] }) # fasta file upload elif st.session_state.entry_method == ENTRY_METHODS['fasta']: if st.session_state.fasta_entry is not None: fasta_path = st.session_state.fasta_entry.name with open(fasta_path, 'w') as f: f.write(st.session_state.fasta_entry.getvalue().decode('utf-8')) transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False) # make sure all transcripts have unique identifiers if transcripts.index.has_duplicates: with TRANSCRIPT_ENTRY: st.write("Duplicate transcript ID's detected in fasta file") return # convert to upper case as used by tokenizer transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper()) # make sure all transcripts only contain nucleotides A, C, G, T, and wildcard N valid = transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys())) if not all(valid): with TRANSCRIPT_ENTRY: st.write('Transcript(s) must only contain upper or lower case A, C, G, and Ts') return # make sure all transcripts satisfy length requirements too_short = transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN) if any(too_short): with TRANSCRIPT_ENTRY: st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN)) return # run model st.session_state.on_target, st.session_state.off_target = tiger.tiger_exhibit( transcripts=transcripts, mode={v: k for k, v in tiger.RUN_MODES.items()}[st.session_state.mode], # status=RUNTIME, check_off_targets=st.session_state.check_off_targets ) if __name__ == '__main__': # app initialization if 'mode' not in st.session_state: st.session_state.mode = tiger.RUN_MODES['all'] st.session_state.disable_off_target_checkbox = True if 'entry_method' not in st.session_state: st.session_state.entry_method = ENTRY_METHODS['manual'] if 'run' not in st.session_state: st.session_state.run = False if 'on_target' not in st.session_state: st.session_state.on_target = None if 'off_target' not in st.session_state: st.session_state.off_target = None # title and documentation with DOCUMENTATION: st.title('TIGER Cas13 Efficacy Prediction') # mode selection with MODE_SELECTION: col1, col2 = st.columns([0.65, 0.35]) with col1: st.radio( label='What do you want to predict?', options=tuple(tiger.RUN_MODES.values()), key='mode', on_change=mode_change_callback ) with col2: st.checkbox( label='Find off-target effects (slow)', key='check_off_targets', disabled=st.session_state.disable_off_target_checkbox ) # transcript entry with TRANSCRIPT_ENTRY: st.selectbox( label='How would you like to provide transcripts of interest?', options=ENTRY_METHODS.values(), key='entry_method', ) if st.session_state.entry_method == ENTRY_METHODS['manual']: st.text_input( label='Enter a target transcript:', key='manual_entry', placeholder='Upper or lower case', ) elif st.session_state.entry_method == ENTRY_METHODS['fasta']: st.file_uploader( label='Upload a fasta file:', key='fasta_entry', ) # runtime with RUNTIME: st.button(label='Get predictions!', on_click=run) # results with RESULTS: # on-target results if st.session_state.on_target is not None: st.write('On-target predictions:', st.session_state.on_target) st.download_button( label='Download on-target predictions', data=convert_df(st.session_state.on_target), file_name='on_target.csv', mime='text/csv' ) # off-target results if st.session_state.off_target is not None: if len(st.session_state.off_target) > 0: st.write('Off-target predictions:', st.session_state.off_target) st.download_button( label='Download off-target predictions', data=convert_df(st.session_state.off_target), file_name='off_target.csv', mime='text/csv' ) else: st.write('We did not find any off-target effects!')