Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import tiger | |
| import pandas as pd | |
| import streamlit as st | |
| ENTRY_METHODS = dict( | |
| manual='Manual entry of single transcript', | |
| fasta="Fasta file upload (supports multiple transcripts if they have unique ID's)" | |
| ) | |
| # containers | |
| DOCUMENTATION = st.container() | |
| MODE_SELECTION = st.container() | |
| TRANSCRIPT_ENTRY = st.container() | |
| RESULTS = st.container() | |
| def convert_df(df): | |
| # IMPORTANT: Cache the conversion to prevent computation on every rerun | |
| return df.to_csv().encode('utf-8') | |
| def mode_change_callback(): | |
| if st.session_state.mode == tiger.RUN_MODES['all']: | |
| st.session_state.check_off_targets = False | |
| st.session_state.disable_off_target_checkbox = True | |
| else: | |
| st.session_state.disable_off_target_checkbox = False | |
| def entry_method_change_callback(): | |
| if st.session_state.entry_method == ENTRY_METHODS['manual']: | |
| st.session_state.manual_entry_disabled = False | |
| st.session_state.fasta_entry_disabled = True | |
| elif st.session_state.entry_method == ENTRY_METHODS['fasta']: | |
| st.session_state.manual_entry_disabled = True | |
| st.session_state.fasta_entry_disabled = False | |
| def process_input(): | |
| # initialize transcript DataFrame | |
| st.session_state.transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL]) | |
| # manual entry | |
| if st.session_state.entry_method == ENTRY_METHODS['manual']: | |
| st.session_state.transcripts = pd.DataFrame({ | |
| tiger.ID_COL: ['ManualEntry'], | |
| tiger.SEQ_COL: [st.session_state.manual_entry] | |
| }) | |
| # fasta file upload | |
| elif st.session_state.entry_method == ENTRY_METHODS['fasta']: | |
| if st.session_state.fasta_entry is not None: | |
| fasta_path = st.session_state.fasta_entry.name | |
| with open(fasta_path, 'w') as f: | |
| f.write(st.session_state.fasta_entry.getvalue().decode('utf-8')) | |
| st.session_state.transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False) | |
| # make sure all transcripts have unique identifiers | |
| if st.session_state.transcripts.index.has_duplicates: | |
| with TRANSCRIPT_ENTRY: | |
| st.write("Duplicate transcript ID's detected in fasta file") | |
| return | |
| # make sure all transcripts satisfy length requirements | |
| too_short = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: len(s)) < tiger.TARGET_LEN | |
| if any(too_short): | |
| with TRANSCRIPT_ENTRY: | |
| st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN)) | |
| return | |
| # convert to upper case as used by tokenizer | |
| st.session_state.transcripts[tiger.SEQ_COL] = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: s.upper()) | |
| # make sure all transcripts only contain nucleotides A, C, G, T, and wildcard N | |
| valid = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys())) | |
| if not all(valid): | |
| with TRANSCRIPT_ENTRY: | |
| st.write('Transcript(s) must only contain upper or lower case A, C, G, and Ts') | |
| return | |
| print(st.session_state.transcripts) | |
| # everything looks good, so run the model | |
| st.session_state.run = True | |
| if __name__ == '__main__': | |
| # app initialization | |
| if 'mode' not in st.session_state: | |
| st.session_state.mode = tiger.RUN_MODES['all'] | |
| st.session_state.disable_off_target_checkbox = True | |
| if 'entry_method' not in st.session_state: | |
| st.session_state.entry_method = ENTRY_METHODS['manual'] | |
| st.session_state.manual_entry_disabled = False | |
| st.session_state.fasta_entry_disabled = True | |
| if 'run' not in st.session_state: | |
| st.session_state.run = False | |
| # title and documentation | |
| with DOCUMENTATION: | |
| st.title('TIGER Cas13 Efficacy Prediction') | |
| # mode selection | |
| with MODE_SELECTION: | |
| col1, col2 = st.columns([0.65, 0.35]) | |
| with col1: | |
| st.radio( | |
| label='What do you want to predict?', | |
| options=tuple(tiger.RUN_MODES.values()), | |
| key='mode', | |
| on_change=mode_change_callback | |
| ) | |
| with col2: | |
| st.checkbox( | |
| label='Find off-target effects (slow)', | |
| key='check_off_targets', | |
| disabled=st.session_state.disable_off_target_checkbox | |
| ) | |
| # transcript entry | |
| with TRANSCRIPT_ENTRY: | |
| st.selectbox( | |
| label='How would you like to provide transcripts of interest?', | |
| options=ENTRY_METHODS.values(), | |
| key='entry_method', | |
| on_change=entry_method_change_callback | |
| ) | |
| st.text_input( | |
| label='Enter a target transcript:', | |
| key='manual_entry', | |
| placeholder='Upper or lower case', | |
| disabled=st.session_state.manual_entry_disabled | |
| ) | |
| st.file_uploader( | |
| label='Upload a fasta file:', | |
| key='fasta_entry', | |
| disabled=st.session_state.fasta_entry_disabled | |
| ) | |
| st.button(label='Get predictions!', on_click=process_input) | |
| with RESULTS: | |
| if st.session_state.run: | |
| st.session_state.run = False | |
| print('RUNNING') | |
| # on_target, off_target = tiger.tiger_exhibit( | |
| # transcripts=st.session_state.transcripts, | |
| # mode={v: k for k, v in tiger.RUN_MODES.items()}[st.session_state.mode], | |
| # status=st.empty(), | |
| # progress_bar=st.progress(0), | |
| # check_off_targets=st.session_state.check_off_targets | |
| # ) | |
| # # valid input | |
| # if src_seq and all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in src_seq]): | |
| # on_target, off_target = tiger_exhibit(pd.DataFrame(dict(id=['ManualEntry'], seq=[src_seq])), | |
| # status_bar, status_text, check_off_targets=option == 'On and Off Target') | |
| # on_target.rename(columns={'Guide': '23 nt guide sequence'}, inplace=True) | |
| # if len(on_target) > 0: | |
| # if on_target.iloc[0]['On-target ID'] == 0: | |
| # on_target.drop(['On-target ID'], axis=1, inplace=True) | |
| # st.write('On-target predictions: ', on_target) | |
| # st.download_button(label='Download', data=convert_df(on_target), file_name='on_target.csv', mime='text/csv') | |
| # if option == 'On and Off Target' and len(off_target) > 0: | |
| # off_target.rename(columns={'Guide': '23 nt guide sequence'}, inplace=True) | |
| # st.write('Off-target predictions: ', off_target) | |
| # st.download_button(label='Download', data=convert_df(off_target), file_name='off_target.csv', mime='text/csv') | |
| # elif option == 'On and Off Target' and len(off_target) == 0: | |
| # st.write('We did not find any off-target effects!') | |
| # | |