Spaces:

Knowles-Lab
/

tiger

Running on CPU Upgrade

App Files Files Community

tiger / app.py

astirn

progress

e9d876c over 2 years ago

raw

history blame

6.89 kB

	import tiger
	import pandas as pd
	import streamlit as st

	ENTRY_METHODS = dict(
	manual='Manual entry of single transcript',
	fasta="Fasta file upload (supports multiple transcripts if they have unique ID's)"
	)

	# containers
	DOCUMENTATION = st.container()
	MODE_SELECTION = st.container()
	TRANSCRIPT_ENTRY = st.container()
	RESULTS = st.container()


	@st.cache_data
	def convert_df(df):
	# IMPORTANT: Cache the conversion to prevent computation on every rerun
	return df.to_csv().encode('utf-8')


	def mode_change_callback():
	if st.session_state.mode == tiger.RUN_MODES['all']:
	st.session_state.check_off_targets = False
	st.session_state.disable_off_target_checkbox = True
	else:
	st.session_state.disable_off_target_checkbox = False


	def entry_method_change_callback():
	if st.session_state.entry_method == ENTRY_METHODS['manual']:
	st.session_state.manual_entry_disabled = False
	st.session_state.fasta_entry_disabled = True
	elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
	st.session_state.manual_entry_disabled = True
	st.session_state.fasta_entry_disabled = False


	def process_input():

	# initialize transcript DataFrame
	st.session_state.transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL])

	# manual entry
	if st.session_state.entry_method == ENTRY_METHODS['manual']:
	st.session_state.transcripts = pd.DataFrame({
	tiger.ID_COL: ['ManualEntry'],
	tiger.SEQ_COL: [st.session_state.manual_entry]
	})

	# fasta file upload
	elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
	if st.session_state.fasta_entry is not None:
	fasta_path = st.session_state.fasta_entry.name
	with open(fasta_path, 'w') as f:
	f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
	st.session_state.transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)

	# make sure all transcripts have unique identifiers
	if st.session_state.transcripts.index.has_duplicates:
	with TRANSCRIPT_ENTRY:
	st.write("Duplicate transcript ID's detected in fasta file")
	return

	# make sure all transcripts satisfy length requirements
	too_short = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: len(s)) < tiger.TARGET_LEN
	if any(too_short):
	with TRANSCRIPT_ENTRY:
	st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN))
	return

	# convert to upper case as used by tokenizer
	st.session_state.transcripts[tiger.SEQ_COL] = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())

	# make sure all transcripts only contain nucleotides A, C, G, T, and wildcard N
	valid = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))
	if not all(valid):
	with TRANSCRIPT_ENTRY:
	st.write('Transcript(s) must only contain upper or lower case A, C, G, and Ts')
	return

	print(st.session_state.transcripts)

	# everything looks good, so run the model
	st.session_state.run = True


	if __name__ == '__main__':

	# app initialization
	if 'mode' not in st.session_state:
	st.session_state.mode = tiger.RUN_MODES['all']
	st.session_state.disable_off_target_checkbox = True
	if 'entry_method' not in st.session_state:
	st.session_state.entry_method = ENTRY_METHODS['manual']
	st.session_state.manual_entry_disabled = False
	st.session_state.fasta_entry_disabled = True
	if 'run' not in st.session_state:
	st.session_state.run = False

	# title and documentation
	with DOCUMENTATION:
	st.title('TIGER Cas13 Efficacy Prediction')

	# mode selection
	with MODE_SELECTION:
	col1, col2 = st.columns([0.65, 0.35])
	with col1:
	st.radio(
	label='What do you want to predict?',
	options=tuple(tiger.RUN_MODES.values()),
	key='mode',
	on_change=mode_change_callback
	)
	with col2:
	st.checkbox(
	label='Find off-target effects (slow)',
	key='check_off_targets',
	disabled=st.session_state.disable_off_target_checkbox
	)

	# transcript entry
	with TRANSCRIPT_ENTRY:
	st.selectbox(
	label='How would you like to provide transcripts of interest?',
	options=ENTRY_METHODS.values(),
	key='entry_method',
	on_change=entry_method_change_callback
	)
	st.text_input(
	label='Enter a target transcript:',
	key='manual_entry',
	placeholder='Upper or lower case',
	disabled=st.session_state.manual_entry_disabled
	)
	st.file_uploader(
	label='Upload a fasta file:',
	key='fasta_entry',
	disabled=st.session_state.fasta_entry_disabled
	)
	st.button(label='Get predictions!', on_click=process_input)

	with RESULTS:
	if st.session_state.run:
	st.session_state.run = False
	print('RUNNING')
	# on_target, off_target = tiger.tiger_exhibit(
	# transcripts=st.session_state.transcripts,
	# mode={v: k for k, v in tiger.RUN_MODES.items()}[st.session_state.mode],
	# status=st.empty(),
	# progress_bar=st.progress(0),
	# check_off_targets=st.session_state.check_off_targets
	# )

	# # valid input
	# if src_seq and all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in src_seq]):
	# on_target, off_target = tiger_exhibit(pd.DataFrame(dict(id=['ManualEntry'], seq=[src_seq])),
	# status_bar, status_text, check_off_targets=option == 'On and Off Target')
	# on_target.rename(columns={'Guide': '23 nt guide sequence'}, inplace=True)
	# if len(on_target) > 0:
	# if on_target.iloc[0]['On-target ID'] == 0:
	# on_target.drop(['On-target ID'], axis=1, inplace=True)
	# st.write('On-target predictions: ', on_target)
	# st.download_button(label='Download', data=convert_df(on_target), file_name='on_target.csv', mime='text/csv')
	# if option == 'On and Off Target' and len(off_target) > 0:
	# off_target.rename(columns={'Guide': '23 nt guide sequence'}, inplace=True)
	# st.write('Off-target predictions: ', off_target)
	# st.download_button(label='Download', data=convert_df(off_target), file_name='off_target.csv', mime='text/csv')
	# elif option == 'On and Off Target' and len(off_target) == 0:
	# st.write('We did not find any off-target effects!')
	#