Spaces:

Knowles-Lab
/

tiger

Running on CPU Upgrade

App Files Files Community

astirn commited on Feb 26, 2023

Commit

6009ef4

2 Parent(s): 8c452e8 9f169cd

Merge branch 'main' of https://huggingface.co/spaces/Knowles-Lab/tiger into main

Browse files

Files changed (2) hide show

app.py +62 -13
tiger.py +10 -5

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import pandas as pd
 import streamlit as st
-from tiger import tiger_exhibit, TARGET_LEN, NUCLEOTIDE_TOKENS
 @st.cache
@@ -8,23 +9,71 @@ def convert_df(df):
     # IMPORTANT: Cache the conversion to prevent computation on every rerun
     return df.to_csv().encode('utf-8')
 # title and instructions
 st.title('TIGER Cas13 Efficacy Prediction')
-st.session_state['userInput'] = ''
-st.session_state['userInput'] = st.text_input(
     label='Enter a target transcript:',
-    # value='ATGCAGGACGCGGAGAACGTGGCGGTGCCCGAGGCGGCCGAGGAGCGCGC',
     placeholder='Upper or lower case')
-# input is too short
-if len(st.session_state['userInput']) < TARGET_LEN:
-    transcript_len = len(st.session_state['userInput'])
-    st.write('Transcript length ({:d}) must be at least {:d} bases.'.format(transcript_len, TARGET_LEN))
 # valid input
-elif all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in st.session_state['userInput']]):
-    on_target, off_target = tiger_exhibit(pd.DataFrame(dict(id=['ManualEntry'], seq=[st.session_state['userInput']])))
     st.write('On-target predictions: ', on_target)
     st.download_button(label='Download', data=convert_df(on_target), file_name='on_target.csv', mime='text/csv')
     if len(off_target) > 0:
@@ -34,5 +83,5 @@ elif all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in st
         st.write('We did not find any off-target effects!')
 # invalid input
-else:
-    st.write('Nucleotides other than ACGT detected!')

 import pandas as pd
 import streamlit as st
+import os, shutil
+from tiger import tiger_exhibit, load_transcripts, TARGET_LEN, NUCLEOTIDE_TOKENS
 @st.cache
     # IMPORTANT: Cache the conversion to prevent computation on every rerun
     return df.to_csv().encode('utf-8')
 # title and instructions
 st.title('TIGER Cas13 Efficacy Prediction')
+st.session_state["fasta_seq"] = ""
+st.session_state["text_seq"] = ""
+status_bar, status_text = None, None
+# UserInput Form from text input
+text_form = st.form("text")
+text_input = text_form.text_input(
     label='Enter a target transcript:',
+    #value='ATGCAGGACGCGGAGAACGTGGCGGTGCCCGAGGCGGCCGAGGAGCGCGC',
     placeholder='Upper or lower case')
+if text_input:
+    # input is too short
+    if len(text_input) < TARGET_LEN:
+        transcript_len = len(text_input)
+        text_form.write('Transcript length ({:d}) must be at least {:d} bases.'.format(transcript_len, TARGET_LEN))
+    else:
+        st.session_state["text_seq"] = text_input
+text_calc = text_form.form_submit_button(label="calculate")
+#status bar
+status_text_textform = text_form.empty()
+status_bar_textform = text_form.progress(0)
+# UserInput Form from file
+fasta_form = st.form("fasta")
+fasta = fasta_form.file_uploader(label="upload fasta file")
+if fasta:
+    if os.path.exists("temp"):
+        shutil.rmtree("temp")
+    os.makedirs("temp")
+    fname = fasta.name
+    st.write(fname)
+    fpath = os.path.join("temp", fname)
+    with open(fpath, "w") as f:
+        f.write(fasta.getvalue().decode("utf-8"))
+    transcript_tbl = load_transcripts([fpath])
+    fasta_form.text("fasta file contents")
+    fasta_form.write(transcript_tbl)
+    seq = transcript_tbl['seq'][0]
+    st.session_state["fasta_seq"] = seq
+fasta_calc = fasta_form.form_submit_button(label="calculate")
+status_text_fastaform = fasta_form.empty()
+status_bar_fastaform = fasta_form.progress(0)
+#st.write(text_calc)
+#st.write(fasta_calc)
+#Calculation
+if text_calc:
+    src_seq = st.session_state["text_seq"]
+    status_text = status_text_textform
+    status_bar= status_bar_textform
+elif fasta_calc:
+    src_seq = st.session_state["fasta_seq"]
+    status_text = status_text_fastaform
+    status_bar= status_bar_fastaform
+else:
+    src_seq = ""
+#st.write(src_seq)
 # valid input
+if src_seq and all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in src_seq]):
+    on_target, off_target = tiger_exhibit(pd.DataFrame(dict(id=['ManualEntry'], seq=[src_seq])),
+            status_bar, status_text)
     st.write('On-target predictions: ', on_target)
     st.download_button(label='Download', data=convert_df(on_target), file_name='on_target.csv', mime='text/csv')
     if len(off_target) > 0:
         st.write('We did not find any off-target effects!')
 # invalid input
+#else:
+#    st.write('Nucleotides other than ACGT detected!')

tiger.py CHANGED Viewed

@@ -24,7 +24,6 @@ for gpu in tf.config.list_physical_devices('GPU'):
 if len(tf.config.list_physical_devices('GPU')) > 0:
     tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
 def load_transcripts(fasta_files):
     # load all transcripts from fasta files into a DataFrame
@@ -95,7 +94,7 @@ def process_data(transcript_seq: str):
         tf.reshape(one_hot_encode_sequence(target_seq, add_context_padding=False), [len(target_seq), -1]),
         tf.reshape(one_hot_encode_sequence(guide_seq, add_context_padding=True), [len(guide_seq), -1]),
         ], axis=-1)
     return target_seq, guide_seq, model_inputs
@@ -112,7 +111,7 @@ def predict_on_target(transcript_seq: str, model: tf.keras.Model):
     return predictions
-def find_off_targets(top_guides: pd.DataFrame):
     # load reference transcripts
     reference_transcripts = load_transcripts([os.path.join('transcripts', f) for f in REFERENCE_TRANSCRIPTS])
@@ -166,6 +165,9 @@ def find_off_targets(top_guides: pd.DataFrame):
             off_targets = pd.concat([off_targets, pd.DataFrame(dict_off_targets)])
         # progress update
         print('\rPercent complete: {:.2f}%'.format(100 * min(i / len(reference_transcripts), 1)), end='')
     print('')
@@ -186,7 +188,7 @@ def predict_off_target(off_targets: pd.DataFrame, model: tf.keras.Model):
     return off_targets.sort_values('Normalized LFC')
-def tiger_exhibit(transcripts: pd.DataFrame):
     # load model
     if os.path.exists('model'):
@@ -204,11 +206,14 @@ def tiger_exhibit(transcripts: pd.DataFrame):
         on_target_predictions = pd.concat([on_target_predictions, df.iloc[:NUM_TOP_GUIDES]])
         # progress update
         print('\rPercent complete: {:.2f}%'.format(100 * min((i + 1) / len(transcripts), 1)), end='')
     print('')
     # predict off-target effects for top guides
-    off_targets = find_off_targets(on_target_predictions)
     off_target_predictions = predict_off_target(off_targets, model=tiger)
     # reverse guide sequences

 if len(tf.config.list_physical_devices('GPU')) > 0:
     tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
 def load_transcripts(fasta_files):
     # load all transcripts from fasta files into a DataFrame
         tf.reshape(one_hot_encode_sequence(target_seq, add_context_padding=False), [len(target_seq), -1]),
         tf.reshape(one_hot_encode_sequence(guide_seq, add_context_padding=True), [len(guide_seq), -1]),
         ], axis=-1)
+    print(model_inputs)
     return target_seq, guide_seq, model_inputs
     return predictions
+def find_off_targets(top_guides: pd.DataFrame, status_bar, status_text):
     # load reference transcripts
     reference_transcripts = load_transcripts([os.path.join('transcripts', f) for f in REFERENCE_TRANSCRIPTS])
             off_targets = pd.concat([off_targets, pd.DataFrame(dict_off_targets)])
         # progress update
+        if status_bar:
+            status_text.text("Scanning for off-targets Percent complete: {:.2f}%".format(int(100 * min(i / len(reference_transcripts), 1))))
+            status_bar.progress(int(100 * min(i / len(reference_transcripts), 1)))
         print('\rPercent complete: {:.2f}%'.format(100 * min(i / len(reference_transcripts), 1)), end='')
     print('')
     return off_targets.sort_values('Normalized LFC')
+def tiger_exhibit(transcripts: pd.DataFrame, status_bar=None, status_text=None):
     # load model
     if os.path.exists('model'):
         on_target_predictions = pd.concat([on_target_predictions, df.iloc[:NUM_TOP_GUIDES]])
         # progress update
+        if status_bar:
+            status_text.text("Scanning for on-targets Percent complete: {:.2f}%".format(100 * min((i + 1) / len(transcripts), 1)))
+            status_bar.progress(int(100 * min((i + 1) / len(transcripts), 1)))
         print('\rPercent complete: {:.2f}%'.format(100 * min((i + 1) / len(transcripts), 1)), end='')
     print('')
     # predict off-target effects for top guides
+    off_targets = find_off_targets(on_target_predictions,  status_bar, status_text)
     off_target_predictions = predict_off_target(off_targets, model=tiger)
     # reverse guide sequences