Spaces:

Knowles-Lab
/

tiger

Running on CPU Upgrade

App Files Files Community

astirn commited on Jul 7, 2023

Commit

e43faaa

1 Parent(s): 97f8ff7

very close

Browse files

Files changed (2) hide show

app.py +58 -86
tiger.py +5 -5

app.py CHANGED Viewed

@@ -29,23 +29,14 @@ def mode_change_callback():
         st.session_state.disable_off_target_checkbox = False
-def entry_method_change_callback():
-    if st.session_state.entry_method == ENTRY_METHODS['manual']:
-        st.session_state.manual_entry_disabled = False
-        st.session_state.fasta_entry_disabled = True
-    elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
-        st.session_state.manual_entry_disabled = True
-        st.session_state.fasta_entry_disabled = False
-def process_input():
     # initialize transcript DataFrame
-    st.session_state.transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL])
     # manual entry
     if st.session_state.entry_method == ENTRY_METHODS['manual']:
-        st.session_state.transcripts = pd.DataFrame({
             tiger.ID_COL: ['ManualEntry'],
             tiger.SEQ_COL: [st.session_state.manual_entry]
         })
@@ -56,33 +47,38 @@ def process_input():
             fasta_path = st.session_state.fasta_entry.name
             with open(fasta_path, 'w') as f:
                 f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
-            st.session_state.transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
     # make sure all transcripts have unique identifiers
-    if st.session_state.transcripts.index.has_duplicates:
         with TRANSCRIPT_ENTRY:
             st.write("Duplicate transcript ID's detected in fasta file")
         return
-    # make sure all transcripts satisfy length requirements
-    too_short = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)
-    if any(too_short):
-        with TRANSCRIPT_ENTRY:
-            st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN))
-        return
     # convert to upper case as used by tokenizer
-    st.session_state.transcripts[tiger.SEQ_COL] = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
     # make sure all transcripts only contain nucleotides A, C, G, T, and wildcard N
-    valid = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))
     if not all(valid):
         with TRANSCRIPT_ENTRY:
             st.write('Transcript(s) must only contain upper or lower case A, C, G, and Ts')
         return
-    # everything looks good, so run the model
-    st.session_state.run = True
 if __name__ == '__main__':
@@ -91,18 +87,14 @@ if __name__ == '__main__':
     if 'mode' not in st.session_state:
         st.session_state.mode = tiger.RUN_MODES['all']
         st.session_state.disable_off_target_checkbox = True
-    if 'off_targets_checked' not in st.session_state:
-        st.session_state.off_targets_checked = False
     if 'entry_method' not in st.session_state:
         st.session_state.entry_method = ENTRY_METHODS['manual']
-        st.session_state.manual_entry_disabled = False
-        st.session_state.fasta_entry_disabled = True
     if 'run' not in st.session_state:
         st.session_state.run = False
-    if 'results_ready' not in st.session_state:
-        st.session_state.results_ready = False
-        st.session_state.on_target = pd.DataFrame()
-        st.session_state.off_target = pd.DataFrame()
     # title and documentation
     with DOCUMENTATION:
@@ -131,65 +123,45 @@ if __name__ == '__main__':
             label='How would you like to provide transcripts of interest?',
             options=ENTRY_METHODS.values(),
             key='entry_method',
-            on_change=entry_method_change_callback
-        )
-        st.text_input(
-            label='Enter a target transcript:',
-            key='manual_entry',
-            placeholder='Upper or lower case',
-            disabled=st.session_state.manual_entry_disabled
-        )
-        st.file_uploader(
-            label='Upload a fasta file:',
-            key='fasta_entry',
-            disabled=st.session_state.fasta_entry_disabled
         )
-        st.button(label='Get predictions!', on_click=process_input)
     with RUNTIME:
-        # check and clear run signal and existing results
-        if st.session_state.run:
-            st.session_state.run = False
-            st.session_state.results_ready = False
-            # run model and signal results are ready
-            st.session_state.off_targets_checked = st.session_state.check_off_targets
-            st.session_state.on_target, st.session_state.off_target = tiger.tiger_exhibit(
-                transcripts=st.session_state.transcripts,
-                mode={v: k for k, v in tiger.RUN_MODES.items()}[st.session_state.mode],
-                status=st.empty(),
-                progress_bar=st.progress(0),
-                check_off_targets=st.session_state.off_targets_checked
-            )
-            st.session_state.results_ready = True
     with RESULTS:
-        on_target_results = st.empty()
-        off_target_results = st.empty()
-        # display results if they are ready
-        if st.session_state.results_ready:
-            with on_target_results:
-                st.write('On-target predictions:', st.session_state.on_target)
                 st.download_button(
-                    label='Download on-target predictions',
-                    data=convert_df(st.session_state.on_target),
-                    file_name='on_target.csv',
                     mime='text/csv'
                 )
-            with off_target_results:
-                if st.session_state.off_targets_checked and len(st.session_state.off_target) > 0:
-                    st.write('Off-target predictions:', st.session_state.off_target)
-                    st.download_button(
-                        label='Download off-target predictions',
-                        data=convert_df(st.session_state.off_target),
-                        file_name='off_target.csv',
-                        mime='text/csv'
-                    )
-                elif st.session_state.off_targets_checked and len(st.session_state.off_target) == 0:
-                    st.write('We did not find any off-target effects!')
-        # otherwise, clear our results
-        else:
-            on_target_results.empty()
-            off_target_results.empty()

         st.session_state.disable_off_target_checkbox = False
+def run():
     # initialize transcript DataFrame
+    transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL])
     # manual entry
     if st.session_state.entry_method == ENTRY_METHODS['manual']:
+        transcripts = pd.DataFrame({
             tiger.ID_COL: ['ManualEntry'],
             tiger.SEQ_COL: [st.session_state.manual_entry]
         })
             fasta_path = st.session_state.fasta_entry.name
             with open(fasta_path, 'w') as f:
                 f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
+            transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
     # make sure all transcripts have unique identifiers
+    if transcripts.index.has_duplicates:
         with TRANSCRIPT_ENTRY:
             st.write("Duplicate transcript ID's detected in fasta file")
         return
     # convert to upper case as used by tokenizer
+    transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
     # make sure all transcripts only contain nucleotides A, C, G, T, and wildcard N
+    valid = transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))
     if not all(valid):
         with TRANSCRIPT_ENTRY:
             st.write('Transcript(s) must only contain upper or lower case A, C, G, and Ts')
         return
+    # make sure all transcripts satisfy length requirements
+    too_short = transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)
+    if any(too_short):
+        with TRANSCRIPT_ENTRY:
+            st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN))
+        return
+    # run model
+    st.session_state.on_target, st.session_state.off_target = tiger.tiger_exhibit(
+        transcripts=transcripts,
+        mode={v: k for k, v in tiger.RUN_MODES.items()}[st.session_state.mode],
+        # status=RUNTIME,
+        check_off_targets=st.session_state.check_off_targets
+    )
 if __name__ == '__main__':
     if 'mode' not in st.session_state:
         st.session_state.mode = tiger.RUN_MODES['all']
         st.session_state.disable_off_target_checkbox = True
     if 'entry_method' not in st.session_state:
         st.session_state.entry_method = ENTRY_METHODS['manual']
     if 'run' not in st.session_state:
         st.session_state.run = False
+    if 'on_target' not in st.session_state:
+        st.session_state.on_target = None
+    if 'off_target' not in st.session_state:
+        st.session_state.off_target = None
     # title and documentation
     with DOCUMENTATION:
             label='How would you like to provide transcripts of interest?',
             options=ENTRY_METHODS.values(),
             key='entry_method',
         )
+        if st.session_state.entry_method == ENTRY_METHODS['manual']:
+            st.text_input(
+                label='Enter a target transcript:',
+                key='manual_entry',
+                placeholder='Upper or lower case',
+            )
+        elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
+            st.file_uploader(
+                label='Upload a fasta file:',
+                key='fasta_entry',
+            )
+    # runtime
     with RUNTIME:
+        st.button(label='Get predictions!', on_click=run)
+    # results
     with RESULTS:
+        # on-target results
+        if st.session_state.on_target is not None:
+            st.write('On-target predictions:', st.session_state.on_target)
+            st.download_button(
+                label='Download on-target predictions',
+                data=convert_df(st.session_state.on_target),
+                file_name='on_target.csv',
+                mime='text/csv'
+            )
+        # off-target results
+        if st.session_state.off_target is not None:
+            if len(st.session_state.off_target) > 0:
+                st.write('Off-target predictions:', st.session_state.off_target)
                 st.download_button(
+                    label='Download off-target predictions',
+                    data=convert_df(st.session_state.off_target),
+                    file_name='off_target.csv',
                     mime='text/csv'
                 )
+            else:
+                st.write('We did not find any off-target effects!')

tiger.py CHANGED Viewed

@@ -204,7 +204,7 @@ def top_guides_per_transcript(predictions: pd.DataFrame):
     return top_guides.reset_index(drop=True)
-def find_off_targets(top_guides: pd.DataFrame, status=None, progress_bar=None):
     # load reference transcripts
     reference_transcripts = load_transcripts([os.path.join('transcripts', f) for f in REFERENCE_TRANSCRIPTS])
@@ -262,9 +262,9 @@ def find_off_targets(top_guides: pd.DataFrame, status=None, progress_bar=None):
         update_text = 'Scanning for off-targets: {:.2f}%'.format(percent_complete)
         print('\r' + update_text, end='')
         if status is not None:
-            status.text(update_text)
-        if progress_bar is not None:
-            progress_bar.progress(percent_complete / 100)
     print('')
     return off_targets
@@ -301,7 +301,7 @@ def tiger_exhibit(transcripts: pd.DataFrame, mode: str, check_off_targets: bool,
     on_target_predictions = get_on_target_predictions(transcripts, tiger, status, progress_bar)
     # initialize other outputs
-    off_target_predictions = pd.DataFrame()
     if mode == 'all' and not check_off_targets:
         pass  # nothing to do!

     return top_guides.reset_index(drop=True)
+def find_off_targets(top_guides: pd.DataFrame, status=None):
     # load reference transcripts
     reference_transcripts = load_transcripts([os.path.join('transcripts', f) for f in REFERENCE_TRANSCRIPTS])
         update_text = 'Scanning for off-targets: {:.2f}%'.format(percent_complete)
         print('\r' + update_text, end='')
         if status is not None:
+            with status:
+                st.text(update_text)
+                st.progress(percent_complete / 100)
     print('')
     return off_targets
     on_target_predictions = get_on_target_predictions(transcripts, tiger, status, progress_bar)
     # initialize other outputs
+    off_target_predictions = None
     if mode == 'all' and not check_off_targets:
         pass  # nothing to do!