Spaces:

Knowles-Lab
/

tiger

Running on CPU Upgrade

App Files Files Community

astirn commited on Jul 5, 2023

Commit

79e1e8e

1 Parent(s): ecb653b

cleanup and bug fixes

Browse files

Files changed (1) hide show

tiger.py +15 -14

tiger.py CHANGED Viewed

@@ -20,8 +20,7 @@ SEQ_COL = 'Sequence'
 TARGET_COL = 'Target Sequence'
 GUIDE_COL = 'Guide Sequence'
 SCORE_COL = 'Guide Score'
-RUN_MODE_ALL_PM = 'All on-target guides per transcript'
-RUN_MODE_TITRATION = 'Top guides per transcript'
 REFERENCE_TRANSCRIPTS = ('gencode.v19.pc_transcripts.fa.gz', 'gencode.v19.lncRNA_transcripts.fa.gz')
 BATCH_SIZE_COMPUTE = 500
 BATCH_SIZE_SCAN = 20
@@ -167,10 +166,10 @@ def get_on_target_predictions(transcripts: pd.DataFrame, model: tf.keras.Model,
         # progress update
         percent_complete = 100 * min((i + 1) / len(transcripts), 1)
         update_text = 'Evaluating on-target guides for each transcript: {:.2f}%'.format(percent_complete)
         if status_bar:
             status_text.text()
-            status_bar.progress(int(100 * min((i + 1) / len(transcripts), 1)))
-        print('\r' + update_text, end='')
     print('')
     return predictions
@@ -178,6 +177,7 @@ def get_on_target_predictions(transcripts: pd.DataFrame, model: tf.keras.Model,
 def top_guides_per_transcript(predictions: pd.DataFrame):
     top_guides = pd.DataFrame()
     for transcript in predictions[ID_COL].unique():
         df = predictions.loc[predictions[ID_COL] == transcript]
@@ -193,7 +193,7 @@ def find_off_targets(top_guides: pd.DataFrame, status_bar, status_text):
     reference_transcripts = load_transcripts([os.path.join('transcripts', f) for f in REFERENCE_TRANSCRIPTS])
     # one-hot encode guides to form a filter
-    guide_filter = one_hot_encode_sequence(sequence_complement(top_guides['Guide']), add_context_padding=False)
     guide_filter = tf.transpose(guide_filter, [1, 2, 0])
     # loop over transcripts in batches
@@ -241,10 +241,12 @@ def find_off_targets(top_guides: pd.DataFrame, status_bar, status_text):
             off_targets = pd.concat([off_targets, pd.DataFrame(dict_off_targets)])
         # progress update
         if status_bar:
-            status_text.text("Scanning for off-targets Percent complete: {:.2f}%".format(int(100 * min(i / len(reference_transcripts), 1))))
-            status_bar.progress(int(100 * min(i / len(reference_transcripts), 1)))
-        print('\rPercent complete: {:.2f}%'.format(100 * min(i / len(reference_transcripts), 1)), end='')
     print('')
     return off_targets
@@ -264,7 +266,7 @@ def predict_off_target(off_targets: pd.DataFrame, model: tf.keras.Model):
     return off_targets.sort_values(SCORE_COL)
-def tiger_exhibit(transcripts: pd.DataFrame, run_mode: str, check_off_targets: bool, status_bar=None, status_text=None):
     # load model
     if os.path.exists('model'):
@@ -279,10 +281,10 @@ def tiger_exhibit(transcripts: pd.DataFrame, run_mode: str, check_off_targets: b
     # initialize other outputs
     off_target_predictions = pd.DataFrame()
-    if run_mode == RUN_MODE_ALL_PM:
         return on_target_predictions, off_target_predictions
-    elif run_mode == RUN_MODE_TITRATION:  # TODO: and titration candidates
         on_target_predictions = top_guides_per_transcript(on_target_predictions)
     else:
@@ -305,6 +307,7 @@ if __name__ == '__main__':
     # common arguments
     parser = argparse.ArgumentParser()
     parser.add_argument('--check_off_targets', action='store_true', default=False)
     parser.add_argument('--fasta_path', type=str, default=None)
     args = parser.parse_args()
@@ -334,9 +337,7 @@ if __name__ == '__main__':
         # run batch
         idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
-        df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop],
-                                                    run_mode=RUN_MODE_TITRATION,
-                                                    check_off_targets=args.check_off_targets)
         # save batch results
         df_on_target.to_csv('on_target.csv', header=batch == 1, index=False, mode='a')

 TARGET_COL = 'Target Sequence'
 GUIDE_COL = 'Guide Sequence'
 SCORE_COL = 'Guide Score'
+RUN_MODES = dict(all='All on-target guides per transcript', titration='Top guides per transcript')
 REFERENCE_TRANSCRIPTS = ('gencode.v19.pc_transcripts.fa.gz', 'gencode.v19.lncRNA_transcripts.fa.gz')
 BATCH_SIZE_COMPUTE = 500
 BATCH_SIZE_SCAN = 20
         # progress update
         percent_complete = 100 * min((i + 1) / len(transcripts), 1)
         update_text = 'Evaluating on-target guides for each transcript: {:.2f}%'.format(percent_complete)
+        print('\r' + update_text, end='')
         if status_bar:
             status_text.text()
+            status_bar.progress(percent_complete)
     print('')
     return predictions
 def top_guides_per_transcript(predictions: pd.DataFrame):
+    # select and sort top guides for each transcript
     top_guides = pd.DataFrame()
     for transcript in predictions[ID_COL].unique():
         df = predictions.loc[predictions[ID_COL] == transcript]
     reference_transcripts = load_transcripts([os.path.join('transcripts', f) for f in REFERENCE_TRANSCRIPTS])
     # one-hot encode guides to form a filter
+    guide_filter = one_hot_encode_sequence(sequence_complement(top_guides[GUIDE_COL]), add_context_padding=False)
     guide_filter = tf.transpose(guide_filter, [1, 2, 0])
     # loop over transcripts in batches
             off_targets = pd.concat([off_targets, pd.DataFrame(dict_off_targets)])
         # progress update
+        percent_complete = 100 * min((i + 1) / len(reference_transcripts), 1)
+        update_text = 'Scanning for off-targets: {:.2f}%'.format(percent_complete)
+        print('\r' + update_text, end='')
         if status_bar:
+            status_text.text()
+            status_bar.progress(percent_complete)
     print('')
     return off_targets
     return off_targets.sort_values(SCORE_COL)
+def tiger_exhibit(transcripts: pd.DataFrame, mode: str, check_off_targets: bool, status_bar=None, status_text=None):
     # load model
     if os.path.exists('model'):
     # initialize other outputs
     off_target_predictions = pd.DataFrame()
+    if mode == 'all':
         return on_target_predictions, off_target_predictions
+    elif mode == 'titration':  # TODO: and titration candidates
         on_target_predictions = top_guides_per_transcript(on_target_predictions)
     else:
     # common arguments
     parser = argparse.ArgumentParser()
+    parser.add_argument('--mode', type=str, default='titration')
     parser.add_argument('--check_off_targets', action='store_true', default=False)
     parser.add_argument('--fasta_path', type=str, default=None)
     args = parser.parse_args()
         # run batch
         idx_stop = min(idx + BATCH_SIZE_TRANSCRIPTS, len(df_transcripts))
+        df_on_target, df_off_target = tiger_exhibit(df_transcripts[idx:idx_stop], args.mode, args.check_off_targets)
         # save batch results
         df_on_target.to_csv('on_target.csv', header=batch == 1, index=False, mode='a')