Spaces:

Knowles-Lab
/

tiger

Running on CPU Upgrade

App Files Files Community

astirn commited on Jul 8, 2023

Commit

546d4e5

1 Parent(s): dc6d628

some changes needed to support titration

Browse files

Files changed (1) hide show

tiger.py +13 -10

tiger.py CHANGED Viewed

@@ -241,16 +241,19 @@ def find_off_targets(top_guides: pd.DataFrame, status_update_fn=None):
             for row in dict_off_targets:
                 start_location = row['Guide Midpoint'] - (GUIDE_LEN // 2)
                 del row['Guide Midpoint']
                 if start_location < CONTEXT_5P:
-                    row[SEQ_COL] = row[SEQ_COL][0:GUIDE_LEN + CONTEXT_3P]
-                    row[SEQ_COL] = 'N' * (TARGET_LEN - len(row[SEQ_COL])) + row[SEQ_COL]
-                elif start_location + GUIDE_LEN + CONTEXT_3P > len(row[SEQ_COL]):
-                    row[SEQ_COL] = row[SEQ_COL][start_location - CONTEXT_5P:]
-                    row[SEQ_COL] = row[SEQ_COL] + 'N' * (TARGET_LEN - len(row[SEQ_COL]))
                 else:
-                    row[SEQ_COL] = row[SEQ_COL][start_location - CONTEXT_5P:start_location + GUIDE_LEN + CONTEXT_3P]
-                if row[MM_COL] == 0 and 'N' not in row[SEQ_COL]:
-                    assert row[GUIDE_COL] == sequence_complement([row[SEQ_COL][CONTEXT_5P:TARGET_LEN - CONTEXT_3P]])[0]
             # append new off-targets
             off_targets = pd.concat([off_targets, pd.DataFrame(dict_off_targets)])
@@ -272,14 +275,14 @@ def predict_off_target(off_targets: pd.DataFrame, model: tf.keras.Model):
     # compute off-target predictions
     model_inputs = tf.concat([
-        tf.reshape(one_hot_encode_sequence(off_targets[SEQ_COL], add_context_padding=False), [len(off_targets), -1]),
         tf.reshape(one_hot_encode_sequence(off_targets[GUIDE_COL], add_context_padding=True), [len(off_targets), -1]),
         ], axis=-1)
     lfc_estimate = model.predict(model_inputs, batch_size=BATCH_SIZE_COMPUTE, verbose=False)
     off_targets[SCORE_COL] = prediction_transform(tf.squeeze(lfc_estimate).numpy())
     # trim context sequence
-    off_targets[SEQ_COL] = off_targets[SEQ_COL].apply(lambda seq: seq[CONTEXT_5P:len(seq) - CONTEXT_3P])
     return off_targets.sort_values(SCORE_COL, ascending=False).reset_index(drop=True)

             for row in dict_off_targets:
                 start_location = row['Guide Midpoint'] - (GUIDE_LEN // 2)
                 del row['Guide Midpoint']
+                target = row[SEQ_COL]
+                del row[SEQ_COL]
                 if start_location < CONTEXT_5P:
+                    target = target[0:GUIDE_LEN + CONTEXT_3P]
+                    target = 'N' * (TARGET_LEN - len(target)) + target
+                elif start_location + GUIDE_LEN + CONTEXT_3P > len(target):
+                    target = target[start_location - CONTEXT_5P:]
+                    target = target + 'N' * (TARGET_LEN - len(target))
                 else:
+                    target = target[start_location - CONTEXT_5P:start_location + GUIDE_LEN + CONTEXT_3P]
+                if row[MM_COL] == 0 and 'N' not in row[TARGET_COL]:
+                    assert row[GUIDE_COL] == sequence_complement([target[CONTEXT_5P:TARGET_LEN - CONTEXT_3P]])[0]
+                row[TARGET_COL] = target
             # append new off-targets
             off_targets = pd.concat([off_targets, pd.DataFrame(dict_off_targets)])
     # compute off-target predictions
     model_inputs = tf.concat([
+        tf.reshape(one_hot_encode_sequence(off_targets[TARGET_COL], add_context_padding=False), [len(off_targets), -1]),
         tf.reshape(one_hot_encode_sequence(off_targets[GUIDE_COL], add_context_padding=True), [len(off_targets), -1]),
         ], axis=-1)
     lfc_estimate = model.predict(model_inputs, batch_size=BATCH_SIZE_COMPUTE, verbose=False)
     off_targets[SCORE_COL] = prediction_transform(tf.squeeze(lfc_estimate).numpy())
     # trim context sequence
+    off_targets[TARGET_COL] = off_targets[TARGET_COL].apply(lambda seq: seq[CONTEXT_5P:len(seq) - CONTEXT_3P])
     return off_targets.sort_values(SCORE_COL, ascending=False).reset_index(drop=True)