astirn commited on
Commit
546d4e5
·
1 Parent(s): dc6d628

some changes needed to support titration

Browse files
Files changed (1) hide show
  1. tiger.py +13 -10
tiger.py CHANGED
@@ -241,16 +241,19 @@ def find_off_targets(top_guides: pd.DataFrame, status_update_fn=None):
241
  for row in dict_off_targets:
242
  start_location = row['Guide Midpoint'] - (GUIDE_LEN // 2)
243
  del row['Guide Midpoint']
 
 
244
  if start_location < CONTEXT_5P:
245
- row[SEQ_COL] = row[SEQ_COL][0:GUIDE_LEN + CONTEXT_3P]
246
- row[SEQ_COL] = 'N' * (TARGET_LEN - len(row[SEQ_COL])) + row[SEQ_COL]
247
- elif start_location + GUIDE_LEN + CONTEXT_3P > len(row[SEQ_COL]):
248
- row[SEQ_COL] = row[SEQ_COL][start_location - CONTEXT_5P:]
249
- row[SEQ_COL] = row[SEQ_COL] + 'N' * (TARGET_LEN - len(row[SEQ_COL]))
250
  else:
251
- row[SEQ_COL] = row[SEQ_COL][start_location - CONTEXT_5P:start_location + GUIDE_LEN + CONTEXT_3P]
252
- if row[MM_COL] == 0 and 'N' not in row[SEQ_COL]:
253
- assert row[GUIDE_COL] == sequence_complement([row[SEQ_COL][CONTEXT_5P:TARGET_LEN - CONTEXT_3P]])[0]
 
254
 
255
  # append new off-targets
256
  off_targets = pd.concat([off_targets, pd.DataFrame(dict_off_targets)])
@@ -272,14 +275,14 @@ def predict_off_target(off_targets: pd.DataFrame, model: tf.keras.Model):
272
 
273
  # compute off-target predictions
274
  model_inputs = tf.concat([
275
- tf.reshape(one_hot_encode_sequence(off_targets[SEQ_COL], add_context_padding=False), [len(off_targets), -1]),
276
  tf.reshape(one_hot_encode_sequence(off_targets[GUIDE_COL], add_context_padding=True), [len(off_targets), -1]),
277
  ], axis=-1)
278
  lfc_estimate = model.predict(model_inputs, batch_size=BATCH_SIZE_COMPUTE, verbose=False)
279
  off_targets[SCORE_COL] = prediction_transform(tf.squeeze(lfc_estimate).numpy())
280
 
281
  # trim context sequence
282
- off_targets[SEQ_COL] = off_targets[SEQ_COL].apply(lambda seq: seq[CONTEXT_5P:len(seq) - CONTEXT_3P])
283
 
284
  return off_targets.sort_values(SCORE_COL, ascending=False).reset_index(drop=True)
285
 
 
241
  for row in dict_off_targets:
242
  start_location = row['Guide Midpoint'] - (GUIDE_LEN // 2)
243
  del row['Guide Midpoint']
244
+ target = row[SEQ_COL]
245
+ del row[SEQ_COL]
246
  if start_location < CONTEXT_5P:
247
+ target = target[0:GUIDE_LEN + CONTEXT_3P]
248
+ target = 'N' * (TARGET_LEN - len(target)) + target
249
+ elif start_location + GUIDE_LEN + CONTEXT_3P > len(target):
250
+ target = target[start_location - CONTEXT_5P:]
251
+ target = target + 'N' * (TARGET_LEN - len(target))
252
  else:
253
+ target = target[start_location - CONTEXT_5P:start_location + GUIDE_LEN + CONTEXT_3P]
254
+ if row[MM_COL] == 0 and 'N' not in row[TARGET_COL]:
255
+ assert row[GUIDE_COL] == sequence_complement([target[CONTEXT_5P:TARGET_LEN - CONTEXT_3P]])[0]
256
+ row[TARGET_COL] = target
257
 
258
  # append new off-targets
259
  off_targets = pd.concat([off_targets, pd.DataFrame(dict_off_targets)])
 
275
 
276
  # compute off-target predictions
277
  model_inputs = tf.concat([
278
+ tf.reshape(one_hot_encode_sequence(off_targets[TARGET_COL], add_context_padding=False), [len(off_targets), -1]),
279
  tf.reshape(one_hot_encode_sequence(off_targets[GUIDE_COL], add_context_padding=True), [len(off_targets), -1]),
280
  ], axis=-1)
281
  lfc_estimate = model.predict(model_inputs, batch_size=BATCH_SIZE_COMPUTE, verbose=False)
282
  off_targets[SCORE_COL] = prediction_transform(tf.squeeze(lfc_estimate).numpy())
283
 
284
  # trim context sequence
285
+ off_targets[TARGET_COL] = off_targets[TARGET_COL].apply(lambda seq: seq[CONTEXT_5P:len(seq) - CONTEXT_3P])
286
 
287
  return off_targets.sort_values(SCORE_COL, ascending=False).reset_index(drop=True)
288