Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
some changes needed to support titration
Browse files
tiger.py
CHANGED
|
@@ -241,16 +241,19 @@ def find_off_targets(top_guides: pd.DataFrame, status_update_fn=None):
|
|
| 241 |
for row in dict_off_targets:
|
| 242 |
start_location = row['Guide Midpoint'] - (GUIDE_LEN // 2)
|
| 243 |
del row['Guide Midpoint']
|
|
|
|
|
|
|
| 244 |
if start_location < CONTEXT_5P:
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
elif start_location + GUIDE_LEN + CONTEXT_3P > len(
|
| 248 |
-
|
| 249 |
-
|
| 250 |
else:
|
| 251 |
-
|
| 252 |
-
if row[MM_COL] == 0 and 'N' not in row[
|
| 253 |
-
assert row[GUIDE_COL] == sequence_complement([
|
|
|
|
| 254 |
|
| 255 |
# append new off-targets
|
| 256 |
off_targets = pd.concat([off_targets, pd.DataFrame(dict_off_targets)])
|
|
@@ -272,14 +275,14 @@ def predict_off_target(off_targets: pd.DataFrame, model: tf.keras.Model):
|
|
| 272 |
|
| 273 |
# compute off-target predictions
|
| 274 |
model_inputs = tf.concat([
|
| 275 |
-
tf.reshape(one_hot_encode_sequence(off_targets[
|
| 276 |
tf.reshape(one_hot_encode_sequence(off_targets[GUIDE_COL], add_context_padding=True), [len(off_targets), -1]),
|
| 277 |
], axis=-1)
|
| 278 |
lfc_estimate = model.predict(model_inputs, batch_size=BATCH_SIZE_COMPUTE, verbose=False)
|
| 279 |
off_targets[SCORE_COL] = prediction_transform(tf.squeeze(lfc_estimate).numpy())
|
| 280 |
|
| 281 |
# trim context sequence
|
| 282 |
-
off_targets[
|
| 283 |
|
| 284 |
return off_targets.sort_values(SCORE_COL, ascending=False).reset_index(drop=True)
|
| 285 |
|
|
|
|
| 241 |
for row in dict_off_targets:
|
| 242 |
start_location = row['Guide Midpoint'] - (GUIDE_LEN // 2)
|
| 243 |
del row['Guide Midpoint']
|
| 244 |
+
target = row[SEQ_COL]
|
| 245 |
+
del row[SEQ_COL]
|
| 246 |
if start_location < CONTEXT_5P:
|
| 247 |
+
target = target[0:GUIDE_LEN + CONTEXT_3P]
|
| 248 |
+
target = 'N' * (TARGET_LEN - len(target)) + target
|
| 249 |
+
elif start_location + GUIDE_LEN + CONTEXT_3P > len(target):
|
| 250 |
+
target = target[start_location - CONTEXT_5P:]
|
| 251 |
+
target = target + 'N' * (TARGET_LEN - len(target))
|
| 252 |
else:
|
| 253 |
+
target = target[start_location - CONTEXT_5P:start_location + GUIDE_LEN + CONTEXT_3P]
|
| 254 |
+
if row[MM_COL] == 0 and 'N' not in row[TARGET_COL]:
|
| 255 |
+
assert row[GUIDE_COL] == sequence_complement([target[CONTEXT_5P:TARGET_LEN - CONTEXT_3P]])[0]
|
| 256 |
+
row[TARGET_COL] = target
|
| 257 |
|
| 258 |
# append new off-targets
|
| 259 |
off_targets = pd.concat([off_targets, pd.DataFrame(dict_off_targets)])
|
|
|
|
| 275 |
|
| 276 |
# compute off-target predictions
|
| 277 |
model_inputs = tf.concat([
|
| 278 |
+
tf.reshape(one_hot_encode_sequence(off_targets[TARGET_COL], add_context_padding=False), [len(off_targets), -1]),
|
| 279 |
tf.reshape(one_hot_encode_sequence(off_targets[GUIDE_COL], add_context_padding=True), [len(off_targets), -1]),
|
| 280 |
], axis=-1)
|
| 281 |
lfc_estimate = model.predict(model_inputs, batch_size=BATCH_SIZE_COMPUTE, verbose=False)
|
| 282 |
off_targets[SCORE_COL] = prediction_transform(tf.squeeze(lfc_estimate).numpy())
|
| 283 |
|
| 284 |
# trim context sequence
|
| 285 |
+
off_targets[TARGET_COL] = off_targets[TARGET_COL].apply(lambda seq: seq[CONTEXT_5P:len(seq) - CONTEXT_3P])
|
| 286 |
|
| 287 |
return off_targets.sort_values(SCORE_COL, ascending=False).reset_index(drop=True)
|
| 288 |
|