Spaces:

Knowles-Lab
/

tiger

Running on CPU Upgrade

astirn commited on Jan 6, 2023

Commit

a690e02

1 Parent(s): cbbf8c9

insufficient target context cases handled

Files changed (1) hide show

tiger.py CHANGED Viewed

@@ -9,7 +9,7 @@ GUIDE_LEN = 23
 CONTEXT_5P = 3
 CONTEXT_3P = 0
 TARGET_LEN = CONTEXT_5P + GUIDE_LEN + CONTEXT_3P
-NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T'], [0, 1, 2, 3]))
 NUCLEOTIDE_COMPLEMENT = dict(zip(['A', 'C', 'G', 'T'], ['T', 'G', 'C', 'A']))
 NUM_TOP_GUIDES = 10
 NUM_MISMATCHES = 3
@@ -118,9 +118,16 @@ def find_off_targets(guides, batch_size=1000):
     # trim transcripts to targets
     dict_off_targets = df_off_targets.to_dict('records')
     for row in dict_off_targets:
-        start_location = row['Midpoint'] - (GUIDE_LEN // 2) - CONTEXT_5P
-        row['Target'] = row['Target'][start_location:start_location + TARGET_LEN]
-        if row['Mismatches'] == 0:
             assert row['Guide'] == sequence_complement([row['Target'][CONTEXT_5P:TARGET_LEN-CONTEXT_3P]])[0]
     df_off_targets = pd.DataFrame(dict_off_targets)

 CONTEXT_5P = 3
 CONTEXT_3P = 0
 TARGET_LEN = CONTEXT_5P + GUIDE_LEN + CONTEXT_3P
+NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T', 'N'], [0, 1, 2, 3, 255]))
 NUCLEOTIDE_COMPLEMENT = dict(zip(['A', 'C', 'G', 'T'], ['T', 'G', 'C', 'A']))
 NUM_TOP_GUIDES = 10
 NUM_MISMATCHES = 3
     # trim transcripts to targets
     dict_off_targets = df_off_targets.to_dict('records')
     for row in dict_off_targets:
+        start_location = row['Midpoint'] - (GUIDE_LEN // 2)
+        if start_location < CONTEXT_5P:
+            row['Target'] = row['Target'][0:GUIDE_LEN + CONTEXT_3P]
+            row['Target'] = 'N' * (TARGET_LEN - len(row['Target'])) + row['Target']
+        elif start_location + GUIDE_LEN + CONTEXT_3P > len(row['Target']):
+            row['Target'] = row['Target'][start_location - CONTEXT_5P:]
+            row['Target'] = row['Target'] + 'N' * (TARGET_LEN - len(row['Target']))
+        else:
+            row['Target'] = row['Target'][start_location - CONTEXT_5P:start_location + GUIDE_LEN + CONTEXT_3P]
+        if row['Mismatches'] == 0 and 'N' not in row['Target']:
             assert row['Guide'] == sequence_complement([row['Target'][CONTEXT_5P:TARGET_LEN-CONTEXT_3P]])[0]
     df_off_targets = pd.DataFrame(dict_off_targets)