astirn commited on
Commit
a690e02
·
1 Parent(s): cbbf8c9

insufficient target context cases handled

Browse files
Files changed (1) hide show
  1. tiger.py +11 -4
tiger.py CHANGED
@@ -9,7 +9,7 @@ GUIDE_LEN = 23
9
  CONTEXT_5P = 3
10
  CONTEXT_3P = 0
11
  TARGET_LEN = CONTEXT_5P + GUIDE_LEN + CONTEXT_3P
12
- NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T'], [0, 1, 2, 3]))
13
  NUCLEOTIDE_COMPLEMENT = dict(zip(['A', 'C', 'G', 'T'], ['T', 'G', 'C', 'A']))
14
  NUM_TOP_GUIDES = 10
15
  NUM_MISMATCHES = 3
@@ -118,9 +118,16 @@ def find_off_targets(guides, batch_size=1000):
118
  # trim transcripts to targets
119
  dict_off_targets = df_off_targets.to_dict('records')
120
  for row in dict_off_targets:
121
- start_location = row['Midpoint'] - (GUIDE_LEN // 2) - CONTEXT_5P
122
- row['Target'] = row['Target'][start_location:start_location + TARGET_LEN]
123
- if row['Mismatches'] == 0:
 
 
 
 
 
 
 
124
  assert row['Guide'] == sequence_complement([row['Target'][CONTEXT_5P:TARGET_LEN-CONTEXT_3P]])[0]
125
  df_off_targets = pd.DataFrame(dict_off_targets)
126
 
 
9
  CONTEXT_5P = 3
10
  CONTEXT_3P = 0
11
  TARGET_LEN = CONTEXT_5P + GUIDE_LEN + CONTEXT_3P
12
+ NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T', 'N'], [0, 1, 2, 3, 255]))
13
  NUCLEOTIDE_COMPLEMENT = dict(zip(['A', 'C', 'G', 'T'], ['T', 'G', 'C', 'A']))
14
  NUM_TOP_GUIDES = 10
15
  NUM_MISMATCHES = 3
 
118
  # trim transcripts to targets
119
  dict_off_targets = df_off_targets.to_dict('records')
120
  for row in dict_off_targets:
121
+ start_location = row['Midpoint'] - (GUIDE_LEN // 2)
122
+ if start_location < CONTEXT_5P:
123
+ row['Target'] = row['Target'][0:GUIDE_LEN + CONTEXT_3P]
124
+ row['Target'] = 'N' * (TARGET_LEN - len(row['Target'])) + row['Target']
125
+ elif start_location + GUIDE_LEN + CONTEXT_3P > len(row['Target']):
126
+ row['Target'] = row['Target'][start_location - CONTEXT_5P:]
127
+ row['Target'] = row['Target'] + 'N' * (TARGET_LEN - len(row['Target']))
128
+ else:
129
+ row['Target'] = row['Target'][start_location - CONTEXT_5P:start_location + GUIDE_LEN + CONTEXT_3P]
130
+ if row['Mismatches'] == 0 and 'N' not in row['Target']:
131
  assert row['Guide'] == sequence_complement([row['Target'][CONTEXT_5P:TARGET_LEN-CONTEXT_3P]])[0]
132
  df_off_targets = pd.DataFrame(dict_off_targets)
133