astirn commited on
Commit
7233b48
·
1 Parent(s): 79e1e8e
Files changed (1) hide show
  1. tiger.py +20 -11
tiger.py CHANGED
@@ -7,25 +7,35 @@ import pandas as pd
7
  import tensorflow as tf
8
  from Bio import SeqIO
9
 
10
- GUIDE_LEN = 23
11
- CONTEXT_5P = 3
12
- CONTEXT_3P = 0
13
- TARGET_LEN = CONTEXT_5P + GUIDE_LEN + CONTEXT_3P
14
- NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T', 'N'], [0, 1, 2, 3, 255]))
15
- NUCLEOTIDE_COMPLEMENT = dict(zip(['A', 'C', 'G', 'T'], ['T', 'G', 'C', 'A']))
16
- NUM_TOP_GUIDES = 10
17
- NUM_MISMATCHES = 3
18
  ID_COL = 'Transcript ID'
19
  SEQ_COL = 'Sequence'
20
  TARGET_COL = 'Target Sequence'
21
  GUIDE_COL = 'Guide Sequence'
22
  SCORE_COL = 'Guide Score'
23
- RUN_MODES = dict(all='All on-target guides per transcript', titration='Top guides per transcript')
 
 
 
 
 
 
 
 
 
 
 
 
24
  REFERENCE_TRANSCRIPTS = ('gencode.v19.pc_transcripts.fa.gz', 'gencode.v19.lncRNA_transcripts.fa.gz')
 
 
25
  BATCH_SIZE_COMPUTE = 500
26
  BATCH_SIZE_SCAN = 20
27
  BATCH_SIZE_TRANSCRIPTS = 50
28
- UNIT_INTERVAL_MAP = 'exp-lin-exp'
 
 
 
29
 
30
  # configure GPUs
31
  for gpu in tf.config.list_physical_devices('GPU'):
@@ -198,7 +208,6 @@ def find_off_targets(top_guides: pd.DataFrame, status_bar, status_text):
198
 
199
  # loop over transcripts in batches
200
  i = 0
201
- print('Scanning for off-targets')
202
  off_targets = pd.DataFrame()
203
  while i < len(reference_transcripts):
204
  # select batch
 
7
  import tensorflow as tf
8
  from Bio import SeqIO
9
 
10
+ # column names
 
 
 
 
 
 
 
11
  ID_COL = 'Transcript ID'
12
  SEQ_COL = 'Sequence'
13
  TARGET_COL = 'Target Sequence'
14
  GUIDE_COL = 'Guide Sequence'
15
  SCORE_COL = 'Guide Score'
16
+
17
+ # nucleotide tokens
18
+ NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T', 'N'], [0, 1, 2, 3, 255]))
19
+ NUCLEOTIDE_COMPLEMENT = dict(zip(['A', 'C', 'G', 'T'], ['T', 'G', 'C', 'A']))
20
+
21
+ # model hyper-parameters
22
+ GUIDE_LEN = 23
23
+ CONTEXT_5P = 3
24
+ CONTEXT_3P = 0
25
+ TARGET_LEN = CONTEXT_5P + GUIDE_LEN + CONTEXT_3P
26
+ UNIT_INTERVAL_MAP = 'exp-lin-exp'
27
+
28
+ # reference transcript files
29
  REFERENCE_TRANSCRIPTS = ('gencode.v19.pc_transcripts.fa.gz', 'gencode.v19.lncRNA_transcripts.fa.gz')
30
+
31
+ # application configuration
32
  BATCH_SIZE_COMPUTE = 500
33
  BATCH_SIZE_SCAN = 20
34
  BATCH_SIZE_TRANSCRIPTS = 50
35
+ NUM_TOP_GUIDES = 10
36
+ NUM_MISMATCHES = 3
37
+ RUN_MODES = dict(all='All on-target guides per transcript', titration='Top guides per transcript')
38
+
39
 
40
  # configure GPUs
41
  for gpu in tf.config.list_physical_devices('GPU'):
 
208
 
209
  # loop over transcripts in batches
210
  i = 0
 
211
  off_targets = pd.DataFrame()
212
  while i < len(reference_transcripts):
213
  # select batch