Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
cleanup
Browse files
tiger.py
CHANGED
|
@@ -7,25 +7,35 @@ import pandas as pd
|
|
| 7 |
import tensorflow as tf
|
| 8 |
from Bio import SeqIO
|
| 9 |
|
| 10 |
-
|
| 11 |
-
CONTEXT_5P = 3
|
| 12 |
-
CONTEXT_3P = 0
|
| 13 |
-
TARGET_LEN = CONTEXT_5P + GUIDE_LEN + CONTEXT_3P
|
| 14 |
-
NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T', 'N'], [0, 1, 2, 3, 255]))
|
| 15 |
-
NUCLEOTIDE_COMPLEMENT = dict(zip(['A', 'C', 'G', 'T'], ['T', 'G', 'C', 'A']))
|
| 16 |
-
NUM_TOP_GUIDES = 10
|
| 17 |
-
NUM_MISMATCHES = 3
|
| 18 |
ID_COL = 'Transcript ID'
|
| 19 |
SEQ_COL = 'Sequence'
|
| 20 |
TARGET_COL = 'Target Sequence'
|
| 21 |
GUIDE_COL = 'Guide Sequence'
|
| 22 |
SCORE_COL = 'Guide Score'
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
REFERENCE_TRANSCRIPTS = ('gencode.v19.pc_transcripts.fa.gz', 'gencode.v19.lncRNA_transcripts.fa.gz')
|
|
|
|
|
|
|
| 25 |
BATCH_SIZE_COMPUTE = 500
|
| 26 |
BATCH_SIZE_SCAN = 20
|
| 27 |
BATCH_SIZE_TRANSCRIPTS = 50
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# configure GPUs
|
| 31 |
for gpu in tf.config.list_physical_devices('GPU'):
|
|
@@ -198,7 +208,6 @@ def find_off_targets(top_guides: pd.DataFrame, status_bar, status_text):
|
|
| 198 |
|
| 199 |
# loop over transcripts in batches
|
| 200 |
i = 0
|
| 201 |
-
print('Scanning for off-targets')
|
| 202 |
off_targets = pd.DataFrame()
|
| 203 |
while i < len(reference_transcripts):
|
| 204 |
# select batch
|
|
|
|
| 7 |
import tensorflow as tf
|
| 8 |
from Bio import SeqIO
|
| 9 |
|
| 10 |
+
# column names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
ID_COL = 'Transcript ID'
|
| 12 |
SEQ_COL = 'Sequence'
|
| 13 |
TARGET_COL = 'Target Sequence'
|
| 14 |
GUIDE_COL = 'Guide Sequence'
|
| 15 |
SCORE_COL = 'Guide Score'
|
| 16 |
+
|
| 17 |
+
# nucleotide tokens
|
| 18 |
+
NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T', 'N'], [0, 1, 2, 3, 255]))
|
| 19 |
+
NUCLEOTIDE_COMPLEMENT = dict(zip(['A', 'C', 'G', 'T'], ['T', 'G', 'C', 'A']))
|
| 20 |
+
|
| 21 |
+
# model hyper-parameters
|
| 22 |
+
GUIDE_LEN = 23
|
| 23 |
+
CONTEXT_5P = 3
|
| 24 |
+
CONTEXT_3P = 0
|
| 25 |
+
TARGET_LEN = CONTEXT_5P + GUIDE_LEN + CONTEXT_3P
|
| 26 |
+
UNIT_INTERVAL_MAP = 'exp-lin-exp'
|
| 27 |
+
|
| 28 |
+
# reference transcript files
|
| 29 |
REFERENCE_TRANSCRIPTS = ('gencode.v19.pc_transcripts.fa.gz', 'gencode.v19.lncRNA_transcripts.fa.gz')
|
| 30 |
+
|
| 31 |
+
# application configuration
|
| 32 |
BATCH_SIZE_COMPUTE = 500
|
| 33 |
BATCH_SIZE_SCAN = 20
|
| 34 |
BATCH_SIZE_TRANSCRIPTS = 50
|
| 35 |
+
NUM_TOP_GUIDES = 10
|
| 36 |
+
NUM_MISMATCHES = 3
|
| 37 |
+
RUN_MODES = dict(all='All on-target guides per transcript', titration='Top guides per transcript')
|
| 38 |
+
|
| 39 |
|
| 40 |
# configure GPUs
|
| 41 |
for gpu in tf.config.list_physical_devices('GPU'):
|
|
|
|
| 208 |
|
| 209 |
# loop over transcripts in batches
|
| 210 |
i = 0
|
|
|
|
| 211 |
off_targets = pd.DataFrame()
|
| 212 |
while i < len(reference_transcripts):
|
| 213 |
# select batch
|