astirn commited on
Commit
59874d6
·
1 Parent(s): 34274e5

context len added

Browse files
model/fingerprint.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8a1907e870a9801ec2efed2ace1024a45d6bdf1c842493d025a3b685e8677e9
3
  size 53
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01a50341063d589fc4efcbb7dc7354318f9dcdba65575e608759284dcc0d8162
3
  size 53
model/keras_metadata.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fdfca7eaf886b5bd775516684b3d32446ba66beb9b042601f0d63c84674f23e
3
- size 13587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c7badc5998ecd142564cb70002b001ee812d404f4ac30976bb33c1233ab898a
3
+ size 13592
model/saved_model.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f09bd71084c78af0ec4b66df96688667eae44ec08da3563856a52d9bdaed04b8
3
  size 214038
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c94e1de45290f8663320886419bd4cf611aa7fa00fce146bc0d96d35b8b5e39
3
  size 214038
model/variables/variables.data-00000-of-00001 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80dcef3ab68a09f348ae6842d28426a4b8f08db2ff441bf1c353634836dbe81a
3
- size 489607
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96f573e7920d24eacd8c00c32f2995392f038629a4ce5ee27d6454448025276e
3
+ size 522375
model/variables/variables.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb901920518efdffd73312c61f080e3d7f959414e5627a7113c959c150140db8
3
  size 869
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:612540024fe115acd056ffc34e9d73b223a14c8620f06c6caee871a3a61f8941
3
  size 869
tiger.py CHANGED
@@ -3,15 +3,11 @@ import tensorflow as tf
3
  import pandas as pd
4
 
5
  GUIDE_LEN = 23
 
 
 
6
  NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T'], [0, 1, 2, 3]))
7
 
8
- # load model
9
- if os.path.exists('model'):
10
- tiger = tf.keras.models.load_model('model')
11
- else:
12
- print('no saved model!')
13
- exit()
14
-
15
 
16
  def process_data(transcript_seq: str):
17
 
@@ -19,8 +15,7 @@ def process_data(transcript_seq: str):
19
  transcript_seq = transcript_seq.upper()
20
 
21
  # get all target sites
22
- num_target_sites = len(transcript_seq) - GUIDE_LEN + 1
23
- target_seq = [transcript_seq[i:i + GUIDE_LEN] for i in range(num_target_sites)]
24
 
25
  # get one-hot encodings
26
  nucleotide_table = tf.lookup.StaticVocabularyTable(
@@ -29,13 +24,20 @@ def process_data(transcript_seq: str):
29
  values=tf.constant(list(NUCLEOTIDE_TOKENS.values()), dtype=tf.int64)),
30
  num_oov_buckets=1)
31
  target_tokens = nucleotide_table.lookup(tf.stack([list(t) for t in target_seq], axis=0))
32
- target_one_hot = tf.reshape(tf.one_hot(target_tokens, depth=4), [num_target_sites, -1])
33
 
34
  return target_seq, target_one_hot
35
 
36
 
37
  def tiger_predict(transcript_seq: str):
38
 
 
 
 
 
 
 
 
39
  # parse transcript sequence into 23-nt target sequences and their one-hot encodings
40
  target_seq, target_seq_one_hot = process_data(transcript_seq)
41
 
 
3
  import pandas as pd
4
 
5
  GUIDE_LEN = 23
6
+ CONTEXT_5P = 3
7
+ CONTEXT_3P = 0
8
+ TARGET_LEN = CONTEXT_5P + GUIDE_LEN + CONTEXT_3P
9
  NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T'], [0, 1, 2, 3]))
10
 
 
 
 
 
 
 
 
11
 
12
  def process_data(transcript_seq: str):
13
 
 
15
  transcript_seq = transcript_seq.upper()
16
 
17
  # get all target sites
18
+ target_seq = [transcript_seq[i: i + TARGET_LEN] for i in range(len(transcript_seq) - TARGET_LEN)]
 
19
 
20
  # get one-hot encodings
21
  nucleotide_table = tf.lookup.StaticVocabularyTable(
 
24
  values=tf.constant(list(NUCLEOTIDE_TOKENS.values()), dtype=tf.int64)),
25
  num_oov_buckets=1)
26
  target_tokens = nucleotide_table.lookup(tf.stack([list(t) for t in target_seq], axis=0))
27
+ target_one_hot = tf.reshape(tf.one_hot(target_tokens, depth=4), [len(target_seq), -1])
28
 
29
  return target_seq, target_one_hot
30
 
31
 
32
  def tiger_predict(transcript_seq: str):
33
 
34
+ # load model
35
+ if os.path.exists('model'):
36
+ tiger = tf.keras.models.load_model('model')
37
+ else:
38
+ print('no saved model!')
39
+ exit()
40
+
41
  # parse transcript sequence into 23-nt target sequences and their one-hot encodings
42
  target_seq, target_seq_one_hot = process_data(transcript_seq)
43