Spaces:

Knowles-Lab
/

tiger

Running on CPU Upgrade

astirn commited on Jan 2, 2023

Commit

59874d6

1 Parent(s): 34274e5

context len added

Files changed (6) hide show

model/fingerprint.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8a1907e870a9801ec2efed2ace1024a45d6bdf1c842493d025a3b685e8677e9
 size 53

 version https://git-lfs.github.com/spec/v1
+oid sha256:01a50341063d589fc4efcbb7dc7354318f9dcdba65575e608759284dcc0d8162
 size 53

model/keras_metadata.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3fdfca7eaf886b5bd775516684b3d32446ba66beb9b042601f0d63c84674f23e
-size 13587

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c7badc5998ecd142564cb70002b001ee812d404f4ac30976bb33c1233ab898a
+size 13592

model/saved_model.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f09bd71084c78af0ec4b66df96688667eae44ec08da3563856a52d9bdaed04b8
 size 214038

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c94e1de45290f8663320886419bd4cf611aa7fa00fce146bc0d96d35b8b5e39
 size 214038

model/variables/variables.data-00000-of-00001 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:80dcef3ab68a09f348ae6842d28426a4b8f08db2ff441bf1c353634836dbe81a
-size 489607

 version https://git-lfs.github.com/spec/v1
+oid sha256:96f573e7920d24eacd8c00c32f2995392f038629a4ce5ee27d6454448025276e
+size 522375

model/variables/variables.index CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb901920518efdffd73312c61f080e3d7f959414e5627a7113c959c150140db8
 size 869

 version https://git-lfs.github.com/spec/v1
+oid sha256:612540024fe115acd056ffc34e9d73b223a14c8620f06c6caee871a3a61f8941
 size 869

tiger.py CHANGED Viewed

@@ -3,15 +3,11 @@ import tensorflow as tf
 import pandas as pd
 GUIDE_LEN = 23
 NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T'], [0, 1, 2, 3]))
-# load model
-if os.path.exists('model'):
-    tiger = tf.keras.models.load_model('model')
-else:
-    print('no saved model!')
-    exit()
 def process_data(transcript_seq: str):
@@ -19,8 +15,7 @@ def process_data(transcript_seq: str):
     transcript_seq = transcript_seq.upper()
     # get all target sites
-    num_target_sites = len(transcript_seq) - GUIDE_LEN + 1
-    target_seq = [transcript_seq[i:i + GUIDE_LEN] for i in range(num_target_sites)]
     # get one-hot encodings
     nucleotide_table = tf.lookup.StaticVocabularyTable(
@@ -29,13 +24,20 @@ def process_data(transcript_seq: str):
             values=tf.constant(list(NUCLEOTIDE_TOKENS.values()), dtype=tf.int64)),
         num_oov_buckets=1)
     target_tokens = nucleotide_table.lookup(tf.stack([list(t) for t in target_seq], axis=0))
-    target_one_hot = tf.reshape(tf.one_hot(target_tokens, depth=4), [num_target_sites, -1])
     return target_seq, target_one_hot
 def tiger_predict(transcript_seq: str):
     # parse transcript sequence into 23-nt target sequences and their one-hot encodings
     target_seq, target_seq_one_hot = process_data(transcript_seq)

 import pandas as pd
 GUIDE_LEN = 23
+CONTEXT_5P = 3
+CONTEXT_3P = 0
+TARGET_LEN = CONTEXT_5P + GUIDE_LEN + CONTEXT_3P
 NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T'], [0, 1, 2, 3]))
 def process_data(transcript_seq: str):
     transcript_seq = transcript_seq.upper()
     # get all target sites
+    target_seq = [transcript_seq[i: i + TARGET_LEN] for i in range(len(transcript_seq) - TARGET_LEN)]
     # get one-hot encodings
     nucleotide_table = tf.lookup.StaticVocabularyTable(
             values=tf.constant(list(NUCLEOTIDE_TOKENS.values()), dtype=tf.int64)),
         num_oov_buckets=1)
     target_tokens = nucleotide_table.lookup(tf.stack([list(t) for t in target_seq], axis=0))
+    target_one_hot = tf.reshape(tf.one_hot(target_tokens, depth=4), [len(target_seq), -1])
     return target_seq, target_one_hot
 def tiger_predict(transcript_seq: str):
+    # load model
+    if os.path.exists('model'):
+        tiger = tf.keras.models.load_model('model')
+    else:
+        print('no saved model!')
+        exit()
     # parse transcript sequence into 23-nt target sequences and their one-hot encodings
     target_seq, target_seq_one_hot = process_data(transcript_seq)