Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,395 Bytes
89be9f9 814d067 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import os
import tensorflow as tf
import pandas as pd
GUIDE_LEN = 23
NUCLEOTIDE_TOKENS = dict(zip(['A', 'C', 'G', 'T'], [0, 1, 2, 3]))
# load model
if os.path.exists('model'):
tiger = tf.keras.models.load_model('model')
else:
print('no saved model!')
exit()
def process_data(x):
x = [item.upper() for item in x]
number_of_input = len(x) - GUIDE_LEN + 1
input_gens = []
for i in range(number_of_input):
input_gens.append("".join(x[i:i + GUIDE_LEN]))
merged_token = []
token_x = [NUCLEOTIDE_TOKENS[item] for item in x]
for i in range(number_of_input):
merged_token.extend(token_x[i:i + GUIDE_LEN])
one_hot_x = tf.one_hot(merged_token, depth=4)
model_input_x = tf.reshape(one_hot_x, [-1, GUIDE_LEN * 4])
return input_gens, model_input_x
def tiger_predict(transcript_seq: str):
# parse transcript sequence into 23-nt target sequences and their one-hot encodings
target_seq, target_seq_one_hot = process_data(transcript_seq)
# get predictions
normalized_lfc = tiger.predict_step(target_seq_one_hot)
predictions = pd.DataFrame({'Target site': target_seq, 'Normalized LFC': tf.squeeze(normalized_lfc).numpy()})
return predictions
if __name__ == '__main__':
# simple test case
transcript_sequence = 'ACGTACGTACGTACGTACGTACGTACGTACGT'
df = tiger_predict(transcript_sequence)
print(df)
|