S Santhosh Kumar commited on
Commit
df11f35
·
1 Parent(s): 35cd332

Add application file

Browse files
Files changed (4) hide show
  1. app.py +39 -0
  2. best_model.h5 +0 -0
  3. requirements.txt +4 -0
  4. utils.py +42 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ from utils import dnaseq_features
4
+ from keras.models import load_model
5
+
6
+ st.set_page_config(page_title = "A Simple App for predicting TFBS in a DNA sequence")
7
+
8
+ model = load_model('best_model.h5')
9
+
10
+ with st.container():
11
+ st.title('Simple Model Serving Web App for TFBS prediction')
12
+ st.caption('Get TFBS Predictions From The Latest Model.')
13
+
14
+ # Create a horizontal line, and then a new container.
15
+ st.markdown("---")
16
+
17
+ with st.container():
18
+
19
+ dna_seq = st.text_area("Input DNA sequence", 'ATAGAGAC...')
20
+
21
+ dna_ohe_feat, ds_index, ds_val = dnaseq_features(seq=dna_seq)
22
+
23
+ trigger = st.button('Make Prediction')
24
+
25
+ if trigger:
26
+
27
+ st.info("Loading the data for predictions")
28
+
29
+ predicted_labels = model.predict(dna_ohe_feat)
30
+
31
+ print(predicted_labels)
32
+ print(ds_val)
33
+
34
+ for i, j in zip(ds_val, predicted_labels):
35
+ st.write(i)
36
+ if np.argmax(j) == 1:
37
+ st.success("TFBS found :thumbsup:")
38
+ else:
39
+ st.error('TFBS not found :thumbsdown:')
best_model.h5 ADDED
Binary file (192 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ numpy
2
+ streamlit
3
+ keras
4
+ tensorflow
utils.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Function for when you want to prepare DNA sequence feature for ML applications
2
+ import numpy as np
3
+
4
+ # Function for when you want to prepare DNA sequence feature for ML applications
5
+ def dnaseq_features(seq):
6
+ start=0
7
+ n_segs=101
8
+ seq_name = 'seq'
9
+ remaind = len(seq)%n_segs
10
+ if(remaind != 0):
11
+ last_id = len(seq) - remaind
12
+ upd_seq = seq[start:last_id]
13
+ dic_seq = {}
14
+ for i in range(0,3):
15
+ a = int(i*n_segs) ; b = int(i*n_segs)+n_segs
16
+ identifier = f"{seq_name}_{a}:{b}"
17
+ dic_seq[identifier] = upd_seq[a:b]
18
+ lst_seq = dic_seq.values()
19
+ index = list(dic_seq.keys())
20
+ values = list(dic_seq.values())
21
+
22
+ # One hot encode
23
+ ii=-1
24
+ for data in lst_seq:
25
+ ii+=1
26
+ abc = 'ACGT'
27
+ char_to_int = dict((c, i) for i, c in enumerate(abc))
28
+ int_enc = [char_to_int[char] for char in data]
29
+ ohe = []
30
+ for value in int_enc:
31
+ base = [0 for _ in range(len(abc))]
32
+ base[value] = 1
33
+ ohe.append(base)
34
+ np_mat = np.array(ohe)
35
+ np_mat = np.expand_dims(np_mat,axis=0)
36
+
37
+ if(ii != 0):
38
+ matrix = np.concatenate([np_mat,matrix],axis=0)
39
+ else:
40
+ matrix = np_mat
41
+
42
+ return matrix,index,values