Spaces:

SthanikamSanthosh
/

predict-tabs

Sleeping

S Santhosh Kumar commited on Mar 21, 2024

Commit

df11f35

1 Parent(s): 35cd332

Add application file

Files changed (4) hide show

app.py ADDED Viewed

+import streamlit as st
+import numpy as np
+from utils import dnaseq_features
+from keras.models import load_model
+st.set_page_config(page_title = "A Simple App for predicting TFBS in a DNA sequence")
+model = load_model('best_model.h5')
+with st.container():
+    st.title('Simple Model Serving Web App for TFBS prediction')
+    st.caption('Get TFBS Predictions From The Latest Model.')
+# Create a horizontal line, and then a new container.
+st.markdown("---")
+with st.container():
+    dna_seq = st.text_area("Input DNA sequence", 'ATAGAGAC...')
+    dna_ohe_feat, ds_index, ds_val = dnaseq_features(seq=dna_seq)
+    trigger = st.button('Make Prediction')
+    if trigger:
+        st.info("Loading the data for predictions")
+        predicted_labels = model.predict(dna_ohe_feat)
+        print(predicted_labels)
+        print(ds_val)
+        for i, j in zip(ds_val, predicted_labels):
+            st.write(i)
+            if np.argmax(j) == 1:
+                st.success("TFBS found :thumbsup:")
+            else:
+                st.error('TFBS not found :thumbsdown:')

best_model.h5 ADDED Viewed

Binary file (192 kB). View file

requirements.txt ADDED Viewed

+numpy
+streamlit
+keras
+tensorflow

utils.py ADDED Viewed

+# Function for when you want to prepare DNA sequence feature for ML applications
+import numpy as np
+# Function for when you want to prepare DNA sequence feature for ML applications
+def dnaseq_features(seq):
+    start=0
+    n_segs=101
+    seq_name = 'seq'
+    remaind = len(seq)%n_segs
+    if(remaind != 0):
+        last_id = len(seq) - remaind
+    upd_seq = seq[start:last_id]
+    dic_seq = {}
+    for i in range(0,3):
+        a = int(i*n_segs) ; b = int(i*n_segs)+n_segs
+        identifier = f"{seq_name}_{a}:{b}"
+        dic_seq[identifier] = upd_seq[a:b]
+    lst_seq = dic_seq.values()
+    index = list(dic_seq.keys())
+    values = list(dic_seq.values())
+    # One hot encode
+    ii=-1
+    for data in lst_seq:
+        ii+=1
+        abc = 'ACGT'
+        char_to_int = dict((c, i) for i, c in enumerate(abc))
+        int_enc = [char_to_int[char] for char in data]
+        ohe = []
+        for value in int_enc:
+            base = [0 for _ in range(len(abc))]
+            base[value] = 1
+            ohe.append(base)
+        np_mat = np.array(ohe)
+        np_mat = np.expand_dims(np_mat,axis=0)
+        if(ii != 0):
+            matrix = np.concatenate([np_mat,matrix],axis=0)
+        else:
+            matrix = np_mat
+    return matrix,index,values