Spaces:
Runtime error
Runtime error
xuyingli
commited on
Commit
·
b8f7a8b
1
Parent(s):
eb20a68
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import torch
|
| 3 |
import esm
|
|
|
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
from myscaledb import Client
|
| 6 |
import random
|
| 7 |
from collections import Counter
|
| 8 |
from tqdm import tqdm
|
| 9 |
from statistics import mean
|
| 10 |
-
|
| 11 |
import torch
|
| 12 |
import matplotlib.pyplot as plt
|
| 13 |
import numpy as np
|
|
@@ -17,8 +18,6 @@ from stmol import *
|
|
| 17 |
import py3Dmol
|
| 18 |
# from streamlit_3Dmol import component_3dmol
|
| 19 |
|
| 20 |
-
import esm
|
| 21 |
-
|
| 22 |
import scipy
|
| 23 |
from sklearn.model_selection import GridSearchCV, train_test_split
|
| 24 |
from sklearn.decomposition import PCA
|
|
@@ -102,6 +101,18 @@ def visualize_3D_Coordinates(coords):
|
|
| 102 |
)
|
| 103 |
return fig
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
def esm_search(model, sequnce, batch_converter,top_k=5):
|
| 106 |
data = [
|
| 107 |
("protein1", sequnce),
|
|
@@ -130,6 +141,19 @@ def esm_search(model, sequnce, batch_converter,top_k=5):
|
|
| 130 |
|
| 131 |
return result_temp_seq
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
def KNN_search(sequence):
|
| 134 |
model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
|
| 135 |
batch_converter = alphabet.get_batch_converter()
|
|
@@ -390,53 +414,22 @@ else:
|
|
| 390 |
st.text('search result (top 5): ')
|
| 391 |
# tab1, tab2, tab3, tab4, = st.tabs(["Cat", "Dog", "Owl"])
|
| 392 |
tab1, tab2, tab3 , tab4, tab5 = st.tabs(['1','2','3','4','5'])
|
| 393 |
-
# option2 = st.radio('top5 sequence', (result_temp_seq[0],result_temp_seq[1],result_temp_seq[2],result_temp_seq[3],result_temp_seq[4]))
|
| 394 |
|
| 395 |
with tab1:
|
| 396 |
st.write(result_temp_seq[0])
|
| 397 |
-
|
| 398 |
-
# print(random.randint(0,9))
|
| 399 |
-
prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
|
| 400 |
-
# protein=st.selectbox('select protein',prot_list)
|
| 401 |
-
protein = prot_str[random.randint(14,18)]
|
| 402 |
-
xyzview = py3Dmol.view(query='pdb:'+protein)
|
| 403 |
-
xyzview.setStyle({'stick':{'color':'spectrum'}})
|
| 404 |
-
showmol(xyzview, height = 500,width=800)
|
| 405 |
-
# st.write(result_temp_seq[4])
|
| 406 |
with tab2:
|
| 407 |
-
import random
|
| 408 |
-
# print(random.randint(0,9))
|
| 409 |
st.write(result_temp_seq[1])
|
| 410 |
-
|
| 411 |
-
# protein=st.selectbox('select protein',prot_list)
|
| 412 |
-
protein = prot_str[random.randint(0,4)]
|
| 413 |
-
xyzview = py3Dmol.view(query='pdb:'+protein)
|
| 414 |
-
xyzview.setStyle({'stick':{'color':'spectrum'}})
|
| 415 |
-
showmol(xyzview, height = 500,width=800)
|
| 416 |
with tab3:
|
| 417 |
st.write(result_temp_seq[2])
|
| 418 |
-
|
| 419 |
-
# protein=st.selectbox('select protein',prot_list)
|
| 420 |
-
protein = prot_str[random.randint(4,8)]
|
| 421 |
-
xyzview = py3Dmol.view(query='pdb:'+protein)
|
| 422 |
-
xyzview.setStyle({'stick':{'color':'spectrum'}})
|
| 423 |
-
showmol(xyzview, height = 500,width=800)
|
| 424 |
with tab4:
|
| 425 |
st.write(result_temp_seq[3])
|
| 426 |
-
|
| 427 |
-
# protein=st.selectbox('select protein',prot_list)
|
| 428 |
-
protein = prot_str[random.randint(4,8)]
|
| 429 |
-
xyzview = py3Dmol.view(query='pdb:'+protein)
|
| 430 |
-
xyzview.setStyle({'stick':{'color':'spectrum'}})
|
| 431 |
-
showmol(xyzview, height = 500,width=800)
|
| 432 |
with tab5:
|
| 433 |
st.write(result_temp_seq[4])
|
| 434 |
-
|
| 435 |
-
# protein=st.selectbox('select protein',prot_list)
|
| 436 |
-
protein = prot_str[random.randint(4,8)]
|
| 437 |
-
xyzview = py3Dmol.view(query='pdb:'+protein)
|
| 438 |
-
xyzview.setStyle({'stick':{'color':'spectrum'}})
|
| 439 |
-
showmol(xyzview, height = 500,width=800)
|
| 440 |
|
| 441 |
|
| 442 |
elif option == 'activity prediction':
|
|
@@ -473,18 +466,4 @@ else:
|
|
| 473 |
expander.markdown("""
|
| 474 |
A PDB ID is a unique 4-character code for each entry in the Protein Data Bank. The first character must be a number between 1 and 9, and the remaining three characters can be letters or numbers.
|
| 475 |
see https://www.rcsb.org/ for more information.
|
| 476 |
-
""")
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import torch
|
| 3 |
import esm
|
| 4 |
+
import requests
|
| 5 |
import matplotlib.pyplot as plt
|
| 6 |
from myscaledb import Client
|
| 7 |
import random
|
| 8 |
from collections import Counter
|
| 9 |
from tqdm import tqdm
|
| 10 |
from statistics import mean
|
| 11 |
+
import biotite.structure.io as bsio
|
| 12 |
import torch
|
| 13 |
import matplotlib.pyplot as plt
|
| 14 |
import numpy as np
|
|
|
|
| 18 |
import py3Dmol
|
| 19 |
# from streamlit_3Dmol import component_3dmol
|
| 20 |
|
|
|
|
|
|
|
| 21 |
import scipy
|
| 22 |
from sklearn.model_selection import GridSearchCV, train_test_split
|
| 23 |
from sklearn.decomposition import PCA
|
|
|
|
| 101 |
)
|
| 102 |
return fig
|
| 103 |
|
| 104 |
+
def render_mol(pdb):
|
| 105 |
+
pdbview = py3Dmol.view()
|
| 106 |
+
pdbview.addModel(pdb,'pdb')
|
| 107 |
+
pdbview.setStyle({'cartoon':{'color':'spectrum'}})
|
| 108 |
+
pdbview.setBackgroundColor('white')#('0xeeeeee')
|
| 109 |
+
pdbview.zoomTo()
|
| 110 |
+
pdbview.zoom(2, 800)
|
| 111 |
+
pdbview.spin(True)
|
| 112 |
+
showmol(pdbview, height = 500,width=800)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
|
| 116 |
def esm_search(model, sequnce, batch_converter,top_k=5):
|
| 117 |
data = [
|
| 118 |
("protein1", sequnce),
|
|
|
|
| 141 |
|
| 142 |
return result_temp_seq
|
| 143 |
|
| 144 |
+
def show_protein_structure(sequence):
|
| 145 |
+
headers = {
|
| 146 |
+
'Content-Type': 'application/x-www-form-urlencoded',
|
| 147 |
+
}
|
| 148 |
+
response = requests.post('https://api.esmatlas.com/foldSequence/v1/pdb/', headers=headers, data=sequence)
|
| 149 |
+
name = sequence[:3] + sequence[-3:]
|
| 150 |
+
pdb_string = response.content.decode('utf-8')
|
| 151 |
+
with open('predicted.pdb', 'w') as f:
|
| 152 |
+
f.write(pdb_string)
|
| 153 |
+
struct = bsio.load_structure('predicted.pdb', extra_fields=["b_factor"])
|
| 154 |
+
b_value = round(struct.b_factor.mean(), 4)
|
| 155 |
+
render_mol(pdb_string)
|
| 156 |
+
|
| 157 |
def KNN_search(sequence):
|
| 158 |
model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
|
| 159 |
batch_converter = alphabet.get_batch_converter()
|
|
|
|
| 414 |
st.text('search result (top 5): ')
|
| 415 |
# tab1, tab2, tab3, tab4, = st.tabs(["Cat", "Dog", "Owl"])
|
| 416 |
tab1, tab2, tab3 , tab4, tab5 = st.tabs(['1','2','3','4','5'])
|
|
|
|
| 417 |
|
| 418 |
with tab1:
|
| 419 |
st.write(result_temp_seq[0])
|
| 420 |
+
show_protein_structure(result_temp_seq[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
with tab2:
|
|
|
|
|
|
|
| 422 |
st.write(result_temp_seq[1])
|
| 423 |
+
show_protein_structure(result_temp_seq[1])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
with tab3:
|
| 425 |
st.write(result_temp_seq[2])
|
| 426 |
+
show_protein_structure(result_temp_seq[2])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
with tab4:
|
| 428 |
st.write(result_temp_seq[3])
|
| 429 |
+
show_protein_structure(result_temp_seq[3])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
with tab5:
|
| 431 |
st.write(result_temp_seq[4])
|
| 432 |
+
show_protein_structure(result_temp_seq[4])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
|
| 434 |
|
| 435 |
elif option == 'activity prediction':
|
|
|
|
| 466 |
expander.markdown("""
|
| 467 |
A PDB ID is a unique 4-character code for each entry in the Protein Data Bank. The first character must be a number between 1 and 9, and the remaining three characters can be letters or numbers.
|
| 468 |
see https://www.rcsb.org/ for more information.
|
| 469 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|