Spaces:

myscale
/

Protein-Structure-Modeling

Runtime error

App Files Files Community

xuyingli commited on Feb 2, 2023

Commit

b8f7a8b

1 Parent(s): eb20a68

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -54

app.py CHANGED Viewed

@@ -1,13 +1,14 @@
 import streamlit as st
 import torch
 import esm
 import matplotlib.pyplot as plt
 from myscaledb import Client
 import random
 from collections import Counter
 from tqdm import tqdm
 from statistics import mean
 import torch
 import matplotlib.pyplot as plt
 import numpy as np
@@ -17,8 +18,6 @@ from stmol import *
 import py3Dmol
 # from streamlit_3Dmol import component_3dmol
-import esm
 import scipy
 from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.decomposition import PCA
@@ -102,6 +101,18 @@ def visualize_3D_Coordinates(coords):
             )
     return fig
 def esm_search(model, sequnce, batch_converter,top_k=5):
     data = [
     ("protein1", sequnce),
@@ -130,6 +141,19 @@ def esm_search(model, sequnce, batch_converter,top_k=5):
     return result_temp_seq
 def KNN_search(sequence):
     model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
     batch_converter = alphabet.get_batch_converter()
@@ -390,53 +414,22 @@ else:
                 st.text('search result (top 5): ')
                 # tab1, tab2, tab3, tab4, = st.tabs(["Cat", "Dog", "Owl"])
                 tab1, tab2, tab3 , tab4, tab5 = st.tabs(['1','2','3','4','5'])
-                # option2 = st.radio('top5 sequence', (result_temp_seq[0],result_temp_seq[1],result_temp_seq[2],result_temp_seq[3],result_temp_seq[4]))
                 with tab1:
                     st.write(result_temp_seq[0])
-                    import random
-                    # print(random.randint(0,9))
-                    prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
-                    # protein=st.selectbox('select protein',prot_list)
-                    protein = prot_str[random.randint(14,18)]
-                    xyzview = py3Dmol.view(query='pdb:'+protein)
-                    xyzview.setStyle({'stick':{'color':'spectrum'}})
-                    showmol(xyzview, height = 500,width=800)
-                     # st.write(result_temp_seq[4])
                 with tab2:
-                    import random
-                    # print(random.randint(0,9))
                     st.write(result_temp_seq[1])
-                    prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
-                    # protein=st.selectbox('select protein',prot_list)
-                    protein = prot_str[random.randint(0,4)]
-                    xyzview = py3Dmol.view(query='pdb:'+protein)
-                    xyzview.setStyle({'stick':{'color':'spectrum'}})
-                    showmol(xyzview, height = 500,width=800)
                 with tab3:
                     st.write(result_temp_seq[2])
-                    prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
-                    # protein=st.selectbox('select protein',prot_list)
-                    protein = prot_str[random.randint(4,8)]
-                    xyzview = py3Dmol.view(query='pdb:'+protein)
-                    xyzview.setStyle({'stick':{'color':'spectrum'}})
-                    showmol(xyzview, height = 500,width=800)
                 with tab4:
                     st.write(result_temp_seq[3])
-                    prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
-                    # protein=st.selectbox('select protein',prot_list)
-                    protein = prot_str[random.randint(4,8)]
-                    xyzview = py3Dmol.view(query='pdb:'+protein)
-                    xyzview.setStyle({'stick':{'color':'spectrum'}})
-                    showmol(xyzview, height = 500,width=800)
                 with tab5:
                     st.write(result_temp_seq[4])
-                    prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
-                    # protein=st.selectbox('select protein',prot_list)
-                    protein = prot_str[random.randint(4,8)]
-                    xyzview = py3Dmol.view(query='pdb:'+protein)
-                    xyzview.setStyle({'stick':{'color':'spectrum'}})
-                    showmol(xyzview, height = 500,width=800)
         elif option == 'activity prediction':
@@ -473,18 +466,4 @@ else:
             expander.markdown("""
             A PDB ID is a unique 4-character code for each entry in the Protein Data Bank. The first character must be a number between 1 and 9, and the remaining three characters can be letters or numbers.
             see https://www.rcsb.org/ for more information.
-            """)

 import streamlit as st
 import torch
 import esm
+import requests
 import matplotlib.pyplot as plt
 from myscaledb import Client
 import random
 from collections import Counter
 from tqdm import tqdm
 from statistics import mean
+import biotite.structure.io as bsio
 import torch
 import matplotlib.pyplot as plt
 import numpy as np
 import py3Dmol
 # from streamlit_3Dmol import component_3dmol
 import scipy
 from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.decomposition import PCA
             )
     return fig
+def render_mol(pdb):
+    pdbview = py3Dmol.view()
+    pdbview.addModel(pdb,'pdb')
+    pdbview.setStyle({'cartoon':{'color':'spectrum'}})
+    pdbview.setBackgroundColor('white')#('0xeeeeee')
+    pdbview.zoomTo()
+    pdbview.zoom(2, 800)
+    pdbview.spin(True)
+    showmol(pdbview, height = 500,width=800)
 def esm_search(model, sequnce, batch_converter,top_k=5):
     data = [
     ("protein1", sequnce),
     return result_temp_seq
+def show_protein_structure(sequence):
+    headers = {
+        'Content-Type': 'application/x-www-form-urlencoded',
+        }
+    response = requests.post('https://api.esmatlas.com/foldSequence/v1/pdb/', headers=headers, data=sequence)
+    name = sequence[:3] + sequence[-3:]
+    pdb_string = response.content.decode('utf-8')
+    with open('predicted.pdb', 'w') as f:
+        f.write(pdb_string)
+    struct = bsio.load_structure('predicted.pdb', extra_fields=["b_factor"])
+    b_value = round(struct.b_factor.mean(), 4)
+    render_mol(pdb_string)
 def KNN_search(sequence):
     model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
     batch_converter = alphabet.get_batch_converter()
                 st.text('search result (top 5): ')
                 # tab1, tab2, tab3, tab4, = st.tabs(["Cat", "Dog", "Owl"])
                 tab1, tab2, tab3 , tab4, tab5 = st.tabs(['1','2','3','4','5'])
                 with tab1:
                     st.write(result_temp_seq[0])
+                    show_protein_structure(result_temp_seq[0])
                 with tab2:
                     st.write(result_temp_seq[1])
+                    show_protein_structure(result_temp_seq[1])
                 with tab3:
                     st.write(result_temp_seq[2])
+                    show_protein_structure(result_temp_seq[2])
                 with tab4:
                     st.write(result_temp_seq[3])
+                    show_protein_structure(result_temp_seq[3])
                 with tab5:
                     st.write(result_temp_seq[4])
+                    show_protein_structure(result_temp_seq[4])
         elif option == 'activity prediction':
             expander.markdown("""
             A PDB ID is a unique 4-character code for each entry in the Protein Data Bank. The first character must be a number between 1 and 9, and the remaining three characters can be letters or numbers.
             see https://www.rcsb.org/ for more information.
+            """)