test seqvec encoder
Browse files
app.py
CHANGED
|
@@ -23,15 +23,13 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
| 23 |
|
| 24 |
st.set_page_config(layout="wide")
|
| 25 |
|
| 26 |
-
st.title('HyperDTI: Robust Task-Conditioned Modeling of Drug-Target Interactions
|
| 27 |
st.markdown('')
|
| 28 |
st.markdown(
|
| 29 |
"""
|
| 30 |
-
🧬 Github: [ml-jku/hyper-dti](https://https://github.com/ml-jku/hyper-dti) 📝 NeurIPS 2022 AI4Science workshop paper: [OpenReview](https://openreview.net/forum?id=dIX34JWnIAL)\n
|
| 31 |
"""
|
| 32 |
)
|
| 33 |
-
#st.error('WARNING! This app is currently under development and should not be used!')
|
| 34 |
-
#st.divider()
|
| 35 |
|
| 36 |
def about_page():
|
| 37 |
st.markdown(
|
|
@@ -57,9 +55,9 @@ def about_page():
|
|
| 57 |
|
| 58 |
|
| 59 |
def retrieval():
|
| 60 |
-
st.markdown('##
|
| 61 |
|
| 62 |
-
st.write('
|
| 63 |
|
| 64 |
col1, col2 = st.columns(2)
|
| 65 |
with col1:
|
|
@@ -71,7 +69,7 @@ def retrieval():
|
|
| 71 |
with col1:
|
| 72 |
ex_target = 'YTKMKTATNIYIFNLALADALATSTLPFQSVNYLMGTWPFGTILCKIVISIDYYNMFTSIFTLCTMSVDRYIAVCHPVKALDFRTPRNAKTVNVCNWI'
|
| 73 |
sequence = st.text_input('Enter amino-acid sequence', value=ex_target, placeholder=ex_target)
|
| 74 |
-
if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA' or sequence == ex_target:
|
| 75 |
st.image('figures/ex_protein.jpeg', use_column_width='always')
|
| 76 |
elif sequence:
|
| 77 |
st.error('Visualization coming soon...')
|
|
@@ -84,12 +82,22 @@ def retrieval():
|
|
| 84 |
if selected_encoder == 'SeqVec':
|
| 85 |
st.image('figures/protein_encoder_done.png')
|
| 86 |
with st.spinner('Encoding in progress...'):
|
| 87 |
-
|
| 88 |
-
|
| 89 |
with open(os.path.join(data_path, f'Lenselink/processed/SeqVec_encoding_test.pickle'), 'rb') as handle:
|
| 90 |
test_set = pickle.load(handle)
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
st.success('Encoding complete.')
|
| 94 |
else:
|
| 95 |
query_embedding = None
|
|
|
|
| 23 |
|
| 24 |
st.set_page_config(layout="wide")
|
| 25 |
|
| 26 |
+
st.title('HyperDTI: Robust Task-Conditioned Modeling of Drug-Target Interactions\n')
|
| 27 |
st.markdown('')
|
| 28 |
st.markdown(
|
| 29 |
"""
|
| 30 |
+
🧬 Github: [ml-jku/hyper-dti](https://https://github.com/ml-jku/hyper-dti) 📝 NeurIPS 2022 AI4Science workshop paper: [OpenReview](https://openreview.net/forum?id=dIX34JWnIAL) TBA Journal of Chemical Information and Modeling. \n
|
| 31 |
"""
|
| 32 |
)
|
|
|
|
|
|
|
| 33 |
|
| 34 |
def about_page():
|
| 35 |
st.markdown(
|
|
|
|
| 55 |
|
| 56 |
|
| 57 |
def retrieval():
|
| 58 |
+
st.markdown('## Retrieval of most active drug compounds')
|
| 59 |
|
| 60 |
+
st.write('Use HyperPCM to generate a QSAR model for a selected query protein target and retrieve the top-k drug compounds predicted to have the highest activity toward the given protein target from the Lenselink datasets.')
|
| 61 |
|
| 62 |
col1, col2 = st.columns(2)
|
| 63 |
with col1:
|
|
|
|
| 69 |
with col1:
|
| 70 |
ex_target = 'YTKMKTATNIYIFNLALADALATSTLPFQSVNYLMGTWPFGTILCKIVISIDYYNMFTSIFTLCTMSVDRYIAVCHPVKALDFRTPRNAKTVNVCNWI'
|
| 71 |
sequence = st.text_input('Enter amino-acid sequence', value=ex_target, placeholder=ex_target)
|
| 72 |
+
if sequence == 'HXHVWPVQDAKARFSEFLDACITEGPQIVSRRGAEEAVLVPIGEWRRLQAAA': # or sequence == ex_target:
|
| 73 |
st.image('figures/ex_protein.jpeg', use_column_width='always')
|
| 74 |
elif sequence:
|
| 75 |
st.error('Visualization coming soon...')
|
|
|
|
| 82 |
if selected_encoder == 'SeqVec':
|
| 83 |
st.image('figures/protein_encoder_done.png')
|
| 84 |
with st.spinner('Encoding in progress...'):
|
| 85 |
+
|
|
|
|
| 86 |
with open(os.path.join(data_path, f'Lenselink/processed/SeqVec_encoding_test.pickle'), 'rb') as handle:
|
| 87 |
test_set = pickle.load(handle)
|
| 88 |
+
|
| 89 |
+
print(sequence in test_set.keys())
|
| 90 |
+
print(sequence in test_set.keys().values)
|
| 91 |
+
if sequence in test_set.keys():
|
| 92 |
+
query_embedding = test_set[sequence]
|
| 93 |
+
else:
|
| 94 |
+
from bio_embeddings.embed import SeqVecEmbedder
|
| 95 |
+
encoder = SeqVecEmbedder()
|
| 96 |
+
embeddings = encoder.embed_batch([sequence])
|
| 97 |
+
for emb in embeddings:
|
| 98 |
+
query_embedding = encoder.reduce_per_protein(emb)
|
| 99 |
+
break
|
| 100 |
+
|
| 101 |
st.success('Encoding complete.')
|
| 102 |
else:
|
| 103 |
query_embedding = None
|