Update app.py
Browse files
app.py
CHANGED
|
@@ -15,8 +15,8 @@ import urllib.parse
|
|
| 15 |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
| 16 |
|
| 17 |
st.set_page_config(layout='wide')
|
| 18 |
-
st.sidebar.title('🔮 GenPro2 Protein Generator
|
| 19 |
-
st.sidebar.write('GenPro2 is an end-to-end
|
| 20 |
|
| 21 |
def generate_sequence_from_words(words, length):
|
| 22 |
seed = ' '.join(words).encode('utf-8')
|
|
@@ -36,7 +36,7 @@ def render_mol(pdb):
|
|
| 36 |
|
| 37 |
def perform_blast_analysis(sequence):
|
| 38 |
st.subheader('Protein Analysis')
|
| 39 |
-
with st.spinner("Analyzing generated protein... This may take a
|
| 40 |
progress_bar = st.progress(0)
|
| 41 |
for i in range(100):
|
| 42 |
progress_bar.progress(i + 1)
|
|
@@ -61,18 +61,14 @@ def perform_blast_analysis(sequence):
|
|
| 61 |
identity_percentage = (hsp.identities / alignment.length) * 100
|
| 62 |
|
| 63 |
st.write(f"**Top Match:** {protein_name}")
|
| 64 |
-
st.write(f"**Organism:** {organism}")
|
| 65 |
st.write(f"**Sequence Identity:** {identity_percentage:.2f}%")
|
| 66 |
-
|
| 67 |
|
| 68 |
# Fetch protein function (if available)
|
| 69 |
if hasattr(alignment, 'description') and alignment.description:
|
| 70 |
st.write(f"**Potential Function:** {alignment.description}")
|
| 71 |
|
| 72 |
-
# Link to BLAST results
|
| 73 |
-
blast_link = f"https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&LINK_LOC=blasthome"
|
| 74 |
-
st.markdown(f"[View full BLAST results (may require re-running the search)]({blast_link})")
|
| 75 |
-
else:
|
| 76 |
st.write("No significant matches found. This might be a unique protein sequence!")
|
| 77 |
except Exception as e:
|
| 78 |
st.error(f"An error occurred during protein analysis: {str(e)}")
|
|
@@ -113,7 +109,7 @@ def update(sequence, word1, word2, word3, sequence_length):
|
|
| 113 |
st.write("Please try again later or contact support if the issue persists.")
|
| 114 |
|
| 115 |
def share_on_twitter(word1, word2, word3, length, plddt):
|
| 116 |
-
tweet_text = f"I generated a new protein using GenPro2
|
| 117 |
tweet_url = f"https://twitter.com/intent/tweet?text={urllib.parse.quote(tweet_text)}"
|
| 118 |
return tweet_url
|
| 119 |
|
|
@@ -151,8 +147,8 @@ if st.session_state.structure_info:
|
|
| 151 |
st.subheader(f'Predicted protein structure using seed: {info["word1"]}, {info["word2"]}, and {info["word3"]} + length {info["sequence_length"]}')
|
| 152 |
render_mol(info['pdb_string'])
|
| 153 |
|
| 154 |
-
st.subheader('plDDT Score')
|
| 155 |
-
st.write('plDDT is
|
| 156 |
plddt_score = int(info["b_value"] * 100)
|
| 157 |
st.info(f'Average plDDT: {plddt_score}%')
|
| 158 |
|
|
@@ -162,32 +158,27 @@ if st.session_state.structure_info:
|
|
| 162 |
<div style='background-color: #e6f2ff; padding: 10px; border-radius: 5px; font-size: 0.8em;'>
|
| 163 |
<ol>
|
| 164 |
<li>Take a screenshot of the protein structure above.</li>
|
| 165 |
-
<li>Click the 'Share on X' button below to open a pre-filled
|
| 166 |
-
<li>Attach your screenshot to
|
| 167 |
</ol>
|
| 168 |
</div>
|
| 169 |
""", unsafe_allow_html=True)
|
| 170 |
|
| 171 |
-
st.write("1. Take a screenshot of the protein structure above.")
|
| 172 |
-
st.write("2. Click the 'Share Results' button below to open a pre-filled tweet.")
|
| 173 |
-
st.write("3. Attach your protein screenshot to the post.")
|
| 174 |
-
|
| 175 |
tweet_url = share_on_twitter(info["word1"], info["word2"], info["word3"], info["sequence_length"], plddt_score)
|
| 176 |
st.markdown(f"[Share Results]({tweet_url})")
|
| 177 |
|
| 178 |
st.markdown("""
|
| 179 |
-
##
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
5. Click the "Analyze Protein" button to get more information about your generated protein.
|
| 186 |
**Remember, this folding is based on randomly generated sequences. Interpret the results with caution.
|
| 187 |
-
Enjoy exploring the world of protein sequences!
|
| 188 |
""")
|
| 189 |
|
| 190 |
-
|
| 191 |
col1, col2 = st.columns(2)
|
| 192 |
with col1:
|
| 193 |
if st.button('Analyze Protein'):
|
|
@@ -200,14 +191,4 @@ if st.session_state.structure_info:
|
|
| 200 |
file_name='predicted.pdb',
|
| 201 |
mime='text/plain',
|
| 202 |
)
|
| 203 |
-
|
| 204 |
-
## What to do next:
|
| 205 |
-
If you find interesting results from the sequence folding, you can explore further:
|
| 206 |
-
1. Learn more about protein structures and sequences.
|
| 207 |
-
2. Visit the [Protein Data Bank (PDB)](https://www.rcsb.org/) for known protein structures.
|
| 208 |
-
3. Compare your folded structure with known functional proteins by downloading your results.
|
| 209 |
-
4. Read about similar proteins to gain insights into potential functions.
|
| 210 |
-
5. Click the "Analyze Protein" button to get more information about your generated protein.
|
| 211 |
-
**Remember, this folding is based on randomly generated sequences. Interpret the results with caution.
|
| 212 |
-
Enjoy exploring the world of protein sequences! Share your high-confidence protein images with us on X [*@WandsAI*](https://x.com/wandsai)!
|
| 213 |
-
""")
|
|
|
|
| 15 |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
| 16 |
|
| 17 |
st.set_page_config(layout='wide')
|
| 18 |
+
st.sidebar.title('🔮 GenPro2 Protein Generator, Structure Predictor, and Analysis Tool')
|
| 19 |
+
st.sidebar.write('GenPro2 is an end-to-end sequence protein generator, structure predictor, analysis tool based [*ESMFold*](https://esmatlas.com/about), the ESM-2 language model, and known proteins.')
|
| 20 |
|
| 21 |
def generate_sequence_from_words(words, length):
|
| 22 |
seed = ' '.join(words).encode('utf-8')
|
|
|
|
| 36 |
|
| 37 |
def perform_blast_analysis(sequence):
|
| 38 |
st.subheader('Protein Analysis')
|
| 39 |
+
with st.spinner("Analyzing generated protein... This may take a several minutes. Stay tuned!"):
|
| 40 |
progress_bar = st.progress(0)
|
| 41 |
for i in range(100):
|
| 42 |
progress_bar.progress(i + 1)
|
|
|
|
| 61 |
identity_percentage = (hsp.identities / alignment.length) * 100
|
| 62 |
|
| 63 |
st.write(f"**Top Match:** {protein_name}")
|
| 64 |
+
st.write(f"**Organism Code:** {organism}")
|
| 65 |
st.write(f"**Sequence Identity:** {identity_percentage:.2f}%")
|
| 66 |
+
|
| 67 |
|
| 68 |
# Fetch protein function (if available)
|
| 69 |
if hasattr(alignment, 'description') and alignment.description:
|
| 70 |
st.write(f"**Potential Function:** {alignment.description}")
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
st.write("No significant matches found. This might be a unique protein sequence!")
|
| 73 |
except Exception as e:
|
| 74 |
st.error(f"An error occurred during protein analysis: {str(e)}")
|
|
|
|
| 109 |
st.write("Please try again later or contact support if the issue persists.")
|
| 110 |
|
| 111 |
def share_on_twitter(word1, word2, word3, length, plddt):
|
| 112 |
+
tweet_text = f"I just generated a new protein using #GenPro2 from the seed-words '{word1}', '{word2}', and '{word3}' + sequence length {length}! It's Predictive Protein Score is: {plddt}%. -- made by @WandAI"
|
| 113 |
tweet_url = f"https://twitter.com/intent/tweet?text={urllib.parse.quote(tweet_text)}"
|
| 114 |
return tweet_url
|
| 115 |
|
|
|
|
| 147 |
st.subheader(f'Predicted protein structure using seed: {info["word1"]}, {info["word2"]}, and {info["word3"]} + length {info["sequence_length"]}')
|
| 148 |
render_mol(info['pdb_string'])
|
| 149 |
|
| 150 |
+
st.subheader('plDDT Confidence Score')
|
| 151 |
+
st.write('plDDT is bench mark for scoring the confidence in prediction on a scale from 0-100%. 70% or more is really good!')
|
| 152 |
plddt_score = int(info["b_value"] * 100)
|
| 153 |
st.info(f'Average plDDT: {plddt_score}%')
|
| 154 |
|
|
|
|
| 158 |
<div style='background-color: #e6f2ff; padding: 10px; border-radius: 5px; font-size: 0.8em;'>
|
| 159 |
<ol>
|
| 160 |
<li>Take a screenshot of the protein structure above.</li>
|
| 161 |
+
<li>Click the 'Share on X' button below to open a pre-filled protein seed-words and score.</li>
|
| 162 |
+
<li>Attach your screenshot to your post before posting.</li>
|
| 163 |
</ol>
|
| 164 |
</div>
|
| 165 |
""", unsafe_allow_html=True)
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
tweet_url = share_on_twitter(info["word1"], info["word2"], info["word3"], info["sequence_length"], plddt_score)
|
| 168 |
st.markdown(f"[Share Results]({tweet_url})")
|
| 169 |
|
| 170 |
st.markdown("""
|
| 171 |
+
## Think might have discovered a useful and unique protein? Here is what to do next:
|
| 172 |
+
1. Analyze your protein using (BLAST)](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&LINK_LOC=blasthome).
|
| 173 |
+
2. Download protein data and visit the [Protein Data Bank (PDB)](https://www.rcsb.org/) for known protein structures.
|
| 174 |
+
3. Compare your folded protein structure and data with known functional proteins.
|
| 175 |
+
|
| 176 |
+
|
|
|
|
| 177 |
**Remember, this folding is based on randomly generated sequences. Interpret the results with caution.
|
| 178 |
+
Enjoy exploring the world of protein sequences!
|
| 179 |
""")
|
| 180 |
|
| 181 |
+
|
| 182 |
col1, col2 = st.columns(2)
|
| 183 |
with col1:
|
| 184 |
if st.button('Analyze Protein'):
|
|
|
|
| 191 |
file_name='predicted.pdb',
|
| 192 |
mime='text/plain',
|
| 193 |
)
|
| 194 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|