| import streamlit as st |
| import random |
| import hashlib |
| import py3Dmol |
| import requests |
| import io |
| from Bio import PDB |
|
|
| def generate_sequence_from_words(words, length): |
| seed = ' '.join(words).encode('utf-8') |
| random.seed(hashlib.md5(seed).hexdigest()) |
| amino_acids = "ACDEFGHIKLMNPQRSTVWY" |
| return ''.join(random.choice(amino_acids) for _ in range(length)) |
|
|
| def predict_structure(sequence): |
| url = "https://api.colabfold.com/batch" |
| data = { |
| "queries": [["query", sequence]], |
| "num_relax": 0, |
| "use_templates": False, |
| "num_models": 1 |
| } |
| response = requests.post(url, json=data) |
| if response.status_code == 200: |
| return response.json() |
| else: |
| st.error(f"Error in structure prediction: {response.text}") |
| return None |
|
|
| def visualize_protein(pdb_string): |
| view = py3Dmol.view(width=800, height=400) |
| view.addModel(pdb_string, 'pdb') |
| view.setStyle({'cartoon': {'color': 'spectrum'}}) |
| view.zoomTo() |
| return view |
|
|
| st.title("Protein Sequence Generator and Structure Predictor") |
|
|
| st.write("Enter three random words to seed your protein sequence:") |
| word1 = st.text_input("Word 1") |
| word2 = st.text_input("Word 2") |
| word3 = st.text_input("Word 3") |
|
|
| sequence_length = st.number_input("Enter desired sequence length", |
| min_value=50, |
| max_value=200, |
| value=100, |
| step=10) |
|
|
| if st.button("Generate Sequence and Predict Structure"): |
| if word1 and word2 and word3: |
| words = [word1, word2, word3] |
| sequence = generate_sequence_from_words(words, sequence_length) |
| st.write(f"Generated sequence inspired by '{word1}', '{word2}', and '{word3}' with length '{sequence_length}':") |
| st.code(sequence) |
|
|
| st.header("Protein Structure Prediction") |
| with st.spinner("Predicting protein structure... This may take a few minutes."): |
| prediction = predict_structure(sequence) |
| if prediction and 'pdb_string' in prediction[0]: |
| pdb_string = prediction[0]['pdb_string'] |
| view = visualize_protein(pdb_string) |
| |
| st_py3dmol = py3Dmol.show3d(view, width=800, height=400) |
| st.components.v1.html(st_py3dmol.startjs, height=400) |
|
|
| |
| plddt_scores = prediction[0].get('plddt', []) |
| if plddt_scores: |
| avg_plddt = sum(plddt_scores) / len(plddt_scores) |
| st.write(f"Average pLDDT score: {avg_plddt:.2f}") |
| st.write("pLDDT > 90: Very high confidence") |
| st.write("90 > pLDDT > 70: Confident") |
| st.write("70 > pLDDT > 50: Low confidence") |
| st.write("pLDDT < 50: Very low confidence") |
| else: |
| st.error("Failed to predict structure. Please try again.") |
| else: |
| st.error("Please enter all three words.") |
|
|
| st.markdown(""" |
| ## What to do next: |
| 1. Experiment with different seed words and sequence lengths. |
| 2. Learn about how protein sequences relate to their predicted structures. |
| 3. Remember that these are computational predictions and may not represent the actual biological structure. |
| 4. For real protein structures, visit the [Protein Data Bank (PDB)](https://www.rcsb.org/). |
| Enjoy exploring the world of protein sequences and predicted structures! |
| """) |