Spaces:
Sleeping
Sleeping
File size: 8,048 Bytes
108e70d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | # -*- coding: utf-8 -*-
"""binder_design.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1fsVz3x72UZlRP7L2VCBwFIKHwGRpbqAS
# AfDesign - peptide binder design
For a given protein target and protein binder length, generate/hallucinate a protein binder sequence AlphaFold thinks will bind to the target structure. To do this, we maximize number of contacts at the interface and maximize pLDDT of the binder.
**WARNING**
1. This notebook is in active development and was designed for demonstration purposes only.
2. Using AfDesign as the only "loss" function for design might be a bad idea, you may find adversarial sequences (aka. sequences that trick AlphaFold).
"""
#@title **setup**
import os
if not os.path.isdir("params"):
# get code
os.system("pip -q install git+https://github.com/sokrypton/ColabDesign.git@v1.1.1")
# for debugging
os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabdesign colabdesign")
# download params
os.system("mkdir params")
os.system("apt-get install aria2 -qq")
os.system("aria2c -q -x 16 https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar")
os.system("tar -xf alphafold_params_2022-12-06.tar -C params")
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import os
from colabdesign import mk_afdesign_model, clear_mem
from colabdesign.shared.utils import copy_dict
from colabdesign.af.alphafold.common import residue_constants
from IPython.display import HTML
from google.colab import files
import numpy as np
#########################
def get_pdb(pdb_code=""):
if pdb_code is None or pdb_code == "":
upload_dict = files.upload()
pdb_string = upload_dict[list(upload_dict.keys())[0]]
with open("tmp.pdb","wb") as out: out.write(pdb_string)
return "tmp.pdb"
elif os.path.isfile(pdb_code):
return pdb_code
elif len(pdb_code) == 4:
os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
return f"{pdb_code}.pdb"
else:
os.system(f"wget -qnc https://alphafold.ebi.ac.uk/files/AF-{pdb_code}-F1-model_v3.pdb")
return f"AF-{pdb_code}-F1-model_v3.pdb"
#@title **prep inputs**
import re
#@markdown ---
#@markdown **target info**
pdb = "5F9R" #@param {type:"string"}
#@markdown - enter PDB code or UniProt code (to fetch AlphaFoldDB model) or leave blink to upload your own
target_chain = "B" #@param {type:"string"}
target_hotspot = "" #@param {type:"string"}
if target_hotspot == "": target_hotspot = None
#@markdown - restrict loss to predefined positions on target (eg. "1-10,12,15")
target_flexible = False #@param {type:"boolean"}
#@markdown - allow backbone of target structure to be flexible
#@markdown ---
#@markdown **binder info**
binder_len = 25 #@param {type:"integer"}
#@markdown - length of binder to hallucination
binder_seq = "" #@param {type:"string"}
binder_seq = re.sub("[^A-Z]", "", binder_seq.upper())
if len(binder_seq) > 0:
binder_len = len(binder_seq)
else:
binder_seq = None
#@markdown - if defined, will initialize design with this sequence
binder_chain = "" #@param {type:"string"}
if binder_chain == "": binder_chain = None
#@markdown - if defined, supervised loss is used (binder_len is ignored)
#@markdown ---
#@markdown **model config**
use_multimer = False #@param {type:"boolean"}
#@markdown - use alphafold-multimer for design
num_recycles = 1 #@param ["0", "1", "3", "6"] {type:"raw"}
num_models = "1" #@param ["1", "2", "3", "4", "5", "all"]
num_models = 5 if num_models == "all" else int(num_models)
#@markdown - number of trained models to use during optimization
x = {"pdb_filename":pdb,
"chain":target_chain,
"binder_len":binder_len,
"binder_chain":binder_chain,
"hotspot":target_hotspot,
"use_multimer":use_multimer,
"rm_target_seq":target_flexible}
x["pdb_filename"] = get_pdb(x["pdb_filename"])
if "x_prev" not in dir() or x != x_prev:
clear_mem()
model = mk_afdesign_model(protocol="binder",
use_multimer=x["use_multimer"],
num_recycles=num_recycles,
recycle_mode="sample")
model.prep_inputs(**x,
ignore_missing=False)
x_prev = copy_dict(x)
print("target length:", model._target_len)
print("binder length:", model._binder_len)
binder_len = model._binder_len
#@title **run AfDesign**
from scipy.special import softmax
optimizer = "pssm_semigreedy" #@param ["pssm_semigreedy", "3stage", "semigreedy", "pssm", "logits", "soft", "hard"]
#@markdown - `pssm_semigreedy` - uses the designed PSSM to bias semigreedy opt. (Recommended)
#@markdown - `3stage` - gradient based optimization (GD) (logits → soft → hard)
#@markdown - `pssm` - GD optimize (logits → soft) to get a sequence profile (PSSM).
#@markdown - `semigreedy` - tries X random mutations, accepts those that decrease loss
#@markdown - `logits` - GD optimize logits inputs (continious)
#@markdown - `soft` - GD optimize softmax(logits) inputs (probabilities)
#@markdown - `hard` - GD optimize one_hot(logits) inputs (discrete)
#@markdown WARNING: The output sequence from `pssm`,`logits`,`soft` is not one_hot. To get a valid sequence use the other optimizers, or redesign the output backbone with another protocol like ProteinMPNN.
#@markdown ----
#@markdown #### advanced GD settings
GD_method = "sgd" #@param ["adabelief", "adafactor", "adagrad", "adam", "adamw", "fromage", "lamb", "lars", "noisy_sgd", "dpsgd", "radam", "rmsprop", "sgd", "sm3", "yogi"]
learning_rate = 0.1 #@param {type:"raw"}
norm_seq_grad = True #@param {type:"boolean"}
dropout = True #@param {type:"boolean"}
model.restart(seq=binder_seq)
model.set_optimizer(optimizer=GD_method,
learning_rate=learning_rate,
norm_seq_grad=norm_seq_grad)
models = model._model_names[:num_models]
flags = {"num_recycles":num_recycles,
"models":models,
"dropout":dropout}
if optimizer == "3stage":
model.design_3stage(120, 60, 10, **flags)
pssm = softmax(model._tmp["seq_logits"],-1)
if optimizer == "pssm_semigreedy":
model.design_pssm_semigreedy(120, 32, **flags)
pssm = softmax(model._tmp["seq_logits"],1)
if optimizer == "semigreedy":
model.design_pssm_semigreedy(0, 32, **flags)
pssm = None
if optimizer == "pssm":
model.design_logits(120, e_soft=1.0, num_models=1, ramp_recycles=True, **flags)
model.design_soft(32, num_models=1, **flags)
flags.update({"dropout":False,"save_best":True})
model.design_soft(10, num_models=num_models, **flags)
pssm = softmax(model.aux["seq"]["logits"],-1)
O = {"logits":model.design_logits,
"soft":model.design_soft,
"hard":model.design_hard}
if optimizer in O:
O[optimizer](120, num_models=1, ramp_recycles=True, **flags)
flags.update({"dropout":False,"save_best":True})
O[optimizer](10, num_models=num_models, **flags)
pssm = softmax(model.aux["seq"]["logits"],-1)
model.save_pdb(f"{model.protocol}.pdb")
#@title display hallucinated protein {run: "auto"}
color = "pLDDT" #@param ["chain", "pLDDT", "rainbow"]
show_sidechains = False #@param {type:"boolean"}
show_mainchains = False #@param {type:"boolean"}
color_HP = False #@param {type:"boolean"}
animate = True #@param {type:"boolean"}
model.plot_pdb(show_sidechains=show_sidechains,
show_mainchains=show_mainchains,
color=color, color_HP=color_HP, animate=animate)
HTML(model.animate(dpi=100))
model.save_pdb(f"{model.protocol}.pdb")
model.get_seqs()
#@markdown ### Amino acid probabilties
import plotly.express as px
alphabet = "ACDEFGHIKLMNPQRSTVWY"
if "pssm" in dir() and pssm is not None:
fig = px.imshow(pssm.mean(0).T,
labels=dict(x="positions", y="amino acids", color="probability"),
y=residue_constants.restypes,
zmin=0,
zmax=1,
template="simple_white",
)
fig.update_xaxes(side="top")
fig.show()
# log
model._tmp["best"]["aux"]["log"] |