Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """binder_design.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1fsVz3x72UZlRP7L2VCBwFIKHwGRpbqAS | |
| # AfDesign - peptide binder design | |
| For a given protein target and protein binder length, generate/hallucinate a protein binder sequence AlphaFold thinks will bind to the target structure. To do this, we maximize number of contacts at the interface and maximize pLDDT of the binder. | |
| **WARNING** | |
| 1. This notebook is in active development and was designed for demonstration purposes only. | |
| 2. Using AfDesign as the only "loss" function for design might be a bad idea, you may find adversarial sequences (aka. sequences that trick AlphaFold). | |
| """ | |
| #@title **setup** | |
| import os | |
| if not os.path.isdir("params"): | |
| # get code | |
| os.system("pip -q install git+https://github.com/sokrypton/ColabDesign.git@v1.1.1") | |
| # for debugging | |
| os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabdesign colabdesign") | |
| # download params | |
| os.system("mkdir params") | |
| os.system("apt-get install aria2 -qq") | |
| os.system("aria2c -q -x 16 https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar") | |
| os.system("tar -xf alphafold_params_2022-12-06.tar -C params") | |
| import warnings | |
| warnings.simplefilter(action='ignore', category=FutureWarning) | |
| import os | |
| from colabdesign import mk_afdesign_model, clear_mem | |
| from colabdesign.shared.utils import copy_dict | |
| from colabdesign.af.alphafold.common import residue_constants | |
| from IPython.display import HTML | |
| from google.colab import files | |
| import numpy as np | |
| ######################### | |
| def get_pdb(pdb_code=""): | |
| if pdb_code is None or pdb_code == "": | |
| upload_dict = files.upload() | |
| pdb_string = upload_dict[list(upload_dict.keys())[0]] | |
| with open("tmp.pdb","wb") as out: out.write(pdb_string) | |
| return "tmp.pdb" | |
| elif os.path.isfile(pdb_code): | |
| return pdb_code | |
| elif len(pdb_code) == 4: | |
| os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb") | |
| return f"{pdb_code}.pdb" | |
| else: | |
| os.system(f"wget -qnc https://alphafold.ebi.ac.uk/files/AF-{pdb_code}-F1-model_v3.pdb") | |
| return f"AF-{pdb_code}-F1-model_v3.pdb" | |
| #@title **prep inputs** | |
| import re | |
| #@markdown --- | |
| #@markdown **target info** | |
| pdb = "5F9R" #@param {type:"string"} | |
| #@markdown - enter PDB code or UniProt code (to fetch AlphaFoldDB model) or leave blink to upload your own | |
| target_chain = "B" #@param {type:"string"} | |
| target_hotspot = "" #@param {type:"string"} | |
| if target_hotspot == "": target_hotspot = None | |
| #@markdown - restrict loss to predefined positions on target (eg. "1-10,12,15") | |
| target_flexible = False #@param {type:"boolean"} | |
| #@markdown - allow backbone of target structure to be flexible | |
| #@markdown --- | |
| #@markdown **binder info** | |
| binder_len = 25 #@param {type:"integer"} | |
| #@markdown - length of binder to hallucination | |
| binder_seq = "" #@param {type:"string"} | |
| binder_seq = re.sub("[^A-Z]", "", binder_seq.upper()) | |
| if len(binder_seq) > 0: | |
| binder_len = len(binder_seq) | |
| else: | |
| binder_seq = None | |
| #@markdown - if defined, will initialize design with this sequence | |
| binder_chain = "" #@param {type:"string"} | |
| if binder_chain == "": binder_chain = None | |
| #@markdown - if defined, supervised loss is used (binder_len is ignored) | |
| #@markdown --- | |
| #@markdown **model config** | |
| use_multimer = False #@param {type:"boolean"} | |
| #@markdown - use alphafold-multimer for design | |
| num_recycles = 1 #@param ["0", "1", "3", "6"] {type:"raw"} | |
| num_models = "1" #@param ["1", "2", "3", "4", "5", "all"] | |
| num_models = 5 if num_models == "all" else int(num_models) | |
| #@markdown - number of trained models to use during optimization | |
| x = {"pdb_filename":pdb, | |
| "chain":target_chain, | |
| "binder_len":binder_len, | |
| "binder_chain":binder_chain, | |
| "hotspot":target_hotspot, | |
| "use_multimer":use_multimer, | |
| "rm_target_seq":target_flexible} | |
| x["pdb_filename"] = get_pdb(x["pdb_filename"]) | |
| if "x_prev" not in dir() or x != x_prev: | |
| clear_mem() | |
| model = mk_afdesign_model(protocol="binder", | |
| use_multimer=x["use_multimer"], | |
| num_recycles=num_recycles, | |
| recycle_mode="sample") | |
| model.prep_inputs(**x, | |
| ignore_missing=False) | |
| x_prev = copy_dict(x) | |
| print("target length:", model._target_len) | |
| print("binder length:", model._binder_len) | |
| binder_len = model._binder_len | |
| #@title **run AfDesign** | |
| from scipy.special import softmax | |
| optimizer = "pssm_semigreedy" #@param ["pssm_semigreedy", "3stage", "semigreedy", "pssm", "logits", "soft", "hard"] | |
| #@markdown - `pssm_semigreedy` - uses the designed PSSM to bias semigreedy opt. (Recommended) | |
| #@markdown - `3stage` - gradient based optimization (GD) (logits β soft β hard) | |
| #@markdown - `pssm` - GD optimize (logits β soft) to get a sequence profile (PSSM). | |
| #@markdown - `semigreedy` - tries X random mutations, accepts those that decrease loss | |
| #@markdown - `logits` - GD optimize logits inputs (continious) | |
| #@markdown - `soft` - GD optimize softmax(logits) inputs (probabilities) | |
| #@markdown - `hard` - GD optimize one_hot(logits) inputs (discrete) | |
| #@markdown WARNING: The output sequence from `pssm`,`logits`,`soft` is not one_hot. To get a valid sequence use the other optimizers, or redesign the output backbone with another protocol like ProteinMPNN. | |
| #@markdown ---- | |
| #@markdown #### advanced GD settings | |
| GD_method = "sgd" #@param ["adabelief", "adafactor", "adagrad", "adam", "adamw", "fromage", "lamb", "lars", "noisy_sgd", "dpsgd", "radam", "rmsprop", "sgd", "sm3", "yogi"] | |
| learning_rate = 0.1 #@param {type:"raw"} | |
| norm_seq_grad = True #@param {type:"boolean"} | |
| dropout = True #@param {type:"boolean"} | |
| model.restart(seq=binder_seq) | |
| model.set_optimizer(optimizer=GD_method, | |
| learning_rate=learning_rate, | |
| norm_seq_grad=norm_seq_grad) | |
| models = model._model_names[:num_models] | |
| flags = {"num_recycles":num_recycles, | |
| "models":models, | |
| "dropout":dropout} | |
| if optimizer == "3stage": | |
| model.design_3stage(120, 60, 10, **flags) | |
| pssm = softmax(model._tmp["seq_logits"],-1) | |
| if optimizer == "pssm_semigreedy": | |
| model.design_pssm_semigreedy(120, 32, **flags) | |
| pssm = softmax(model._tmp["seq_logits"],1) | |
| if optimizer == "semigreedy": | |
| model.design_pssm_semigreedy(0, 32, **flags) | |
| pssm = None | |
| if optimizer == "pssm": | |
| model.design_logits(120, e_soft=1.0, num_models=1, ramp_recycles=True, **flags) | |
| model.design_soft(32, num_models=1, **flags) | |
| flags.update({"dropout":False,"save_best":True}) | |
| model.design_soft(10, num_models=num_models, **flags) | |
| pssm = softmax(model.aux["seq"]["logits"],-1) | |
| O = {"logits":model.design_logits, | |
| "soft":model.design_soft, | |
| "hard":model.design_hard} | |
| if optimizer in O: | |
| O[optimizer](120, num_models=1, ramp_recycles=True, **flags) | |
| flags.update({"dropout":False,"save_best":True}) | |
| O[optimizer](10, num_models=num_models, **flags) | |
| pssm = softmax(model.aux["seq"]["logits"],-1) | |
| model.save_pdb(f"{model.protocol}.pdb") | |
| #@title display hallucinated protein {run: "auto"} | |
| color = "pLDDT" #@param ["chain", "pLDDT", "rainbow"] | |
| show_sidechains = False #@param {type:"boolean"} | |
| show_mainchains = False #@param {type:"boolean"} | |
| color_HP = False #@param {type:"boolean"} | |
| animate = True #@param {type:"boolean"} | |
| model.plot_pdb(show_sidechains=show_sidechains, | |
| show_mainchains=show_mainchains, | |
| color=color, color_HP=color_HP, animate=animate) | |
| HTML(model.animate(dpi=100)) | |
| model.save_pdb(f"{model.protocol}.pdb") | |
| model.get_seqs() | |
| #@markdown ### Amino acid probabilties | |
| import plotly.express as px | |
| alphabet = "ACDEFGHIKLMNPQRSTVWY" | |
| if "pssm" in dir() and pssm is not None: | |
| fig = px.imshow(pssm.mean(0).T, | |
| labels=dict(x="positions", y="amino acids", color="probability"), | |
| y=residue_constants.restypes, | |
| zmin=0, | |
| zmax=1, | |
| template="simple_white", | |
| ) | |
| fig.update_xaxes(side="top") | |
| fig.show() | |
| # log | |
| model._tmp["best"]["aux"]["log"] |