Spaces:
Build error
Build error
File size: 5,561 Bytes
b140e2c 7eb3224 b140e2c 7eb3224 b140e2c 7eb3224 3068eb3 b140e2c eb602a3 b140e2c 7eb3224 b140e2c 7eb3224 eb602a3 7eb3224 eb602a3 7eb3224 eb602a3 7eb3224 b140e2c eb602a3 b140e2c 7eb3224 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | #!/usr/bin/env python3
#---------------------------------------------------------------------
#--- Predictor of a protein solubility change given a mutation ---
#--- by Jan Velecky velda@mail.muni.cz ---
#--- Loschmidt Laboratories, 2023-25 ---
#--- example use: python3 wrapper.py -h ---
#---------------------------------------------------------------------
import argparse
from functools import partial
import Bio.PDB.Polypeptide as AA
from code.data_preprocessing import get_PDB
# ---------------------------------------- DATA TYPES FOR ARGPARSE ----------------------------------------
def Type_index(value): # test 1UCY 1A
try:
if not value[-1].isdigit(): # insertion code
ivalue = int(value[:-1])
else:
ivalue = int(value)
return value
except:
raise argparse.ArgumentTypeError("%s is not a valid residue index" % value)
def Type_char(value):
if len(value) != 1:
raise argparse.ArgumentTypeError("'%s' is not a character" % value)
return value
def Type_aminoAcid(value):
orig = value
value = str.upper(value)
if len(value) == 1:
try:
value = AA.one_to_three(value)
except:
pass
if not AA.is_aa(value):
raise argparse.ArgumentTypeError("'%s' is not a valid amino-acid" % orig)
return str.upper(AA.three_to_one(value))
def parseList(s, type):
return [type(i) for i in s.split(',')]
def Type_listOf(basetype):
return partial(parseList, type=basetype)
def Type_PDB(pdb_code):
try:
pdb_path = get_PDB(pdb_code.lower())
except Exception as e:
raise argparse.ArgumentTypeError(e)
return pdb_code, pdb_path
def check_mutList(loc_list, orig_list, mut_list):
if len(loc_list) != len(mut_list):
if len(mut_list) == 1: # apply mutation to all positions
mut_list *= len(loc_list)
if len(loc_list) == len(mut_list) == len(orig_list):
return mut_list
else:
raise ValueError("Inconsistent multi-point mutant specification")
# ----------------------------------------------END OF DATA TYPES ------------------------------------------
argParser = argparse.ArgumentParser(add_help = True,
description=
"""Solubility change preditor:
Predicts the change in the solubility of a given protein variant
""", epilog="""
Amino acids can be specified by both 1- or 3-letter code.
example of use: ./wrapper.py 1EER F,R 48,150 D --verbose
""",conflict_handler='resolve', # overwrite conflicts
formatter_class=argparse.RawTextHelpFormatter,
prefix_chars="+-",
# exit_on_error=False # would be nice to use it for prompting a user for required params, but was implemented buggy: https://bugs.python.org/issue41255
)
argParser.add_argument(
'input',
metavar='pdb-code',
type=Type_PDB,
help="PDB code"
)
argParser.add_argument( # we only process one chain in the end
'chain', nargs="?",
default='A',
type=Type_char,
help="target chain character; default=A"
)
argParser.add_argument(
'orig',
metavar='wild-type',
type=Type_listOf(Type_aminoAcid),
help="wild-type residue(s) amino-acid[n]"
)
argParser.add_argument(
'loc',
metavar='location',
type=Type_listOf(Type_index),
help="mutated position(s) integer[n]"
)
argParser.add_argument(
'mut',
metavar='mutation',
type=Type_listOf(Type_aminoAcid),
help="1 or n substituents amino-acid|amino-acid[n]"
)
argParser.add_argument(
'-v', '--verbose',
action="store_true",
#help="show ouputs from the underlaying tools"
)
argParser.add_argument('--ver', default=None, help=argparse.SUPPRESS)
# ---------------------------------------------- DATA PREDICTION ------------------------------------------
def check_input(args):
pass
def predict(pdb_path, chain, orig, loc, mut, weights=None, version=None, rich_output=False):
from code.predictor import EnsemblePredictor # expensive import left for after the argument check
pred_model = EnsemblePredictor(weights=weights, version=version)
try:
assesment, prediction = pred_model.predict_change(pdb_path, chain, orig, loc, mut)
except KeyError as e:
raise ValueError("Non-existing position in the PDB: %s" % str(e))
assesment = {'+': 'solubilizing ( > 0.5)', 'N': 'neutral (score ≈ 0.5)', '-': 'desolubilizing (score < 0.5)'}[assesment]
print(weights)
if rich_output:
# ternary gradient orange-black-blue
ORANGE = (255, 165, 0)
BLUESH = (100, 100, 255)
color = tuple((o*max(0, 1-prediction*2) + b*max(0, (2*prediction)**2-1) for o, b in zip(ORANGE, BLUESH)))
return f"Predicted solubiliztation score: <span style='color: rgb{color}'>{prediction:.2f} {assesment}</span>"
else:
return "Predicted solubiliztation score: %g (%s)" % (prediction, assesment)
if __name__ == '__main__':
# ---------------------------------------- ARGUMENTS PROCESSING ----------------------------------------
args = argParser.parse_args()
# positional arguments parsing (argparse can't cope with nested positional arguments)
try:
args.mut = check_mutList(args.loc, args.orig, args.mut)
except Exception as e:
argParser.error(str(e))
pdb_code, pdb_path = args.input
chain = args.chain
# import code.data_preprocessing.model as modeling
# if(args.verbose):
# modeling.VERBOSE_LEVEL = modeling.VERBOSE_VERBOSE
# ------------------------------------- PREPROCESSING & INFERENCE --------------------------------------
print()
print(predict(pdb_path, chain, args.orig, args.loc, args.mut)) |