SoluProtMutDemo / wrapper.py
vvelda's picture
Improvement of the input arguments check
eb602a3 verified
#!/usr/bin/env python3
#---------------------------------------------------------------------
#--- Predictor of a protein solubility change given a mutation ---
#--- by Jan Velecky velda@mail.muni.cz ---
#--- Loschmidt Laboratories, 2023-25 ---
#--- example use: python3 wrapper.py -h ---
#---------------------------------------------------------------------
import argparse
from functools import partial
import Bio.PDB.Polypeptide as AA
from code.data_preprocessing import get_PDB
# ---------------------------------------- DATA TYPES FOR ARGPARSE ----------------------------------------
def Type_index(value): # test 1UCY 1A
try:
if not value[-1].isdigit(): # insertion code
ivalue = int(value[:-1])
else:
ivalue = int(value)
return value
except:
raise argparse.ArgumentTypeError("%s is not a valid residue index" % value)
def Type_char(value):
if len(value) != 1:
raise argparse.ArgumentTypeError("'%s' is not a character" % value)
return value
def Type_aminoAcid(value):
orig = value
value = str.upper(value)
if len(value) == 1:
try:
value = AA.one_to_three(value)
except:
pass
if not AA.is_aa(value):
raise argparse.ArgumentTypeError("'%s' is not a valid amino-acid" % orig)
return str.upper(AA.three_to_one(value))
def parseList(s, type):
return [type(i) for i in s.split(',')]
def Type_listOf(basetype):
return partial(parseList, type=basetype)
def Type_PDB(pdb_code):
try:
pdb_path = get_PDB(pdb_code.lower())
except Exception as e:
raise argparse.ArgumentTypeError(e)
return pdb_code, pdb_path
def check_mutList(loc_list, orig_list, mut_list):
if len(loc_list) != len(mut_list):
if len(mut_list) == 1: # apply mutation to all positions
mut_list *= len(loc_list)
if len(loc_list) == len(mut_list) == len(orig_list):
return mut_list
else:
raise ValueError("Inconsistent multi-point mutant specification")
# ----------------------------------------------END OF DATA TYPES ------------------------------------------
argParser = argparse.ArgumentParser(add_help = True,
description=
"""Solubility change preditor:
Predicts the change in the solubility of a given protein variant
""", epilog="""
Amino acids can be specified by both 1- or 3-letter code.
example of use: ./wrapper.py 1EER F,R 48,150 D --verbose
""",conflict_handler='resolve', # overwrite conflicts
formatter_class=argparse.RawTextHelpFormatter,
prefix_chars="+-",
# exit_on_error=False # would be nice to use it for prompting a user for required params, but was implemented buggy: https://bugs.python.org/issue41255
)
argParser.add_argument(
'input',
metavar='pdb-code',
type=Type_PDB,
help="PDB code"
)
argParser.add_argument( # we only process one chain in the end
'chain', nargs="?",
default='A',
type=Type_char,
help="target chain character; default=A"
)
argParser.add_argument(
'orig',
metavar='wild-type',
type=Type_listOf(Type_aminoAcid),
help="wild-type residue(s) amino-acid[n]"
)
argParser.add_argument(
'loc',
metavar='location',
type=Type_listOf(Type_index),
help="mutated position(s) integer[n]"
)
argParser.add_argument(
'mut',
metavar='mutation',
type=Type_listOf(Type_aminoAcid),
help="1 or n substituents amino-acid|amino-acid[n]"
)
argParser.add_argument(
'-v', '--verbose',
action="store_true",
#help="show ouputs from the underlaying tools"
)
argParser.add_argument('--ver', default=None, help=argparse.SUPPRESS)
# ---------------------------------------------- DATA PREDICTION ------------------------------------------
def check_input(args):
pass
def predict(pdb_path, chain, orig, loc, mut, weights=None, version=None, rich_output=False):
from code.predictor import EnsemblePredictor # expensive import left for after the argument check
pred_model = EnsemblePredictor(weights=weights, version=version)
try:
assesment, prediction = pred_model.predict_change(pdb_path, chain, orig, loc, mut)
except KeyError as e:
raise ValueError("Non-existing position in the PDB: %s" % str(e))
assesment = {'+': 'solubilizing ( > 0.5)', 'N': 'neutral (score ≈ 0.5)', '-': 'desolubilizing (score < 0.5)'}[assesment]
print(weights)
if rich_output:
# ternary gradient orange-black-blue
ORANGE = (255, 165, 0)
BLUESH = (100, 100, 255)
color = tuple((o*max(0, 1-prediction*2) + b*max(0, (2*prediction)**2-1) for o, b in zip(ORANGE, BLUESH)))
return f"Predicted solubiliztation score: <span style='color: rgb{color}'>{prediction:.2f} {assesment}</span>"
else:
return "Predicted solubiliztation score: %g (%s)" % (prediction, assesment)
if __name__ == '__main__':
# ---------------------------------------- ARGUMENTS PROCESSING ----------------------------------------
args = argParser.parse_args()
# positional arguments parsing (argparse can't cope with nested positional arguments)
try:
args.mut = check_mutList(args.loc, args.orig, args.mut)
except Exception as e:
argParser.error(str(e))
pdb_code, pdb_path = args.input
chain = args.chain
# import code.data_preprocessing.model as modeling
# if(args.verbose):
# modeling.VERBOSE_LEVEL = modeling.VERBOSE_VERBOSE
# ------------------------------------- PREPROCESSING & INFERENCE --------------------------------------
print()
print(predict(pdb_path, chain, args.orig, args.loc, args.mut))