Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| #--------------------------------------------------------------------- | |
| #--- Predictor of a protein solubility change given a mutation --- | |
| #--- by Jan Velecky velda@mail.muni.cz --- | |
| #--- Loschmidt Laboratories, 2023-25 --- | |
| #--- example use: python3 wrapper.py -h --- | |
| #--------------------------------------------------------------------- | |
| import argparse | |
| from functools import partial | |
| import Bio.PDB.Polypeptide as AA | |
| from code.data_preprocessing import get_PDB | |
| # ---------------------------------------- DATA TYPES FOR ARGPARSE ---------------------------------------- | |
| def Type_index(value): # test 1UCY 1A | |
| try: | |
| if not value[-1].isdigit(): # insertion code | |
| ivalue = int(value[:-1]) | |
| else: | |
| ivalue = int(value) | |
| return value | |
| except: | |
| raise argparse.ArgumentTypeError("%s is not a valid residue index" % value) | |
| def Type_char(value): | |
| if len(value) != 1: | |
| raise argparse.ArgumentTypeError("'%s' is not a character" % value) | |
| return value | |
| def Type_aminoAcid(value): | |
| orig = value | |
| value = str.upper(value) | |
| if len(value) == 1: | |
| try: | |
| value = AA.one_to_three(value) | |
| except: | |
| pass | |
| if not AA.is_aa(value): | |
| raise argparse.ArgumentTypeError("'%s' is not a valid amino-acid" % orig) | |
| return str.upper(AA.three_to_one(value)) | |
| def parseList(s, type): | |
| return [type(i) for i in s.split(',')] | |
| def Type_listOf(basetype): | |
| return partial(parseList, type=basetype) | |
| def Type_PDB(pdb_code): | |
| try: | |
| pdb_path = get_PDB(pdb_code.lower()) | |
| except Exception as e: | |
| raise argparse.ArgumentTypeError(e) | |
| return pdb_code, pdb_path | |
| def check_mutList(loc_list, orig_list, mut_list): | |
| if len(loc_list) != len(mut_list): | |
| if len(mut_list) == 1: # apply mutation to all positions | |
| mut_list *= len(loc_list) | |
| if len(loc_list) == len(mut_list) == len(orig_list): | |
| return mut_list | |
| else: | |
| raise ValueError("Inconsistent multi-point mutant specification") | |
| # ----------------------------------------------END OF DATA TYPES ------------------------------------------ | |
| argParser = argparse.ArgumentParser(add_help = True, | |
| description= | |
| """Solubility change preditor: | |
| Predicts the change in the solubility of a given protein variant | |
| """, epilog=""" | |
| Amino acids can be specified by both 1- or 3-letter code. | |
| example of use: ./wrapper.py 1EER F,R 48,150 D --verbose | |
| """,conflict_handler='resolve', # overwrite conflicts | |
| formatter_class=argparse.RawTextHelpFormatter, | |
| prefix_chars="+-", | |
| # exit_on_error=False # would be nice to use it for prompting a user for required params, but was implemented buggy: https://bugs.python.org/issue41255 | |
| ) | |
| argParser.add_argument( | |
| 'input', | |
| metavar='pdb-code', | |
| type=Type_PDB, | |
| help="PDB code" | |
| ) | |
| argParser.add_argument( # we only process one chain in the end | |
| 'chain', nargs="?", | |
| default='A', | |
| type=Type_char, | |
| help="target chain character; default=A" | |
| ) | |
| argParser.add_argument( | |
| 'orig', | |
| metavar='wild-type', | |
| type=Type_listOf(Type_aminoAcid), | |
| help="wild-type residue(s) amino-acid[n]" | |
| ) | |
| argParser.add_argument( | |
| 'loc', | |
| metavar='location', | |
| type=Type_listOf(Type_index), | |
| help="mutated position(s) integer[n]" | |
| ) | |
| argParser.add_argument( | |
| 'mut', | |
| metavar='mutation', | |
| type=Type_listOf(Type_aminoAcid), | |
| help="1 or n substituents amino-acid|amino-acid[n]" | |
| ) | |
| argParser.add_argument( | |
| '-v', '--verbose', | |
| action="store_true", | |
| #help="show ouputs from the underlaying tools" | |
| ) | |
| argParser.add_argument('--ver', default=None, help=argparse.SUPPRESS) | |
| # ---------------------------------------------- DATA PREDICTION ------------------------------------------ | |
| def check_input(args): | |
| pass | |
| def predict(pdb_path, chain, orig, loc, mut, weights=None, version=None, rich_output=False): | |
| from code.predictor import EnsemblePredictor # expensive import left for after the argument check | |
| pred_model = EnsemblePredictor(weights=weights, version=version) | |
| try: | |
| assesment, prediction = pred_model.predict_change(pdb_path, chain, orig, loc, mut) | |
| except KeyError as e: | |
| raise ValueError("Non-existing position in the PDB: %s" % str(e)) | |
| assesment = {'+': 'solubilizing ( > 0.5)', 'N': 'neutral (score ≈ 0.5)', '-': 'desolubilizing (score < 0.5)'}[assesment] | |
| print(weights) | |
| if rich_output: | |
| # ternary gradient orange-black-blue | |
| ORANGE = (255, 165, 0) | |
| BLUESH = (100, 100, 255) | |
| color = tuple((o*max(0, 1-prediction*2) + b*max(0, (2*prediction)**2-1) for o, b in zip(ORANGE, BLUESH))) | |
| return f"Predicted solubiliztation score: <span style='color: rgb{color}'>{prediction:.2f} {assesment}</span>" | |
| else: | |
| return "Predicted solubiliztation score: %g (%s)" % (prediction, assesment) | |
| if __name__ == '__main__': | |
| # ---------------------------------------- ARGUMENTS PROCESSING ---------------------------------------- | |
| args = argParser.parse_args() | |
| # positional arguments parsing (argparse can't cope with nested positional arguments) | |
| try: | |
| args.mut = check_mutList(args.loc, args.orig, args.mut) | |
| except Exception as e: | |
| argParser.error(str(e)) | |
| pdb_code, pdb_path = args.input | |
| chain = args.chain | |
| # import code.data_preprocessing.model as modeling | |
| # if(args.verbose): | |
| # modeling.VERBOSE_LEVEL = modeling.VERBOSE_VERBOSE | |
| # ------------------------------------- PREPROCESSING & INFERENCE -------------------------------------- | |
| print() | |
| print(predict(pdb_path, chain, args.orig, args.loc, args.mut)) |