SMILES2PEPTIDE / aminoacid_selective.py
yzhang@u.duke.nus.edu
add sequence 2 smiles feature
4868d91
#!/usr/bin/env python
"""Definitions and properties of amino-acids for p2smi"""
# Natural Amino-acids:
specific_aminos = {
"Fmoc-Aib-OH": {
"Code": "Aib",
"Formula": "C28H29NO5",
"Letter": "Ŷ",
"MolWeight": 221.141578848,
"SMILES": "CC(C)(N)C(=O)O",
"cterm": "NC(C)(C)C(=O)[*:2]",
"disulphide": False,
"ester": False,
"nterm": "[*:1]NC(C)(C)C(=O)O",
},
"Fmoc-Asp(OtBu)-(Dmb)Gly-OH": {
"Code": "Dtg",
"Formula": "C28H29NO5",
"Letter": "Ĝ",
"MolWeight": 221.141578848,
"SMILES": "N[C@@H](CC(=O)OC(C)(C)C)C(=O)N(CC1=C(C=C(C=C1)OC)OC)CC(=O)O",
"nterm": "[*:1]N[C@@H](CC(=O)OC(C)(C)C)C(=O)N(CC1=C(C=C(C=C1)OC)OC)CC(=O)O",
"cterm": "N[C@@H](CC(=O)OC(C)(C)C)C(=O)N(CC1=C(C=C(C=C1)OC)OC)CC(=O)[*:2]",
"disulphide": False,
"ester": False,
},
"Fmoc-Cys(Mmt)-OH": {
"Code": "Cmt",
"Formula": "C28H29NO5",
"Letter": "Ĉ",
"MolWeight": 221.141578848,
"SMILES": "COC1=CC=C(C=C1)C(C2=CC=CC=C2)(C3=CC=CC=C3)SC[C@@H](N)C(=O)O",
"nterm": "N([*:1])[C@@H](CSC(C1=CC=CC=C1)(C2=CC=CC=C2)C3=CC=C(OC)C=C3)C(=O)O",
"cterm": "N[C@@H](CSC(C1=CC=CC=C1)(C2=CC=CC=C2)C3=CC=C(OC)C=C3)C(=O)[*:2]",
"disulphide": False,
"ester": False,
},
"Fmoc-Glu(OAll)-OH": {
"Code": "Eal",
"Formula": "C28H29NO5",
"Letter": "Ė",
"MolWeight": 221.141578848,
"SMILES": "C=CCOC(=O)CC[C@@H](N)C(=O)O",
"nterm": "[*:1]N[C@@H](CCC(=O)OCC=C)C(=O)O",
"cterm": "N[C@@H](CCC(=O)OCC=C)C(=O)[*:2]",
"disulphide": False,
"ester": False,
},
"Fmoc-Lys(palmitoyl-Glu-OtBu)-OH": {
"Code": "Kpg",
"Formula": "C28H29NO5",
"Letter": "Ƙ",
"MolWeight": 221.141578848,
"SMILES": "N[C@@H](CCCNC(=O)CCC[C@@H](NC(=O)CCCCCCCCCCCCCCCC)C(=O)OC(C)(C)C)C(=O)O",
"nterm": "[*:1]N[C@@H](CCCN(C(=O)CCC[C@@H](NC(=O)CCCCCCCCCCCCCCCC)C(=O)OC(C)(C)C))C(=O)O",
"cterm": "N[C@@H](CCCN(C(=O)CCC[C@@H](NC(=O)CCCCCCCCCCCCCCCC)C(=O)OC(C)(C)C))C(=O)[*:2]",
"disulphide": False,
"ester": False,
},
"Fmoc-Thr(PO(OBzl)OH)-OH": {
"Code": "Tpb",
"Formula": "C28H29NO5",
"Letter": "Ṯ",
"MolWeight": 221.141578848,
"SMILES": "N[C@@H]([C@H](C)OP(=O)(O)OCc1ccccc1)C(=O)O",
"disulphide": False,
"ester": False,
"nterm": "[*:1]N[C@@H]([C@H](C)OP(=O)(O)OCC1=CC=CC=C1)C(=O)O",
"cterm": "N[C@@H]([C@H](C)OP(=O)(O)OCC1=CC=CC=C1)C(=O)[*:2]"
},
"Fmoc-Cycloleucine": {
"Code": "Cyl",
"Formula": "C28H29NO5",
"Letter": "Ċ",
"MolWeight": 221.141578848,
"SMILES": "NC1(CCCC1)C(=O)O",
"nterm": "[*:1]NC1(CCCC1)C(=O)O",
"cterm": "NC1(CCCC1)C(=O)[*:2]",
"disulphide": False,
"ester": False,
},
"Fmoc-N-Me-Ala-OH": {
"Code": "Nma",
"Formula": "C28H29NO5",
"Letter": "Ṃ",
"MolWeight": 221.141578848,
"SMILES": "CN([C@@H](C)C(=O)O)",
"cterm": "N(C)[C@@H](C)C(=O)[*:2]",
"disulphide": False,
"ester": False,
"nterm": "[*:1]N(C)[C@@H](C)C(=O)O",
},
"Fmoc-N-Me-Leu-OH": {
"Code": "Nml",
"Formula": "C28H29NO5",
"Letter": "Ŀ",
"MolWeight": 221.141578848,
"SMILES": "CN([C@@H](CC(C)C))C(=O)O",
"cterm": "CN([C@@H](CC(C)C)C(=O))[*:2]",
"disulphide": False,
"ester": False,
"nterm": "[*:1]N(C)[C@@H](CC(C)C)C(=O)O",
},
"Fmoc-Nle-OH": {
"Code": "Nle",
"Formula": "C28H29NO5",
"Letter": "Ł",
"MolWeight": 221.141578848,
"SMILES": "N[C@@H](CCCC)C(=O)O",
"nterm": "[*:1]N[C@@H](CCCC)C(=O)O",
"cterm": "N[C@@H](CCCC)C(=O)[*:2]",
"disulphide": False,
"ester": False,
},
"N-Fmoc-L-homophenylalanine": {
"Code": "Hph",
"Formula": "C28H29NO5",
"Letter": "Ĥ",
"MolWeight": 221.141578848,
"SMILES": "N[C@@H](CCC1=CC=CC=C1)C(=O)O",
"nterm": "[*:1]N[C@@H](CCC1=CC=CC=C1)C(=O)O",
"cterm": "N[C@@H](CCC1=CC=CC=C1)C(=O)[*:2]",
"disulphide": False,
"ester": False,
},
"Glycine": {
"Code": "Gly",
"Formula": "C2H5NO2",
"Letter": "G",
"MolWeight": "75.07",
"SMILES": "NCC(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"L-Alanine": {
"Code": "Ala",
"Formula": "C3H7NO2",
"Letter": "A",
"MolWeight": "89.09",
"SMILES": "N[C@@H](C)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"L-Arginine": {
"Code": "Arg",
"Formula": "C6H14N4O2",
"Letter": "R",
"MolWeight": "174.20",
"SMILES": "N[C@@H](CCCNC(=N)N)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": "N[C@@H](CCCNC(=N*)N)C(=O)O",
},
"L-Asparagine": {
"Code": "Asn",
"Formula": "C4H8N2O3",
"Letter": "N",
"MolWeight": "132.12",
"SMILES": "N[C@@H](CC(=O)N)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": "N[C@@H](CC(=O)N*)C(=O)O",
},
"L-Aspartic_Acid": {
"Code": "Asp",
"Formula": "C4H7NO4",
"Letter": "D",
"MolWeight": "133.10",
"SMILES": "N[C@@H](CC(=O)O)C(=O)O",
"cterm": "N[C@@H](CC*(=O))C(=O)O",
"disulphide": False,
"ester": False,
"nterm": False,
},
"L-Cysteine": {
"Code": "Cys",
"Formula": "C3H7NO2S",
"Letter": "C",
"MolWeight": "121.16",
"SMILES": "N[C@@H](CS)C(=O)O",
"cterm": False,
"disulphide": "N[C@@H](CS*)C(=O)O",
"ester": False,
"nterm": False,
},
"L-Glutamic_Acid": {
"Code": "Glu",
"Formula": "C5H9NO4",
"Letter": "E",
"MolWeight": "147.13",
"SMILES": "N[C@@H](CCC(=O)O)C(=O)O",
"cterm": "N[C@@H](CCC*(=O))C(=O)O",
"disulphide": False,
"ester": False,
"nterm": False,
},
"L-Glutamine": {
"Code": "Gln",
"Formula": "C5H10N2O3",
"Letter": "Q",
"MolWeight": "146.15",
"SMILES": "N[C@@H](CCC(=O)N)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": "N[C@@H](CCC(=O)N*)C(=O)O",
},
"L-Histidine": {
"Code": "His",
"Formula": "C6H9N3O2",
"Letter": "H",
"MolWeight": "155.16",
"SMILES": "N[C@@H](CC1=CNC=N1)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"L-Isoleucine": {
"Code": "Ile",
"Formula": "C6H13NO2",
"Letter": "I",
"MolWeight": "131.18",
"SMILES": "N[C@@H]([C@H](CC)C)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"L-Leucine": {
"Code": "Leu",
"Formula": "C6H13NO2",
"Letter": "L",
"MolWeight": "131.18",
"SMILES": "N[C@@H](CC(C)C)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"L-Lysine": {
"Code": "Lys",
"Formula": "C6H12N2O2",
"Letter": "K",
"MolWeight": "146.19",
"SMILES": "N[C@@H](CCCCN)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": "N[C@@H](CCCCN*)C(=O)O",
},
"L-Methionine": {
"Code": "Met",
"Formula": "C5H11NO2S",
"Letter": "M",
"MolWeight": "149.21",
"SMILES": "N[C@@H](CCSC)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"L-Phenylalanine": {
"Code": "Phe",
"Formula": "C9H11NO2",
"Letter": "F",
"MolWeight": "165.19",
"SMILES": "N[C@@H](Cc1ccccc1)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"L-Proline": {
"Code": "Pro",
"Formula": "C5H9NO2",
"Letter": "P",
"MolWeight": "115.13",
"SMILES": "N1[C@@H](CCC1)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"L-Serine": {
"Code": "Ser",
"Formula": "C3H7NO2",
"Letter": "S",
"MolWeight": "105.09",
"SMILES": "N[C@@H](CO)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": "N[C@@H](CO*)C(=O)O",
"nterm": False,
},
"L-Threonine": {
"Code": "Thr",
"Formula": "C4H9NO3",
"Letter": "T",
"MolWeight": "119.12",
"SMILES": "N[C@@H]([C@H](O)C)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": "N[C@@H]([C@H](O*)C)C(=O)O",
"nterm": False,
},
"L-Tryptophan": {
"Code": "Trp",
"Formula": "C11H12N2O2",
"Letter": "W",
"MolWeight": "204.23",
"SMILES": "N[C@@H](CC(=CN2)C1=C2C=CC=C1)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"L-Tyrosine": {
"Code": "Tyr",
"Formula": "C9H11NO3",
"Letter": "Y",
"MolWeight": "181.19",
"SMILES": "N[C@@H](Cc1ccc(O)cc1)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": "N[C@@H](Cc1ccc(O*)cc1)C(=O)O",
"nterm": False,
},
"L-Valine": {
"Code": "Val",
"Formula": "C5H11NO2",
"Letter": "V",
"MolWeight": "117.15",
"SMILES": "N[C@@H](C(C)C)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"D-Alanine": {
"Code": "ala",
"Formula": "C3H7NO2",
"Letter": "a",
"MolWeight": "89.09",
"SMILES": "N[C@H](C)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"D-Arginine": {
"Code": "arg",
"Formula": "C6H14N4O2",
"Letter": "r",
"MolWeight": "174.20",
"SMILES": "N[C@H](CCCNC(=N)N)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": "N[C@H](CCCNC(=N*)N)C(=O)O",
},
"D-Asparagine": {
"Code": "asn",
"Formula": "C4H8N2O3",
"Letter": "n",
"MolWeight": "132.12",
"SMILES": "N[C@H](CC(=O)N)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": "N[C@H](CC(=O)N*)C(=O)O",
},
"D-Aspartic_Acid": {
"Code": "asp",
"Formula": "C4H7NO4",
"Letter": "d",
"MolWeight": "133.10",
"SMILES": "N[C@H](CC(=O)O)C(=O)O",
"cterm": "N[C@H](CC*(=O))C(=O)O",
"disulphide": False,
"ester": False,
"nterm": False,
},
"D-Cysteine": {
"Code": "cys",
"Formula": "C3H7NO2S",
"Letter": "c",
"MolWeight": "121.16",
"SMILES": "N[C@H](CS)C(=O)O",
"cterm": False,
"disulphide": "N[C@H](CS*)C(=O)O",
"ester": False,
"nterm": False,
},
"D-Glutamic_Acid": {
"Code": "glu",
"Formula": "C5H9NO4",
"Letter": "e",
"MolWeight": "147.13",
"SMILES": "N[C@H](CCC(=O)O)C(=O)O",
"cterm": "N[C@H](CCC*(=O))C(=O)O",
"disulphide": False,
"ester": False,
"nterm": False,
},
"D-Glutamine": {
"Code": "gln",
"Formula": "C5H10N2O3",
"Letter": "q",
"MolWeight": "146.15",
"SMILES": "N[C@H](CCC(=O)N)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": "N[C@H](CCC(=O)N*)C(=O)O",
},
"D-Histidine": {
"Code": "his",
"Formula": "C6H9N3O2",
"Letter": "h",
"MolWeight": "155.16",
"SMILES": "N[C@H](CC1=CNC=N1)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"D-Isoleucine": {
"Code": "ile",
"Formula": "C6H13NO2",
"Letter": "i",
"MolWeight": "131.18",
"SMILES": "N[C@H]([C@@H](CC)C)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"D-Leucine": {
"Code": "leu",
"Formula": "C6H13NO2",
"Letter": "l",
"MolWeight": "131.18",
"SMILES": "N[C@H](CC(C)C)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"D-Lysine": {
"Code": "lys",
"Formula": "C6H12N2O2",
"Letter": "k",
"MolWeight": "146.19",
"SMILES": "N[C@H](CCCCN)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": "N[C@H](CCCCN*)C(=O)O",
},
"D-Methionine": {
"Code": "met",
"Formula": "C5H11NO2S",
"Letter": "m",
"MolWeight": "149.21",
"SMILES": "N[C@H](CCSC)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"D-Phenylalanine": {
"Code": "phe",
"Formula": "C9H11NO2",
"Letter": "f",
"MolWeight": "165.19",
"SMILES": "N[C@H](Cc1ccccc1)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"D-Proline": {
"Code": "pro",
"Formula": "C5H9NO2",
"Letter": "p",
"MolWeight": "115.13",
"SMILES": "N1[C@H](CCC1)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"D-Serine": {
"Code": "ser",
"Formula": "C3H7NO2",
"Letter": "s",
"MolWeight": "105.09",
"SMILES": "N[C@H](CO)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": "N[C@H](CO*)C(=O)O",
"nterm": False,
},
"D-Tryptophan": {
"Code": "trp",
"Formula": "C11H12N2O2",
"Letter": "w",
"MolWeight": "204.23",
"SMILES": "N[C@H](CC(=CN2)C1=C2C=CC=C1)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"D-Tyrosine": {
"Code": "tyr",
"Formula": "C9H11NO3",
"Letter": "y",
"MolWeight": "181.19",
"SMILES": "N[C@H](Cc1ccc(O)cc1)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": "N[C@H](Cc1ccc(O*)cc1)C(=O)O",
"nterm": False,
},
"D-Valine": {
"Code": "val",
"Formula": "C5H11NO2",
"Letter": "v",
"MolWeight": "117.15",
"SMILES": "N[C@H](C(C)C)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": False,
"nterm": False,
},
"D-Threonine": {
"Code": "thr",
"Formula": "C4H9NO3",
"Letter": "t",
"MolWeight": "119.12",
"SMILES": "N[C@H]([C@@H](O)C)C(=O)O",
"cterm": False,
"disulphide": False,
"ester": "N[C@H]([C@@H](O*)C)C(=O)O",
"nterm": False,
},
}