""" Amino Acid Properties from AAindex Database Auto-generated by AAindexDownloader Total features: 20 """ import numpy as np # Raw values from AAindex AA_PROPERTIES_AAINDEX = { 'A': { 'BIGC670101': 52.600000, 'CHAM820101': 0.046000, 'CHOP780201': 1.420000, 'CHOP780202': 0.830000, 'CHOP780203': 0.740000, 'EISD860101': 0.670000, 'FASG760101': 89.090000, 'FAUJ830101': 0.310000, 'GRAR740102': 8.100000, 'GRAR740103': 31.000000, 'GUYH850101': 0.100000, 'HOPT810101': -0.500000, 'JANJ780101': 27.800000, 'KARP850101': 1.041000, 'KYTJ820101': 1.800000, 'ROSM880101': -0.670000, 'VINM940101': 0.984000, 'WERD780101': 0.520000, 'ZIMJ680101': 0.830000, 'ZIMJ680104': 6.000000, }, 'R': { 'BIGC670101': 109.100000, 'CHAM820101': 0.291000, 'CHOP780201': 0.980000, 'CHOP780202': 0.930000, 'CHOP780203': 1.010000, 'EISD860101': -2.100000, 'FASG760101': 174.200000, 'FAUJ830101': -1.010000, 'GRAR740102': 10.500000, 'GRAR740103': 124.000000, 'GUYH850101': 1.910000, 'HOPT810101': 3.000000, 'JANJ780101': 94.700000, 'KARP850101': 1.038000, 'KYTJ820101': -4.500000, 'ROSM880101': 12.100000, 'VINM940101': 1.008000, 'WERD780101': 0.490000, 'ZIMJ680101': 0.830000, 'ZIMJ680104': 10.760000, }, 'N': { 'BIGC670101': 75.700000, 'CHAM820101': 0.134000, 'CHOP780201': 0.670000, 'CHOP780202': 0.890000, 'CHOP780203': 1.460000, 'EISD860101': -0.600000, 'FASG760101': 132.120000, 'FAUJ830101': -0.600000, 'GRAR740102': 11.600000, 'GRAR740103': 56.000000, 'GUYH850101': 0.480000, 'HOPT810101': 0.200000, 'JANJ780101': 60.100000, 'KARP850101': 1.117000, 'KYTJ820101': -3.500000, 'ROSM880101': 7.230000, 'VINM940101': 1.048000, 'WERD780101': 0.420000, 'ZIMJ680101': 0.090000, 'ZIMJ680104': 5.410000, }, 'D': { 'BIGC670101': 68.400000, 'CHAM820101': 0.105000, 'CHOP780201': 1.010000, 'CHOP780202': 0.540000, 'CHOP780203': 1.520000, 'EISD860101': -1.200000, 'FASG760101': 133.100000, 'FAUJ830101': -0.770000, 'GRAR740102': 13.000000, 'GRAR740103': 54.000000, 'GUYH850101': 0.780000, 'HOPT810101': 3.000000, 'JANJ780101': 60.600000, 'KARP850101': 1.033000, 'KYTJ820101': -3.500000, 'ROSM880101': 8.720000, 'VINM940101': 1.068000, 'WERD780101': 0.370000, 'ZIMJ680101': 0.640000, 'ZIMJ680104': 2.770000, }, 'C': { 'BIGC670101': 68.300000, 'CHAM820101': 0.128000, 'CHOP780201': 0.700000, 'CHOP780202': 1.190000, 'CHOP780203': 0.960000, 'EISD860101': 0.380000, 'FASG760101': 121.150000, 'FAUJ830101': 1.540000, 'GRAR740102': 5.500000, 'GRAR740103': 55.000000, 'GUYH850101': -1.420000, 'HOPT810101': -1.000000, 'JANJ780101': 15.500000, 'KARP850101': 0.960000, 'KYTJ820101': 2.500000, 'ROSM880101': -0.340000, 'VINM940101': 0.906000, 'WERD780101': 0.830000, 'ZIMJ680101': 1.480000, 'ZIMJ680104': 5.050000, }, 'Q': { 'BIGC670101': 89.700000, 'CHAM820101': 0.180000, 'CHOP780201': 1.110000, 'CHOP780202': 1.100000, 'CHOP780203': 0.960000, 'EISD860101': -0.220000, 'FASG760101': 146.150000, 'FAUJ830101': -0.220000, 'GRAR740102': 10.500000, 'GRAR740103': 85.000000, 'GUYH850101': 0.950000, 'HOPT810101': 0.200000, 'JANJ780101': 68.700000, 'KARP850101': 1.165000, 'KYTJ820101': -3.500000, 'ROSM880101': 6.390000, 'VINM940101': 1.037000, 'WERD780101': 0.350000, 'ZIMJ680101': 0.000000, 'ZIMJ680104': 5.650000, }, 'E': { 'BIGC670101': 84.700000, 'CHAM820101': 0.151000, 'CHOP780201': 1.510000, 'CHOP780202': 0.370000, 'CHOP780203': 0.950000, 'EISD860101': -0.760000, 'FASG760101': 147.130000, 'FAUJ830101': -0.640000, 'GRAR740102': 12.300000, 'GRAR740103': 83.000000, 'GUYH850101': 0.830000, 'HOPT810101': 3.000000, 'JANJ780101': 68.200000, 'KARP850101': 1.094000, 'KYTJ820101': -3.500000, 'ROSM880101': 7.350000, 'VINM940101': 1.094000, 'WERD780101': 0.380000, 'ZIMJ680101': 0.650000, 'ZIMJ680104': 3.220000, }, 'G': { 'BIGC670101': 36.300000, 'CHAM820101': 0.000000, 'CHOP780201': 0.570000, 'CHOP780202': 0.750000, 'CHOP780203': 1.560000, 'EISD860101': 0.000000, 'FASG760101': 75.070000, 'FAUJ830101': 0.000000, 'GRAR740102': 9.000000, 'GRAR740103': 3.000000, 'GUYH850101': 0.330000, 'HOPT810101': 0.000000, 'JANJ780101': 24.500000, 'KARP850101': 1.142000, 'KYTJ820101': -0.400000, 'ROSM880101': 0.000000, 'VINM940101': 1.031000, 'WERD780101': 0.410000, 'ZIMJ680101': 0.100000, 'ZIMJ680104': 5.970000, }, 'H': { 'BIGC670101': 91.900000, 'CHAM820101': 0.230000, 'CHOP780201': 1.000000, 'CHOP780202': 0.870000, 'CHOP780203': 0.950000, 'EISD860101': 0.640000, 'FASG760101': 155.160000, 'FAUJ830101': 0.130000, 'GRAR740102': 10.400000, 'GRAR740103': 96.000000, 'GUYH850101': -0.500000, 'HOPT810101': -0.500000, 'JANJ780101': 50.700000, 'KARP850101': 0.982000, 'KYTJ820101': -3.200000, 'ROSM880101': 3.820000, 'VINM940101': 0.950000, 'WERD780101': 0.700000, 'ZIMJ680101': 1.100000, 'ZIMJ680104': 7.590000, }, 'I': { 'BIGC670101': 102.000000, 'CHAM820101': 0.186000, 'CHOP780201': 1.080000, 'CHOP780202': 1.600000, 'CHOP780203': 0.470000, 'EISD860101': 1.900000, 'FASG760101': 131.170000, 'FAUJ830101': 1.800000, 'GRAR740102': 5.200000, 'GRAR740103': 111.000000, 'GUYH850101': -1.130000, 'HOPT810101': -1.800000, 'JANJ780101': 22.800000, 'KARP850101': 1.002000, 'KYTJ820101': 4.500000, 'ROSM880101': -3.020000, 'VINM940101': 0.927000, 'WERD780101': 0.790000, 'ZIMJ680101': 3.070000, 'ZIMJ680104': 6.020000, }, 'L': { 'BIGC670101': 102.000000, 'CHAM820101': 0.186000, 'CHOP780201': 1.210000, 'CHOP780202': 1.300000, 'CHOP780203': 0.500000, 'EISD860101': 1.900000, 'FASG760101': 131.170000, 'FAUJ830101': 1.700000, 'GRAR740102': 4.900000, 'GRAR740103': 111.000000, 'GUYH850101': -1.180000, 'HOPT810101': -1.800000, 'JANJ780101': 27.600000, 'KARP850101': 0.967000, 'KYTJ820101': 3.800000, 'ROSM880101': -3.020000, 'VINM940101': 0.935000, 'WERD780101': 0.770000, 'ZIMJ680101': 2.520000, 'ZIMJ680104': 5.980000, }, 'K': { 'BIGC670101': 105.100000, 'CHAM820101': 0.219000, 'CHOP780201': 1.160000, 'CHOP780202': 0.740000, 'CHOP780203': 1.190000, 'EISD860101': -0.570000, 'FASG760101': 146.190000, 'FAUJ830101': -0.990000, 'GRAR740102': 11.300000, 'GRAR740103': 119.000000, 'GUYH850101': 1.400000, 'HOPT810101': 3.000000, 'JANJ780101': 103.000000, 'KARP850101': 1.093000, 'KYTJ820101': -3.900000, 'ROSM880101': 6.130000, 'VINM940101': 1.102000, 'WERD780101': 0.310000, 'ZIMJ680101': 1.600000, 'ZIMJ680104': 9.740000, }, 'M': { 'BIGC670101': 97.700000, 'CHAM820101': 0.221000, 'CHOP780201': 1.450000, 'CHOP780202': 1.050000, 'CHOP780203': 0.600000, 'EISD860101': 2.400000, 'FASG760101': 149.210000, 'FAUJ830101': 1.230000, 'GRAR740102': 5.700000, 'GRAR740103': 105.000000, 'GUYH850101': -1.590000, 'HOPT810101': -1.300000, 'JANJ780101': 33.500000, 'KARP850101': 0.947000, 'KYTJ820101': 1.900000, 'ROSM880101': -1.300000, 'VINM940101': 0.952000, 'WERD780101': 0.760000, 'ZIMJ680101': 1.400000, 'ZIMJ680104': 5.740000, }, 'F': { 'BIGC670101': 113.900000, 'CHAM820101': 0.290000, 'CHOP780201': 1.130000, 'CHOP780202': 1.380000, 'CHOP780203': 0.660000, 'EISD860101': 2.300000, 'FASG760101': 165.190000, 'FAUJ830101': 1.790000, 'GRAR740102': 5.200000, 'GRAR740103': 132.000000, 'GUYH850101': -2.120000, 'HOPT810101': -2.500000, 'JANJ780101': 25.500000, 'KARP850101': 0.930000, 'KYTJ820101': 2.800000, 'ROSM880101': -3.240000, 'VINM940101': 0.915000, 'WERD780101': 0.870000, 'ZIMJ680101': 2.750000, 'ZIMJ680104': 5.480000, }, 'P': { 'BIGC670101': 73.600000, 'CHAM820101': 0.131000, 'CHOP780201': 0.570000, 'CHOP780202': 0.550000, 'CHOP780203': 1.560000, 'EISD860101': 1.200000, 'FASG760101': 115.130000, 'FAUJ830101': 0.720000, 'GRAR740102': 8.000000, 'GRAR740103': 32.500000, 'GUYH850101': 0.730000, 'HOPT810101': 0.000000, 'JANJ780101': 51.500000, 'KARP850101': 1.055000, 'KYTJ820101': -1.600000, 'ROSM880101': -1.750000, 'VINM940101': 1.049000, 'WERD780101': 0.350000, 'ZIMJ680101': 2.700000, 'ZIMJ680104': 6.300000, }, 'S': { 'BIGC670101': 54.900000, 'CHAM820101': 0.062000, 'CHOP780201': 0.770000, 'CHOP780202': 0.750000, 'CHOP780203': 1.430000, 'EISD860101': 0.010000, 'FASG760101': 105.090000, 'FAUJ830101': -0.040000, 'GRAR740102': 9.200000, 'GRAR740103': 32.000000, 'GUYH850101': 0.520000, 'HOPT810101': 0.300000, 'JANJ780101': 42.000000, 'KARP850101': 1.169000, 'KYTJ820101': -0.800000, 'ROSM880101': 4.350000, 'VINM940101': 1.046000, 'WERD780101': 0.490000, 'ZIMJ680101': 0.140000, 'ZIMJ680104': 5.680000, }, 'T': { 'BIGC670101': 71.200000, 'CHAM820101': 0.108000, 'CHOP780201': 0.830000, 'CHOP780202': 1.190000, 'CHOP780203': 0.980000, 'EISD860101': 0.520000, 'FASG760101': 119.120000, 'FAUJ830101': 0.260000, 'GRAR740102': 8.600000, 'GRAR740103': 61.000000, 'GUYH850101': 0.070000, 'HOPT810101': -0.400000, 'JANJ780101': 45.000000, 'KARP850101': 1.073000, 'KYTJ820101': -0.700000, 'ROSM880101': 3.860000, 'VINM940101': 0.997000, 'WERD780101': 0.380000, 'ZIMJ680101': 0.540000, 'ZIMJ680104': 5.660000, }, 'W': { 'BIGC670101': 135.400000, 'CHAM820101': 0.409000, 'CHOP780201': 1.080000, 'CHOP780202': 1.370000, 'CHOP780203': 0.600000, 'EISD860101': 2.600000, 'FASG760101': 204.240000, 'FAUJ830101': 2.250000, 'GRAR740102': 5.400000, 'GRAR740103': 170.000000, 'GUYH850101': -0.510000, 'HOPT810101': -3.400000, 'JANJ780101': 34.700000, 'KARP850101': 0.925000, 'KYTJ820101': -0.900000, 'ROSM880101': -2.860000, 'VINM940101': 0.904000, 'WERD780101': 0.860000, 'ZIMJ680101': 0.310000, 'ZIMJ680104': 5.890000, }, 'Y': { 'BIGC670101': 116.200000, 'CHAM820101': 0.298000, 'CHOP780201': 0.690000, 'CHOP780202': 1.470000, 'CHOP780203': 1.140000, 'EISD860101': 1.600000, 'FASG760101': 181.190000, 'FAUJ830101': 0.960000, 'GRAR740102': 6.200000, 'GRAR740103': 136.000000, 'GUYH850101': -0.210000, 'HOPT810101': -2.300000, 'JANJ780101': 55.200000, 'KARP850101': 0.961000, 'KYTJ820101': -1.300000, 'ROSM880101': 0.980000, 'VINM940101': 0.929000, 'WERD780101': 0.640000, 'ZIMJ680101': 2.970000, 'ZIMJ680104': 5.660000, }, 'V': { 'BIGC670101': 85.100000, 'CHAM820101': 0.140000, 'CHOP780201': 1.060000, 'CHOP780202': 1.700000, 'CHOP780203': 0.590000, 'EISD860101': 1.500000, 'FASG760101': 117.150000, 'FAUJ830101': 1.220000, 'GRAR740102': 5.900000, 'GRAR740103': 84.000000, 'GUYH850101': -1.270000, 'HOPT810101': -1.500000, 'JANJ780101': 23.700000, 'KARP850101': 0.982000, 'KYTJ820101': 4.200000, 'ROSM880101': -2.180000, 'VINM940101': 0.931000, 'WERD780101': 0.720000, 'ZIMJ680101': 1.790000, 'ZIMJ680104': 5.960000, }, } # Feature descriptions FEATURE_DESCRIPTIONS = { 'BIGC670101': 'Residue volume (Bigelow, 1967)', 'CHAM820101': 'Polarizability parameter (Charton-Charton, 1982)', 'CHOP780201': 'Normalized frequency of alpha-helix (Chou-Fasman, 1978b)', 'CHOP780202': 'Normalized frequency of beta-sheet (Chou-Fasman, 1978b)', 'CHOP780203': 'Normalized frequency of beta-turn (Chou-Fasman, 1978b)', 'EISD860101': 'Solvation free energy (Eisenberg-McLachlan, 1986)', 'FASG760101': 'Molecular weight (Fasman, 1976)', 'FAUJ830101': 'Hydrophobic parameter pi (Fauchere-Pliska, 1983)', 'GRAR740102': 'Polarity (Grantham, 1974)', 'GRAR740103': 'Volume (Grantham, 1974)', 'GUYH850101': 'Partition energy (Guy, 1985)', 'HOPT810101': 'Hydrophilicity value (Hopp-Woods, 1981)', 'JANJ780101': 'Average accessible surface area (Janin et al., 1978)', 'KARP850101': 'Flexibility parameter for no rigid neighbors (Karplus-Schulz, 1985)', 'KYTJ820101': 'Hydropathy index (Kyte-Doolittle, 1982)', 'ROSM880101': 'Side chain hydropathy, uncorrected for solvation (Roseman, 1988)', 'VINM940101': 'Normalized flexibility parameters (B-values), average (Vihinen et al., 1994)', 'WERD780101': 'Propensity to be buried inside (Wertz-Scheraga, 1978)', 'ZIMJ680101': 'Hydrophobicity (Zimmerman et al., 1968)', 'ZIMJ680104': 'Isoelectric point (Zimmerman et al., 1968)', } # Convert to numpy array def get_feature_vector(aa, feature_list=None): """ Get feature vector for an amino acid Args: aa: Amino acid single letter code feature_list: List of feature codes to include (None = all) Returns: numpy array of features """ if aa not in AA_PROPERTIES_AAINDEX: aa = "A" # Default to Alanine props = AA_PROPERTIES_AAINDEX[aa] if feature_list is None: feature_list = sorted(props.keys()) return np.array([props[f] for f in feature_list]) def get_sequence_features(sequence, feature_list=None): """Get feature matrix for a sequence [L, N_features]""" return np.array([get_feature_vector(aa, feature_list) for aa in sequence]) # Test if __name__ == "__main__": print("Loaded 20 features for 20 amino acids") print("\nExample: Alanine (A)") for key, value in list(AA_PROPERTIES_AAINDEX["A"].items())[:5]: print(f" {key}: {value:.4f} - {FEATURE_DESCRIPTIONS[key][:50]}") print("\nTest sequence features:") seq = "ARNDCQEG" features = get_sequence_features(seq) print(f" Sequence: {seq}") print(f" Feature matrix shape: {features.shape}")