File size: 4,790 Bytes
6357b81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122

import numpy as np
import scipy.io
import os


def PSTAAP_feature(protein_sequences, test_PSTAAP=False):
    for i in range(len(protein_sequences)):
        protein_sequences[i] = protein_sequences[i][:24] + protein_sequences[i][25:]

    if test_PSTAAP:
        mat_contents = scipy.io.loadmat("Feature_extraction_algorithms/Fr_test.mat")
    else:
        mat_contents = scipy.io.loadmat("Feature_extraction_algorithms/Fr_train.mat")

    Fr = mat_contents['Fr']
    """

    print(Fr[0*400+5*20+0,0])

    print(Fr[5 * 400 + 0 * 20 + 16, 1])

    print(Fr[0 * 400 + 16 * 20 + 14, 2])

    """
    AA = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
    PSTAAP = np.zeros((len(protein_sequences), 46))
    for i in range(len(protein_sequences)):
        for j in range(len(protein_sequences[0])-2):
            t1 = protein_sequences[i][j]
            position1 = AA.index(t1)
            t2 = protein_sequences[i][j+1]
            position2 = AA.index(t2)
            t3 = protein_sequences[i][j+2]
            position3 = AA.index(t3)

            PSTAAP[i][j] = Fr[400 * position1 + 20 * position2 + position3][j]

    return PSTAAP


if __name__ == '__main__':
    import numpy as np
    import matplotlib.pyplot as plt
    from scipy.interpolate import splrep, BSpline
    from sklearn.preprocessing import MinMaxScaler
    from numpy.polynomial import Polynomial


    def plot_multiple_polynomial_fitted_functions(sample_datas, degree=3):
        markers = ["o", "o", "^", "^", "v", "p"]
        colors = ["b", "b", "c", "c", "m", "y"]
        label = ["sample1(1,0,0,0)", "sample1(1,0,0,0)", "sample1(0,1,0,0)", "sample2(0,1,0,0)", "sample3(0,0,1,0)", "sample6(0,0,0,1)"]
        plt.figure(figsize=(12, 6))

        for i, sample_data in enumerate(sample_datas):
            if i == 0 or i == 1 or i == 4 or i == 5:
                continue
            # 无量纲化处理
            scaler = MinMaxScaler()
            normalized_data = scaler.fit_transform(sample_data.reshape(-1, 1)).flatten()
            # 拟合多项式函数
            x = np.linspace(0, 1, len(normalized_data))
            p = Polynomial.fit(x, normalized_data, degree)
            y_poly = p(x)
            # 计算极值点
            dy_poly = p.deriv(1)(x)
            extrema_indices = np.where(np.diff(np.sign(dy_poly)))[0]
            extrema_x = x[extrema_indices]
            extrema_y = y_poly[extrema_indices]

            plt.plot(x, y_poly, label=f'{label[i]}', marker=markers[i], color=colors[i])
            plt.plot(extrema_x, extrema_y, 'rx', markersize=10)  # 标记极值点

        plt.xlabel('X')
        plt.ylabel('Y')
        plt.title('Fitted Polynomial Functions with Extrema')
        plt.legend()
        plt.show()

    def plot_multiple_fitted_functions(sample_datas, smooth_factor=1):
        markers = ["o", "o", "^", "^", "v", "p"]
        colors = ["b", "b", "c", "c", "m", "y"]
        label = ["", "", "sample1(0,1,0,0)", "sample2(0,1,0,0)", "sample3(0,0,1,0)", ""]
        plt.figure(figsize=(12, 6))

        for i, sample_data in enumerate(sample_datas):
            if i == 0 or i == 1 or i == 4 or i == 5:
                continue

            scaler = MinMaxScaler()
            normalized_data = scaler.fit_transform(sample_data.reshape(-1, 1)).flatten()

            x = np.linspace(0, 1, len(normalized_data))
            tck = splrep(x, normalized_data, k=3, s=smooth_factor)
            spline = BSpline(tck[0], tck[1], tck[2])

            y_spline = spline(x)
            dy_spline = spline.derivative()
            extrema_indices = np.where(np.diff(np.sign(dy_spline(x))))[0]
            extrema_x = x[extrema_indices]
            extrema_y = y_spline[extrema_indices]

            plt.plot(x, y_spline, label=f'{label[i]}', marker=markers[i], color=colors[i])
            plt.plot(extrema_x, extrema_y, 'rx', markersize=10)

        plt.xlabel('X')
        plt.ylabel('Y')
        plt.title('Fitted B-Spline Functions with Extrema')
        plt.legend()
        plt.show()

    protein_sequences = [
        "TSPASVASSSSTPSSKTKDLGHNDKSSTPGLKSNTPTPRNDAPTPGTST",  # a
        "LGGNIEQLVARSNILTLMYQCMQDKMPEVRQSSFALLGDLTKACFQHVK",  # a
        "VDFQHASEDARKTINQWVKGQTEGKIPELLASGMVDNMTKLVLVNAIYF",  # c
        "VEGTLKGPEVDLKGPRLDFEGPDAKLSGPSLKMPSLEISAPKVTAPDVD",  # c
        "IDILTSREQFFSDEERKYMAINQKKAYILVTPLKSRKVIEQRCMRYNLS",  # m
        "LAGTDGETTTQGLDGLSERCAQYKKDGADFAKWRCVLKISERTPSALAI",  # s
    ]
    data = PSTAAP_feature(protein_sequences, False)

    # 调用绘图函数
    plot_multiple_polynomial_fitted_functions(data)
    plot_multiple_fitted_functions(data)