Xianfish9's picture
Upload 2 files
1eeed0f verified
import pandas as pd
import numpy as np
data_dict = {
"A": [0.108555943, 0.405506884, 0.955223881, 0.896118721, 0, 1, 1, 1, 0, 0, 1, 0],
"R": [0.767557104, 1, 0.686567164, 0.801369863, 1.191, 1, 1, 1, 1, 1, 0, 0],
"N": [0.441656988, 0.330413016, 0.462686567, 0, 0, 1, 1, 1, 0, 1, 0, 0],
"D": [0.449322493, 0, 0.253731343, 0.865296804, 0.374, 1, 1, 1, -1, 1, 1, 0],
"C": [0.356871854, 0.285356696, 0, 1, 0.793, 1, 1, 0, 0, 1, 0, 1],
"Q": [0.55029036, 0.360450563, 0.686567164, 0.811643836, 0, 1, 1, 1, 0, 1, 0, 0],
"E": [0.557955865, 0.056320401, 0.71641791, 0.873287671, 0.403, 1, 1, 1, -1, 1, 1, 0],
"G": [0, 0.400500626, 0.955223881, 0.885844749, 0, 0, 1, 1, 0, 1, 1, 0],
"H": [0.62013163, 0.603254068, 0.164179104, 0.816210046, 0.569, 1, 1, 1, 1, 1, 1, 0.8],
"I": [0.43437863, 0.406758448, 0.910447761, 0.883561644, 0, 1, 0, 1, 0, 0, 0, 0],
"L": [0.43437863, 0.40175219, 0.970149254, 0.865296804, 0, 1, 0, 1, 0, 0, 0, 0],
"K": [0.550677507, 0.872340426, 0.701492537, 0.79109589, 1, 1, 1, 1, 1, 1, 0, 0],
"M": [0.574061169, 0.371714643, 0.850746269, 0.820776256, 0, 1, 0, 1, 0, 0, 0.1, 0],
"F": [0.697793264, 0.339173967, 0.179104478, 0.811643836, 0, 1, 0, 1, 0, 0, 0.1, 0.9],
"P": [0.310181959, 0.441802253, 0.417910448, 0.979452055, 0, 1, 1, 1, 0, 0.5, 1, 0],
"S": [0.232442896, 0.364205257, 0.746268657, 0.813926941, 0, 1, 1, 1, 0, 1, 1, 0],
"T": [0.341076268, 0.42428035, 0.567164179, 0.808219178, 0, 1, 1, 1, 0, 1, 0, 0],
"W": [1, 0.39048811, 1, 0.841324201, 0, 1, 0, 1, 0, 0, 0, 0.8],
"Y": [0.821680217, 0.361702128, 0.731343284, 0.809360731, 0.961, 1, 0, 1, 0, 0.5, 1, 0.8],
"V": [0.325822687, 0.399249061, 0.865671642, 0.878995434, 0, 1, 0, 1, 0, 0, 0, 0]
}
data_new_dict = {
"G": [0.0, 0.5363372093023252, -1],
"A": [0.07347447047353533, 0.5534883720930233, 1],
"T": [0.2366637964906612, 0.48837209302325573, -1],
"S": [0.15120012868167378, 0.372093023255814, -1],
"P": [0.21957275509630982, 0.7209302325581398, -1],
"V": [0.22993840656583635, 0.5363372093023252, 1],
"L": [0.31981981981981985, 0.5416666666666666, 1],
"I": [0.31981981981981985, 0.5552325581395349, 1],
"M": [0.4250240432219403, 0.4011627906976744, 1],
"F": [0.5205540765087008, 0.2558139534883721, 1],
"Y": [0.6151618372354518, 0.36046511627906974, 1],
"W": [0.740630755511022, 0.4976744186046512, 1],
"D": [0.33248277702242695, 0.0, -1],
"E": [0.409931793152727, 0.06976744186046511, -1],
"N": [0.32651104755672825, 0.2674418604651163, -1],
"Q": [0.40479484311833796, 0.34883720930232553, -1],
"K": [0.40452036659863644, 1.0, -1],
"R": [0.5447355923482843, 1.2325581395348837, -1],
"H": [0.47177217934844545, 0.8488372093023256, 1],
"C": [0.24070355568601118, 0.20348837209302326, 1]
}
def PC_feature(seq):
feature = []
for i in range(len(seq)):
f = [data_new_dict[aa] for aa in seq[i]]
feature.append(f)
return np.array(feature)
"""
seq = ["AAC", 'CRN']
output = PC_feature(seq)
print(output)
print(output.shape)
"""