Wwwy1031 commited on
Commit
87995a2
·
verified ·
1 Parent(s): 4e010c6

Create DDE.py

Browse files
Files changed (1) hide show
  1. DDE.py +89 -0
DDE.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import math
3
+ from util.seed import set_seed
4
+ import pandas as pd
5
+ import numpy as np
6
+
7
+ set_seed()
8
+
9
+ def DDE(fastas, **kw):
10
+ AA = ['A', 'C', 'D', 'E', 'F', 'G', 'H',
11
+ 'I', 'K', 'L', 'M', 'N', 'P', 'Q',
12
+ 'R', 'S', 'T', 'V', 'W', 'Y']
13
+
14
+ myCodons = {
15
+ 'A': 4,
16
+ 'C': 2,
17
+ 'D': 2,
18
+ 'E': 2,
19
+ 'F': 2,
20
+ 'G': 4,
21
+ 'H': 2,
22
+ 'I': 3,
23
+ 'K': 2,
24
+ 'L': 6,
25
+ 'M': 1,
26
+ 'N': 2,
27
+ 'P': 4,
28
+ 'Q': 2,
29
+ 'R': 6,
30
+ 'S': 6,
31
+ 'T': 4,
32
+ 'V': 4,
33
+ 'W': 1,
34
+ 'Y': 2
35
+ }
36
+
37
+ encodings = []
38
+ diPeptides = [aa1 + aa2 for aa1 in AA for aa2 in AA]
39
+ header = ['#'] + diPeptides
40
+ encodings.append(header)
41
+
42
+ myTM = []
43
+ for pair in diPeptides:
44
+ myTM.append((myCodons[pair[0]] / 61) * (myCodons[pair[1]] / 61))
45
+
46
+ AADict = {}
47
+ for i in range(len(AA)):
48
+ AADict[AA[i]] = i
49
+
50
+ for i in fastas:
51
+ name, sequence = i[0], re.sub('-', '', i[1])
52
+ code = [name]
53
+ tmpCode = [0] * 400
54
+ for j in range(len(sequence) - 2 + 1):
55
+ tmpCode[AADict[sequence[j]] * 20 + AADict[sequence[j+1]]] = tmpCode[AADict[sequence[j]] * 20 + AADict[sequence[j+1]]] +1
56
+ if sum(tmpCode) != 0:
57
+ tmpCode = [i/sum(tmpCode) for i in tmpCode]
58
+
59
+ myTV = []
60
+ for j in range(len(myTM)):
61
+ myTV.append(myTM[j] * (1-myTM[j]) / (len(sequence) - 1))
62
+
63
+ for j in range(len(tmpCode)):
64
+ tmpCode[j] = (tmpCode[j] - myTM[j]) / math.sqrt(myTV[j])
65
+
66
+ code = code + tmpCode
67
+ encodings.append(code)
68
+ return encodings
69
+
70
+
71
+ def feature_DDE(file_path):
72
+ f = open(file_path, 'r', encoding='utf-8')
73
+ fasta_list = np.array(f.readlines())
74
+ aa_feature_list = []
75
+ for flag in range(0, len(fasta_list), 2):
76
+ fasta_str = [[fasta_list[flag].strip('\n').strip(), fasta_list[flag + 1].strip('\n').strip()]]
77
+ dpc_output = DDE(fasta_str)
78
+ dpc_output[1].remove(dpc_output[1][0])
79
+ dpc_feature = dpc_output[1][:]
80
+ aa_feature_list.append(dpc_feature)
81
+ aa_feature_list = pd.DataFrame(aa_feature_list)
82
+ aa_feature_list = aa_feature_list.iloc[:,:]
83
+ coloumnname = []
84
+ for i in range(len(aa_feature_list.columns)):
85
+ x = 'DDE'+str(i+1)
86
+ coloumnname.append(x)
87
+ aa_feature_list.columns = coloumnname
88
+ return aa_feature_list
89
+