Stroke-ia commited on
Commit
1d67cae
·
verified ·
1 Parent(s): a694711

Upload helper_code.py

Browse files
Files changed (1) hide show
  1. helper_code.py +262 -0
helper_code.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Do *not* edit this script.
4
+ # These are helper functions that you can use with your code.
5
+
6
+ import os, numpy as np, scipy as sp, scipy.io, scipy.io.wavfile
7
+
8
+ # Check if a variable is a number or represents a number.
9
+ def is_number(x):
10
+ try:
11
+ float(x)
12
+ return True
13
+ except (ValueError, TypeError):
14
+ return False
15
+
16
+ # Check if a variable is an integer or represents an integer.
17
+ def is_integer(x):
18
+ if is_number(x):
19
+ return float(x).is_integer()
20
+ else:
21
+ return False
22
+
23
+ # Check if a variable is a a finite number or represents a finite number.
24
+ def is_finite_number(x):
25
+ if is_number(x):
26
+ return np.isfinite(float(x))
27
+ else:
28
+ return False
29
+
30
+ # Compare normalized strings.
31
+ def compare_strings(x, y):
32
+ try:
33
+ return str(x).strip().casefold()==str(y).strip().casefold()
34
+ except AttributeError: # For Python 2.x compatibility
35
+ return str(x).strip().lower()==str(y).strip().lower()
36
+
37
+ # Find patient data files.
38
+ def find_patient_files(data_folder):
39
+ # Find patient files.
40
+ filenames = list()
41
+ for f in sorted(os.listdir(data_folder)):
42
+ root, extension = os.path.splitext(f)
43
+ if not root.startswith('.') and extension=='.txt':
44
+ filename = os.path.join(data_folder, f)
45
+ filenames.append(filename)
46
+
47
+ # To help with debugging, sort numerically if the filenames are integers.
48
+ roots = [os.path.split(filename)[1][:-4] for filename in filenames]
49
+ if all(is_integer(root) for root in roots):
50
+ filenames = sorted(filenames, key=lambda filename: int(os.path.split(filename)[1][:-4]))
51
+
52
+ return filenames
53
+
54
+ # Load patient data as a string.
55
+ def load_patient_data(filename):
56
+ with open(filename, 'r') as f:
57
+ data = f.read()
58
+ return data
59
+
60
+ # Load a WAV file.
61
+ def load_wav_file(filename):
62
+ frequency, recording = sp.io.wavfile.read(filename)
63
+ return recording, frequency
64
+
65
+ # Load recordings.
66
+ def load_recordings(data_folder, data, get_frequencies=False):
67
+ num_locations = get_num_locations(data)
68
+ recording_information = data.split('\n')[1:num_locations+1]
69
+
70
+ recordings = list()
71
+ frequencies = list()
72
+ for i in range(num_locations):
73
+ entries = recording_information[i].split(' ')
74
+ recording_file = entries[2]
75
+ filename = os.path.join(data_folder, recording_file)
76
+ recording, frequency = load_wav_file(filename)
77
+ recordings.append(recording)
78
+ frequencies.append(frequency)
79
+
80
+ if get_frequencies:
81
+ return recordings, frequencies
82
+ else:
83
+ return recordings
84
+
85
+ # Get patient ID from patient data.
86
+ def get_patient_id(data):
87
+ patient_id = None
88
+ for i, l in enumerate(data.split('\n')):
89
+ if i==0:
90
+ try:
91
+ patient_id = l.split(' ')[0]
92
+ except:
93
+ pass
94
+ else:
95
+ break
96
+ return patient_id
97
+
98
+ # Get number of recording locations from patient data.
99
+ def get_num_locations(data):
100
+ num_locations = None
101
+ for i, l in enumerate(data.split('\n')):
102
+ if i==0:
103
+ try:
104
+ num_locations = int(l.split(' ')[1])
105
+ except:
106
+ pass
107
+ else:
108
+ break
109
+ return num_locations
110
+
111
+ # Get frequency from patient data.
112
+ def get_frequency(data):
113
+ frequency = None
114
+ for i, l in enumerate(data.split('\n')):
115
+ if i==0:
116
+ try:
117
+ frequency = float(l.split(' ')[2])
118
+ except:
119
+ pass
120
+ else:
121
+ break
122
+ return frequency
123
+
124
+ # Get recording locations from patient data.
125
+ def get_locations(data):
126
+ num_locations = get_num_locations(data)
127
+ locations = list()
128
+ for i, l in enumerate(data.split('\n')):
129
+ entries = l.split(' ')
130
+ if i==0:
131
+ pass
132
+ elif 1<=i<=num_locations:
133
+ locations.append(entries[0])
134
+ else:
135
+ break
136
+ return locations
137
+
138
+ # Get age from patient data.
139
+ def get_age(data):
140
+ age = None
141
+ for l in data.split('\n'):
142
+ if l.startswith('#Age:'):
143
+ try:
144
+ age = l.split(': ')[1].strip()
145
+ except:
146
+ pass
147
+ return age
148
+
149
+ # Get sex from patient data.
150
+ def get_sex(data):
151
+ sex = None
152
+ for l in data.split('\n'):
153
+ if l.startswith('#Sex:'):
154
+ try:
155
+ sex = l.split(': ')[1].strip()
156
+ except:
157
+ pass
158
+ return sex
159
+
160
+ # Get height from patient data.
161
+ def get_height(data):
162
+ height = None
163
+ for l in data.split('\n'):
164
+ if l.startswith('#Height:'):
165
+ try:
166
+ height = float(l.split(': ')[1].strip())
167
+ except:
168
+ pass
169
+ return height
170
+
171
+ # Get weight from patient data.
172
+ def get_weight(data):
173
+ weight = None
174
+ for l in data.split('\n'):
175
+ if l.startswith('#Weight:'):
176
+ try:
177
+ weight = float(l.split(': ')[1].strip())
178
+ except:
179
+ pass
180
+ return weight
181
+
182
+ # Get pregnancy status from patient data.
183
+ def get_pregnancy_status(data):
184
+ is_pregnant = None
185
+ for l in data.split('\n'):
186
+ if l.startswith('#Pregnancy status:'):
187
+ try:
188
+ is_pregnant = bool(sanitize_binary_value(l.split(': ')[1].strip()))
189
+ except:
190
+ pass
191
+ return is_pregnant
192
+
193
+ # Get murmur from patient data.
194
+ def get_murmur(data):
195
+ murmur = None
196
+ for l in data.split('\n'):
197
+ if l.startswith('#Murmur:'):
198
+ try:
199
+ murmur = l.split(': ')[1]
200
+ except:
201
+ pass
202
+ if murmur is None:
203
+ raise ValueError('No murmur available. Is your code trying to load labels from the hidden data?')
204
+ return murmur
205
+
206
+ # Get outcome from patient data.
207
+ def get_outcome(data):
208
+ outcome = None
209
+ for l in data.split('\n'):
210
+ if l.startswith('#Outcome:'):
211
+ try:
212
+ outcome = l.split(': ')[1]
213
+ except:
214
+ pass
215
+ if outcome is None:
216
+ raise ValueError('No outcome available. Is your code trying to load labels from the hidden data?')
217
+ return outcome
218
+
219
+ # Sanitize binary values from Challenge outputs.
220
+ def sanitize_binary_value(x):
221
+ x = str(x).replace('"', '').replace("'", "").strip() # Remove any quotes or invisible characters.
222
+ if (is_finite_number(x) and float(x)==1) or (x in ('True', 'true', 'T', 't')):
223
+ return 1
224
+ else:
225
+ return 0
226
+
227
+ # Santize scalar values from Challenge outputs.
228
+ def sanitize_scalar_value(x):
229
+ x = str(x).replace('"', '').replace("'", "").strip() # Remove any quotes or invisible characters.
230
+ if is_finite_number(x) or (is_number(x) and np.isinf(float(x))):
231
+ return float(x)
232
+ else:
233
+ return 0.0
234
+
235
+ # Save Challenge outputs.
236
+ def save_challenge_outputs(filename, patient_id, classes, labels, probabilities):
237
+ # Format Challenge outputs.
238
+ patient_string = '#{}'.format(patient_id)
239
+ class_string = ','.join(str(c) for c in classes)
240
+ label_string = ','.join(str(l) for l in labels)
241
+ probabilities_string = ','.join(str(p) for p in probabilities)
242
+ output_string = patient_string + '\n' + class_string + '\n' + label_string + '\n' + probabilities_string + '\n'
243
+
244
+ # Write the Challenge outputs.
245
+ with open(filename, 'w') as f:
246
+ f.write(output_string)
247
+
248
+ # Load Challenge outputs.
249
+ def load_challenge_outputs(filename):
250
+ with open(filename, 'r') as f:
251
+ for i, l in enumerate(f):
252
+ if i==0:
253
+ patient_id = l.replace('#', '').strip()
254
+ elif i==1:
255
+ classes = tuple(entry.strip() for entry in l.split(','))
256
+ elif i==2:
257
+ labels = tuple(sanitize_binary_value(entry) for entry in l.split(','))
258
+ elif i==3:
259
+ probabilities = tuple(sanitize_scalar_value(entry) for entry in l.split(','))
260
+ else:
261
+ break
262
+ return patient_id, classes, labels, probabilities