Spaces:
Paused
Paused
File size: 7,898 Bytes
1d67cae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 |
#!/usr/bin/env python
# Do *not* edit this script.
# These are helper functions that you can use with your code.
import os, numpy as np, scipy as sp, scipy.io, scipy.io.wavfile
# Check if a variable is a number or represents a number.
def is_number(x):
try:
float(x)
return True
except (ValueError, TypeError):
return False
# Check if a variable is an integer or represents an integer.
def is_integer(x):
if is_number(x):
return float(x).is_integer()
else:
return False
# Check if a variable is a a finite number or represents a finite number.
def is_finite_number(x):
if is_number(x):
return np.isfinite(float(x))
else:
return False
# Compare normalized strings.
def compare_strings(x, y):
try:
return str(x).strip().casefold()==str(y).strip().casefold()
except AttributeError: # For Python 2.x compatibility
return str(x).strip().lower()==str(y).strip().lower()
# Find patient data files.
def find_patient_files(data_folder):
# Find patient files.
filenames = list()
for f in sorted(os.listdir(data_folder)):
root, extension = os.path.splitext(f)
if not root.startswith('.') and extension=='.txt':
filename = os.path.join(data_folder, f)
filenames.append(filename)
# To help with debugging, sort numerically if the filenames are integers.
roots = [os.path.split(filename)[1][:-4] for filename in filenames]
if all(is_integer(root) for root in roots):
filenames = sorted(filenames, key=lambda filename: int(os.path.split(filename)[1][:-4]))
return filenames
# Load patient data as a string.
def load_patient_data(filename):
with open(filename, 'r') as f:
data = f.read()
return data
# Load a WAV file.
def load_wav_file(filename):
frequency, recording = sp.io.wavfile.read(filename)
return recording, frequency
# Load recordings.
def load_recordings(data_folder, data, get_frequencies=False):
num_locations = get_num_locations(data)
recording_information = data.split('\n')[1:num_locations+1]
recordings = list()
frequencies = list()
for i in range(num_locations):
entries = recording_information[i].split(' ')
recording_file = entries[2]
filename = os.path.join(data_folder, recording_file)
recording, frequency = load_wav_file(filename)
recordings.append(recording)
frequencies.append(frequency)
if get_frequencies:
return recordings, frequencies
else:
return recordings
# Get patient ID from patient data.
def get_patient_id(data):
patient_id = None
for i, l in enumerate(data.split('\n')):
if i==0:
try:
patient_id = l.split(' ')[0]
except:
pass
else:
break
return patient_id
# Get number of recording locations from patient data.
def get_num_locations(data):
num_locations = None
for i, l in enumerate(data.split('\n')):
if i==0:
try:
num_locations = int(l.split(' ')[1])
except:
pass
else:
break
return num_locations
# Get frequency from patient data.
def get_frequency(data):
frequency = None
for i, l in enumerate(data.split('\n')):
if i==0:
try:
frequency = float(l.split(' ')[2])
except:
pass
else:
break
return frequency
# Get recording locations from patient data.
def get_locations(data):
num_locations = get_num_locations(data)
locations = list()
for i, l in enumerate(data.split('\n')):
entries = l.split(' ')
if i==0:
pass
elif 1<=i<=num_locations:
locations.append(entries[0])
else:
break
return locations
# Get age from patient data.
def get_age(data):
age = None
for l in data.split('\n'):
if l.startswith('#Age:'):
try:
age = l.split(': ')[1].strip()
except:
pass
return age
# Get sex from patient data.
def get_sex(data):
sex = None
for l in data.split('\n'):
if l.startswith('#Sex:'):
try:
sex = l.split(': ')[1].strip()
except:
pass
return sex
# Get height from patient data.
def get_height(data):
height = None
for l in data.split('\n'):
if l.startswith('#Height:'):
try:
height = float(l.split(': ')[1].strip())
except:
pass
return height
# Get weight from patient data.
def get_weight(data):
weight = None
for l in data.split('\n'):
if l.startswith('#Weight:'):
try:
weight = float(l.split(': ')[1].strip())
except:
pass
return weight
# Get pregnancy status from patient data.
def get_pregnancy_status(data):
is_pregnant = None
for l in data.split('\n'):
if l.startswith('#Pregnancy status:'):
try:
is_pregnant = bool(sanitize_binary_value(l.split(': ')[1].strip()))
except:
pass
return is_pregnant
# Get murmur from patient data.
def get_murmur(data):
murmur = None
for l in data.split('\n'):
if l.startswith('#Murmur:'):
try:
murmur = l.split(': ')[1]
except:
pass
if murmur is None:
raise ValueError('No murmur available. Is your code trying to load labels from the hidden data?')
return murmur
# Get outcome from patient data.
def get_outcome(data):
outcome = None
for l in data.split('\n'):
if l.startswith('#Outcome:'):
try:
outcome = l.split(': ')[1]
except:
pass
if outcome is None:
raise ValueError('No outcome available. Is your code trying to load labels from the hidden data?')
return outcome
# Sanitize binary values from Challenge outputs.
def sanitize_binary_value(x):
x = str(x).replace('"', '').replace("'", "").strip() # Remove any quotes or invisible characters.
if (is_finite_number(x) and float(x)==1) or (x in ('True', 'true', 'T', 't')):
return 1
else:
return 0
# Santize scalar values from Challenge outputs.
def sanitize_scalar_value(x):
x = str(x).replace('"', '').replace("'", "").strip() # Remove any quotes or invisible characters.
if is_finite_number(x) or (is_number(x) and np.isinf(float(x))):
return float(x)
else:
return 0.0
# Save Challenge outputs.
def save_challenge_outputs(filename, patient_id, classes, labels, probabilities):
# Format Challenge outputs.
patient_string = '#{}'.format(patient_id)
class_string = ','.join(str(c) for c in classes)
label_string = ','.join(str(l) for l in labels)
probabilities_string = ','.join(str(p) for p in probabilities)
output_string = patient_string + '\n' + class_string + '\n' + label_string + '\n' + probabilities_string + '\n'
# Write the Challenge outputs.
with open(filename, 'w') as f:
f.write(output_string)
# Load Challenge outputs.
def load_challenge_outputs(filename):
with open(filename, 'r') as f:
for i, l in enumerate(f):
if i==0:
patient_id = l.replace('#', '').strip()
elif i==1:
classes = tuple(entry.strip() for entry in l.split(','))
elif i==2:
labels = tuple(sanitize_binary_value(entry) for entry in l.split(','))
elif i==3:
probabilities = tuple(sanitize_scalar_value(entry) for entry in l.split(','))
else:
break
return patient_id, classes, labels, probabilities
|