Spaces:
Paused
Paused
| #!/usr/bin/env python | |
| # Do *not* edit this script. | |
| # These are helper functions that you can use with your code. | |
| import os, numpy as np, scipy as sp, scipy.io, scipy.io.wavfile | |
| # Check if a variable is a number or represents a number. | |
| def is_number(x): | |
| try: | |
| float(x) | |
| return True | |
| except (ValueError, TypeError): | |
| return False | |
| # Check if a variable is an integer or represents an integer. | |
| def is_integer(x): | |
| if is_number(x): | |
| return float(x).is_integer() | |
| else: | |
| return False | |
| # Check if a variable is a a finite number or represents a finite number. | |
| def is_finite_number(x): | |
| if is_number(x): | |
| return np.isfinite(float(x)) | |
| else: | |
| return False | |
| # Compare normalized strings. | |
| def compare_strings(x, y): | |
| try: | |
| return str(x).strip().casefold()==str(y).strip().casefold() | |
| except AttributeError: # For Python 2.x compatibility | |
| return str(x).strip().lower()==str(y).strip().lower() | |
| # Find patient data files. | |
| def find_patient_files(data_folder): | |
| # Find patient files. | |
| filenames = list() | |
| for f in sorted(os.listdir(data_folder)): | |
| root, extension = os.path.splitext(f) | |
| if not root.startswith('.') and extension=='.txt': | |
| filename = os.path.join(data_folder, f) | |
| filenames.append(filename) | |
| # To help with debugging, sort numerically if the filenames are integers. | |
| roots = [os.path.split(filename)[1][:-4] for filename in filenames] | |
| if all(is_integer(root) for root in roots): | |
| filenames = sorted(filenames, key=lambda filename: int(os.path.split(filename)[1][:-4])) | |
| return filenames | |
| # Load patient data as a string. | |
| def load_patient_data(filename): | |
| with open(filename, 'r') as f: | |
| data = f.read() | |
| return data | |
| # Load a WAV file. | |
| def load_wav_file(filename): | |
| frequency, recording = sp.io.wavfile.read(filename) | |
| return recording, frequency | |
| # Load recordings. | |
| def load_recordings(data_folder, data, get_frequencies=False): | |
| num_locations = get_num_locations(data) | |
| recording_information = data.split('\n')[1:num_locations+1] | |
| recordings = list() | |
| frequencies = list() | |
| for i in range(num_locations): | |
| entries = recording_information[i].split(' ') | |
| recording_file = entries[2] | |
| filename = os.path.join(data_folder, recording_file) | |
| recording, frequency = load_wav_file(filename) | |
| recordings.append(recording) | |
| frequencies.append(frequency) | |
| if get_frequencies: | |
| return recordings, frequencies | |
| else: | |
| return recordings | |
| # Get patient ID from patient data. | |
| def get_patient_id(data): | |
| patient_id = None | |
| for i, l in enumerate(data.split('\n')): | |
| if i==0: | |
| try: | |
| patient_id = l.split(' ')[0] | |
| except: | |
| pass | |
| else: | |
| break | |
| return patient_id | |
| # Get number of recording locations from patient data. | |
| def get_num_locations(data): | |
| num_locations = None | |
| for i, l in enumerate(data.split('\n')): | |
| if i==0: | |
| try: | |
| num_locations = int(l.split(' ')[1]) | |
| except: | |
| pass | |
| else: | |
| break | |
| return num_locations | |
| # Get frequency from patient data. | |
| def get_frequency(data): | |
| frequency = None | |
| for i, l in enumerate(data.split('\n')): | |
| if i==0: | |
| try: | |
| frequency = float(l.split(' ')[2]) | |
| except: | |
| pass | |
| else: | |
| break | |
| return frequency | |
| # Get recording locations from patient data. | |
| def get_locations(data): | |
| num_locations = get_num_locations(data) | |
| locations = list() | |
| for i, l in enumerate(data.split('\n')): | |
| entries = l.split(' ') | |
| if i==0: | |
| pass | |
| elif 1<=i<=num_locations: | |
| locations.append(entries[0]) | |
| else: | |
| break | |
| return locations | |
| # Get age from patient data. | |
| def get_age(data): | |
| age = None | |
| for l in data.split('\n'): | |
| if l.startswith('#Age:'): | |
| try: | |
| age = l.split(': ')[1].strip() | |
| except: | |
| pass | |
| return age | |
| # Get sex from patient data. | |
| def get_sex(data): | |
| sex = None | |
| for l in data.split('\n'): | |
| if l.startswith('#Sex:'): | |
| try: | |
| sex = l.split(': ')[1].strip() | |
| except: | |
| pass | |
| return sex | |
| # Get height from patient data. | |
| def get_height(data): | |
| height = None | |
| for l in data.split('\n'): | |
| if l.startswith('#Height:'): | |
| try: | |
| height = float(l.split(': ')[1].strip()) | |
| except: | |
| pass | |
| return height | |
| # Get weight from patient data. | |
| def get_weight(data): | |
| weight = None | |
| for l in data.split('\n'): | |
| if l.startswith('#Weight:'): | |
| try: | |
| weight = float(l.split(': ')[1].strip()) | |
| except: | |
| pass | |
| return weight | |
| # Get pregnancy status from patient data. | |
| def get_pregnancy_status(data): | |
| is_pregnant = None | |
| for l in data.split('\n'): | |
| if l.startswith('#Pregnancy status:'): | |
| try: | |
| is_pregnant = bool(sanitize_binary_value(l.split(': ')[1].strip())) | |
| except: | |
| pass | |
| return is_pregnant | |
| # Get murmur from patient data. | |
| def get_murmur(data): | |
| murmur = None | |
| for l in data.split('\n'): | |
| if l.startswith('#Murmur:'): | |
| try: | |
| murmur = l.split(': ')[1] | |
| except: | |
| pass | |
| if murmur is None: | |
| raise ValueError('No murmur available. Is your code trying to load labels from the hidden data?') | |
| return murmur | |
| # Get outcome from patient data. | |
| def get_outcome(data): | |
| outcome = None | |
| for l in data.split('\n'): | |
| if l.startswith('#Outcome:'): | |
| try: | |
| outcome = l.split(': ')[1] | |
| except: | |
| pass | |
| if outcome is None: | |
| raise ValueError('No outcome available. Is your code trying to load labels from the hidden data?') | |
| return outcome | |
| # Sanitize binary values from Challenge outputs. | |
| def sanitize_binary_value(x): | |
| x = str(x).replace('"', '').replace("'", "").strip() # Remove any quotes or invisible characters. | |
| if (is_finite_number(x) and float(x)==1) or (x in ('True', 'true', 'T', 't')): | |
| return 1 | |
| else: | |
| return 0 | |
| # Santize scalar values from Challenge outputs. | |
| def sanitize_scalar_value(x): | |
| x = str(x).replace('"', '').replace("'", "").strip() # Remove any quotes or invisible characters. | |
| if is_finite_number(x) or (is_number(x) and np.isinf(float(x))): | |
| return float(x) | |
| else: | |
| return 0.0 | |
| # Save Challenge outputs. | |
| def save_challenge_outputs(filename, patient_id, classes, labels, probabilities): | |
| # Format Challenge outputs. | |
| patient_string = '#{}'.format(patient_id) | |
| class_string = ','.join(str(c) for c in classes) | |
| label_string = ','.join(str(l) for l in labels) | |
| probabilities_string = ','.join(str(p) for p in probabilities) | |
| output_string = patient_string + '\n' + class_string + '\n' + label_string + '\n' + probabilities_string + '\n' | |
| # Write the Challenge outputs. | |
| with open(filename, 'w') as f: | |
| f.write(output_string) | |
| # Load Challenge outputs. | |
| def load_challenge_outputs(filename): | |
| with open(filename, 'r') as f: | |
| for i, l in enumerate(f): | |
| if i==0: | |
| patient_id = l.replace('#', '').strip() | |
| elif i==1: | |
| classes = tuple(entry.strip() for entry in l.split(',')) | |
| elif i==2: | |
| labels = tuple(sanitize_binary_value(entry) for entry in l.split(',')) | |
| elif i==3: | |
| probabilities = tuple(sanitize_scalar_value(entry) for entry in l.split(',')) | |
| else: | |
| break | |
| return patient_id, classes, labels, probabilities | |