magnumical commited on
Commit
279af50
·
verified ·
1 Parent(s): 0ce5547

Upload 72 files

Browse files
Files changed (39) hide show
  1. .gitattributes +5 -0
  2. Exploration/__pycache__/inference.cpython-312.pyc +0 -0
  3. Exploration/inference.py +30 -6
  4. LegacyTraining/train.py +709 -0
  5. Model_Inference.py +217 -100
  6. README.md +7 -7
  7. TestModels.py +109 -0
  8. Train.py +177 -115
  9. app.py +1 -1
  10. data/Respiratory_Sound_Database/testsample/115_1b1_Ar_sc_Meditron.txt +24 -0
  11. data/Respiratory_Sound_Database/testsample/115_1b1_Ar_sc_Meditron.wav +3 -0
  12. data/Respiratory_Sound_Database/testsample/121_1b1_Tc_sc_Meditron.wav +3 -0
  13. data/Respiratory_Sound_Database/testsample/121_1p1_Tc_sc_Meditron.txt +8 -0
  14. data/Respiratory_Sound_Database/testsample/149_1b1_Al_sc_Meditron.txt +18 -0
  15. data/Respiratory_Sound_Database/testsample/149_1b1_Al_sc_Meditron.wav +3 -0
  16. data/Respiratory_Sound_Database/testsample/157_1b1_Al_sc_Meditron.wav +0 -0
  17. data/Respiratory_Sound_Database/testsample/157_1b1_Ar_sc_Meditron.txt +12 -0
  18. data/Respiratory_Sound_Database/testsample/191_2b2_Tc_mc_LittC2SE.txt +8 -0
  19. data/Respiratory_Sound_Database/testsample/191_2b2_Tc_mc_LittC2SE.wav +3 -0
  20. data/Respiratory_Sound_Database/testsample/215_1b3_Tc_sc_Meditron.txt +6 -0
  21. data/Respiratory_Sound_Database/testsample/215_1b3_Tc_sc_Meditron.wav +3 -0
  22. data/Respiratory_Sound_Database/testsample/patient_diagnosis.csv +7 -0
  23. requirements.txt +3 -1
  24. streamlit_ui/__pycache__/data_exploration.cpython-312.pyc +0 -0
  25. streamlit_ui/__pycache__/model_deployment.cpython-312.pyc +0 -0
  26. streamlit_ui/__pycache__/model_performance.cpython-312.pyc +0 -0
  27. streamlit_ui/__pycache__/readme.cpython-312.pyc +0 -0
  28. streamlit_ui/data_exploration.py +205 -178
  29. streamlit_ui/readme.py +2 -2
  30. utils/__init__.py +0 -0
  31. utils/__pycache__/__init__.cpython-312.pyc +0 -0
  32. utils/__pycache__/audioprocessing.cpython-312.pyc +0 -0
  33. utils/__pycache__/data_loader.cpython-312.pyc +0 -0
  34. utils/__pycache__/evaluation.cpython-312.pyc +0 -0
  35. utils/__pycache__/model_utils.cpython-312.pyc +0 -0
  36. utils/audioprocessing.py +126 -166
  37. utils/data_loader.py +12 -5
  38. utils/evaluation.py +10 -34
  39. utils/model_utils.py +140 -120
.gitattributes CHANGED
@@ -37,3 +37,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
37
  data/Respiratory_Sound_Database/testsample/101_1b1_Pr_sc_Meditron.wav filter=lfs diff=lfs merge=lfs -text
38
  data/Respiratory_Sound_Database/testsample/102_1b1_Ar_sc_Meditron.wav filter=lfs diff=lfs merge=lfs -text
39
  data/Respiratory_Sound_Database/testsample/103_2b2_Ar_mc_LittC2SE.wav filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
37
  data/Respiratory_Sound_Database/testsample/101_1b1_Pr_sc_Meditron.wav filter=lfs diff=lfs merge=lfs -text
38
  data/Respiratory_Sound_Database/testsample/102_1b1_Ar_sc_Meditron.wav filter=lfs diff=lfs merge=lfs -text
39
  data/Respiratory_Sound_Database/testsample/103_2b2_Ar_mc_LittC2SE.wav filter=lfs diff=lfs merge=lfs -text
40
+ data/Respiratory_Sound_Database/testsample/115_1b1_Ar_sc_Meditron.wav filter=lfs diff=lfs merge=lfs -text
41
+ data/Respiratory_Sound_Database/testsample/121_1b1_Tc_sc_Meditron.wav filter=lfs diff=lfs merge=lfs -text
42
+ data/Respiratory_Sound_Database/testsample/149_1b1_Al_sc_Meditron.wav filter=lfs diff=lfs merge=lfs -text
43
+ data/Respiratory_Sound_Database/testsample/191_2b2_Tc_mc_LittC2SE.wav filter=lfs diff=lfs merge=lfs -text
44
+ data/Respiratory_Sound_Database/testsample/215_1b3_Tc_sc_Meditron.wav filter=lfs diff=lfs merge=lfs -text
Exploration/__pycache__/inference.cpython-312.pyc CHANGED
Binary files a/Exploration/__pycache__/inference.cpython-312.pyc and b/Exploration/__pycache__/inference.cpython-312.pyc differ
 
Exploration/inference.py CHANGED
@@ -7,8 +7,21 @@ import matplotlib.pyplot as plt
7
  import librosa
8
  import librosa.display
9
  import scipy.signal as signal
 
 
10
 
11
  class RespiratorySoundAnalysis:
 
 
 
 
 
 
 
 
 
 
 
12
  def __init__(self, diagnosis_file, audio_path):
13
  self.diagnosis_file = diagnosis_file
14
  self.audio_path = audio_path
@@ -188,11 +201,22 @@ class RespiratorySoundAnalysis:
188
  return y_normalized, target_sr
189
 
190
  # Entry point for standalone execution
 
 
 
 
 
 
 
 
 
191
  if __name__ == "__main__":
192
- diagnosis_file = '../data//Respiratory_Sound_Database//patient_diagnosis.csv'
193
- audio_path = '../data/Respiratory_Sound_Database/testsample'
 
 
194
 
195
- analysis = RespiratorySoundAnalysis(diagnosis_file, audio_path)
196
 
197
  # Load and analyze data
198
  analysis.load_diagnosis_data()
@@ -201,9 +225,9 @@ if __name__ == "__main__":
201
  analysis.analyze_audio_properties()
202
  analysis.plot_audio_duration_distribution()
203
 
204
- # Visualize sample audio
205
  if analysis.audio_files:
206
- analysis.visualize_sample_audio(analysis.audio_files[0])
207
 
208
  # Merge data
209
- analysis.merge_audio_and_diagnosis_data()
 
7
  import librosa
8
  import librosa.display
9
  import scipy.signal as signal
10
+ import argparse
11
+
12
 
13
  class RespiratorySoundAnalysis:
14
+ """
15
+ A class to perform analysis and preprocessing of respiratory sound recordings.
16
+
17
+ Attributes:
18
+ diagnosis_file (str): Path to the CSV file containing patient diagnoses.
19
+ audio_path (str): Path to the directory containing audio files.
20
+ diagnosis_df (DataFrame): DataFrame to hold diagnosis data.
21
+ audio_files (list): List of audio file paths.
22
+ audio_df (DataFrame): DataFrame to hold audio file properties.
23
+ merged_df (DataFrame): DataFrame combining audio properties with diagnosis data.
24
+ """
25
  def __init__(self, diagnosis_file, audio_path):
26
  self.diagnosis_file = diagnosis_file
27
  self.audio_path = audio_path
 
201
  return y_normalized, target_sr
202
 
203
  # Entry point for standalone execution
204
+
205
+ #diagnosis_file = './data//Respiratory_Sound_Database//patient_diagnosis.csv'
206
+ #audio_path = './data/Respiratory_Sound_Database/testsample'
207
+
208
+
209
+
210
+ # Entry point for standalone execution
211
+ # python Exploration/inference.py --diagnosis_file './data//Respiratory_Sound_Database//patient_diagnosis.csv --audio_path ./data/Respiratory_Sound_Database/testsample
212
+
213
  if __name__ == "__main__":
214
+ parser = argparse.ArgumentParser(description="Run analysis on respiratory sound data.")
215
+ parser.add_argument("--diagnosis_file", type=str, required=True, help="Path to the patient diagnosis CSV file.")
216
+ parser.add_argument("--audio_path", type=str, required=True, help="Path to the directory containing audio files.")
217
+ args = parser.parse_args()
218
 
219
+ analysis = RespiratorySoundAnalysis(args.diagnosis_file, args.audio_path)
220
 
221
  # Load and analyze data
222
  analysis.load_diagnosis_data()
 
225
  analysis.analyze_audio_properties()
226
  analysis.plot_audio_duration_distribution()
227
 
228
+ # Visualize a sample audio file
229
  if analysis.audio_files:
230
+ analysis.visualize_sample_audio(os.path.basename(analysis.audio_files[0]))
231
 
232
  # Merge data
233
+ analysis.merge_audio_and_diagnosis_data()
LegacyTraining/train.py ADDED
@@ -0,0 +1,709 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import gc
4
+ from joblib import Parallel, delayed
5
+ import joblib
6
+ import mlflow
7
+ import mlflow.keras
8
+ import numpy as np
9
+ import pandas as pd
10
+ import librosa
11
+ import librosa.display
12
+ import optuna
13
+ from tqdm import tqdm
14
+ import matplotlib.pyplot as plt
15
+ from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix, classification_report
16
+ from sklearn.model_selection import train_test_split
17
+ from sklearn.preprocessing import LabelEncoder
18
+ from keras.models import Sequential
19
+ from keras.utils import to_categorical, normalize
20
+ from keras.layers import Conv2D, Dense, MaxPooling2D, Dropout, BatchNormalization, GlobalAveragePooling2D
21
+ from keras.layers import Conv1D, MaxPooling1D,GlobalAveragePooling1D
22
+ from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
23
+ from tensorflow.keras.preprocessing.image import ImageDataGenerator
24
+ from imblearn.over_sampling import SMOTE
25
+ from scipy.signal import butter, sosfilt
26
+ import argparse
27
+
28
+ # Set up logging
29
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
30
+ data_logger = logging.getLogger("data_loading")
31
+ processing_logger = logging.getLogger("data_processing")
32
+ model_logger = logging.getLogger("model_training")
33
+
34
+
35
+ def load_data(diagnosis_path='/kaggle/input/respiratory-sound-database/Respiratory_Sound_Database/Respiratory_Sound_Database/patient_diagnosis.csv',
36
+ demographic_path='/kaggle/input/respiratory-sound-database/demographic_info.txt'):
37
+ """Load patient diagnosis and demographic data."""
38
+ data_logger.info("Loading patient diagnosis and demographic data.")
39
+
40
+ # Load diagnosis data
41
+ diagnosis_df = pd.read_csv(diagnosis_path,
42
+ names=['Patient number', 'Diagnosis'])
43
+
44
+ # Load demographic data
45
+ patient_df = pd.read_csv(demographic_path,
46
+ names=['Patient number', 'Age', 'Sex', 'Adult BMI (kg/m2)', 'Child Weight (kg)', 'Child Height (cm)'],
47
+ delimiter=' ')
48
+
49
+ data_logger.info("Data successfully loaded.")
50
+
51
+ # Merge and return
52
+ return pd.merge(left=patient_df, right=diagnosis_df, how='left')
53
+
54
+
55
+ def process_audio_metadata(folder_path):
56
+ """Extract audio metadata from filenames."""
57
+ processing_logger.info("Extracting audio metadata from filenames.")
58
+ data = []
59
+ for filename in os.listdir(folder_path):
60
+ if filename.endswith('.txt'):
61
+ parts = filename.split('_')
62
+ data.append({
63
+ 'Patient number': int(parts[0]),
64
+ 'Recording index': parts[1],
65
+ 'Chest location': parts[2],
66
+ 'Acquisition mode': parts[3],
67
+ 'Recording equipment': parts[4].split('.')[0]
68
+ })
69
+ processing_logger.info("Audio metadata extraction complete.")
70
+ return pd.DataFrame(data)
71
+
72
+
73
+ def merge_datasets(df1, df2):
74
+ """Merge metadata and diagnosis data."""
75
+ processing_logger.info("Merging metadata and diagnosis data.")
76
+ merged_df = pd.merge(left=df1, right=df2, how='left').sort_values('Patient number').reset_index(drop=True)
77
+ merged_df['audio_file_name'] = merged_df.apply(lambda row: f"{row['Patient number']}_{row['Recording index']}_{row['Chest location']}_{row['Acquisition mode']}_{row['Recording equipment']}.wav", axis=1)
78
+ processing_logger.info("Merging complete.")
79
+ return merged_df
80
+
81
+
82
+
83
+ def filter_and_sample_data(df, mode='binary'):
84
+ """
85
+ Filter and sample the dataset for binary or multi-class classification.
86
+
87
+ Returns filtered and processed DataFrame.
88
+ """
89
+ processing_logger.info(f"Filtering and sampling the dataset for {mode} classification.")
90
+
91
+ if mode == 'binary':
92
+ # Binary classification: Normal vs. Abnormal
93
+ df['Diagnosis'] = df['Diagnosis'].apply(lambda x: 'Normal' if x == 'Healthy' else 'Abnormal')
94
+ elif mode == 'multi':
95
+ # Multi-class classification: Group classes
96
+ # I grouped disease based on their similarities
97
+ processing_logger.info("Grouping classes for multi-class classification.")
98
+ df['Diagnosis'] = df['Diagnosis'].replace({
99
+ 'Healthy': 'Normal',
100
+ 'COPD': 'Chronic Respiratory Diseases',
101
+ 'Asthma': 'Chronic Respiratory Diseases',
102
+ 'URTI': 'Respiratory Infections',
103
+ 'Bronchiolitis': 'Respiratory Infections',
104
+ 'LRTI': 'Respiratory Infections',
105
+ 'Pneumonia': 'Respiratory Infections',
106
+ 'Bronchiectasis': 'Respiratory Infections'
107
+ })
108
+
109
+ # Filter out rare classes with fewer than 5 samples
110
+ class_counts = df['Diagnosis'].value_counts()
111
+ valid_classes = class_counts[class_counts >= 5].index
112
+ df = df[df['Diagnosis'].isin(valid_classes)].reset_index(drop=True)
113
+
114
+ processing_logger.info(f"Filtered classes: {df['Diagnosis'].unique()}")
115
+ processing_logger.info(f"Filtering and sampling complete with mode={mode}.")
116
+ return df
117
+
118
+
119
+ def prepare_dataset_augmented(df_filtered, audio_files_path, classification_mode):
120
+ """Prepare the dataset for augmented features. it will be 1D array"""
121
+ processing_logger.info("Preparing dataset with AUGMENTED pipeline.")
122
+
123
+ # Extract features and labels
124
+ X, y = mfccs_feature_extraction(audio_files_path, df_filtered)
125
+
126
+ # Apply label encoding
127
+ le = LabelEncoder()
128
+ y_encoded = le.fit_transform(np.array(y)) # Encode labels to integers
129
+
130
+ if classification_mode == "binary":
131
+ # Use single column with 0 and 1 for binary classification
132
+ processing_logger.info("Binary classification mode: Using single column labels (0/1).")
133
+ y_processed = y_encoded # No one-hot encoding
134
+ else:
135
+ # One-hot encode labels for multi-class classification
136
+ processing_logger.info("Multi-class classification mode: Applying one-hot encoding.")
137
+ y_processed = to_categorical(y_encoded)
138
+
139
+ # Log the mapping of one-hot encoding to class labels
140
+ print("One-hot encoding mapping:")
141
+ for idx, label in enumerate(le.classes_):
142
+ print(f"{idx} -> {label}")
143
+
144
+ processing_logger.info("Dataset preparation with augmented pipeline complete.")
145
+ return X, y_processed, le
146
+
147
+
148
+ def mfccs_feature_extraction(audio_files_path, df_filtered, n_jobs=-1):
149
+ """
150
+ Make the process of MFCC feature extraction faster by running jobs in-parallel
151
+
152
+ Returns array of features extracted from the audio files and Array of target labels.
153
+ """
154
+ processing_logger.info(f"Processing audio files in: {audio_files_path}")
155
+ files = [file for file in os.listdir(audio_files_path) if file.endswith('.wav') and file[:3] not in ['103', '108', '115']]
156
+
157
+ #files = files[:30] ## DEBUG
158
+
159
+ # Use Parallel and delayed to process files in parallel
160
+ results = Parallel(n_jobs=n_jobs, backend="loky")(delayed(process_audio_file)(file, audio_files_path, df_filtered) for file in tqdm(files, desc="Processing audio files"))
161
+
162
+ # Flatten results
163
+ X_ = []
164
+ y_ = []
165
+ for X_local, y_local in results:
166
+ X_.extend(X_local)
167
+ y_.extend(y_local)
168
+
169
+ X_data = np.array(X_)
170
+ y_data = np.array(y_)
171
+ processing_logger.info("MFCC feature extraction and augmentation complete.")
172
+ return X_data, y_data
173
+
174
+
175
+ def process_audio_file(soundDir, audio_files_path, df_filtered):
176
+ """
177
+ Process a single audio file: extract MFCC features and augment with noise, stretching, and shifting.
178
+
179
+ """
180
+ X_local = []
181
+ y_local = []
182
+ features = 52
183
+
184
+ # Extract patient ID and disease from filename and DataFrame
185
+ patient_id = int(soundDir.split('_')[0])
186
+ disease = df_filtered.loc[df_filtered['Patient number'] == patient_id, 'Diagnosis'].values[0]
187
+
188
+ # Load audio file
189
+ data_x, sampling_rate = librosa.load(os.path.join(audio_files_path, soundDir), sr=None)
190
+ data_x = preprocess_audio(data_x, sampling_rate) # Apply filtering
191
+
192
+
193
+ mfccs = np.mean(librosa.feature.mfcc(y=data_x, sr=sampling_rate, n_mfcc=features).T, axis=0)
194
+ X_local.append(mfccs)
195
+ y_local.append(disease)
196
+
197
+ # Data augmentation
198
+ for augmentation in [add_noise, shift, stretch, pitch_shift]:
199
+ if augmentation == add_noise:
200
+ augmented_data = augmentation(data_x, 0.001)
201
+ elif augmentation == shift:
202
+ augmented_data = augmentation(data_x, 1600)
203
+ elif augmentation == stretch:
204
+ augmented_data = augmentation(data_x, 1.2)
205
+ elif augmentation == pitch_shift:
206
+ augmented_data = augmentation(data_x, sampling_rate, 3)
207
+
208
+ mfccs_augmented = np.mean(librosa.feature.mfcc(y=augmented_data, sr=sampling_rate, n_mfcc=features).T, axis=0)
209
+ X_local.append(mfccs_augmented)
210
+ y_local.append(disease)
211
+
212
+ return X_local, y_local
213
+
214
+
215
+ def add_noise(data,x):
216
+ noise = np.random.randn(len(data))
217
+ data_noise = data + x * noise
218
+ return data_noise
219
+
220
+ def shift(data, x):
221
+ return np.roll(data, int(x))
222
+
223
+ def stretch(data, rate):
224
+ return librosa.effects.time_stretch(data, rate=rate)
225
+
226
+ def pitch_shift (data , sr, rate):
227
+ return librosa.effects.pitch_shift(data, sr=sr, n_steps=rate)
228
+
229
+
230
+
231
+
232
+ def prepare_dataset_parallel(df, audio_files_path, mode, classification_mode):
233
+ """Prepare the dataset by extracting features from audio files in parallel."""
234
+ processing_logger.info(f"Preparing dataset using {mode} features in parallel.")
235
+ results = Parallel(n_jobs=-1)(delayed(preprocess_file)(row, audio_files_path, mode) for _, row in tqdm(df.iterrows(), total=len(df)))
236
+
237
+ X, y = zip(*results)
238
+ X = np.array(X)
239
+ X = np.expand_dims(X, axis=-1) # Add channel dimension
240
+ X = normalize(X, axis=1)
241
+
242
+ le = LabelEncoder()
243
+ y_encoded = le.fit_transform(np.array(y)) # Encode labels
244
+
245
+ if classification_mode == "binary":
246
+ # Use single column with 0 and 1 for binary classification
247
+ processing_logger.info("Binary classification mode: Using single column labels (0/1).")
248
+ y = y_encoded # No one-hot encoding
249
+ else:
250
+ # One-hot encode labels for multi-class classification
251
+ processing_logger.info("Multi-class classification mode: Applying one-hot encoding.")
252
+ y = to_categorical(y_encoded)
253
+
254
+ processing_logger.info(f"Dataset preparation using {mode} complete.")
255
+ return X, y, le
256
+
257
+ def preprocess_file(row, audio_files_path, mode):
258
+ """Preprocess a single audio file."""
259
+ file_path = os.path.join(audio_files_path, row['audio_file_name'])
260
+ feature = preprocessing(file_path, mode)
261
+ label = row['Diagnosis']
262
+ return feature, label
263
+
264
+ def preprocessing(audio_file, mode):
265
+ """Preprocess audio file by resampling, padding/truncating, and extracting features."""
266
+ sr_new = 16000 # Resample audio to 16 kHz
267
+ x, sr = librosa.load(audio_file, sr=sr_new)
268
+ x = preprocess_audio(x, sr)
269
+ # Padding or truncating to 5 seconds (5 * sr_new samples)
270
+ max_len = 5 * sr_new
271
+ if x.shape[0] < max_len:
272
+ x = np.pad(x, (0, max_len - x.shape[0]))
273
+ else:
274
+ x = x[:max_len]
275
+
276
+ # Extract features
277
+ # I understand the common choice for n_mfcc is 13, but here i assumed we need to capture more informationm, therefore I choose 20.
278
+ if mode == 'mfcc':
279
+ feature = librosa.feature.mfcc(y=x, sr=sr_new, n_mfcc=20) # Ensure consistent shape
280
+ elif mode == 'log_mel':
281
+ feature = librosa.feature.melspectrogram(y=x, sr=sr_new, n_mels=20, fmax=8000) # Match n_mels to 20
282
+ feature = librosa.power_to_db(feature, ref=np.max)
283
+
284
+ return feature
285
+
286
+ def oversample_data(X, y):
287
+ """Apply SMOTE to balance classes."""
288
+ processing_logger.info("Applying SMOTE to balance classes.")
289
+
290
+ # Save the original shape of features
291
+ original_shape = X.shape[1:]
292
+
293
+ # Flatten for SMOTE processing
294
+ X = X.reshape((X.shape[0], -1))
295
+
296
+ # Convert one-hot encoded labels to integers
297
+ y = np.argmax(y, axis=1)
298
+
299
+ # Apply SMOTE
300
+ smote = SMOTE(random_state=42)
301
+ X_resampled, y_resampled = smote.fit_resample(X, y)
302
+
303
+ # Reshape back to the original dimensions
304
+ X_resampled = X_resampled.reshape((-1, *original_shape))
305
+
306
+ # Convert labels back to one-hot encoding
307
+ y_resampled = to_categorical(y_resampled)
308
+
309
+ processing_logger.info("SMOTE oversampling complete.")
310
+ return X_resampled, y_resampled
311
+
312
+
313
+
314
+ def build_model(input_shape, n_filters, dense_units, dropout_rate, num_classes, model_type='1D', classification_mode='binary'):
315
+ """
316
+ Build and compile a CNN model for 1D or 2D data.
317
+
318
+ Returns CNN model.
319
+ """
320
+ print(f"Building the updated {model_type} CNN model with {classification_mode} classification.")
321
+ model = Sequential()
322
+
323
+ # Add convolutional layers based on the model type
324
+ if model_type == '1D':
325
+ # 1D CNN layers
326
+ model.add(Conv1D(n_filters, kernel_size=3, activation='relu', input_shape=input_shape))
327
+ model.add(BatchNormalization())
328
+ model.add(MaxPooling1D(pool_size=2))
329
+ model.add(Dropout(dropout_rate))
330
+
331
+ model.add(Conv1D(n_filters * 2, kernel_size=3, activation='relu'))
332
+ model.add(BatchNormalization())
333
+ model.add(MaxPooling1D(pool_size=2))
334
+ model.add(Dropout(dropout_rate))
335
+
336
+ model.add(Conv1D(n_filters * 4, kernel_size=3, activation='relu'))
337
+ model.add(BatchNormalization())
338
+ model.add(GlobalAveragePooling1D())
339
+ model.add(Dropout(dropout_rate))
340
+
341
+ elif model_type == '2D':
342
+ # 2D CNN layers
343
+ model.add(Conv2D(n_filters, (3, 3), activation='relu', input_shape=input_shape))
344
+ model.add(BatchNormalization())
345
+ if input_shape[0] >= 2:
346
+ model.add(MaxPooling2D((2, 2)))
347
+ model.add(Dropout(dropout_rate))
348
+
349
+ model.add(Conv2D(n_filters * 2, (3, 3), activation='relu'))
350
+ model.add(BatchNormalization())
351
+ if input_shape[0] >= 4:
352
+ model.add(MaxPooling2D((2, 2)))
353
+ model.add(Dropout(dropout_rate))
354
+
355
+ model.add(Conv2D(n_filters * 4, (3, 3), activation='relu'))
356
+ model.add(BatchNormalization())
357
+ model.add(GlobalAveragePooling2D())
358
+ model.add(Dropout(dropout_rate))
359
+
360
+ else:
361
+ raise ValueError("Invalid model_type. Must be '1D' or '2D'.")
362
+
363
+ # Add fully connected layers
364
+ model.add(Dense(dense_units, activation='relu'))
365
+ model.add(BatchNormalization())
366
+ model.add(Dropout(dropout_rate))
367
+
368
+ # Add output layer dynamically based on classification mode
369
+ if classification_mode == 'binary':
370
+ # Binary classification: Single unit with sigmoid activation
371
+ model.add(Dense(1, activation='sigmoid'))
372
+ loss_function = 'binary_crossentropy'
373
+ else:
374
+ # Multi-class classification: num_classes units with softmax activation
375
+ model.add(Dense(num_classes, activation='softmax'))
376
+ loss_function = 'categorical_crossentropy'
377
+
378
+ # Compile the model
379
+ model.compile(optimizer='adam', loss=loss_function, metrics=['accuracy'])
380
+ print(f"{model_type} CNN model built and compiled successfully for {classification_mode} classification.")
381
+ return model
382
+
383
+
384
+ def log_metrics(y_true, y_pred, mode):
385
+ """Log evaluation metrics."""
386
+ precision = classification_report(y_true, y_pred, output_dict=True)['weighted avg']['precision']
387
+ recall = classification_report(y_true, y_pred, output_dict=True)['weighted avg']['recall']
388
+ f1_score = classification_report(y_true, y_pred, output_dict=True)['weighted avg']['f1-score']
389
+
390
+ mlflow.log_metric(f"{mode}_precision", precision)
391
+ mlflow.log_metric(f"{mode}_recall", recall)
392
+ mlflow.log_metric(f"{mode}_f1_score", f1_score)
393
+
394
+
395
+
396
+ def track_experiment_with_mlflow_and_optuna(mode, num_classes, model_type='1D', classification_mode='binary'):
397
+ """
398
+ Optimize hyperparameters using Optuna and track experiments with MLflow.
399
+
400
+ mode: Feature extraction mode (e.g., 'augmented', 'mfcc', 'log_mel').
401
+ num_classes: Number of classes for classification.
402
+ model_type: Type of model ('1D' for Conv1D, '2D' for Conv2D).
403
+ classification_mode: 'binary' for binary classification, 'multi' for multi-class classification.
404
+ """
405
+ def objective(trial):
406
+ with mlflow.start_run(nested=True): # Start a new MLflow run for each trial
407
+ # Hyperparameters to tune
408
+ n_filters = trial.suggest_categorical('n_filters', [16, 32, 64])
409
+ dense_units = trial.suggest_int('dense_units', 64, 256, step=32)
410
+ dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5, step=0.1)
411
+ learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
412
+
413
+ # Build and compile the model
414
+ model = build_model(
415
+ input_shape=X_train.shape[1:],
416
+ n_filters=n_filters,
417
+ dense_units=dense_units,
418
+ dropout_rate=dropout_rate,
419
+ num_classes=num_classes,
420
+ model_type=model_type,
421
+ classification_mode=classification_mode
422
+ )
423
+
424
+ # Define EarlyStopping callback
425
+ early_stopping = EarlyStopping(
426
+ monitor='val_loss', # Monitor validation loss
427
+ patience=5, # Stop training after 5 epochs with no improvement
428
+ restore_best_weights=True
429
+ )
430
+
431
+ # Train the model
432
+ history = model.fit(
433
+ X_train, y_train,
434
+ validation_data=(X_val, y_val),
435
+ epochs=50, # Allow a larger max epoch since EarlyStopping will handle early termination
436
+ batch_size=32,
437
+ callbacks=[early_stopping],
438
+ verbose=0
439
+ )
440
+
441
+ # Log hyperparameters and metrics to MLflow
442
+ mlflow.log_params({
443
+ 'n_filters': n_filters,
444
+ 'dense_units': dense_units,
445
+ 'dropout_rate': dropout_rate,
446
+ 'learning_rate': learning_rate,
447
+ 'model_type': model_type,
448
+ 'classification_mode': classification_mode
449
+ })
450
+ mlflow.log_metric("best_val_accuracy", max(history.history['val_accuracy']))
451
+
452
+ # Save training and validation loss curves
453
+ plt.figure()
454
+ plt.plot(history.history['loss'], label='Train Loss')
455
+ plt.plot(history.history['val_loss'], label='Validation Loss')
456
+ plt.legend()
457
+ plt.title("Training and Validation Loss")
458
+ loss_curve_path = f"loss_curve_{trial.number}_{model_type}.png"
459
+ plt.savefig(loss_curve_path)
460
+ mlflow.log_artifact(loss_curve_path)
461
+
462
+ return max(history.history['val_accuracy'])
463
+
464
+ # Start Optuna study
465
+ study = optuna.create_study(direction='maximize')
466
+ study.optimize(objective, n_trials=20)
467
+
468
+ # Retrieve best trial and log results
469
+ best_trial = study.best_trial
470
+ model_logger.info(f"Best Trial for {mode} ({model_type}): {best_trial.params}")
471
+
472
+ # Build the best model (already compiled in build_model)
473
+ best_model = build_model(
474
+ input_shape=X_train.shape[1:],
475
+ n_filters=best_trial.params['n_filters'],
476
+ dense_units=best_trial.params['dense_units'],
477
+ dropout_rate=best_trial.params['dropout_rate'],
478
+ num_classes=num_classes,
479
+ model_type=model_type,
480
+ classification_mode=classification_mode
481
+ )
482
+
483
+ # Train the best model with EarlyStopping
484
+ early_stopping = EarlyStopping(
485
+ monitor='val_loss',
486
+ patience=5,
487
+ restore_best_weights=True
488
+ )
489
+
490
+ best_model.fit(
491
+ X_train, y_train,
492
+ validation_data=(X_val, y_val),
493
+ epochs=50, batch_size=32,
494
+ callbacks=[early_stopping],
495
+ verbose=1
496
+ )
497
+
498
+ # Save the best model
499
+ best_model_path = f"best_model_{mode}_{model_type}.h5"
500
+ best_model.save(best_model_path)
501
+ mlflow.log_artifact(best_model_path)
502
+ model_logger.info(f"Best model for {mode} ({model_type}) saved successfully.")
503
+
504
+ return best_model
505
+
506
+ def log_class_distribution(y, message):
507
+ """Log the class distribution."""
508
+ if y.ndim == 1: # Binary classification (1D array of 0s and 1s)
509
+ unique, counts = np.unique(y, return_counts=True)
510
+ else: # Multi-class classification (2D one-hot encoded array)
511
+ unique, counts = np.unique(np.argmax(y, axis=1), return_counts=True)
512
+
513
+ class_distribution = dict(zip(unique, counts))
514
+ processing_logger.info(f"{message} Class Distribution: {class_distribution}")
515
+
516
+
517
+ def preprocess_audio(audio, sr):
518
+ """
519
+ Apply a bandpass filter to audio data.
520
+
521
+ """
522
+ # Define cutoff frequencies
523
+ low_cutoff = 50 # 50 Hz
524
+ high_cutoff = min(5000, sr / 2 - 1) # Ensure it is below Nyquist frequency
525
+
526
+ if low_cutoff >= high_cutoff:
527
+ raise ValueError(
528
+ f"Invalid filter range: low_cutoff={low_cutoff}, high_cutoff={high_cutoff} for sampling rate {sr}"
529
+ )
530
+
531
+ # Design a bandpass filter
532
+ sos = butter(N=10, Wn=[low_cutoff, high_cutoff], btype='band', fs=sr, output='sos')
533
+
534
+ # Apply the filter
535
+ filtered_audio = sosfilt(sos, audio)
536
+ return filtered_audio
537
+
538
+
539
+ def generate_random_audio_data(samples=20000, feature_dim=20):
540
+ """Generate random audio-like data for testing purposes."""
541
+ X = np.random.rand(samples, feature_dim, feature_dim) # Simulate 2D audio features
542
+ y = np.random.randint(0, 2, size=samples) # Binary classification labels
543
+ return X, y
544
+
545
+ def test_model():
546
+ """Test 2D CNN model with simulated audio data for debugging."""
547
+ print("[DEBUG] Generating simulated audio data...")
548
+ global X_train, X_val, X_test, y_train, y_val, y_test
549
+ X, y = generate_random_audio_data()
550
+
551
+ # Simulate preprocessing similar to audio processing pipeline
552
+ print("[DEBUG] Preprocessing simulated audio data...")
553
+ X_preprocessed = np.array([np.log1p(sample) for sample in X]) # Simulate a log transform or feature extraction
554
+
555
+ # Split data into train, validation, and test sets
556
+ X_train, X_temp, y_train, y_temp = train_test_split(X_preprocessed, y, test_size=0.3, stratify=y, random_state=42)
557
+ X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)
558
+
559
+ print(f"[DEBUG] Data split: Training={X_train.shape}, Validation={X_val.shape}, Test={X_test.shape}")
560
+
561
+ # Expand dimensions for 2D CNN input
562
+ X_train = np.expand_dims(X_train, axis=-1)
563
+ X_val = np.expand_dims(X_val, axis=-1)
564
+ X_test = np.expand_dims(X_test, axis=-1)
565
+
566
+ print("[DEBUG] Initializing 2D CNN model...")
567
+ model = track_experiment_with_mlflow_and_optuna(
568
+ mode='mfcc',
569
+ num_classes=1,
570
+ model_type='2D', # Specify 2D CNN for MFCC and Log-Mel
571
+ classification_mode='binary'
572
+ )
573
+
574
+ print("[DEBUG] Training the model...")
575
+ # Train the model with a single epoch for testing
576
+ model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=1, batch_size=32)
577
+
578
+ print("[DEBUG] Evaluating the model...")
579
+ results = model.evaluate(X_test, y_test)
580
+ print(f"[DEBUG] Test evaluation results: {results}")
581
+
582
+
583
+ def main():
584
+ # how to run:
585
+ # python legacy/test.py --metadata_path data/Respiratory_Sound_Database/audio_and_txt_files --audio_files_path data/Respiratory_Sound_Database/audio_and_txt_files --demographic_path data/demographic_info.txt --diagnosis_path data/Respiratory_Sound_Database/patient_diagnosis.csv --classification_modes binary --feature_types mfcc
586
+
587
+ # Parse arguments
588
+ parser = argparse.ArgumentParser(description="Run the respiratory sound analysis pipeline.")
589
+ parser.add_argument("--metadata_path", type=str, default="/kaggle/input/respiratory-sound-database/Respiratory_Sound_Database/Respiratory_Sound_Database/audio_and_txt_files", help="Path to the metadata directory.")
590
+ parser.add_argument("--audio_files_path", type=str, default="/kaggle/input/respiratory-sound-database/Respiratory_Sound_Database/Respiratory_Sound_Database/audio_and_txt_files", help="Path to the directory containing audio files.")
591
+ parser.add_argument("--demographic_path", type=str, default="/kaggle/input/respiratory-sound-database/demographic_info.txt", help="Path to the demographic info file.")
592
+ parser.add_argument("--diagnosis_path", type=str, default="/kaggle/input/respiratory-sound-database/Respiratory_Sound_Database/Respiratory_Sound_Database/patient_diagnosis.csv", help="Path to the patient diagnosis CSV file.")
593
+ parser.add_argument("--tracking_uri", type=str, default="./mlruns", help="MLflow tracking URI.")
594
+ parser.add_argument("--classification_modes", type=str, nargs='+', default=['multi', 'binary'], help="Classification modes to run (default: all modes). Options: 'binary', 'multi'.")
595
+ parser.add_argument("--feature_types", type=str, nargs='+', default=['mfcc', 'log_mel', 'augmented'], help="Feature types to use (default: all types). Options: 'mfcc', 'log_mel', 'augmented'.")
596
+ parser.add_argument("--debug", action='store_true', help="Run in debug mode with random test data.")
597
+ args = parser.parse_args()
598
+
599
+ if args.debug:
600
+ test_model()
601
+ return
602
+ # Assign arguments to variables
603
+ metadata_path = args.metadata_path
604
+ audio_files_path = args.audio_files_path
605
+ demographic_path = args.demographic_path
606
+ diagnosis_path = args.diagnosis_path
607
+
608
+
609
+ # Set MLflow tracking URI
610
+ mlflow.set_tracking_uri(args.tracking_uri)
611
+
612
+ metadata_path = args.metadata_path
613
+ audio_files_path = args.audio_files_path
614
+
615
+ data_logger.info("Starting data pipeline.")
616
+ df = load_data(demographic_path=demographic_path, diagnosis_path=diagnosis_path)
617
+ audio_metadata = process_audio_metadata(audio_files_path)
618
+ df_all = merge_datasets(audio_metadata, df)
619
+
620
+ # Use user-specified or default classification modes and feature types
621
+ classification_modes = args.classification_modes
622
+ feature_types = args.feature_types
623
+ models = []
624
+
625
+ for classification_mode in classification_modes:
626
+ # Preprocess dataset for binary or multi-class classification
627
+ df_filtered = filter_and_sample_data(df_all, mode=classification_mode)
628
+ processing_logger.info(f"Dataset shape for {classification_mode} mode: {df_filtered.shape}")
629
+
630
+ for feature_type in feature_types:
631
+ processing_logger.info(f"Running experiment for {classification_mode} classification with {feature_type} features.")
632
+ global X_train, X_val, X_test, y_train, y_val, y_test
633
+
634
+ # Prepare the dataset
635
+ if feature_type == 'augmented':
636
+ X, y, le = prepare_dataset_augmented(
637
+ df_filtered,
638
+ audio_files_path,
639
+ classification_mode=classification_mode
640
+ )
641
+ else:
642
+ X, y, le = prepare_dataset_parallel(
643
+ df_filtered,
644
+ audio_files_path,
645
+ mode=feature_type,
646
+ classification_mode=classification_mode
647
+ )
648
+
649
+ # Split data into train/val/test
650
+ X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
651
+ X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)
652
+
653
+ # Save test data for future evaluation
654
+ np.save(f"X_test_{classification_mode}_{feature_type}.npy", X_test)
655
+ np.save(f"y_test_{classification_mode}_{feature_type}.npy", y_test)
656
+ mlflow.log_artifact(f"X_test_{classification_mode}_{feature_type}.npy")
657
+ mlflow.log_artifact(f"y_test_{classification_mode}_{feature_type}.npy")
658
+
659
+ # Log dataset characteristics
660
+ log_class_distribution(y_train, "Before Oversampling")
661
+ processing_logger.info(f"Train size: {X_train.shape}, Validation size: {X_val.shape}, Test size: {X_test.shape}")
662
+
663
+ try:
664
+ X_train, y_train = oversample_data(X_train, y_train)
665
+ except ValueError as e:
666
+ processing_logger.warning(f"SMOTE skipped: {e}")
667
+ log_class_distribution(y_train, "After Oversampling")
668
+
669
+ # Determine number of classes
670
+ if classification_mode == "binary":
671
+ num_classes = 1 # Single output for binary classification
672
+ else:
673
+ num_classes = y_train.shape[1] # Number of classes for multi-class
674
+
675
+ # Train and save model
676
+ with mlflow.start_run(run_name=f"Experiment_{classification_mode}_{feature_type}", nested=True):
677
+ if feature_type == 'augmented':
678
+ # Expand dimensions for 1D CNN input
679
+ X_train = np.expand_dims(X_train, axis=-1)
680
+ X_val = np.expand_dims(X_val, axis=-1)
681
+ X_test = np.expand_dims(X_test, axis=-1)
682
+
683
+ # Optimize and train 1D CNN
684
+ model = track_experiment_with_mlflow_and_optuna(
685
+ mode=feature_type,
686
+ num_classes=num_classes,
687
+ model_type='1D', # Specify 1D CNN for GRU features
688
+ classification_mode=classification_mode
689
+ )
690
+ else:
691
+ # Optimize and train CNN models for MFCC and MEL
692
+ model = track_experiment_with_mlflow_and_optuna(
693
+ mode=feature_type,
694
+ num_classes=num_classes,
695
+ model_type='2D', # Specify 2D CNN for MFCC and Log-Mel
696
+ classification_mode=classification_mode
697
+ )
698
+
699
+ # Save final model
700
+ final_model_path = f"final_model_{classification_mode}_{feature_type}.h5"
701
+ model.save(final_model_path)
702
+ mlflow.log_artifact(final_model_path)
703
+ models.append(model)
704
+
705
+ processing_logger.info("All experiments completed successfully!")
706
+
707
+
708
+ if __name__ == "__main__":
709
+ main()
Model_Inference.py CHANGED
@@ -1,109 +1,226 @@
1
-
2
-
3
  import os
 
4
  import numpy as np
5
- import pandas as pd
6
- from sklearn.metrics import (
7
- accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report, roc_curve
8
- )
9
  from tensorflow.keras.models import load_model
10
- import matplotlib.pyplot as plt
 
11
 
12
- # Paths
 
 
 
 
13
  MODEL_PATH = "./models"
14
- DATASET_PATH = "./processed_datasets"
15
-
16
- # Model and dataset filenames
17
- MODELS = [
18
- "final_model_binary_augmented.h5",
19
- "final_model_binary_log_mel.h5",
20
- "final_model_binary_mfcc.h5",
21
- "final_model_multi_augmented.h5",
22
- "final_model_multi_log_mel.h5",
23
- "final_model_multi_mfcc.h5"
24
- ]
25
-
26
- DATASETS = {
27
- "binary_augmented": ("X_test_binary_augmented.npy", "y_test_binary_augmented.npy"),
28
- "binary_log_mel": ("X_test_binary_log_mel.npy", "y_test_binary_log_mel.npy"),
29
- "binary_mfcc": ("X_test_binary_mfcc.npy", "y_test_binary_mfcc.npy"),
30
- "multi_augmented": ("X_test_multi_augmented.npy", "y_test_multi_augmented.npy"),
31
- "multi_log_mel": ("X_test_multi_log_mel.npy", "y_test_multi_log_mel.npy"),
32
- "multi_mfcc": ("X_test_multi_mfcc.npy", "y_test_multi_mfcc.npy")
33
  }
34
 
35
- # Metrics dictionary
36
- metrics_dict = []
37
 
38
- # Function to evaluate a model
39
- def evaluate_model(model, X_test, y_test, mode):
40
- y_pred_prob = model.predict(X_test)
41
- y_pred = np.argmax(y_pred_prob, axis=1)
42
- y_true = np.argmax(y_test, axis=1)
43
-
44
- accuracy = accuracy_score(y_true, y_pred)
45
- precision = precision_score(y_true, y_pred, average='weighted')
46
- recall = recall_score(y_true, y_pred, average='weighted')
47
- f1 = f1_score(y_true, y_pred, average='weighted')
48
- auc = roc_auc_score(y_test, y_pred_prob, multi_class='ovr')
49
- conf_matrix = confusion_matrix(y_true, y_pred)
 
 
 
 
 
 
 
50
 
51
- print(f"--- Evaluation for {mode} ---")
52
- print(f"Accuracy: {accuracy:.4f}")
53
- print(f"Precision: {precision:.4f}")
54
- print(f"Recall: {recall:.4f}")
55
- print(f"F1 Score: {f1:.4f}")
56
- print(f"ROC-AUC: {auc:.4f}")
57
- print("Confusion Matrix:")
58
- print(conf_matrix)
59
- print("\n")
60
-
61
- # Log metrics
62
- metrics_dict.append({
63
- "Model": mode,
64
- "Accuracy": accuracy,
65
- "Precision": precision,
66
- "Recall": recall,
67
- "F1 Score": f1,
68
- "ROC-AUC": auc
69
- })
70
-
71
- # Plot ROC curve
72
- fpr = {}
73
- tpr = {}
74
- for i in range(y_test.shape[1]):
75
- fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred_prob[:, i])
76
- plt.figure(figsize=(10, 6))
77
- for i, label in enumerate(np.unique(y_true)):
78
- plt.plot(fpr[i], tpr[i], label=f"Class {label} ROC")
79
- plt.plot([0, 1], [0, 1], 'k--', label='Chance')
80
- plt.xlabel('False Positive Rate')
81
- plt.ylabel('True Positive Rate')
82
- plt.title(f"ROC Curve - {mode}")
83
- plt.legend()
84
- plt.savefig(f"roc_curve_{mode}.png")
85
- plt.close()
86
-
87
- # Evaluate all models
88
- for model_name in MODELS:
89
- mode_key = model_name.replace("final_model_", "").replace(".h5", "").replace(" ", "_").lower()
90
- dataset = DATASETS.get(mode_key)
91
-
92
- if dataset:
93
- # Load the model and dataset
94
- model_path = os.path.join(MODEL_PATH, model_name)
95
- model = load_model(model_path)
96
-
97
- X_test_path, y_test_path = dataset
98
- X_test = np.load(os.path.join(DATASET_PATH, X_test_path))
99
- y_test = np.load(os.path.join(DATASET_PATH, y_test_path))
100
-
101
- # Evaluate the model
102
- evaluate_model(model, X_test, y_test, mode_key)
103
- else:
104
- print(f"No dataset found for model: {model_name}")
105
-
106
- # Save metrics as a CSV
107
- metrics_df = pd.DataFrame(metrics_dict)
108
- metrics_df.to_csv("model_evaluation_summary.csv", index=False)
109
- print("Evaluation complete. Summary saved as 'model_evaluation_summary.csv'.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import logging
3
  import numpy as np
4
+ import librosa
5
+ from sklearn.preprocessing import normalize
 
 
6
  from tensorflow.keras.models import load_model
7
+ from scipy.signal import butter, sosfilt
8
+ import pandas as pd
9
 
10
+ # Set up logging
11
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
12
+ logger = logging.getLogger("audio_classifier_test")
13
+
14
+ # Paths and Constants
15
  MODEL_PATH = "./models"
16
+ FILE_PATH = "./data/Respiratory_Sound_Database/testsample/101_1b1_Al_sc_Meditron.wav"
17
+ MODELS = {
18
+ "binary": {
19
+ "augmented": "final_model_binary_augmented.h5",
20
+ "log_mel": "final_model_binary_log_mel.h5",
21
+ "mfcc": "final_model_binary_mfcc.h5",
22
+ },
23
+ "multi": {
24
+ "augmented": "final_model_multi_augmented.h5",
25
+ "log_mel": "final_model_multi_log_mel.h5",
26
+ "mfcc": "final_model_multi_mfcc.h5",
27
+ }
28
+ }
29
+ CLASS_NAMES = {
30
+ "binary": ["Abnormal", "Normal"],
31
+ "multi": ["Chronic Respiratory Diseases", "Normal", "Respiratory Infections"]
 
 
 
32
  }
33
 
 
 
34
 
35
+ # Augmentation Functions
36
+ def add_noise(data, noise_factor=0.001):
37
+ noise = np.random.randn(len(data))
38
+ return data + noise_factor * noise
39
+
40
+ def shift(data, shift_factor=1600):
41
+ return np.roll(data, shift_factor)
42
+
43
+ def stretch(data, rate=1.2):
44
+ return librosa.effects.time_stretch(data, rate=rate)
45
+
46
+ def pitch_shift(data, sr, n_steps=3):
47
+ return librosa.effects.pitch_shift(data, sr=sr, n_steps=n_steps)
48
+
49
+
50
+
51
+ def filtering(audio, sr):
52
+ """
53
+ Apply a bandpass filter to audio data.
54
 
55
+ Returns filtered audio signal.
56
+ """
57
+ # Define cutoff frequencies
58
+ low_cutoff = 50 # 50 Hz
59
+ high_cutoff = min(5000, sr / 2 - 1) # Ensure it is below Nyquist frequency
60
+
61
+ if low_cutoff >= high_cutoff:
62
+ raise ValueError(
63
+ f"Invalid filter range: low_cutoff={low_cutoff}, high_cutoff={high_cutoff} for sampling rate {sr}"
64
+ )
65
+
66
+ # Design a bandpass filter
67
+ sos = butter(N=10, Wn=[low_cutoff, high_cutoff], btype='band', fs=sr, output='sos')
68
+
69
+ # Apply the filter
70
+ filtered_audio = sosfilt(sos, audio)
71
+ return filtered_audio
72
+
73
+
74
+ def preprocess_audio(audio_file, mode="augmented", input_shape=None):
75
+ """
76
+ Preprocess an audio file for classification by resampling, padding/truncating,
77
+ and extracting features (e.g., MFCC, Log-Mel spectrogram, or Augmented features).
78
+ """
79
+ try:
80
+ sr_new = 16000 # Resample audio to 16 kHz
81
+ x, sr = librosa.load(audio_file, sr=sr_new)
82
+ x = filtering(x, sr)
83
+ logger.info(f"Loaded audio file '{audio_file}' with shape {x.shape} and sampling rate {sr}.")
84
+
85
+ max_len = 5 * sr_new
86
+ if x.shape[0] < max_len:
87
+ x = np.pad(x, (0, max_len - x.shape[0]))
88
+ logger.info(f"Audio padded to {max_len} samples.")
89
+ else:
90
+ x = x[:max_len]
91
+ logger.info(f"Audio truncated to {max_len} samples.")
92
+
93
+ # Handle each mode separately
94
+ if mode == 'mfcc':
95
+ feature = librosa.feature.mfcc(y=x, sr=sr_new, n_mfcc=20) # Extract MFCC
96
+ feature = normalize(feature, axis=1)
97
+
98
+ elif mode == 'log_mel':
99
+ mel_spec = librosa.feature.melspectrogram(y=x, sr=sr_new, n_mels=20, fmax=8000)
100
+ feature = librosa.power_to_db(mel_spec, ref=np.max) # Extract Log-Mel spectrogram
101
+ feature = normalize(feature, axis=1)
102
+
103
+ elif mode == 'augmented':
104
+ features = []
105
+
106
+ # Base MFCC
107
+ base_mfcc = np.mean(librosa.feature.mfcc(y=x, sr=sr_new, n_mfcc=52).T, axis=0)
108
+ features.append(base_mfcc)
109
+
110
+ # Augmented features
111
+ for augmentation in [
112
+ lambda d: add_noise(d, 0.001),
113
+ lambda d: shift(d, 1600),
114
+ lambda d: stretch(d, 1.2),
115
+ lambda d: pitch_shift(d, sr_new, 3)
116
+ ]:
117
+ augmented_data = augmentation(x)
118
+ aug_mfcc = np.mean(librosa.feature.mfcc(y=augmented_data, sr=sr_new, n_mfcc=52).T, axis=0)
119
+ features.append(aug_mfcc)
120
+
121
+ # Average augmented features
122
+ feature = np.mean(features, axis=0)
123
+ feature = normalize(feature.reshape(1, -1), axis=1).flatten() # Normalize
124
+
125
+ else:
126
+ raise ValueError(f"Unknown mode: {mode}")
127
+
128
+ # Reshape for model input if required
129
+ if input_shape:
130
+ feature = _reshape_feature(feature, input_shape)
131
+
132
+ logger.info(f"Feature extracted with shape {feature.shape}.")
133
+ return np.expand_dims(feature, axis=-1) # Add channel dimension
134
+
135
+ except Exception as e:
136
+ logger.error(f"Error in preprocessing audio: {e}")
137
+ raise
138
+
139
+
140
+ def _reshape_feature(feature, input_shape):
141
+ """
142
+ Reshape the feature to match the expected input shape of the model.
143
+
144
+ Returns reshaped feature.
145
+ """
146
+ expected_time_frames = input_shape[1]
147
+ if len(feature) > expected_time_frames:
148
+ feature = feature[:expected_time_frames]
149
+ elif len(feature) < expected_time_frames:
150
+ feature = np.pad(feature, (0, expected_time_frames - len(feature)))
151
+
152
+ return feature
153
+
154
+
155
+ def classify_audio(model_type, feature_type, file_path):
156
+ """
157
+ Classify an audio file using the specified model and feature type.
158
+ """
159
+ try:
160
+ model_file = os.path.join(MODEL_PATH, MODELS[model_type][feature_type])
161
+ if not os.path.exists(model_file):
162
+ raise FileNotFoundError(f"Model file '{model_file}' not found.")
163
+ model = load_model(model_file)
164
+
165
+ # Get input shape from the model
166
+ input_shape = model.input_shape
167
+
168
+ # Preprocess audio
169
+ processed_audio = preprocess_audio(file_path, mode=feature_type, input_shape=input_shape)
170
+
171
+ # Add batch dimension
172
+ processed_audio = np.expand_dims(processed_audio, axis=0)
173
+
174
+ # Predict
175
+ predictions = model.predict(processed_audio)
176
+ predicted_class = np.argmax(predictions, axis=1)[0]
177
+ probabilities = predictions[0].tolist()
178
+
179
+ logger.info(f"Prediction complete. Predicted class: {predicted_class}, Probabilities: {probabilities}")
180
+ return predicted_class, probabilities
181
+
182
+ except Exception as e:
183
+ logger.error(f"Error in classification: {e}")
184
+ raise
185
+
186
+
187
+ def main():
188
+ logger.info("Starting audio classification test script.")
189
+
190
+ if not os.path.exists(FILE_PATH):
191
+ logger.error(f"Audio file not found: {FILE_PATH}")
192
+ return
193
+
194
+ results = [] # To store results for the summary table
195
+
196
+ for model_type in MODELS.keys():
197
+ for feature_type in MODELS[model_type].keys():
198
+ try:
199
+ logger.info(f"Testing {model_type} model with {feature_type} features.")
200
+ predicted_class, probabilities = classify_audio(model_type, feature_type, FILE_PATH)
201
+ class_name = CLASS_NAMES[model_type][predicted_class]
202
+ logger.info(f"Predicted Class: {class_name} ({predicted_class}), Probabilities: {probabilities}")
203
+
204
+ # Add result to the summary
205
+ results.append({
206
+ "Model Type": model_type,
207
+ "Feature Type": feature_type,
208
+ "Predicted Class": class_name,
209
+ "Probabilities": probabilities
210
+ })
211
+ except Exception as e:
212
+ logger.error(f"Failed for {model_type} - {feature_type}: {e}")
213
+ results.append({
214
+ "Model Type": model_type,
215
+ "Feature Type": feature_type,
216
+ "Predicted Class": "Error",
217
+ "Probabilities": str(e)
218
+ })
219
+
220
+ # Create a DataFrame and print the table
221
+ df_results = pd.DataFrame(results)
222
+ print("\nSummary of Results:")
223
+ print(df_results.to_string(index=False))
224
+
225
+ if __name__ == "__main__":
226
+ main()
README.md CHANGED
@@ -1,7 +1,7 @@
1
- ---
2
- title: ICBHI 2017 Challenge - Amplifier Health
3
- sdk: streamlit
4
- emoji: 📊
5
- colorFrom: purple
6
- colorTo: blue
7
- ---
 
1
+ ---
2
+ title: ICBHI 2017 Challenge - Amplifier Health
3
+ sdk: streamlit
4
+ emoji: 📊
5
+ colorFrom: purple
6
+ colorTo: blue
7
+ ---
TestModels.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import os
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.metrics import (
7
+ accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report, roc_curve
8
+ )
9
+ from tensorflow.keras.models import load_model
10
+ import matplotlib.pyplot as plt
11
+
12
+ # Paths
13
+ MODEL_PATH = "./models"
14
+ DATASET_PATH = "./processed_datasets"
15
+
16
+ # Model and dataset filenames
17
+ MODELS = [
18
+ "final_model_binary_augmented.h5",
19
+ "final_model_binary_log_mel.h5",
20
+ "final_model_binary_mfcc.h5",
21
+ "final_model_multi_augmented.h5",
22
+ "final_model_multi_log_mel.h5",
23
+ "final_model_multi_mfcc.h5"
24
+ ]
25
+
26
+ DATASETS = {
27
+ "binary_augmented": ("X_test_binary_augmented.npy", "y_test_binary_augmented.npy"),
28
+ "binary_log_mel": ("X_test_binary_log_mel.npy", "y_test_binary_log_mel.npy"),
29
+ "binary_mfcc": ("X_test_binary_mfcc.npy", "y_test_binary_mfcc.npy"),
30
+ "multi_augmented": ("X_test_multi_augmented.npy", "y_test_multi_augmented.npy"),
31
+ "multi_log_mel": ("X_test_multi_log_mel.npy", "y_test_multi_log_mel.npy"),
32
+ "multi_mfcc": ("X_test_multi_mfcc.npy", "y_test_multi_mfcc.npy")
33
+ }
34
+
35
+ # Metrics dictionary
36
+ metrics_dict = []
37
+
38
+ # Function to evaluate a model
39
+ def evaluate_model(model, X_test, y_test, mode):
40
+ y_pred_prob = model.predict(X_test)
41
+ y_pred = np.argmax(y_pred_prob, axis=1)
42
+ y_true = np.argmax(y_test, axis=1)
43
+
44
+ accuracy = accuracy_score(y_true, y_pred)
45
+ precision = precision_score(y_true, y_pred, average='weighted')
46
+ recall = recall_score(y_true, y_pred, average='weighted')
47
+ f1 = f1_score(y_true, y_pred, average='weighted')
48
+ auc = roc_auc_score(y_test, y_pred_prob, multi_class='ovr')
49
+ conf_matrix = confusion_matrix(y_true, y_pred)
50
+
51
+ print(f"--- Evaluation for {mode} ---")
52
+ print(f"Accuracy: {accuracy:.4f}")
53
+ print(f"Precision: {precision:.4f}")
54
+ print(f"Recall: {recall:.4f}")
55
+ print(f"F1 Score: {f1:.4f}")
56
+ print(f"ROC-AUC: {auc:.4f}")
57
+ print("Confusion Matrix:")
58
+ print(conf_matrix)
59
+ print("\n")
60
+
61
+ # Log metrics
62
+ metrics_dict.append({
63
+ "Model": mode,
64
+ "Accuracy": accuracy,
65
+ "Precision": precision,
66
+ "Recall": recall,
67
+ "F1 Score": f1,
68
+ "ROC-AUC": auc
69
+ })
70
+
71
+ # Plot ROC curve
72
+ fpr = {}
73
+ tpr = {}
74
+ for i in range(y_test.shape[1]):
75
+ fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred_prob[:, i])
76
+ plt.figure(figsize=(10, 6))
77
+ for i, label in enumerate(np.unique(y_true)):
78
+ plt.plot(fpr[i], tpr[i], label=f"Class {label} ROC")
79
+ plt.plot([0, 1], [0, 1], 'k--', label='Chance')
80
+ plt.xlabel('False Positive Rate')
81
+ plt.ylabel('True Positive Rate')
82
+ plt.title(f"ROC Curve - {mode}")
83
+ plt.legend()
84
+ plt.savefig(f"roc_curve_{mode}.png")
85
+ plt.close()
86
+
87
+ # Evaluate all models
88
+ for model_name in MODELS:
89
+ mode_key = model_name.replace("final_model_", "").replace(".h5", "").replace(" ", "_").lower()
90
+ dataset = DATASETS.get(mode_key)
91
+
92
+ if dataset:
93
+ # Load the model and dataset
94
+ model_path = os.path.join(MODEL_PATH, model_name)
95
+ model = load_model(model_path)
96
+
97
+ X_test_path, y_test_path = dataset
98
+ X_test = np.load(os.path.join(DATASET_PATH, X_test_path))
99
+ y_test = np.load(os.path.join(DATASET_PATH, y_test_path))
100
+
101
+ # Evaluate the model
102
+ evaluate_model(model, X_test, y_test, mode_key)
103
+ else:
104
+ print(f"No dataset found for model: {model_name}")
105
+
106
+ # Save metrics as a CSV
107
+ metrics_df = pd.DataFrame(metrics_dict)
108
+ metrics_df.to_csv("model_evaluation_summary.csv", index=False)
109
+ print("Evaluation complete. Summary saved as 'model_evaluation_summary.csv'.")
Train.py CHANGED
@@ -27,6 +27,7 @@ from tensorflow.keras.layers import Conv1D, GRU, Input, add, Dense, Dropout, Bat
27
  from tensorflow.keras.optimizers import Adamax
28
  from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
29
  from tensorflow.keras.preprocessing.image import ImageDataGenerator
 
30
 
31
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress INFO and WARNING logs from TensorFlow
32
  os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' # Disable oneDNN optimizations
@@ -39,18 +40,11 @@ processing_logger = logging.getLogger("data_processing")
39
  model_logger = logging.getLogger("model_training")
40
 
41
  # Dataset and Paths
42
- AUDIO_FILES_PATH = 'D://github//AmpleHealth//data//Respiratory_Sound_Database//audio_and_txt_files'
43
- METADATA_PATH = 'D://github//AmpleHealth//data//Respiratory_Sound_Database//audio_and_txt_files'
44
 
45
- def save_dataset(X, y, mode, output_dir="c"):
46
  """
47
  Save the processed X and y to .npy files.
48
-
49
- Args:
50
- X: Processed features.
51
- y: Processed labels.
52
- mode: Mode for which the dataset is processed.
53
- output_dir: Directory to save the .npy files.
54
  """
55
  import os
56
 
@@ -66,9 +60,8 @@ def save_dataset(X, y, mode, output_dir="c"):
66
  processing_logger.info(f"Saved dataset for mode '{mode}' to {output_dir}")
67
 
68
 
69
- def load_or_process_dataset(df_filtered, audio_files_path, mode, output_dir="processed_datasets"):
70
 
71
- #output_dir = os.path.abspath(output_dir)
72
  # File paths for preprocessed data
73
  X_path = os.path.join(output_dir, f"X_{mode}.npy")
74
  y_path = os.path.join(output_dir, f"y_{mode}.npy")
@@ -82,13 +75,21 @@ def load_or_process_dataset(df_filtered, audio_files_path, mode, output_dir="pro
82
  processing_logger.info(f"Preprocessed files not found for mode '{mode}'. Processing data...")
83
  os.makedirs(output_dir, exist_ok=True)
84
 
85
- if mode == 'augmented':
86
- X, y, le = prepare_dataset_augmented(df_filtered, audio_files_path)
 
 
 
 
 
87
  else:
88
- X, y, le = prepare_dataset_parallel(df_filtered, audio_files_path, mode=mode)
89
-
90
-
91
-
 
 
 
92
  # Save the processed data and LabelEncoder
93
  np.save(X_path, X)
94
  np.save(y_path, y)
@@ -97,119 +98,180 @@ def load_or_process_dataset(df_filtered, audio_files_path, mode, output_dir="pro
97
  le = LabelEncoder()
98
  return X, y, le
99
 
100
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  def main():
102
- data_logger.info("Starting data pipeline.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- # Step 1: Load and preprocess data
 
105
  data_logger.info("Loading and preprocessing data...")
106
- df = load_data()
107
- audio_metadata = process_audio_metadata(METADATA_PATH)
 
 
 
 
 
108
  df_all = merge_datasets(audio_metadata, df)
109
 
110
- # Define classification modes and feature types
111
- classification_modes = [ 'multi', 'binary']#
112
- feature_types = [ 'augmented','mfcc', 'log_mel'] #,
113
 
114
- for classification_mode in classification_modes:
115
- # Preprocess dataset for binary or multi-class classification
116
  df_filtered = filter_and_sample_data(df_all, mode=classification_mode)
 
117
 
118
- for feature_type in feature_types:
119
- processing_logger.info(f"Preparing dataset for {classification_mode} classification with {feature_type} features.")
120
 
121
  # Load or process dataset
122
- X, y, le = load_or_process_dataset(df_filtered, AUDIO_FILES_PATH, feature_type, output_dir=f"processed_datasets/{classification_mode}")
123
-
124
- # Log input dimensions
125
- processing_logger.info(f"Input data dimensions for {feature_type}: {X.shape}")
126
- processing_logger.info(f"Output data dimensions for {feature_type}: {y.shape}")
127
-
128
- # Split dataset
129
- processing_logger.info("Splitting dataset...")
130
- X_train, X_val, X_test, y_train, y_val, y_test = split_dataset(X, y)
131
-
132
- # Check for class balance
133
- unique_classes, class_counts = np.unique(np.argmax(y_train, axis=1), return_counts=True)
134
- processing_logger.info(f"Class distribution before oversampling: {dict(zip(unique_classes, class_counts))}")
 
 
 
 
 
 
 
135
 
136
  try:
137
  X_train, y_train = oversample_data(X_train, y_train)
138
- unique_classes, class_counts = np.unique(np.argmax(y_train, axis=1), return_counts=True)
139
- processing_logger.info(f"Class distribution after oversampling: {dict(zip(unique_classes, class_counts))}")
140
  except ValueError as e:
141
  processing_logger.warning(f"SMOTE skipped: {e}")
142
-
143
-
144
- # Log dimensions after preprocessing
145
- processing_logger.info(f"Training data dimensions for {feature_type}: X_train={X_train.shape}, y_train={y_train.shape}")
146
- processing_logger.info(f"Validation data dimensions for {feature_type}: X_val={X_val.shape}, y_val={y_val.shape}")
147
- processing_logger.info(f"Test data dimensions for {feature_type}: X_test={X_test.shape}, y_test={y_test.shape}")
148
-
149
- # Train and optimize model
150
- model_logger.info(f"Running optimization for {feature_type} mode...")
151
-
152
- if feature_type == 'augmented': # Train 1D CNN for GRU features
153
- X_train = np.expand_dims(X_train, axis=-1)
154
- X_val = np.expand_dims(X_val, axis=-1)
155
- X_test = np.expand_dims(X_test, axis=-1)
156
-
157
- model_logger.info(f"Updated 1D CNN Input dimensions: X_train={X_train.shape}, X_val={X_val.shape}, X_test={X_test.shape}")
158
-
159
- best_params = run_optuna_optimization(
160
- model_type="1D",
161
- input_shape=X_train.shape[1:],
162
- num_classes=y_train.shape[1],
163
- X_train=X_train,
164
- y_train=y_train,
165
- X_val=X_val,
166
- y_val=y_val,
167
- n_trials=20
168
- )
169
- best_model = build_cnn_model(
170
- input_shape=X_train.shape[1:],
171
- n_filters=best_params["n_filters"],
172
- dense_units=best_params["dense_units"],
173
- dropout_rate=best_params["dropout_rate"],
174
- num_classes=y_train.shape[1],
175
- model_type="1D"
176
- )
177
- else: # Train 2D CNN for MFCC and Log-Mel
178
- best_params = run_optuna_optimization(
179
- model_type="2D",
180
- input_shape=X_train.shape[1:],
181
- num_classes=y_train.shape[1],
182
- X_train=X_train,
183
- y_train=y_train,
184
- X_val=X_val,
185
- y_val=y_val,
186
- n_trials=20
187
- )
188
- best_model = build_cnn_model(
189
- input_shape=X_train.shape[1:],
190
- n_filters=best_params["n_filters"],
191
- dense_units=best_params["dense_units"],
192
- dropout_rate=best_params["dropout_rate"],
193
- num_classes=y_train.shape[1],
194
- model_type="2D"
195
- )
196
-
197
- model_logger.info(f"Model input shape: {X_train.shape[1:]}")
198
- model_logger.info(f"Number of output classes: {y_train.shape[1]}")
199
-
200
- # Train and save the model
201
- best_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)
202
- model_path = f".models/best_model_{classification_mode}_{feature_type}.h5"
203
- best_model.save(model_path)
204
- mlflow.log_artifact(model_path)
205
-
206
- # Evaluate model
207
- y_pred = best_model.predict(X_test)
208
- log_metrics(y_test, y_pred, f"{classification_mode}_{feature_type}")
209
-
210
- data_logger.info("Pipeline completed successfully.")
211
-
212
 
213
  if __name__ == "__main__":
214
  main()
215
-
 
27
  from tensorflow.keras.optimizers import Adamax
28
  from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
29
  from tensorflow.keras.preprocessing.image import ImageDataGenerator
30
+ import argparse
31
 
32
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress INFO and WARNING logs from TensorFlow
33
  os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' # Disable oneDNN optimizations
 
40
  model_logger = logging.getLogger("model_training")
41
 
42
  # Dataset and Paths
43
+ AUDIO_FILES_PATH = './/data//Respiratory_Sound_Database//audio_and_txt_files'
 
44
 
45
+ def save_dataset(X, y, mode, output_dir="./processed_datasets/new"):
46
  """
47
  Save the processed X and y to .npy files.
 
 
 
 
 
 
48
  """
49
  import os
50
 
 
60
  processing_logger.info(f"Saved dataset for mode '{mode}' to {output_dir}")
61
 
62
 
63
+ def load_or_process_dataset(df_filtered, audio_files_path, mode, feature_type, output_dir="processed_datasets/new"):
64
 
 
65
  # File paths for preprocessed data
66
  X_path = os.path.join(output_dir, f"X_{mode}.npy")
67
  y_path = os.path.join(output_dir, f"y_{mode}.npy")
 
75
  processing_logger.info(f"Preprocessed files not found for mode '{mode}'. Processing data...")
76
  os.makedirs(output_dir, exist_ok=True)
77
 
78
+ # Prepare the dataset
79
+ if feature_type == 'augmented':
80
+ X, y, le = prepare_dataset_augmented(
81
+ df_filtered,
82
+ audio_files_path,
83
+ classification_mode=mode
84
+ )
85
  else:
86
+ X, y, le = prepare_dataset_parallel(
87
+ df_filtered,
88
+ audio_files_path,
89
+ mode=feature_type,
90
+ classification_mode=mode
91
+ )
92
+
93
  # Save the processed data and LabelEncoder
94
  np.save(X_path, X)
95
  np.save(y_path, y)
 
98
  le = LabelEncoder()
99
  return X, y, le
100
 
101
+ def log_class_distribution(y, message):
102
+ """Log the class distribution."""
103
+ if y.ndim == 1: # Binary classification (1D array of 0s and 1s)
104
+ unique, counts = np.unique(y, return_counts=True)
105
+ else: # Multi-class classification (2D one-hot encoded array)
106
+ unique, counts = np.unique(np.argmax(y, axis=1), return_counts=True)
107
+
108
+ class_distribution = dict(zip(unique, counts))
109
+ processing_logger.info(f"{message} Class Distribution: {class_distribution}")
110
+
111
+
112
+ def generate_random_audio_data(samples=200, feature_dim=20):
113
+ """Generate random audio-like data for testing purposes."""
114
+ X = np.random.rand(samples, feature_dim, feature_dim) # Simulate 2D audio features
115
+ y = np.random.randint(0, 2, size=samples) # Binary classification labels
116
+ return X, y
117
+
118
+ def test_model():
119
+ """Test 2D CNN model with simulated audio data for debugging."""
120
+ print("[DEBUG] Generating simulated audio data...")
121
+ global X_train, X_val, X_test, y_train, y_val, y_test
122
+ X, y = generate_random_audio_data()
123
+
124
+ # Simulate preprocessing similar to audio processing pipeline
125
+ print("[DEBUG] Preprocessing simulated audio data...")
126
+ X_preprocessed = np.array([np.log1p(sample) for sample in X]) # Simulate a log transform or feature extraction
127
+
128
+ # Split data into train, validation, and test sets
129
+ X_train, X_temp, y_train, y_temp = train_test_split(X_preprocessed, y, test_size=0.3, stratify=y, random_state=42)
130
+ X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)
131
+
132
+ print(f"[DEBUG] Data split: Training={X_train.shape}, Validation={X_val.shape}, Test={X_test.shape}")
133
+
134
+ # Expand dimensions for 2D CNN input
135
+ X_train = np.expand_dims(X_train, axis=-1)
136
+ X_val = np.expand_dims(X_val, axis=-1)
137
+ X_test = np.expand_dims(X_test, axis=-1)
138
+
139
+ print("[DEBUG] Initializing 2D CNN model...")
140
+ model = track_experiment_with_mlflow_and_optuna(
141
+ mode='mfcc',
142
+ num_classes=1,
143
+ model_type='2D', # Specify 2D CNN for MFCC and Log-Mel
144
+ classification_mode='binary',
145
+ X_train=X_train,
146
+ y_train=y_train,
147
+ X_val=X_val,
148
+ y_val=y_val,
149
+ n_trials=20,
150
+ )
151
+
152
+ print("[DEBUG] Training the model...")
153
+ # Train the model with a single epoch for testing
154
+ model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=1, batch_size=32)
155
+
156
+ print("[DEBUG] Evaluating the model...")
157
+ results = model.evaluate(X_test, y_test)
158
+ print(f"[DEBUG] Test evaluation results: {results}")
159
+
160
+
161
+ # Define main function
162
  def main():
163
+ # python Train.py --metadata_path data/Respiratory_Sound_Database/audio_and_txt_files --audio_files_path data/Respiratory_Sound_Database/audio_and_txt_filesv --demographic_path data/demographic_info.tx --diagnosis_path --diagnosis_path data/Respiratory_Sound_Database/patient_diagnosis.csv --classification_modes binary --feature_types mfcc
164
+
165
+ parser = argparse.ArgumentParser(description="Run the respiratory sound analysis pipeline.")
166
+ parser.add_argument("--metadata_path", type=str, default="./data/metadata", help="Path to the metadata directory.")
167
+ parser.add_argument("--audio_files_path", type=str, default="./data/audio", help="Path to the directory containing audio files.")
168
+ parser.add_argument("--demographic_path", type=str, default="./data/demographic_info.txt", help="Path to the demographic info file.")
169
+ parser.add_argument("--diagnosis_path", type=str, default="./data/patient_diagnosis.csv", help="Path to the patient diagnosis CSV file.")
170
+ parser.add_argument("--tracking_uri", type=str, default="./mlruns", help="MLflow tracking URI.")
171
+ parser.add_argument("--classification_modes", type=str, nargs='+', default=['multi', 'binary'], help="Classification modes to run. Options: 'binary', 'multi'.")
172
+ parser.add_argument("--feature_types", type=str, nargs='+', default=['mfcc'], help="Feature types to use. Options: 'mfcc', 'log_mel', 'augmented'.")
173
+ parser.add_argument("--debug", action='store_true', help="Run in debug mode with random test data.")
174
+ args = parser.parse_args()
175
+
176
+
177
+ if args.debug:
178
+ test_model()
179
+ return
180
+
181
+ # Set up directories and MLflow tracking
182
+ AUDIO_FILES_PATH = args.audio_files_path
183
+ mlflow.set_tracking_uri(args.tracking_uri)
184
 
185
+ # Logging initial information
186
+ data_logger.info("Starting data pipeline.")
187
  data_logger.info("Loading and preprocessing data...")
188
+
189
+ # Load and preprocess data
190
+ df = load_data(
191
+ diagnosis_path=args.diagnosis_path,
192
+ demographic_path=args.demographic_path
193
+ )
194
+ audio_metadata = process_audio_metadata(AUDIO_FILES_PATH)
195
  df_all = merge_datasets(audio_metadata, df)
196
 
197
+ models = []
 
 
198
 
199
+ for classification_mode in args.classification_modes:
200
+ # Preprocess dataset for classification mode
201
  df_filtered = filter_and_sample_data(df_all, mode=classification_mode)
202
+ processing_logger.info(f"Dataset shape for {classification_mode} mode: {df_filtered.shape}")
203
 
204
+ for feature_type in args.feature_types:
205
+ processing_logger.info(f"Running experiment for {classification_mode} classification with {feature_type} features.")
206
 
207
  # Load or process dataset
208
+ X, y, le = load_or_process_dataset(
209
+ df_filtered, AUDIO_FILES_PATH,
210
+ feature_type=feature_type,
211
+ mode=classification_mode,
212
+ output_dir=f"processed_datasets/{classification_mode}"
213
+ )
214
+
215
+ # Split data into train/val/test
216
+ X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
217
+ X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)
218
+
219
+ # Save test data for future evaluation
220
+ np.save(f"X_test_{classification_mode}_{feature_type}.npy", X_test)
221
+ np.save(f"y_test_{classification_mode}_{feature_type}.npy", y_test)
222
+ mlflow.log_artifact(f"X_test_{classification_mode}_{feature_type}.npy")
223
+ mlflow.log_artifact(f"y_test_{classification_mode}_{feature_type}.npy")
224
+
225
+ # Log dataset characteristics
226
+ log_class_distribution(y_train, "Before Oversampling")
227
+ processing_logger.info(f"Train size: {X_train.shape}, Validation size: {X_val.shape}, Test size: {X_test.shape}")
228
 
229
  try:
230
  X_train, y_train = oversample_data(X_train, y_train)
 
 
231
  except ValueError as e:
232
  processing_logger.warning(f"SMOTE skipped: {e}")
233
+ log_class_distribution(y_train, "After Oversampling")
234
+
235
+ # Determine number of classes
236
+ num_classes = 1 if classification_mode == "binary" else y_train.shape[1]
237
+
238
+ # Train and save model
239
+ with mlflow.start_run(run_name=f"Experiment_{classification_mode}_{feature_type}", nested=True):
240
+ if feature_type == 'augmented':
241
+ X_train = np.expand_dims(X_train, axis=-1)
242
+ X_val = np.expand_dims(X_val, axis=-1)
243
+ X_test = np.expand_dims(X_test, axis=-1)
244
+
245
+ model = track_experiment_with_mlflow_and_optuna(
246
+ mode=feature_type,
247
+ num_classes=num_classes,
248
+ model_type='1D',
249
+ classification_mode=classification_mode,
250
+ X_train=X_train,
251
+ y_train=y_train,
252
+ X_val=X_val,
253
+ y_val=y_val,
254
+ n_trials=20,
255
+ )
256
+ else:
257
+ model = track_experiment_with_mlflow_and_optuna(
258
+ mode=feature_type,
259
+ num_classes=num_classes,
260
+ model_type='2D',
261
+ classification_mode=classification_mode,
262
+ X_train=X_train,
263
+ y_train=y_train,
264
+ X_val=X_val,
265
+ y_val=y_val,
266
+ n_trials=20,
267
+ )
268
+
269
+ final_model_path = f"final_model_{classification_mode}_{feature_type}.h5"
270
+ model.save(final_model_path)
271
+ mlflow.log_artifact(final_model_path)
272
+ models.append(model)
273
+
274
+ processing_logger.info("All experiments completed successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
  if __name__ == "__main__":
277
  main()
 
app.py CHANGED
@@ -10,7 +10,7 @@ if "active_page" not in st.session_state:
10
  st.session_state["active_page"] = "Introduction"
11
 
12
  # Streamlit app setup
13
- st.title("ICBHI 2017 Challenge - Amplifier Health")
14
 
15
  # Sidebar Navigation
16
  st.sidebar.markdown('<div class="sidebar-header">Navigate</div>', unsafe_allow_html=True)
 
10
  st.session_state["active_page"] = "Introduction"
11
 
12
  # Streamlit app setup
13
+ st.title("ICBHI 2017 Challenge - Amplifier Health Take-home Assignment")
14
 
15
  # Sidebar Navigation
16
  st.sidebar.markdown('<div class="sidebar-header">Navigate</div>', unsafe_allow_html=True)
data/Respiratory_Sound_Database/testsample/115_1b1_Ar_sc_Meditron.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0.064 0.393 0 0
2
+ 0.393 1.236 0 0
3
+ 1.236 2.193 0 0
4
+ 2.193 2.979 0 0
5
+ 2.979 3.922 0 0
6
+ 3.922 4.736 0 0
7
+ 4.736 5.664 0 0
8
+ 5.664 6.593 0 0
9
+ 6.593 7.393 0 0
10
+ 7.393 8.221 0 0
11
+ 8.221 9.236 0 0
12
+ 9.236 10.164 0 0
13
+ 10.164 10.836 0 1
14
+ 10.836 12.179 0 0
15
+ 12.179 13.007 0 0
16
+ 13.007 13.65 0 0
17
+ 13.65 14.593 0 0
18
+ 14.593 15.479 0 0
19
+ 15.479 16.321 0 0
20
+ 16.321 17.079 0 0
21
+ 17.079 17.879 0 0
22
+ 17.879 18.707 0 0
23
+ 18.707 19.55 0 0
24
+ 19.55 19.893 0 0
data/Respiratory_Sound_Database/testsample/115_1b1_Ar_sc_Meditron.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a169cb80250e104970716bc6785803ad956e9fb3de4dc15540bcf5fac9b8d9b
3
+ size 2646044
data/Respiratory_Sound_Database/testsample/121_1b1_Tc_sc_Meditron.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d65fd9b4aa09be36d9f731acd468621c39c3d20402930dbc79022f4d137a67b9
3
+ size 2646044
data/Respiratory_Sound_Database/testsample/121_1p1_Tc_sc_Meditron.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ 0.036 1.907 0 0
2
+ 1.907 4.521 0 0
3
+ 4.521 7.193 0 0
4
+ 7.193 9.75 0 0
5
+ 9.75 12.407 0 0
6
+ 12.407 15.079 0 0
7
+ 15.079 17.521 0 0
8
+ 17.521 19.95 0 0
data/Respiratory_Sound_Database/testsample/149_1b1_Al_sc_Meditron.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0.007 0.807 0 0
2
+ 0.807 1.393 0 0
3
+ 1.393 2.15 0 1
4
+ 2.15 3.136 0 1
5
+ 3.136 4.193 1 1
6
+ 4.193 5.436 0 1
7
+ 5.436 6.636 0 0
8
+ 6.636 7.936 1 0
9
+ 7.936 9.364 0 1
10
+ 9.364 10.764 0 1
11
+ 10.764 12.121 0 1
12
+ 12.121 13.179 0 1
13
+ 13.179 14.15 0 1
14
+ 14.15 15.236 0 1
15
+ 15.236 16.45 0 1
16
+ 16.45 17.95 0 1
17
+ 17.95 19.179 1 1
18
+ 19.179 19.607 0 1
data/Respiratory_Sound_Database/testsample/149_1b1_Al_sc_Meditron.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ed6fbf99d088fdb9bed679656c53af95b52a31e8e1839aef8fe0fc48d9e897e
3
+ size 2646044
data/Respiratory_Sound_Database/testsample/157_1b1_Al_sc_Meditron.wav ADDED
Binary file (520 kB). View file
 
data/Respiratory_Sound_Database/testsample/157_1b1_Ar_sc_Meditron.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1.9095 6.2217 0 0
2
+ 6.2217 14.007 0 0
3
+ 14.007 19.263 0 0
4
+ 19.263 26.209 0 0
5
+ 26.209 36.124 0 0
6
+ 36.124 47.329 0 0
7
+ 47.329 50.214 0 0
8
+ 50.214 53.299 0 0
9
+ 53.299 56.688 0 0
10
+ 56.688 59.636 0 0
11
+ 59.636 62.721 0 0
12
+ 62.721 65.05 0 0
data/Respiratory_Sound_Database/testsample/191_2b2_Tc_mc_LittC2SE.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ 0.022 2.322 0 0
2
+ 2.322 4.964 0 0
3
+ 4.964 7.664 0 0
4
+ 7.664 10.007 0 0
5
+ 10.007 13.336 0 0
6
+ 13.336 16.164 0 0
7
+ 16.164 18.864 0 0
8
+ 18.864 19.821 0 0
data/Respiratory_Sound_Database/testsample/191_2b2_Tc_mc_LittC2SE.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a240a8f969713173f7ae42ef886c1ae9be49f376c5a5e717319e0f3f58176ca2
3
+ size 2646044
data/Respiratory_Sound_Database/testsample/215_1b3_Tc_sc_Meditron.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ 0.022 1.693 0 0
2
+ 1.693 6.022 0 0
3
+ 6.022 10.507 0 0
4
+ 10.507 14.664 0 0
5
+ 14.664 18.907 0 0
6
+ 18.907 19.964 0 0
data/Respiratory_Sound_Database/testsample/215_1b3_Tc_sc_Meditron.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c3a93e837d51db7c65754a9fdd2adf66dfa8d72cb7b5a8a493146452c28b93c
3
+ size 1764046
data/Respiratory_Sound_Database/testsample/patient_diagnosis.csv ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ 101,URTI
2
+ 115,LRTI
3
+ 121,Healthy
4
+ 149,Bronchiolitis
5
+ 157,COPD
6
+ 191,Pneumonia
7
+ 215,Bronchiectasis
requirements.txt CHANGED
@@ -16,4 +16,6 @@ tensorflow
16
  tensorflow_intel
17
  tqdm
18
  fsspec
19
- prometheus_client
 
 
 
16
  tensorflow_intel
17
  tqdm
18
  fsspec
19
+ prometheus_client
20
+ argparse
21
+ pytest
streamlit_ui/__pycache__/data_exploration.cpython-312.pyc CHANGED
Binary files a/streamlit_ui/__pycache__/data_exploration.cpython-312.pyc and b/streamlit_ui/__pycache__/data_exploration.cpython-312.pyc differ
 
streamlit_ui/__pycache__/model_deployment.cpython-312.pyc CHANGED
Binary files a/streamlit_ui/__pycache__/model_deployment.cpython-312.pyc and b/streamlit_ui/__pycache__/model_deployment.cpython-312.pyc differ
 
streamlit_ui/__pycache__/model_performance.cpython-312.pyc CHANGED
Binary files a/streamlit_ui/__pycache__/model_performance.cpython-312.pyc and b/streamlit_ui/__pycache__/model_performance.cpython-312.pyc differ
 
streamlit_ui/__pycache__/readme.cpython-312.pyc CHANGED
Binary files a/streamlit_ui/__pycache__/readme.cpython-312.pyc and b/streamlit_ui/__pycache__/readme.cpython-312.pyc differ
 
streamlit_ui/data_exploration.py CHANGED
@@ -1,178 +1,205 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- import matplotlib.pyplot as plt
5
- import seaborn as sns
6
- import librosa
7
- import librosa.display
8
- import os
9
- from Exploration.inference import RespiratorySoundAnalysis
10
-
11
- # Define base paths
12
- BASE_PATH = './/data//Respiratory_Sound_Database'
13
- DIAGNOSIS_FILE = os.path.join(BASE_PATH, 'patient_diagnosis.csv')
14
- AUDIO_PATH = os.path.join(BASE_PATH, 'testsample')
15
- DEMOGRAPHIC_FILE = os.path.join('.//data', 'demographic_info.txt')
16
-
17
- # Initialize analysis object
18
- analysis = RespiratorySoundAnalysis(DIAGNOSIS_FILE, AUDIO_PATH)
19
-
20
- # Load data
21
- @st.cache_data
22
- def load_data():
23
- analysis.load_diagnosis_data()
24
- analysis.load_audio_files()
25
- analysis.analyze_audio_properties()
26
- return analysis.diagnosis_df, analysis.audio_df
27
-
28
- diagnosis_df, audio_df = load_data()
29
-
30
- # Load patient demographic data
31
- @st.cache_data
32
- def load_patient_demographics():
33
- patient_df = pd.read_csv(
34
- DEMOGRAPHIC_FILE,
35
- names=['Patient number', 'Age', 'Sex', 'Adult BMI (kg/m2)', 'Child Weight (kg)', 'Child Height (cm)'],
36
- delimiter=' '
37
- )
38
- return patient_df
39
-
40
- patient_df = load_patient_demographics()
41
-
42
- # Streamlit App Function
43
- def run():
44
- st.title("Respiratory Sound Data Explorer")
45
-
46
- # Tabs for navigation
47
- tabs = st.tabs(["Overview", "Explore Data", "Patient Demographics", "Preprocessing & Audio Effects"])
48
-
49
- # Overview Tab
50
- with tabs[0]:
51
- st.header("Dataset Overview")
52
-
53
- # Highlight key statistics
54
- total_patients = len(diagnosis_df)
55
- most_common_disease = diagnosis_df['disease'].value_counts().idxmax()
56
- least_common_disease = diagnosis_df['disease'].value_counts().idxmin()
57
-
58
- st.subheader("Key Statistics")
59
- st.markdown(f"""
60
- - **Total Patients:** {total_patients}
61
- - **Most Common Disease:** {most_common_disease} ({diagnosis_df['disease'].value_counts().max()} patients)
62
- - **Least Common Disease:** {least_common_disease} ({diagnosis_df['disease'].value_counts().min()} patients)
63
- """)
64
-
65
- # Diagnosis Distribution
66
- st.subheader("Diagnosis Distribution")
67
- disease_counts = diagnosis_df['disease'].value_counts()
68
- fig, ax = plt.subplots(figsize=(10, 6))
69
- sns.barplot(y=disease_counts.index, x=disease_counts.values, palette="viridis", ax=ax)
70
- ax.set_title("Disease Distribution", fontsize=16, fontweight='bold')
71
- ax.set_xlabel("Number of Patients", fontsize=12)
72
- ax.set_ylabel("Disease", fontsize=12)
73
- st.pyplot(fig)
74
-
75
- # Explore Data Tab
76
- with tabs[1]:
77
- st.header("Explore Data")
78
- shortest_file = os.path.basename(audio_df.loc[audio_df['duration_sec'].idxmin(), 'file_name'])
79
- longest_file = os.path.basename(audio_df.loc[audio_df['duration_sec'].idxmax(), 'file_name'])
80
-
81
- if audio_df is not None and not audio_df.empty:
82
- st.subheader("Key Audio Insights")
83
- st.markdown(f"""
84
- - **Total Audio Files:** {len(audio_df)}
85
- - **Average Duration:** {audio_df['duration_sec'].mean():.2f} seconds
86
- - **Shortest Audio File:** {shortest_file} ({audio_df['duration_sec'].min():.2f} seconds)
87
- - **Longest Audio File:** {longest_file} ({audio_df['duration_sec'].max():.2f} seconds)
88
- """)
89
-
90
- # Duration Distribution
91
- st.subheader("Audio Duration Distribution")
92
- fig, ax = plt.subplots(figsize=(10, 6))
93
- sns.histplot(audio_df['duration_sec'], bins=20, kde=True, color='skyblue', ax=ax)
94
- ax.set_title("Audio Duration Distribution", fontsize=16, fontweight='bold')
95
- st.pyplot(fig)
96
-
97
- else:
98
- st.warning("No audio data available to display.")
99
-
100
- # Patient Demographics Tab
101
- with tabs[2]:
102
- st.header("Patient Demographics")
103
- st.dataframe(patient_df)
104
-
105
- st.subheader("Missing Values Information")
106
- st.write(patient_df.isna().sum())
107
-
108
- st.subheader("Age Distribution")
109
- fig, ax = plt.subplots(figsize=(10, 6))
110
- sns.histplot(patient_df['Age'].dropna(), bins=20, kde=True, color='skyblue', ax=ax)
111
- ax.set_title("Age Distribution", fontsize=16, fontweight='bold')
112
- st.pyplot(fig)
113
-
114
- # Preprocessing & Audio Effects Tab
115
- with tabs[3]:
116
- st.header("Preprocessing & Audio Effects")
117
- wav_files = [f for f in os.listdir(AUDIO_PATH) if f.endswith('.wav')]
118
-
119
- if wav_files:
120
- selected_file_name = st.selectbox("Select an Audio File", wav_files)
121
- file_path = os.path.join(AUDIO_PATH, selected_file_name)
122
-
123
- try:
124
- y_raw, sr = librosa.load(file_path)
125
- st.audio(file_path, format="audio/wav")
126
-
127
- # Raw Waveform
128
- st.subheader("Raw Waveform")
129
- fig, ax = plt.subplots(figsize=(10, 4))
130
- librosa.display.waveshow(y_raw, sr=sr, ax=ax)
131
- ax.set_title("Raw Waveform", fontsize=16, fontweight='bold')
132
- st.pyplot(fig)
133
-
134
- # Noise Filtering
135
- st.subheader("Noise Filtering")
136
- y_filtered = librosa.effects.preemphasis(y_raw)
137
- fig, ax = plt.subplots(figsize=(10, 4))
138
- librosa.display.waveshow(y_filtered, sr=sr, ax=ax)
139
- ax.set_title("Filtered Waveform (Pre-emphasis Applied)", fontsize=16, fontweight='bold')
140
- st.pyplot(fig)
141
-
142
- # Log Mel-Spectrogram
143
- st.subheader("Log Mel-Spectrogram")
144
- mel_spect = librosa.feature.melspectrogram(y=y_filtered, sr=sr, n_mels=128)
145
- mel_spect_db = librosa.power_to_db(mel_spect, ref=np.max)
146
- fig, ax = plt.subplots(figsize=(10, 6))
147
- img = librosa.display.specshow(mel_spect_db, sr=sr, x_axis='time', y_axis='mel', ax=ax, cmap='viridis')
148
- fig.colorbar(img, ax=ax, format="%+2.0f dB")
149
- ax.set_title("Log Mel-Spectrogram", fontsize=16, fontweight='bold')
150
- st.pyplot(fig)
151
-
152
- # Fast Fourier Transform (FFT)
153
- st.subheader("FFT (Frequency Domain)")
154
- fft_vals = np.abs(np.fft.fft(y_filtered))
155
- freqs = np.fft.fftfreq(len(fft_vals), 1 / sr)
156
- fig, ax = plt.subplots(figsize=(10, 4))
157
- ax.plot(freqs[:len(freqs) // 2], fft_vals[:len(fft_vals) // 2], color='blue')
158
- ax.set_title("FFT - Frequency Spectrum", fontsize=16, fontweight='bold')
159
- ax.set_xlabel("Frequency (Hz)")
160
- ax.set_ylabel("Amplitude")
161
- st.pyplot(fig)
162
-
163
- # Zero Crossing Rate
164
- st.subheader("Zero Crossing Rate")
165
- zcr = librosa.feature.zero_crossing_rate(y_filtered)[0]
166
- fig, ax = plt.subplots(figsize=(10, 4))
167
- ax.plot(zcr, color='orange')
168
- ax.set_title("Zero Crossing Rate Over Time", fontsize=16, fontweight='bold')
169
- ax.set_xlabel("Frame Index")
170
- ax.set_ylabel("Zero Crossing Rate")
171
- st.pyplot(fig)
172
-
173
- except Exception as e:
174
- st.error(f"Error processing audio file: {e}")
175
-
176
- else:
177
- st.warning("No audio files found in the directory.")
178
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import librosa
7
+ import librosa.display
8
+ import os
9
+ from Exploration.inference import RespiratorySoundAnalysis
10
+
11
+ # Define base paths
12
+ BASE_PATH = './/data//Respiratory_Sound_Database'
13
+ DIAGNOSIS_FILE = os.path.join(BASE_PATH, 'patient_diagnosis.csv')
14
+ AUDIO_PATH = os.path.join(BASE_PATH, 'testsample')
15
+ DEMOGRAPHIC_FILE = os.path.join('.//data', 'demographic_info.txt')
16
+
17
+ # Initialize analysis object
18
+ analysis = RespiratorySoundAnalysis(DIAGNOSIS_FILE, AUDIO_PATH)
19
+
20
+ # Load data
21
+ @st.cache_data
22
+ def load_data():
23
+ analysis.load_diagnosis_data()
24
+ analysis.load_audio_files()
25
+ analysis.analyze_audio_properties()
26
+ return analysis.diagnosis_df, analysis.audio_df
27
+
28
+ diagnosis_df, audio_df = load_data()
29
+
30
+ # Load patient demographic data
31
+ @st.cache_data
32
+ def load_patient_demographics():
33
+ patient_df = pd.read_csv(
34
+ DEMOGRAPHIC_FILE,
35
+ names=['Patient number', 'Age', 'Sex', 'Adult BMI (kg/m2)', 'Child Weight (kg)', 'Child Height (cm)'],
36
+ delimiter=' '
37
+ )
38
+ return patient_df
39
+
40
+ patient_df = load_patient_demographics()
41
+
42
+ # Streamlit App Function
43
+ def run():
44
+ st.title("Respiratory Sound Data Explorer")
45
+
46
+ # Tabs for navigation
47
+ tabs = st.tabs(["Overview", "Explore Data", "Patient Demographics", "Preprocessing & Audio Effects"])
48
+
49
+ # Overview Tab
50
+ with tabs[0]:
51
+ st.header("Dataset Overview")
52
+
53
+ # Highlight key statistics
54
+ total_patients = len(diagnosis_df)
55
+ most_common_disease = diagnosis_df['disease'].value_counts().idxmax()
56
+ least_common_disease = diagnosis_df['disease'].value_counts().idxmin()
57
+
58
+ st.subheader("Key Statistics")
59
+ st.markdown(f"""
60
+ - **Total Patients:** {total_patients}
61
+ - **Most Common Disease:** {most_common_disease} ({diagnosis_df['disease'].value_counts().max()} patients)
62
+ - **Least Common Disease:** {least_common_disease} ({diagnosis_df['disease'].value_counts().min()} patients)
63
+ """)
64
+
65
+ # Diagnosis Distribution
66
+ st.subheader("Diagnosis Distribution")
67
+ disease_counts = diagnosis_df['disease'].value_counts()
68
+ fig, ax = plt.subplots(figsize=(10, 6))
69
+ sns.barplot(y=disease_counts.index, x=disease_counts.values, palette="viridis", ax=ax)
70
+ ax.set_title("Disease Distribution", fontsize=16, fontweight='bold')
71
+ ax.set_xlabel("Number of Patients", fontsize=12)
72
+ ax.set_ylabel("Disease", fontsize=12)
73
+ st.pyplot(fig)
74
+
75
+ # Explore Data Tab
76
+ with tabs[1]:
77
+ st.header("Explore Data")
78
+ shortest_file = os.path.basename(audio_df.loc[audio_df['duration_sec'].idxmin(), 'file_name'])
79
+ longest_file = os.path.basename(audio_df.loc[audio_df['duration_sec'].idxmax(), 'file_name'])
80
+
81
+ if audio_df is not None and not audio_df.empty:
82
+ st.subheader("Key Audio Insights")
83
+ st.markdown(f"""
84
+ - **Total Audio Files:** {len(audio_df)}
85
+ - **Average Duration:** {audio_df['duration_sec'].mean():.2f} seconds
86
+ - **Shortest Audio File:** {shortest_file} ({audio_df['duration_sec'].min():.2f} seconds)
87
+ - **Longest Audio File:** {longest_file} ({audio_df['duration_sec'].max():.2f} seconds)
88
+ """)
89
+
90
+ # Duration Distribution
91
+ st.subheader("Audio Duration Distribution")
92
+ fig, ax = plt.subplots(figsize=(10, 6))
93
+ sns.histplot(audio_df['duration_sec'], bins=20, kde=True, color='skyblue', ax=ax)
94
+ ax.set_title("Audio Duration Distribution", fontsize=16, fontweight='bold')
95
+ st.pyplot(fig)
96
+
97
+ else:
98
+ st.warning("No audio data available to display.")
99
+
100
+ # Patient Demographics Tab
101
+ with tabs[2]:
102
+ st.header("Patient Demographics")
103
+ st.dataframe(patient_df)
104
+
105
+ st.subheader("Missing Values Information")
106
+ st.write(patient_df.isna().sum())
107
+
108
+ st.subheader("Age Distribution")
109
+ fig, ax = plt.subplots(figsize=(10, 6))
110
+ sns.histplot(patient_df['Age'].dropna(), bins=20, kde=True, color='skyblue', ax=ax)
111
+ ax.set_title("Age Distribution", fontsize=16, fontweight='bold')
112
+ st.pyplot(fig)
113
+
114
+ # Preprocessing & Audio Effects Tab
115
+ with tabs[3]:
116
+ st.header("Preprocessing & Audio Effects")
117
+ # List all .wav files in the AUDIO_PATH directory
118
+ wav_files = [f for f in os.listdir(AUDIO_PATH) if f.endswith('.wav')]
119
+
120
+ if wav_files:
121
+ selected_file_name = st.selectbox("Select an Audio File", wav_files)
122
+
123
+ # Construct the full path of the selected file
124
+ file_path = os.path.join(AUDIO_PATH, selected_file_name)
125
+
126
+ try:
127
+ # Load raw audio
128
+ y_raw, sr = librosa.load(file_path)
129
+ except Exception as e:
130
+ st.error(f"Error loading audio file: {e}")
131
+ st.stop()
132
+
133
+ # Preprocessing and Visualization
134
+ try:
135
+ y_processed, processed_sr = analysis.preprocess_audio(y_raw, sr)
136
+
137
+ # Mel spectrogram
138
+ mel = librosa.feature.melspectrogram(
139
+ y=y_processed, sr=processed_sr, n_fft=2048, hop_length=512, power=2.0
140
+ )
141
+ mel_db = librosa.power_to_db(mel, ref=np.max)
142
+
143
+ # STFT
144
+ stft = librosa.stft(y_processed, n_fft=2048, hop_length=512)
145
+ stft_db = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
146
+
147
+ # Frequency Spectrum
148
+ fft = np.abs(np.fft.rfft(y_processed))
149
+ freqs = np.fft.rfftfreq(len(y_processed), 1 / processed_sr)
150
+
151
+ # Zero-Crossing Rate
152
+ zcr = librosa.feature.zero_crossing_rate(y_processed)[0]
153
+
154
+ # RMS Energy
155
+ rms = librosa.feature.rms(y=y_processed)[0]
156
+
157
+ # Create subplots for visualizations
158
+ fig, axs = plt.subplots(3, 2, figsize=(15, 12))
159
+
160
+ # Raw waveform
161
+ librosa.display.waveshow(y_raw, sr=sr, ax=axs[0, 0])
162
+ axs[0, 0].set_title("Raw Waveform", fontsize=12)
163
+
164
+ # Preprocessed waveform
165
+ librosa.display.waveshow(y_processed, sr=processed_sr, ax=axs[0, 1])
166
+ axs[0, 1].set_title("Preprocessed Waveform", fontsize=12)
167
+
168
+ # Frequency spectrum
169
+ axs[1, 0].plot(freqs, fft, color='blue')
170
+ axs[1, 0].set_title("Frequency Spectrum", fontsize=12)
171
+ axs[1, 0].set_xlabel("Frequency (Hz)")
172
+ axs[1, 0].set_ylabel("Amplitude")
173
+
174
+ # ZCR
175
+ axs[1, 1].plot(zcr, color='green')
176
+ axs[1, 1].set_title("Zero-Crossing Rate", fontsize=12)
177
+ axs[1, 1].set_xlabel("Frames")
178
+ axs[1, 1].set_ylabel("Rate")
179
+
180
+ # RMS Energy
181
+ axs[2, 0].plot(rms, color='red')
182
+ axs[2, 0].set_title("RMS Energy", fontsize=12)
183
+ axs[2, 0].set_xlabel("Frames")
184
+ axs[2, 0].set_ylabel("RMS")
185
+
186
+ # Mel spectrogram
187
+ img_mel = librosa.display.specshow(
188
+ mel_db, sr=processed_sr, x_axis='time', y_axis='mel', ax=axs[2, 1], cmap='viridis'
189
+ )
190
+ axs[2, 1].set_title("Mel Spectrogram", fontsize=12)
191
+ fig.colorbar(img_mel, ax=axs[2, 1], format="%+2.0f dB")
192
+
193
+ # Adjust layout
194
+ plt.tight_layout()
195
+ st.pyplot(fig)
196
+
197
+ except Exception as e:
198
+ st.error(f"Error during audio preprocessing or visualization: {e}")
199
+ st.stop()
200
+
201
+ # Play audio
202
+ st.subheader("Listen to Audio")
203
+ st.audio(file_path, format="audio/wav")
204
+ else:
205
+ st.warning("No audio files found in the directory.")
streamlit_ui/readme.py CHANGED
@@ -4,7 +4,7 @@ def run():
4
  st.title("Welcome!")
5
  st.subheader("Introduction")
6
  st.write("""
7
- This project involves developing a machine learning solution to classify respiratory sounds into diagnostic categories using the ICBHI 2017 Challenge Dataset.
8
  The pipeline includes data preprocessing, feature extraction, model training, evaluation, and deployment. """)
9
 
10
  st.image("./streamlit_ui/img/deployment.png", caption="Project Overview")
@@ -15,7 +15,7 @@ The pipeline includes data preprocessing, feature extraction, model training, ev
15
 
16
  st.write(""" ### Repository:
17
  You can access the GitHub repository for this project here:
18
- [GitHub Repository](https://github.com/your-repo-link) """)
19
 
20
  st.write(""" ### Contact:
21
  Developed by Reza Amini | magnumical.ca
 
4
  st.title("Welcome!")
5
  st.subheader("Introduction")
6
  st.write("""
7
+ This project involves developing a ML model to classify respiratory sounds into diagnostic categories using the ICBHI 2017 Challenge Dataset.
8
  The pipeline includes data preprocessing, feature extraction, model training, evaluation, and deployment. """)
9
 
10
  st.image("./streamlit_ui/img/deployment.png", caption="Project Overview")
 
15
 
16
  st.write(""" ### Repository:
17
  You can access the GitHub repository for this project here:
18
+ [GitHub Repository](https://github.com/magnumical/amp1) """)
19
 
20
  st.write(""" ### Contact:
21
  Developed by Reza Amini | magnumical.ca
utils/__init__.py ADDED
File without changes
utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (136 Bytes). View file
 
utils/__pycache__/audioprocessing.cpython-312.pyc CHANGED
Binary files a/utils/__pycache__/audioprocessing.cpython-312.pyc and b/utils/__pycache__/audioprocessing.cpython-312.pyc differ
 
utils/__pycache__/data_loader.cpython-312.pyc CHANGED
Binary files a/utils/__pycache__/data_loader.cpython-312.pyc and b/utils/__pycache__/data_loader.cpython-312.pyc differ
 
utils/__pycache__/evaluation.cpython-312.pyc CHANGED
Binary files a/utils/__pycache__/evaluation.cpython-312.pyc and b/utils/__pycache__/evaluation.cpython-312.pyc differ
 
utils/__pycache__/model_utils.cpython-312.pyc CHANGED
Binary files a/utils/__pycache__/model_utils.cpython-312.pyc and b/utils/__pycache__/model_utils.cpython-312.pyc differ
 
utils/audioprocessing.py CHANGED
@@ -8,7 +8,8 @@ from sklearn.preprocessing import LabelEncoder
8
  from keras.utils import to_categorical
9
  import logging
10
  from utils.augmentation import add_noise, shift, stretch, pitch_shift # Ensure augmentation functions are imported
11
- from keras.utils import to_categorical, normalize
 
12
 
13
 
14
  from imblearn.over_sampling import RandomOverSampler
@@ -19,78 +20,85 @@ from imblearn.over_sampling import SMOTE
19
  # Initialize logger
20
  processing_logger = logging.getLogger("audio_processing")
21
 
22
-
23
  def process_audio_file(soundDir, audio_files_path, df_filtered):
24
  """
25
  Process a single audio file: extract MFCC features and augment with noise, stretching, and shifting.
26
-
27
- Args:
28
- soundDir: Filename of the audio file.
29
- audio_files_path: Path to the directory containing audio files.
30
- df_filtered: Filtered DataFrame containing patient diagnosis and metadata.
31
-
32
- Returns:
33
- Tuple containing features (X_local) and labels (y_local).
34
  """
35
  X_local = []
36
  y_local = []
37
- features = 52 # Number of MFCC features
38
 
39
- try:
40
- # Extract patient ID and disease from filename and DataFrame
41
- patient_id = int(soundDir.split('_')[0])
42
- disease = df_filtered.loc[df_filtered['Patient number'] == patient_id, 'Diagnosis'].values[0]
43
 
44
- # Load audio file
45
- data_x, sampling_rate = librosa.load(os.path.join(audio_files_path, soundDir), sr=None)
46
- mfccs = np.mean(librosa.feature.mfcc(y=data_x, sr=sampling_rate, n_mfcc=features).T, axis=0)
47
- X_local.append(mfccs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  y_local.append(disease)
49
 
50
- # Apply augmentations
51
- augmentations = [
52
- (add_noise, {"x": 0.001}),
53
- (shift, {"x": 1600}),
54
- (stretch, {"rate": 1.2}),
55
- (pitch_shift, {"rate": 3}),
56
- ]
57
 
58
- for func, kwargs in augmentations:
59
- augmented_data = func(data_x, **kwargs)
60
- mfccs_augmented = np.mean(librosa.feature.mfcc(y=augmented_data, sr=sampling_rate, n_mfcc=features).T, axis=0)
61
- X_local.append(mfccs_augmented)
62
- y_local.append(disease)
63
 
64
- except Exception as e:
65
- processing_logger.error(f"Error processing file {soundDir}: {e}")
 
 
 
 
 
 
66
 
67
- return X_local, y_local
 
 
 
68
 
 
 
69
 
70
- def mfccs_feature_extraction(audio_files_path, df_filtered, n_jobs=-1):
71
- """
72
- Extract MFCC features from audio data and augment with noise, stretching, and shifting in parallel.
73
 
74
- Args:
75
- audio_files_path: Path to the directory containing audio files.
76
- df_filtered: Filtered DataFrame containing patient diagnosis and metadata.
77
- n_jobs: Number of parallel jobs (-1 to use all available cores).
78
 
79
- Returns:
80
- X_data: Array of features extracted from the audio files.
81
- y_data: Array of target labels.
 
 
82
  """
83
  processing_logger.info(f"Processing audio files in: {audio_files_path}")
84
  files = [file for file in os.listdir(audio_files_path) if file.endswith('.wav') and file[:3] not in ['103', '108', '115']]
85
- #files = files[:40] # DEBUG limit, adjust as needed
 
86
 
87
  # Use Parallel and delayed to process files in parallel
88
- results = Parallel(n_jobs=n_jobs, backend="loky")(
89
- delayed(process_audio_file)(file, audio_files_path, df_filtered) for file in tqdm(files, desc="Processing audio files")
90
- )
91
 
92
  # Flatten results
93
- X_, y_ = [], []
 
94
  for X_local, y_local in results:
95
  X_.extend(X_local)
96
  y_.extend(y_local)
@@ -101,9 +109,9 @@ def mfccs_feature_extraction(audio_files_path, df_filtered, n_jobs=-1):
101
  return X_data, y_data
102
 
103
 
104
- def prepare_dataset_augmented(df_filtered, audio_files_path):
105
- """Prepare the dataset using the GRU pipeline."""
106
- processing_logger.info("Preparing dataset with GRU pipeline.")
107
 
108
  # Extract features and labels
109
  X, y = mfccs_feature_extraction(audio_files_path, df_filtered)
@@ -111,46 +119,44 @@ def prepare_dataset_augmented(df_filtered, audio_files_path):
111
  # Apply label encoding
112
  le = LabelEncoder()
113
  y_encoded = le.fit_transform(np.array(y)) # Encode labels to integers
114
- y_one_hot = to_categorical(y_encoded) # Convert to one-hot encoding
115
-
116
- # Log the mapping of one-hot encoding to class labels
117
- print("One-hot encoding mapping:")
118
- for idx, label in enumerate(le.classes_):
119
- print(f"{idx} -> {label}")
120
-
121
- processing_logger.info("Dataset preparation with GRU pipeline complete.")
122
- return X, y_one_hot, le
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
 
126
  def process_audio_metadata(folder_path):
127
- """
128
- Extract audio metadata from filenames.
129
-
130
- Args:
131
- folder_path: Path to the folder containing metadata files.
132
-
133
- Returns:
134
- Metadata DataFrame.
135
- """
136
  processing_logger.info("Extracting audio metadata from filenames.")
137
  data = []
138
  for filename in os.listdir(folder_path):
139
  if filename.endswith('.txt'):
140
  parts = filename.split('_')
141
- try:
142
- data.append({
143
- 'Patient number': int(parts[0]),
144
- 'Recording index': parts[1],
145
- 'Chest location': parts[2],
146
- 'Acquisition mode': parts[3],
147
- 'Recording equipment': parts[4].split('.')[0],
148
- })
149
- except (IndexError, ValueError) as e:
150
- processing_logger.warning(f"Skipping file {filename}: {e}")
151
  processing_logger.info("Audio metadata extraction complete.")
152
  return pd.DataFrame(data)
153
 
 
 
154
  def merge_datasets(df1, df2):
155
  """Merge metadata and diagnosis data."""
156
  processing_logger.info("Merging metadata and diagnosis data.")
@@ -159,16 +165,14 @@ def merge_datasets(df1, df2):
159
  processing_logger.info("Merging complete.")
160
  return merged_df
161
 
 
 
 
162
  def filter_and_sample_data(df, mode='binary'):
163
  """
164
  Filter and sample the dataset for binary or multi-class classification.
165
 
166
- Args:
167
- df: Input DataFrame containing diagnosis data.
168
- mode: Specify 'binary' for Normal/Abnormal or 'multi-class' for grouped classes.
169
-
170
- Returns:
171
- Filtered and processed DataFrame.
172
  """
173
  processing_logger.info(f"Filtering and sampling the dataset for {mode} classification.")
174
 
@@ -177,6 +181,7 @@ def filter_and_sample_data(df, mode='binary'):
177
  df['Diagnosis'] = df['Diagnosis'].apply(lambda x: 'Normal' if x == 'Healthy' else 'Abnormal')
178
  elif mode == 'multi':
179
  # Multi-class classification: Group classes
 
180
  processing_logger.info("Grouping classes for multi-class classification.")
181
  df['Diagnosis'] = df['Diagnosis'].replace({
182
  'Healthy': 'Normal',
@@ -198,27 +203,13 @@ def filter_and_sample_data(df, mode='binary'):
198
  processing_logger.info(f"Filtering and sampling complete with mode={mode}.")
199
  return df
200
 
201
- from imblearn.over_sampling import SMOTE
202
- from tensorflow.keras.utils import to_categorical
203
- import numpy as np
204
 
205
- def oversample_data(X, y, random_state=42, k_neighbors=5):
206
- """
207
- Apply SMOTE to balance classes for both binary and multi-class cases.
208
-
209
- Args:
210
- X: Feature data.
211
- y: One-hot encoded labels.
212
- random_state: Random seed for reproducibility.
213
- k_neighbors: Number of nearest neighbors for SMOTE.
214
-
215
- Returns:
216
- Oversampled feature data and labels.
217
- """
218
  processing_logger.info("Applying SMOTE to balance classes.")
219
 
220
  # Save the original shape of features
221
- original_shape = X.shape[1:]
222
 
223
  # Flatten for SMOTE processing
224
  X = X.reshape((X.shape[0], -1))
@@ -226,49 +217,24 @@ def oversample_data(X, y, random_state=42, k_neighbors=5):
226
  # Convert one-hot encoded labels to integers
227
  y = np.argmax(y, axis=1)
228
 
229
- # Log original class distribution
230
- unique_classes, class_counts = np.unique(y, return_counts=True)
231
- processing_logger.info(f"Original class distribution: {dict(zip(unique_classes, class_counts))}")
 
 
 
 
 
 
232
 
233
- try:
234
- # Apply SMOTE
235
- smote = SMOTE(random_state=random_state, k_neighbors=k_neighbors)
236
- X_resampled, y_resampled = smote.fit_resample(X, y)
237
-
238
- # Log new class distribution
239
- unique_classes, class_counts = np.unique(y_resampled, return_counts=True)
240
- processing_logger.info(f"New class distribution after SMOTE: {dict(zip(unique_classes, class_counts))}")
241
-
242
- # Reshape back to the original dimensions
243
- X_resampled = X_resampled.reshape((-1, *original_shape))
244
-
245
- # Convert labels back to one-hot encoding
246
- y_resampled = to_categorical(y_resampled)
247
-
248
- processing_logger.info("SMOTE oversampling complete.")
249
- return X_resampled, y_resampled
250
- except ValueError as e:
251
- processing_logger.warning(f"SMOTE could not be applied: {e}")
252
- return X, to_categorical(y) # Return original data if SMOTE fails
253
-
254
-
255
-
256
-
257
- def augment_data(X, y):
258
- """Apply data augmentation to increase dataset size."""
259
- processing_logger.info("Applying data augmentation.")
260
- datagen = ImageDataGenerator(
261
- rotation_range=10,
262
- width_shift_range=0.1,
263
- height_shift_range=0.1,
264
- horizontal_flip=True
265
- )
266
- datagen.fit(X)
267
- processing_logger.info("Data augmentation setup complete.")
268
- return datagen
269
-
270
-
271
- def prepare_dataset_parallel(df, audio_files_path, mode):
272
  """Prepare the dataset by extracting features from audio files in parallel."""
273
  processing_logger.info(f"Preparing dataset using {mode} features in parallel.")
274
  results = Parallel(n_jobs=-1)(delayed(preprocess_file)(row, audio_files_path, mode) for _, row in tqdm(df.iterrows(), total=len(df)))
@@ -277,12 +243,23 @@ def prepare_dataset_parallel(df, audio_files_path, mode):
277
  X = np.array(X)
278
  X = np.expand_dims(X, axis=-1) # Add channel dimension
279
  X = normalize(X, axis=1)
 
280
  le = LabelEncoder()
281
- y = to_categorical(le.fit_transform(np.array(y)))
 
 
 
 
 
 
 
 
 
282
 
283
  processing_logger.info(f"Dataset preparation using {mode} complete.")
284
  return X, y, le
285
 
 
286
  def preprocess_file(row, audio_files_path, mode):
287
  """Preprocess a single audio file."""
288
  file_path = os.path.join(audio_files_path, row['audio_file_name'])
@@ -294,7 +271,7 @@ def preprocessing(audio_file, mode):
294
  """Preprocess audio file by resampling, padding/truncating, and extracting features."""
295
  sr_new = 16000 # Resample audio to 16 kHz
296
  x, sr = librosa.load(audio_file, sr=sr_new)
297
-
298
  # Padding or truncating to 5 seconds (5 * sr_new samples)
299
  max_len = 5 * sr_new
300
  if x.shape[0] < max_len:
@@ -303,6 +280,7 @@ def preprocessing(audio_file, mode):
303
  x = x[:max_len]
304
 
305
  # Extract features
 
306
  if mode == 'mfcc':
307
  feature = librosa.feature.mfcc(y=x, sr=sr_new, n_mfcc=20) # Ensure consistent shape
308
  elif mode == 'log_mel':
@@ -311,22 +289,4 @@ def preprocessing(audio_file, mode):
311
 
312
  return feature
313
 
314
- def prepare_dataset(df, audio_files_path, mode):
315
- """Prepare the dataset by extracting features from audio files."""
316
- processing_logger.info(f"Preparing dataset using {mode} features.")
317
- X, y = [], []
318
- for _, row in tqdm(df.iterrows(), total=len(df)):
319
- file_path = os.path.join(audio_files_path, row['audio_file_name'])
320
- feature = preprocessing(file_path, mode)
321
- X.append(feature)
322
- y.append(row['Diagnosis'])
323
- del feature # Free memory after processing each file
324
- gc.collect()
325
-
326
- X = np.array(X)
327
- X = np.expand_dims(X, axis=-1) # Add channel dimension
328
- X = normalize(X, axis=1)
329
- le = LabelEncoder()
330
- y = to_categorical(le.fit_transform(np.array(y)))
331
- processing_logger.info(f"Dataset preparation using {mode} complete.")
332
- return X, y, le
 
8
  from keras.utils import to_categorical
9
  import logging
10
  from utils.augmentation import add_noise, shift, stretch, pitch_shift # Ensure augmentation functions are imported
11
+ from keras.utils import normalize
12
+ from scipy.signal import butter, sosfilt
13
 
14
 
15
  from imblearn.over_sampling import RandomOverSampler
 
20
  # Initialize logger
21
  processing_logger = logging.getLogger("audio_processing")
22
 
 
23
  def process_audio_file(soundDir, audio_files_path, df_filtered):
24
  """
25
  Process a single audio file: extract MFCC features and augment with noise, stretching, and shifting.
26
+
 
 
 
 
 
 
 
27
  """
28
  X_local = []
29
  y_local = []
30
+ features = 52
31
 
32
+ # Extract patient ID and disease from filename and DataFrame
33
+ patient_id = int(soundDir.split('_')[0])
34
+ disease = df_filtered.loc[df_filtered['Patient number'] == patient_id, 'Diagnosis'].values[0]
 
35
 
36
+ # Load audio file
37
+ data_x, sampling_rate = librosa.load(os.path.join(audio_files_path, soundDir), sr=None)
38
+ data_x = preprocess_audio(data_x, sampling_rate) # Apply filtering
39
+
40
+
41
+ mfccs = np.mean(librosa.feature.mfcc(y=data_x, sr=sampling_rate, n_mfcc=features).T, axis=0)
42
+ X_local.append(mfccs)
43
+ y_local.append(disease)
44
+
45
+ # Data augmentation
46
+ for augmentation in [add_noise, shift, stretch, pitch_shift]:
47
+ if augmentation == add_noise:
48
+ augmented_data = augmentation(data_x, 0.001)
49
+ elif augmentation == shift:
50
+ augmented_data = augmentation(data_x, 1600)
51
+ elif augmentation == stretch:
52
+ augmented_data = augmentation(data_x, 1.2)
53
+ elif augmentation == pitch_shift:
54
+ augmented_data = augmentation(data_x, sampling_rate, 3)
55
+
56
+ mfccs_augmented = np.mean(librosa.feature.mfcc(y=augmented_data, sr=sampling_rate, n_mfcc=features).T, axis=0)
57
+ X_local.append(mfccs_augmented)
58
  y_local.append(disease)
59
 
60
+ return X_local, y_local
 
 
 
 
 
 
61
 
 
 
 
 
 
62
 
63
+ def preprocess_audio(audio, sr):
64
+ """
65
+ Apply a bandpass filter to audio data.
66
+
67
+ """
68
+ # Define cutoff frequencies
69
+ low_cutoff = 50 # 50 Hz
70
+ high_cutoff = min(5000, sr / 2 - 1) # Ensure it is below Nyquist frequency
71
 
72
+ if low_cutoff >= high_cutoff:
73
+ raise ValueError(
74
+ f"Invalid filter range: low_cutoff={low_cutoff}, high_cutoff={high_cutoff} for sampling rate {sr}"
75
+ )
76
 
77
+ # Design a bandpass filter
78
+ sos = butter(N=10, Wn=[low_cutoff, high_cutoff], btype='band', fs=sr, output='sos')
79
 
80
+ # Apply the filter
81
+ filtered_audio = sosfilt(sos, audio)
82
+ return filtered_audio
83
 
 
 
 
 
84
 
85
+ def mfccs_feature_extraction(audio_files_path, df_filtered, n_jobs=-1):
86
+ """
87
+ Make the process of MFCC feature extraction faster by running jobs in-parallel
88
+
89
+ Returns array of features extracted from the audio files and Array of target labels.
90
  """
91
  processing_logger.info(f"Processing audio files in: {audio_files_path}")
92
  files = [file for file in os.listdir(audio_files_path) if file.endswith('.wav') and file[:3] not in ['103', '108', '115']]
93
+
94
+ #files = files[:30] ## DEBUG
95
 
96
  # Use Parallel and delayed to process files in parallel
97
+ results = Parallel(n_jobs=n_jobs, backend="loky")(delayed(process_audio_file)(file, audio_files_path, df_filtered) for file in tqdm(files, desc="Processing audio files"))
 
 
98
 
99
  # Flatten results
100
+ X_ = []
101
+ y_ = []
102
  for X_local, y_local in results:
103
  X_.extend(X_local)
104
  y_.extend(y_local)
 
109
  return X_data, y_data
110
 
111
 
112
+ def prepare_dataset_augmented(df_filtered, audio_files_path, classification_mode):
113
+ """Prepare the dataset for augmented features. it will be 1D array"""
114
+ processing_logger.info("Preparing dataset with AUGMENTED pipeline.")
115
 
116
  # Extract features and labels
117
  X, y = mfccs_feature_extraction(audio_files_path, df_filtered)
 
119
  # Apply label encoding
120
  le = LabelEncoder()
121
  y_encoded = le.fit_transform(np.array(y)) # Encode labels to integers
 
 
 
 
 
 
 
 
 
122
 
123
+ if classification_mode == "binary":
124
+ # Use single column with 0 and 1 for binary classification
125
+ processing_logger.info("Binary classification mode: Using single column labels (0/1).")
126
+ y_processed = y_encoded # No one-hot encoding
127
+ else:
128
+ # One-hot encode labels for multi-class classification
129
+ processing_logger.info("Multi-class classification mode: Applying one-hot encoding.")
130
+ y_processed = to_categorical(y_encoded)
131
+
132
+ # Log the mapping of one-hot encoding to class labels
133
+ print("One-hot encoding mapping:")
134
+ for idx, label in enumerate(le.classes_):
135
+ print(f"{idx} -> {label}")
136
+
137
+ processing_logger.info("Dataset preparation with augmented pipeline complete.")
138
+ return X, y_processed, le
139
 
140
 
141
  def process_audio_metadata(folder_path):
142
+ """Extract audio metadata from filenames."""
 
 
 
 
 
 
 
 
143
  processing_logger.info("Extracting audio metadata from filenames.")
144
  data = []
145
  for filename in os.listdir(folder_path):
146
  if filename.endswith('.txt'):
147
  parts = filename.split('_')
148
+ data.append({
149
+ 'Patient number': int(parts[0]),
150
+ 'Recording index': parts[1],
151
+ 'Chest location': parts[2],
152
+ 'Acquisition mode': parts[3],
153
+ 'Recording equipment': parts[4].split('.')[0]
154
+ })
 
 
 
155
  processing_logger.info("Audio metadata extraction complete.")
156
  return pd.DataFrame(data)
157
 
158
+
159
+
160
  def merge_datasets(df1, df2):
161
  """Merge metadata and diagnosis data."""
162
  processing_logger.info("Merging metadata and diagnosis data.")
 
165
  processing_logger.info("Merging complete.")
166
  return merged_df
167
 
168
+
169
+
170
+
171
  def filter_and_sample_data(df, mode='binary'):
172
  """
173
  Filter and sample the dataset for binary or multi-class classification.
174
 
175
+ Returns filtered and processed DataFrame.
 
 
 
 
 
176
  """
177
  processing_logger.info(f"Filtering and sampling the dataset for {mode} classification.")
178
 
 
181
  df['Diagnosis'] = df['Diagnosis'].apply(lambda x: 'Normal' if x == 'Healthy' else 'Abnormal')
182
  elif mode == 'multi':
183
  # Multi-class classification: Group classes
184
+ # I grouped disease based on their similarities
185
  processing_logger.info("Grouping classes for multi-class classification.")
186
  df['Diagnosis'] = df['Diagnosis'].replace({
187
  'Healthy': 'Normal',
 
203
  processing_logger.info(f"Filtering and sampling complete with mode={mode}.")
204
  return df
205
 
 
 
 
206
 
207
+ def oversample_data(X, y):
208
+ """Apply SMOTE to balance classes."""
 
 
 
 
 
 
 
 
 
 
 
209
  processing_logger.info("Applying SMOTE to balance classes.")
210
 
211
  # Save the original shape of features
212
+ original_shape = X.shape[1:]
213
 
214
  # Flatten for SMOTE processing
215
  X = X.reshape((X.shape[0], -1))
 
217
  # Convert one-hot encoded labels to integers
218
  y = np.argmax(y, axis=1)
219
 
220
+ # Apply SMOTE
221
+ smote = SMOTE(random_state=42)
222
+ X_resampled, y_resampled = smote.fit_resample(X, y)
223
+
224
+ # Reshape back to the original dimensions
225
+ X_resampled = X_resampled.reshape((-1, *original_shape))
226
+
227
+ # Convert labels back to one-hot encoding
228
+ y_resampled = to_categorical(y_resampled)
229
 
230
+ processing_logger.info("SMOTE oversampling complete.")
231
+ return X_resampled, y_resampled
232
+
233
+
234
+
235
+
236
+
237
+ def prepare_dataset_parallel(df, audio_files_path, mode, classification_mode):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  """Prepare the dataset by extracting features from audio files in parallel."""
239
  processing_logger.info(f"Preparing dataset using {mode} features in parallel.")
240
  results = Parallel(n_jobs=-1)(delayed(preprocess_file)(row, audio_files_path, mode) for _, row in tqdm(df.iterrows(), total=len(df)))
 
243
  X = np.array(X)
244
  X = np.expand_dims(X, axis=-1) # Add channel dimension
245
  X = normalize(X, axis=1)
246
+
247
  le = LabelEncoder()
248
+ y_encoded = le.fit_transform(np.array(y)) # Encode labels
249
+
250
+ if classification_mode == "binary":
251
+ # Use single column with 0 and 1 for binary classification
252
+ processing_logger.info("Binary classification mode: Using single column labels (0/1).")
253
+ y = y_encoded # No one-hot encoding
254
+ else:
255
+ # One-hot encode labels for multi-class classification
256
+ processing_logger.info("Multi-class classification mode: Applying one-hot encoding.")
257
+ y = to_categorical(y_encoded)
258
 
259
  processing_logger.info(f"Dataset preparation using {mode} complete.")
260
  return X, y, le
261
 
262
+
263
  def preprocess_file(row, audio_files_path, mode):
264
  """Preprocess a single audio file."""
265
  file_path = os.path.join(audio_files_path, row['audio_file_name'])
 
271
  """Preprocess audio file by resampling, padding/truncating, and extracting features."""
272
  sr_new = 16000 # Resample audio to 16 kHz
273
  x, sr = librosa.load(audio_file, sr=sr_new)
274
+ x = preprocess_audio(x, sr)
275
  # Padding or truncating to 5 seconds (5 * sr_new samples)
276
  max_len = 5 * sr_new
277
  if x.shape[0] < max_len:
 
280
  x = x[:max_len]
281
 
282
  # Extract features
283
+ # I understand the common choice for n_mfcc is 13, but here i assumed we need to capture more informationm, therefore I choose 20.
284
  if mode == 'mfcc':
285
  feature = librosa.feature.mfcc(y=x, sr=sr_new, n_mfcc=20) # Ensure consistent shape
286
  elif mode == 'log_mel':
 
289
 
290
  return feature
291
 
292
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/data_loader.py CHANGED
@@ -8,22 +8,29 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
8
  data_logger = logging.getLogger("data_pipeline")
9
 
10
 
11
- def load_data():
 
12
  """Load patient diagnosis and demographic data."""
13
  data_logger.info("Loading patient diagnosis and demographic data.")
14
- diagnosis_df = pd.read_csv('D://github//AmpleHealth//data//Respiratory_Sound_Database//patient_diagnosis.csv',
 
 
15
  names=['Patient number', 'Diagnosis'])
16
 
17
- patient_df = pd.read_csv('D://github//AmpleHealth//data//demographic_info.txt',
 
18
  names=['Patient number', 'Age', 'Sex', 'Adult BMI (kg/m2)', 'Child Weight (kg)', 'Child Height (cm)'],
19
  delimiter=' ')
20
 
21
  data_logger.info("Data successfully loaded.")
 
 
22
  return pd.merge(left=patient_df, right=diagnosis_df, how='left')
23
 
 
24
  def process_audio_metadata(folder_path):
25
  """Extract audio metadata from filenames."""
26
- data_logger.info("Extracting audio metadata from filenames.")
27
  data = []
28
  for filename in os.listdir(folder_path):
29
  if filename.endswith('.txt'):
@@ -35,5 +42,5 @@ def process_audio_metadata(folder_path):
35
  'Acquisition mode': parts[3],
36
  'Recording equipment': parts[4].split('.')[0]
37
  })
38
- data_logger.info("Audio metadata extraction complete.")
39
  return pd.DataFrame(data)
 
8
  data_logger = logging.getLogger("data_pipeline")
9
 
10
 
11
+ def load_data(diagnosis_path='.//data//Respiratory_Sound_Database//patient_diagnosis.csv',
12
+ demographic_path='.//data//demographic_info.txt'):
13
  """Load patient diagnosis and demographic data."""
14
  data_logger.info("Loading patient diagnosis and demographic data.")
15
+
16
+ # Load diagnosis data
17
+ diagnosis_df = pd.read_csv(diagnosis_path,
18
  names=['Patient number', 'Diagnosis'])
19
 
20
+ # Load demographic data
21
+ patient_df = pd.read_csv(demographic_path,
22
  names=['Patient number', 'Age', 'Sex', 'Adult BMI (kg/m2)', 'Child Weight (kg)', 'Child Height (cm)'],
23
  delimiter=' ')
24
 
25
  data_logger.info("Data successfully loaded.")
26
+
27
+ # Merge and return
28
  return pd.merge(left=patient_df, right=diagnosis_df, how='left')
29
 
30
+
31
  def process_audio_metadata(folder_path):
32
  """Extract audio metadata from filenames."""
33
+ processing_logger.info("Extracting audio metadata from filenames.")
34
  data = []
35
  for filename in os.listdir(folder_path):
36
  if filename.endswith('.txt'):
 
42
  'Acquisition mode': parts[3],
43
  'Recording equipment': parts[4].split('.')[0]
44
  })
45
+ processing_logger.info("Audio metadata extraction complete.")
46
  return pd.DataFrame(data)
utils/evaluation.py CHANGED
@@ -1,49 +1,25 @@
1
  from sklearn.metrics import classification_report
2
  import mlflow
3
-
 
 
 
4
 
5
  import numpy as np
6
 
7
- def log_metrics(y_true, y_pred, mode):
8
- """
9
- Log evaluation metrics for binary or multi-class classification.
10
 
11
- Args:
12
- y_true: True labels (array-like, one-hot encoded for multi-class).
13
- y_pred: Predicted probabilities (array-like, continuous values).
14
- mode: Mode of classification ('binary' or 'multi-class').
15
- """
16
- # Convert one-hot encoded `y_true` to class indices
17
- if y_true.ndim > 1: # If one-hot encoded
18
- y_true = np.argmax(y_true, axis=1)
19
-
20
- # Convert predicted probabilities `y_pred` to class indices
21
- if y_pred.ndim > 1: # If predicted as probabilities
22
- y_pred = np.argmax(y_pred, axis=1)
23
-
24
- if mode == 'binary':
25
- class_names = ["Class 0", "Class 1"]
26
- classification = classification_report(y_true, y_pred, output_dict=True, target_names=class_names)
27
- else:
28
- unique_classes = np.unique(y_true)
29
- class_names = [f"Class {i}" for i in unique_classes]
30
- classification = classification_report(y_true, y_pred, output_dict=True, target_names=class_names)
31
-
32
- # Log metrics to MLflow
33
- precision = classification['weighted avg']['precision']
34
- recall = classification['weighted avg']['recall']
35
- f1_score = classification['weighted avg']['f1-score']
36
 
37
  mlflow.log_metric(f"{mode}_precision", precision)
38
  mlflow.log_metric(f"{mode}_recall", recall)
39
  mlflow.log_metric(f"{mode}_f1_score", f1_score)
40
 
41
- print(f"Classification Report ({mode}):\n", classification_report(y_true, y_pred, target_names=class_names))
42
 
43
-
44
- import matplotlib.pyplot as plt
45
- from sklearn.metrics import roc_curve, roc_auc_score
46
- import numpy as np
47
 
48
  def plot_roc_curve(y_true, y_pred_prob, mode, class_names=None):
49
  """
 
1
  from sklearn.metrics import classification_report
2
  import mlflow
3
+
4
+ import matplotlib.pyplot as plt
5
+ from sklearn.metrics import roc_curve, roc_auc_score
6
+ import numpy as np
7
 
8
  import numpy as np
9
 
 
 
 
10
 
11
+ def log_metrics(y_true, y_pred, mode):
12
+ """Log evaluation metrics."""
13
+ precision = classification_report(y_true, y_pred, output_dict=True)['weighted avg']['precision']
14
+ recall = classification_report(y_true, y_pred, output_dict=True)['weighted avg']['recall']
15
+ f1_score = classification_report(y_true, y_pred, output_dict=True)['weighted avg']['f1-score']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  mlflow.log_metric(f"{mode}_precision", precision)
18
  mlflow.log_metric(f"{mode}_recall", recall)
19
  mlflow.log_metric(f"{mode}_f1_score", f1_score)
20
 
 
21
 
22
+
 
 
 
23
 
24
  def plot_roc_curve(y_true, y_pred_prob, mode, class_names=None):
25
  """
utils/model_utils.py CHANGED
@@ -6,10 +6,12 @@ from keras.optimizers import Adamax
6
  from keras.utils import to_categorical
7
  from sklearn.model_selection import train_test_split
8
  import optuna
9
-
 
10
  from imblearn.over_sampling import RandomOverSampler
11
  from tensorflow.keras.preprocessing.image import ImageDataGenerator
12
  from imblearn.over_sampling import SMOTE
 
13
 
14
 
15
 
@@ -30,6 +32,8 @@ from keras.layers import (
30
  GlobalAveragePooling1D, GlobalAveragePooling2D,
31
  Dense, Dropout, BatchNormalization
32
  )
 
 
33
 
34
 
35
  # Initialize logger
@@ -40,24 +44,16 @@ model_logger = logging.getLogger("model_utils")
40
  # MODEL BUILDING UTILITIES
41
  # ==========================
42
 
43
- def build_cnn_model(input_shape, n_filters=32, dense_units=128, dropout_rate=0.3, num_classes=2, model_type='1D'):
44
  """
45
- Build and compile a CNN model.
46
-
47
- Args:
48
- input_shape: Shape of the input data.
49
- n_filters: Number of filters for the convolutional layers.
50
- dense_units: Number of units in the dense layer.
51
- dropout_rate: Dropout rate for regularization.
52
- num_classes: Number of output classes.
53
- model_type: '1D' for 1D CNN, '2D' for 2D CNN.
54
-
55
- Returns:
56
- Compiled CNN model.
57
  """
58
- model_logger.info(f"Building a {model_type} CNN model with input shape {input_shape}.")
59
  model = Sequential()
60
 
 
61
  if model_type == '1D':
62
  # 1D CNN layers
63
  model.add(Conv1D(n_filters, kernel_size=3, activation='relu', input_shape=input_shape))
@@ -97,126 +93,150 @@ def build_cnn_model(input_shape, n_filters=32, dense_units=128, dropout_rate=0.3
97
  else:
98
  raise ValueError("Invalid model_type. Must be '1D' or '2D'.")
99
 
100
- # Fully connected layers
101
  model.add(Dense(dense_units, activation='relu'))
102
  model.add(BatchNormalization())
103
  model.add(Dropout(dropout_rate))
104
- model.add(Dense(num_classes, activation='sigmoid' if num_classes == 1 else 'softmax'))
 
 
 
 
 
 
 
 
 
105
 
106
  # Compile the model
107
- loss = 'binary_crossentropy' if num_classes == 1 else 'categorical_crossentropy'
108
- model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
109
- model_logger.info(f"{model_type} CNN model built and compiled successfully.")
110
  return model
111
 
112
 
113
- # ===============================
114
- # HYPERPARAMETER OPTIMIZATION
115
- # ===============================
116
 
117
- def optimize_cnn_model(trial, input_shape, num_classes, X_train, y_train, X_val, y_val, model_type='1D'):
118
- """
119
- Optimize CNN model using Optuna.
120
-
121
- Args:
122
- trial: Optuna trial object.
123
- input_shape: Shape of the input data.
124
- num_classes: Number of output classes.
125
- X_train: Training data.
126
- y_train: Training labels.
127
- X_val: Validation data.
128
- y_val: Validation labels.
129
- model_type: Type of model ('1D' or '2D').
130
-
131
- Returns:
132
- Best validation accuracy.
133
- """
134
- n_filters = trial.suggest_int("n_filters", 16, 64, step=16)
135
- dense_units = trial.suggest_int("dense_units", 64, 256, step=64)
136
- dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)
137
-
138
- model = build_cnn_model(input_shape, n_filters, dense_units, dropout_rate, num_classes, model_type=model_type)
139
- history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32, verbose=0)
140
-
141
- val_accuracy = max(history.history['val_accuracy'])
142
- return val_accuracy
143
 
144
-
145
- def run_optuna_optimization(model_type, input_shape, num_classes, X_train, y_train, X_val, y_val, n_trials=20):
 
 
 
 
 
 
 
 
 
146
  """
147
- Run Optuna optimization for a given model type.
148
-
149
- Args:
150
- model_type: Type of model to optimize ('1D' or '2D').
151
- input_shape: Shape of the input data.
152
- num_classes: Number of output classes.
153
- X_train: Training data.
154
- y_train: Training labels.
155
- X_val: Validation data.
156
- y_val: Validation labels.
157
- n_trials: Number of trials for Optuna optimization.
158
-
159
- Returns:
160
- Best hyperparameters.
161
  """
162
  def objective(trial):
163
- return optimize_cnn_model(trial, input_shape, num_classes, X_train, y_train, X_val, y_val, model_type)
164
-
165
- study = optuna.create_study(direction="maximize")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  study.optimize(objective, n_trials=n_trials)
167
 
168
- model_logger.info(f"Best trial for {model_type} CNN: {study.best_trial.params}")
169
- return study.best_trial.params
170
-
171
-
172
- # ============================
173
- # DATASET PREPARATION UTILS
174
- # ============================
175
- from sklearn.model_selection import train_test_split
176
- import numpy as np
177
-
178
- def split_dataset(X, y, test_size=0.3, validation_size=0.5, random_state=42):
179
- """
180
- Split dataset into training, validation, and test sets.
181
-
182
- Args:
183
- X: Feature data.
184
- y: Labels.
185
- test_size: Proportion of the data to reserve for testing.
186
- validation_size: Proportion of the test set to reserve for validation.
187
- random_state: Random seed.
188
-
189
- Returns:
190
- X_train, X_val, X_test, y_train, y_val, y_test
191
- """
192
- model_logger.info("Splitting dataset into training, validation, and test sets...")
193
-
194
- # Check for minimum class size
195
- class_counts = np.sum(y, axis=0) if len(y.shape) > 1 else np.bincount(y)
196
- if np.any(class_counts < 2):
197
- model_logger.warning("Some classes have fewer than 2 samples. Stratification will be disabled.")
198
- stratify_train = None
199
- stratify_test = None
200
- else:
201
- stratify_train = y
202
- stratify_test = y
203
-
204
- # Split training and test data
205
- X_train, X_temp, y_train, y_temp = train_test_split(
206
- X, y, test_size=test_size, stratify=stratify_train, random_state=random_state
207
  )
208
-
209
- # Split validation and test data
210
- class_counts_temp = np.sum(y_temp, axis=0) if len(y_temp.shape) > 1 else np.bincount(y_temp)
211
- if np.any(class_counts_temp < 2):
212
- model_logger.warning("Some classes in the temporary test set have fewer than 2 samples. Stratification will be disabled for the validation split.")
213
- stratify_temp = None
214
- else:
215
- stratify_temp = y_temp
216
 
217
- X_val, X_test, y_val, y_test = train_test_split(
218
- X_temp, y_temp, test_size=validation_size, stratify=stratify_temp, random_state=random_state
 
 
 
219
  )
220
-
221
- model_logger.info("Dataset split completed.")
222
- return X_train, X_val, X_test, y_train, y_val, y_test
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from keras.utils import to_categorical
7
  from sklearn.model_selection import train_test_split
8
  import optuna
9
+ import mlflow
10
+ import mlflow.keras
11
  from imblearn.over_sampling import RandomOverSampler
12
  from tensorflow.keras.preprocessing.image import ImageDataGenerator
13
  from imblearn.over_sampling import SMOTE
14
+ import matplotlib.pyplot as plt
15
 
16
 
17
 
 
32
  GlobalAveragePooling1D, GlobalAveragePooling2D,
33
  Dense, Dropout, BatchNormalization
34
  )
35
+ from sklearn.model_selection import train_test_split
36
+ import numpy as np
37
 
38
 
39
  # Initialize logger
 
44
  # MODEL BUILDING UTILITIES
45
  # ==========================
46
 
47
+ def build_model(input_shape, n_filters, dense_units, dropout_rate, num_classes, model_type='1D', classification_mode='binary'):
48
  """
49
+ Build and compile a CNN model for 1D or 2D data.
50
+
51
+ Returns CNN model.
 
 
 
 
 
 
 
 
 
52
  """
53
+ print(f"Building the updated {model_type} CNN model with {classification_mode} classification.")
54
  model = Sequential()
55
 
56
+ # Add convolutional layers based on the model type
57
  if model_type == '1D':
58
  # 1D CNN layers
59
  model.add(Conv1D(n_filters, kernel_size=3, activation='relu', input_shape=input_shape))
 
93
  else:
94
  raise ValueError("Invalid model_type. Must be '1D' or '2D'.")
95
 
96
+ # Add fully connected layers
97
  model.add(Dense(dense_units, activation='relu'))
98
  model.add(BatchNormalization())
99
  model.add(Dropout(dropout_rate))
100
+
101
+ # Add output layer dynamically based on classification mode
102
+ if classification_mode == 'binary':
103
+ # Binary classification: Single unit with sigmoid activation
104
+ model.add(Dense(1, activation='sigmoid'))
105
+ loss_function = 'binary_crossentropy'
106
+ else:
107
+ # Multi-class classification: num_classes units with softmax activation
108
+ model.add(Dense(num_classes, activation='softmax'))
109
+ loss_function = 'categorical_crossentropy'
110
 
111
  # Compile the model
112
+ model.compile(optimizer='adam', loss=loss_function, metrics=['accuracy'])
113
+ print(f"{model_type} CNN model built and compiled successfully for {classification_mode} classification.")
 
114
  return model
115
 
116
 
 
 
 
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ def track_experiment_with_mlflow_and_optuna(
120
+ mode,
121
+ num_classes,
122
+ model_type,
123
+ classification_mode,
124
+ X_train,
125
+ y_train,
126
+ X_val,
127
+ y_val,
128
+ n_trials=20,
129
+ ):
130
  """
131
+ Optimize hyperparameters using Optuna and track experiments with MLflow.
132
+
133
+ Parameters:
134
+ - mode: Feature extraction mode (e.g., 'augmented', 'mfcc', 'log_mel').
135
+ - num_classes: Number of classes for classification.
136
+ - model_type: Type of model ('1D' for Conv1D, '2D' for Conv2D).
137
+ - classification_mode: 'binary' for binary classification, 'multi' for multi-class classification.
138
+ - X_train, y_train: Training data and labels.
139
+ - X_val, y_val: Validation data and labels.
140
+ - n_trials: Number of Optuna trials.
 
 
 
 
141
  """
142
  def objective(trial):
143
+ with mlflow.start_run(nested=True):
144
+ # Hyperparameters to tune
145
+ n_filters = trial.suggest_categorical('n_filters', [16, 32, 64])
146
+ dense_units = trial.suggest_int('dense_units', 64, 256, step=32)
147
+ dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5, step=0.1)
148
+ learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
149
+
150
+ # Build and compile the model
151
+ model = build_model(
152
+ input_shape=X_train.shape[1:],
153
+ n_filters=n_filters,
154
+ dense_units=dense_units,
155
+ dropout_rate=dropout_rate,
156
+ num_classes=num_classes,
157
+ model_type=model_type,
158
+ classification_mode=classification_mode
159
+ )
160
+
161
+ # Define EarlyStopping callback
162
+ early_stopping = EarlyStopping(
163
+ monitor='val_loss',
164
+ patience=5,
165
+ restore_best_weights=True
166
+ )
167
+
168
+ # Train the model
169
+ history = model.fit(
170
+ X_train,
171
+ y_train,
172
+ validation_data=(X_val, y_val),
173
+ epochs=50,
174
+ batch_size=32,
175
+ callbacks=[early_stopping],
176
+ verbose=0,
177
+ )
178
+
179
+ # Log hyperparameters and metrics to MLflow
180
+ mlflow.log_params({
181
+ 'n_filters': n_filters,
182
+ 'dense_units': dense_units,
183
+ 'dropout_rate': dropout_rate,
184
+ 'learning_rate': learning_rate,
185
+ 'model_type': model_type,
186
+ 'classification_mode': classification_mode,
187
+ })
188
+ mlflow.log_metric("best_val_accuracy", max(history.history['val_accuracy']))
189
+
190
+ # Save loss curves
191
+ plt.figure()
192
+ plt.plot(history.history['loss'], label='Train Loss')
193
+ plt.plot(history.history['val_loss'], label='Validation Loss')
194
+ plt.legend()
195
+ plt.title("Training and Validation Loss")
196
+ loss_curve_path = f"loss_curve_{trial.number}_{model_type}.png"
197
+ plt.savefig(loss_curve_path)
198
+ mlflow.log_artifact(loss_curve_path)
199
+
200
+ return max(history.history['val_accuracy'])
201
+
202
+ # Start Optuna study
203
+ study = optuna.create_study(direction='maximize')
204
  study.optimize(objective, n_trials=n_trials)
205
 
206
+ # Retrieve the best trial and log results
207
+ best_trial = study.best_trial
208
+ model_logger.info(f"Best Trial for {mode} ({model_type}): {best_trial.params}")
209
+
210
+ # Build and return the best model
211
+ best_model = build_model(
212
+ input_shape=X_train.shape[1:],
213
+ n_filters=best_trial.params['n_filters'],
214
+ dense_units=best_trial.params['dense_units'],
215
+ dropout_rate=best_trial.params['dropout_rate'],
216
+ num_classes=num_classes,
217
+ model_type=model_type,
218
+ classification_mode=classification_mode
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  )
 
 
 
 
 
 
 
 
220
 
221
+ # Train the best model
222
+ early_stopping = EarlyStopping(
223
+ monitor='val_loss',
224
+ patience=5,
225
+ restore_best_weights=True,
226
  )
227
+ best_model.fit(
228
+ X_train, y_train,
229
+ validation_data=(X_val, y_val),
230
+ epochs=50,
231
+ batch_size=32,
232
+ callbacks=[early_stopping],
233
+ verbose=1,
234
+ )
235
+
236
+ # Save the best model
237
+ best_model_path = f"best_model_{mode}_{model_type}.h5"
238
+ best_model.save(best_model_path)
239
+ mlflow.log_artifact(best_model_path)
240
+ model_logger.info(f"Best model for {mode} ({model_type}) saved successfully.")
241
+
242
+ return best_model