Spaces:
Sleeping
Sleeping
Add clean gradio app
Browse files- .gitattributes +1 -0
- .gitignore +0 -0
- README.md +4 -4
- app_gradio.py +191 -0
- dl.py +180 -0
- extractaudio.py +38 -0
- main.py +89 -0
- requirements.txt +8 -0
- saved_models/abnormal_model.h5 +3 -0
- saved_models/normal_model.h5 +3 -0
- saved_models/stage1_model.h5 +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.mat filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
File without changes
|
README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.47.2
|
| 8 |
app_file: app.py
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Audio
|
| 3 |
+
emoji: 🔥
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: indigo
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.47.2
|
| 8 |
app_file: app.py
|
app_gradio.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import numpy as np
|
| 5 |
+
import librosa
|
| 6 |
+
import librosa.display
|
| 7 |
+
import matplotlib.pyplot as plt
|
| 8 |
+
import tensorflow as tf
|
| 9 |
+
from tensorflow.keras import models
|
| 10 |
+
|
| 11 |
+
# --- 1. Configuration & Global Variables ---
|
| 12 |
+
# Create a temporary directory for spectrograms if it doesn't exist
|
| 13 |
+
TEMP_DIR = "temp_gradio_specs"
|
| 14 |
+
os.makedirs(TEMP_DIR, exist_ok=True)
|
| 15 |
+
|
| 16 |
+
# Define image size for the model
|
| 17 |
+
IMG_SIZE = (224, 224)
|
| 18 |
+
|
| 19 |
+
# --- 2. Load Models and Define Classes (Done once on startup) ---
|
| 20 |
+
print("🚀 Loading machine learning models...")
|
| 21 |
+
try:
|
| 22 |
+
stage1_model = models.load_model("saved_models/stage1_model.h5")
|
| 23 |
+
abnormal_model = models.load_model("saved_models/abnormal_model.h5")
|
| 24 |
+
normal_model = models.load_model("saved_models/normal_model.h5")
|
| 25 |
+
print("✅ Models loaded successfully.")
|
| 26 |
+
except Exception as e:
|
| 27 |
+
print(f"❌ Error loading models: {e}")
|
| 28 |
+
# Exit if models can't be loaded
|
| 29 |
+
exit()
|
| 30 |
+
|
| 31 |
+
# Define class lists exactly as they were during training
|
| 32 |
+
stage1_classes = ["00 - Abnormal", "01 - Normal"]
|
| 33 |
+
abnormal_classes = sorted(os.listdir("MelSpectrograms/00 - Abnormal"))
|
| 34 |
+
normal_classes = sorted(os.listdir("MelSpectrograms/01 - Normal"))
|
| 35 |
+
|
| 36 |
+
print(f"Stage 1 Classes: {stage1_classes}")
|
| 37 |
+
print(f"Abnormal Sub-classes: {abnormal_classes}")
|
| 38 |
+
print(f"Normal Sub-classes: {normal_classes}")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# --- 3. Helper Functions and Classes ---
|
| 42 |
+
|
| 43 |
+
def save_mel_spectrogram(file_path, save_dir, sr=22050, n_mels=128, hop_length=512, n_fft=2048):
|
| 44 |
+
"""Generates and saves a Mel Spectrogram from an audio file."""
|
| 45 |
+
try:
|
| 46 |
+
y, sr = librosa.load(file_path, sr=sr, mono=True)
|
| 47 |
+
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
|
| 48 |
+
S_db = librosa.power_to_db(S, ref=np.max)
|
| 49 |
+
|
| 50 |
+
filename = os.path.basename(file_path).replace(".wav", ".png")
|
| 51 |
+
save_path = os.path.join(save_dir, filename)
|
| 52 |
+
|
| 53 |
+
plt.figure(figsize=(4, 4))
|
| 54 |
+
librosa.display.specshow(S_db, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel', cmap='magma')
|
| 55 |
+
plt.axis("off")
|
| 56 |
+
plt.savefig(save_path, bbox_inches="tight", pad_inches=0)
|
| 57 |
+
plt.close()
|
| 58 |
+
return save_path
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"Error creating spectrogram: {e}")
|
| 61 |
+
return None
|
| 62 |
+
|
| 63 |
+
class HierarchicalClassifier:
|
| 64 |
+
"""A wrapper class for the two-stage prediction logic."""
|
| 65 |
+
def __init__(self, stage1_model, abnormal_model, normal_model,
|
| 66 |
+
stage1_classes, abnormal_classes, normal_classes):
|
| 67 |
+
self.img_size = IMG_SIZE
|
| 68 |
+
self.stage1_model = stage1_model
|
| 69 |
+
self.abnormal_model = abnormal_model
|
| 70 |
+
self.normal_model = normal_model
|
| 71 |
+
self.stage1_classes = stage1_classes
|
| 72 |
+
self.abnormal_classes = abnormal_classes
|
| 73 |
+
self.normal_classes = normal_classes
|
| 74 |
+
|
| 75 |
+
def _preprocess_image(self, image_path):
|
| 76 |
+
img = tf.keras.utils.load_img(image_path, target_size=self.img_size)
|
| 77 |
+
img_array = tf.keras.utils.img_to_array(img) / 255.0
|
| 78 |
+
img_array = tf.expand_dims(img_array, 0)
|
| 79 |
+
return img_array
|
| 80 |
+
|
| 81 |
+
def predict(self, image_path):
|
| 82 |
+
img_array = self._preprocess_image(image_path)
|
| 83 |
+
stage1_pred = self.stage1_model.predict(img_array, verbose=0)
|
| 84 |
+
stage1_idx = np.argmax(stage1_pred)
|
| 85 |
+
main_class = self.stage1_classes[stage1_idx]
|
| 86 |
+
|
| 87 |
+
if main_class == "00 - Abnormal":
|
| 88 |
+
sub_pred = self.abnormal_model.predict(img_array, verbose=0)
|
| 89 |
+
sub_idx = np.argmax(sub_pred)
|
| 90 |
+
sub_class = self.abnormal_classes[sub_idx]
|
| 91 |
+
else:
|
| 92 |
+
sub_pred = self.normal_model.predict(img_array, verbose=0)
|
| 93 |
+
sub_idx = np.argmax(sub_pred)
|
| 94 |
+
sub_class = self.normal_classes[sub_idx]
|
| 95 |
+
|
| 96 |
+
return {
|
| 97 |
+
"stage1_class": main_class,
|
| 98 |
+
"stage1_confidence": float(np.max(stage1_pred)),
|
| 99 |
+
"stage2_class": sub_class,
|
| 100 |
+
"stage2_confidence": float(np.max(sub_pred)),
|
| 101 |
+
"final_prediction": f"{main_class.split(' - ')[1]} → {sub_class.split(' - ')[1]}"
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
# Instantiate the classifier with loaded models and classes
|
| 105 |
+
classifier = HierarchicalClassifier(
|
| 106 |
+
stage1_model, abnormal_model, normal_model,
|
| 107 |
+
stage1_classes, abnormal_classes, normal_classes
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
# --- 4. The Main Prediction Function for Gradio ---
|
| 111 |
+
def predict_washing_machine_sound(audio_filepath):
|
| 112 |
+
"""
|
| 113 |
+
This is the core function that Gradio will call.
|
| 114 |
+
It takes an audio file path, processes it, and returns the formatted result.
|
| 115 |
+
"""
|
| 116 |
+
if audio_filepath is None:
|
| 117 |
+
return "Please upload an audio file first.", None
|
| 118 |
+
|
| 119 |
+
print(f"Processing file: {audio_filepath}")
|
| 120 |
+
|
| 121 |
+
# The spectrogram path needs to be cleaned up after prediction
|
| 122 |
+
spec_path = None
|
| 123 |
+
try:
|
| 124 |
+
# Generate a spectrogram from the input audio file
|
| 125 |
+
spec_path = save_mel_spectrogram(audio_filepath, TEMP_DIR)
|
| 126 |
+
if not spec_path:
|
| 127 |
+
return "Error: Could not generate spectrogram from the audio file.", None
|
| 128 |
+
|
| 129 |
+
# Get prediction from the classifier
|
| 130 |
+
result = classifier.predict(spec_path)
|
| 131 |
+
|
| 132 |
+
# Format the output for better readability
|
| 133 |
+
output_text = (
|
| 134 |
+
f"🎯 Final Prediction: {result['final_prediction']}\n\n"
|
| 135 |
+
f"Confidence Scores:\n"
|
| 136 |
+
f"--------------------\n"
|
| 137 |
+
f"Stage 1 ({result['stage1_class']}): {result['stage1_confidence']:.4f}\n"
|
| 138 |
+
f"Stage 2 ({result['stage2_class']}): {result['stage2_confidence']:.4f}"
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
# Return the formatted text and the path to the spectrogram image to display it
|
| 142 |
+
return output_text, spec_path
|
| 143 |
+
|
| 144 |
+
except Exception as e:
|
| 145 |
+
print(f"An error occurred during prediction: {e}")
|
| 146 |
+
return f"An error occurred: {str(e)}", None
|
| 147 |
+
|
| 148 |
+
finally:
|
| 149 |
+
# Clean up the generated spectrogram image file after it's been used
|
| 150 |
+
# Gradio handles the temp audio file, but we must handle the temp spectrogram
|
| 151 |
+
if spec_path and os.path.exists(spec_path):
|
| 152 |
+
# Note: Gradio might need the file to display it, so cleaning up here
|
| 153 |
+
# might be too early if the image component relies on the path.
|
| 154 |
+
# For simplicity, we can let them accumulate in the temp folder or
|
| 155 |
+
# implement more complex cleanup later. Let's comment out the immediate delete.
|
| 156 |
+
# os.remove(spec_path)
|
| 157 |
+
pass
|
| 158 |
+
|
| 159 |
+
# --- 5. Build and Launch the Gradio Interface ---
|
| 160 |
+
if __name__ == "__main__":
|
| 161 |
+
# Define some example audio files from your dataset
|
| 162 |
+
example_files = [
|
| 163 |
+
"Washing machine/00 - Abnormal/00-2 - Dehydration mode noise/04.wav",
|
| 164 |
+
"Washing machine/01 - Normal/01-1 - Washing mode/01.wav",
|
| 165 |
+
"Washing machine/00 - Abnormal/00-1 - Bearing noise/02.wav"
|
| 166 |
+
]
|
| 167 |
+
|
| 168 |
+
demo = gr.Interface(
|
| 169 |
+
fn=predict_washing_machine_sound,
|
| 170 |
+
inputs=gr.Audio(type="filepath", label="Upload Washing Machine Audio (.wav)"),
|
| 171 |
+
outputs=[
|
| 172 |
+
gr.Textbox(label="Prediction Result"),
|
| 173 |
+
gr.Image(label="Generated Mel Spectrogram")
|
| 174 |
+
],
|
| 175 |
+
title="Washing Machine Sound Classifier",
|
| 176 |
+
description="Upload a WAV audio file of a washing machine to classify its operation status. The model performs a two-stage classification: first identifying 'Normal' vs 'Abnormal' sound, then determining the specific sub-type.",
|
| 177 |
+
examples=example_files,
|
| 178 |
+
allow_flagging="never"
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
# Launch the web UI
|
| 182 |
+
demo.launch()
|
| 183 |
+
|
| 184 |
+
# Clean up the entire temp directory on exit
|
| 185 |
+
# This is a simple way to manage temp files
|
| 186 |
+
try:
|
| 187 |
+
print("\nCleaning up temporary files...")
|
| 188 |
+
shutil.rmtree(TEMP_DIR)
|
| 189 |
+
print("✅ Cleanup complete.")
|
| 190 |
+
except Exception as e:
|
| 191 |
+
print(f"Could not clean up temp files: {e}")
|
dl.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import tensorflow as tf
|
| 2 |
+
from keras._tf_keras.keras.utils import image_dataset_from_directory
|
| 3 |
+
from keras._tf_keras.keras import Sequential, layers, models
|
| 4 |
+
import numpy as np
|
| 5 |
+
import os
|
| 6 |
+
img_size = (224, 224)
|
| 7 |
+
batch_size = 32
|
| 8 |
+
AUTOTUNE = tf.data.AUTOTUNE
|
| 9 |
+
|
| 10 |
+
normalization_layer = layers.Rescaling(1./255)
|
| 11 |
+
|
| 12 |
+
def preprocess_ds(ds, training=True):
|
| 13 |
+
ds = ds.map(lambda x, y: (normalization_layer(x), y))
|
| 14 |
+
if training:
|
| 15 |
+
ds = ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
|
| 16 |
+
else:
|
| 17 |
+
ds = ds.cache().prefetch(buffer_size=AUTOTUNE)
|
| 18 |
+
return ds
|
| 19 |
+
|
| 20 |
+
def build_model(num_classes):
|
| 21 |
+
model = models.Sequential([
|
| 22 |
+
layers.Conv2D(32, (3, 3), activation="relu", input_shape=img_size + (3,)),
|
| 23 |
+
layers.MaxPooling2D((2, 2)),
|
| 24 |
+
layers.Conv2D(64, (3, 3), activation="relu"),
|
| 25 |
+
layers.MaxPooling2D((2, 2)),
|
| 26 |
+
layers.Conv2D(128, (3, 3), activation="relu"),
|
| 27 |
+
layers.MaxPooling2D((2, 2)),
|
| 28 |
+
layers.Flatten(),
|
| 29 |
+
layers.Dense(128, activation="relu"),
|
| 30 |
+
layers.Dropout(0.3),
|
| 31 |
+
layers.Dense(num_classes, activation="softmax")
|
| 32 |
+
])
|
| 33 |
+
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
|
| 34 |
+
return model
|
| 35 |
+
|
| 36 |
+
stage1_train = image_dataset_from_directory(
|
| 37 |
+
"MelSpectrograms",
|
| 38 |
+
labels="inferred",
|
| 39 |
+
label_mode="int",
|
| 40 |
+
validation_split=0.2,
|
| 41 |
+
subset="training",
|
| 42 |
+
seed=42,
|
| 43 |
+
image_size=img_size,
|
| 44 |
+
batch_size=batch_size
|
| 45 |
+
)
|
| 46 |
+
stage1_val = image_dataset_from_directory(
|
| 47 |
+
"MelSpectrograms",
|
| 48 |
+
labels="inferred",
|
| 49 |
+
label_mode="int",
|
| 50 |
+
validation_split=0.2,
|
| 51 |
+
subset="validation",
|
| 52 |
+
seed=42,
|
| 53 |
+
image_size=img_size,
|
| 54 |
+
batch_size=batch_size
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
stage1_classes = stage1_train.class_names
|
| 58 |
+
print("Stage 1 Classes:", stage1_classes)
|
| 59 |
+
|
| 60 |
+
stage1_train = preprocess_ds(stage1_train, training=True)
|
| 61 |
+
stage1_val = preprocess_ds(stage1_val, training=False)
|
| 62 |
+
|
| 63 |
+
print("Training Stage 1 (Normal vs Abnormal)...")
|
| 64 |
+
stage1_model = build_model(len(stage1_classes))
|
| 65 |
+
stage1_model.fit(stage1_train, validation_data=stage1_val, epochs=10)
|
| 66 |
+
|
| 67 |
+
abnormal_train = image_dataset_from_directory(
|
| 68 |
+
"MelSpectrograms/00 - Abnormal",
|
| 69 |
+
labels="inferred",
|
| 70 |
+
label_mode="int",
|
| 71 |
+
validation_split=0.2,
|
| 72 |
+
subset="training",
|
| 73 |
+
seed=42,
|
| 74 |
+
image_size=img_size,
|
| 75 |
+
batch_size=batch_size
|
| 76 |
+
)
|
| 77 |
+
abnormal_val = image_dataset_from_directory(
|
| 78 |
+
"MelSpectrograms/00 - Abnormal",
|
| 79 |
+
labels="inferred",
|
| 80 |
+
label_mode="int",
|
| 81 |
+
validation_split=0.2,
|
| 82 |
+
subset="validation",
|
| 83 |
+
seed=42,
|
| 84 |
+
image_size=img_size,
|
| 85 |
+
batch_size=batch_size
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
abnormal_classes = abnormal_train.class_names
|
| 89 |
+
print("Stage 2 Abnormal Classes:", abnormal_classes)
|
| 90 |
+
|
| 91 |
+
abnormal_train = preprocess_ds(abnormal_train, training=True)
|
| 92 |
+
abnormal_val = preprocess_ds(abnormal_val, training=False)
|
| 93 |
+
|
| 94 |
+
print("Training Stage 2 (Abnormal)...")
|
| 95 |
+
abnormal_model = build_model(len(abnormal_classes))
|
| 96 |
+
abnormal_model.fit(abnormal_train, validation_data=abnormal_val, epochs=10)
|
| 97 |
+
|
| 98 |
+
normal_train = image_dataset_from_directory(
|
| 99 |
+
"MelSpectrograms/01 - Normal",
|
| 100 |
+
labels="inferred",
|
| 101 |
+
label_mode="int",
|
| 102 |
+
validation_split=0.2,
|
| 103 |
+
subset="training",
|
| 104 |
+
seed=42,
|
| 105 |
+
image_size=img_size,
|
| 106 |
+
batch_size=batch_size
|
| 107 |
+
)
|
| 108 |
+
normal_val = image_dataset_from_directory(
|
| 109 |
+
"MelSpectrograms/01 - Normal",
|
| 110 |
+
labels="inferred",
|
| 111 |
+
label_mode="int",
|
| 112 |
+
validation_split=0.2,
|
| 113 |
+
subset="validation",
|
| 114 |
+
seed=42,
|
| 115 |
+
image_size=img_size,
|
| 116 |
+
batch_size=batch_size
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
normal_classes = normal_train.class_names
|
| 120 |
+
print("Stage 2 Normal Classes:", normal_classes)
|
| 121 |
+
|
| 122 |
+
normal_train = preprocess_ds(normal_train, training=True)
|
| 123 |
+
normal_val = preprocess_ds(normal_val, training=False)
|
| 124 |
+
|
| 125 |
+
print("Training Stage 2 (Normal)...")
|
| 126 |
+
normal_model = build_model(len(normal_classes))
|
| 127 |
+
normal_model.fit(normal_train, validation_data=normal_val, epochs=10)
|
| 128 |
+
|
| 129 |
+
class HierarchicalClassifier:
|
| 130 |
+
def __init__(self, stage1_model, abnormal_model, normal_model,
|
| 131 |
+
stage1_classes, abnormal_classes, normal_classes, img_size=(224, 224)):
|
| 132 |
+
self.img_size = img_size
|
| 133 |
+
self.stage1_model = stage1_model
|
| 134 |
+
self.abnormal_model = abnormal_model
|
| 135 |
+
self.normal_model = normal_model
|
| 136 |
+
self.stage1_classes = stage1_classes
|
| 137 |
+
self.abnormal_classes = abnormal_classes
|
| 138 |
+
self.normal_classes = normal_classes
|
| 139 |
+
|
| 140 |
+
def preprocess(self, image_path):
|
| 141 |
+
img = tf.keras.utils.load_img(image_path, target_size=self.img_size)
|
| 142 |
+
img_array = tf.keras.utils.img_to_array(img) / 255.0
|
| 143 |
+
img_array = tf.expand_dims(img_array, 0)
|
| 144 |
+
return img_array
|
| 145 |
+
|
| 146 |
+
def predict(self, image_path):
|
| 147 |
+
img_array = self.preprocess(image_path)
|
| 148 |
+
stage1_pred = self.stage1_model.predict(img_array)
|
| 149 |
+
stage1_idx = np.argmax(stage1_pred)
|
| 150 |
+
main_class = self.stage1_classes[stage1_idx]
|
| 151 |
+
if main_class == "00 - Abnormal":
|
| 152 |
+
sub_pred = self.abnormal_model.predict(img_array)
|
| 153 |
+
sub_idx = np.argmax(sub_pred)
|
| 154 |
+
sub_class = self.abnormal_classes[sub_idx]
|
| 155 |
+
else:
|
| 156 |
+
sub_pred = self.normal_model.predict(img_array)
|
| 157 |
+
sub_idx = np.argmax(sub_pred)
|
| 158 |
+
sub_class = self.normal_classes[sub_idx]
|
| 159 |
+
return {
|
| 160 |
+
"stage1_class": main_class,
|
| 161 |
+
"stage1_confidence": float(np.max(stage1_pred)),
|
| 162 |
+
"stage2_class": sub_class,
|
| 163 |
+
"stage2_confidence": float(np.max(sub_pred)),
|
| 164 |
+
"final_prediction": f"{main_class} → {sub_class}"
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
# classifier = HierarchicalClassifier(stage1_model, abnormal_model, normal_model,
|
| 168 |
+
# stage1_classes, abnormal_classes, normal_classes)
|
| 169 |
+
|
| 170 |
+
# result = classifier.predict("MelSpectrograms/00 - Abnormal/00-2 - Dehydration mode noise/01.png")
|
| 171 |
+
# print(result["final_prediction"])
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
# Save models after training
|
| 175 |
+
os.makedirs("saved_models", exist_ok=True)
|
| 176 |
+
stage1_model.save("saved_models/stage1_model.h5")
|
| 177 |
+
abnormal_model.save("saved_models/abnormal_model.h5")
|
| 178 |
+
normal_model.save("saved_models/normal_model.h5")
|
| 179 |
+
|
| 180 |
+
print("✅ Models saved in 'saved_models/' folder")
|
extractaudio.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import librosa
|
| 3 |
+
import librosa.display
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
def save_mel_spectrogram(file_path, save_dir, sr=22050, n_mels=128, hop_length=512, n_fft=2048):
|
| 8 |
+
y, sr = librosa.load(file_path, sr=sr, mono=True)
|
| 9 |
+
|
| 10 |
+
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
|
| 11 |
+
S_db = librosa.power_to_db(S, ref=np.max)
|
| 12 |
+
|
| 13 |
+
os.makedirs(save_dir, exist_ok=True)
|
| 14 |
+
save_path = os.path.join(save_dir, os.path.basename(file_path).replace(".wav", ".png"))
|
| 15 |
+
|
| 16 |
+
plt.figure(figsize=(4, 4))
|
| 17 |
+
librosa.display.specshow(S_db, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel', cmap='magma')
|
| 18 |
+
plt.axis('off')
|
| 19 |
+
plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
|
| 20 |
+
plt.close()
|
| 21 |
+
|
| 22 |
+
return save_path
|
| 23 |
+
|
| 24 |
+
def process_dataset(wav_root, output_root, sr=22050, n_mels=128):
|
| 25 |
+
for root, dirs, files in os.walk(wav_root):
|
| 26 |
+
for file in files:
|
| 27 |
+
if file.endswith(".wav"):
|
| 28 |
+
file_path = os.path.join(root, file)
|
| 29 |
+
rel_path = os.path.relpath(root, wav_root)
|
| 30 |
+
save_dir = os.path.join(output_root, rel_path)
|
| 31 |
+
save_mel_spectrogram(file_path, save_dir, sr=sr, n_mels=n_mels)
|
| 32 |
+
|
| 33 |
+
wav_root = "Washing machine"
|
| 34 |
+
output_root = "MelSpectrograms"
|
| 35 |
+
process_dataset(wav_root, output_root)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
main.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import librosa
|
| 3 |
+
import librosa.display
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
import numpy as np
|
| 6 |
+
import tensorflow as tf
|
| 7 |
+
from tensorflow.keras import models
|
| 8 |
+
|
| 9 |
+
# ========== Utility: Save mel spectrogram ==========
|
| 10 |
+
def save_mel_spectrogram(file_path, save_dir="temp_specs", sr=22050, n_mels=128, hop_length=512, n_fft=2048):
|
| 11 |
+
y, sr = librosa.load(file_path, sr=sr, mono=True)
|
| 12 |
+
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
|
| 13 |
+
S_db = librosa.power_to_db(S, ref=np.max)
|
| 14 |
+
|
| 15 |
+
os.makedirs(save_dir, exist_ok=True)
|
| 16 |
+
save_path = os.path.join(save_dir, os.path.basename(file_path).replace(".wav", ".png"))
|
| 17 |
+
|
| 18 |
+
plt.figure(figsize=(4, 4))
|
| 19 |
+
librosa.display.specshow(S_db, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel', cmap='magma')
|
| 20 |
+
plt.axis("off")
|
| 21 |
+
plt.savefig(save_path, bbox_inches="tight", pad_inches=0)
|
| 22 |
+
plt.close()
|
| 23 |
+
|
| 24 |
+
return save_path
|
| 25 |
+
|
| 26 |
+
# ========== Hierarchical Classifier ==========
|
| 27 |
+
class HierarchicalClassifier:
|
| 28 |
+
def __init__(self, stage1_model, abnormal_model, normal_model,
|
| 29 |
+
stage1_classes, abnormal_classes, normal_classes, img_size=(224, 224)):
|
| 30 |
+
self.img_size = img_size
|
| 31 |
+
self.stage1_model = stage1_model
|
| 32 |
+
self.abnormal_model = abnormal_model
|
| 33 |
+
self.normal_model = normal_model
|
| 34 |
+
self.stage1_classes = stage1_classes
|
| 35 |
+
self.abnormal_classes = abnormal_classes
|
| 36 |
+
self.normal_classes = normal_classes
|
| 37 |
+
|
| 38 |
+
def preprocess(self, image_path):
|
| 39 |
+
img = tf.keras.utils.load_img(image_path, target_size=self.img_size)
|
| 40 |
+
img_array = tf.keras.utils.img_to_array(img) / 255.0
|
| 41 |
+
img_array = tf.expand_dims(img_array, 0)
|
| 42 |
+
return img_array
|
| 43 |
+
|
| 44 |
+
def predict(self, image_path):
|
| 45 |
+
img_array = self.preprocess(image_path)
|
| 46 |
+
stage1_pred = self.stage1_model.predict(img_array, verbose=0)
|
| 47 |
+
stage1_idx = np.argmax(stage1_pred)
|
| 48 |
+
main_class = self.stage1_classes[stage1_idx]
|
| 49 |
+
|
| 50 |
+
if main_class == "00 - Abnormal":
|
| 51 |
+
sub_pred = self.abnormal_model.predict(img_array, verbose=0)
|
| 52 |
+
sub_idx = np.argmax(sub_pred)
|
| 53 |
+
sub_class = self.abnormal_classes[sub_idx]
|
| 54 |
+
else:
|
| 55 |
+
sub_pred = self.normal_model.predict(img_array, verbose=0)
|
| 56 |
+
sub_idx = np.argmax(sub_pred)
|
| 57 |
+
sub_class = self.normal_classes[sub_idx]
|
| 58 |
+
|
| 59 |
+
return {
|
| 60 |
+
"stage1_class": main_class,
|
| 61 |
+
"stage1_confidence": float(np.max(stage1_pred)),
|
| 62 |
+
"stage2_class": sub_class,
|
| 63 |
+
"stage2_confidence": float(np.max(sub_pred)),
|
| 64 |
+
"final_prediction": f"{main_class} → {sub_class}"
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
# ========== Load Models ==========
|
| 68 |
+
stage1_model = models.load_model("saved_models/stage1_model.h5")
|
| 69 |
+
abnormal_model = models.load_model("saved_models/abnormal_model.h5")
|
| 70 |
+
normal_model = models.load_model("saved_models/normal_model.h5")
|
| 71 |
+
|
| 72 |
+
# Define class lists (same order as training!)
|
| 73 |
+
stage1_classes = ["00 - Abnormal", "01 - Normal"]
|
| 74 |
+
abnormal_classes = os.listdir("MelSpectrograms/00 - Abnormal")
|
| 75 |
+
normal_classes = os.listdir("MelSpectrograms/01 - Normal")
|
| 76 |
+
|
| 77 |
+
classifier = HierarchicalClassifier(stage1_model, abnormal_model, normal_model,
|
| 78 |
+
stage1_classes, abnormal_classes, normal_classes)
|
| 79 |
+
|
| 80 |
+
# ========== Example Inference ==========
|
| 81 |
+
audio_file = "C:/Users/dell/3D Objects/Samsung Prism/Brain\Audio/audio-washing-machine/Washing machine/00 - Abnormal/00-2 - Dehydration mode noise/04.wav" # 🔹 Replace with your audio file
|
| 82 |
+
spec_path = save_mel_spectrogram(audio_file)
|
| 83 |
+
|
| 84 |
+
result = classifier.predict(spec_path)
|
| 85 |
+
print("🎯 Final Prediction:", result["final_prediction"])
|
| 86 |
+
print("Stage 1:", result["stage1_class"], "| Confidence:", result["stage1_confidence"])
|
| 87 |
+
print("Stage 2:", result["stage2_class"], "| Confidence:", result["stage2_confidence"])
|
| 88 |
+
|
| 89 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tensorflow
|
| 2 |
+
librosa
|
| 3 |
+
matplotlib
|
| 4 |
+
numpy
|
| 5 |
+
fastapi
|
| 6 |
+
uvicorn[standard]
|
| 7 |
+
python-multipart
|
| 8 |
+
gradio
|
saved_models/abnormal_model.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4f715c19462dfdbc5a6d9942d9512eca75a0825bfecc7931329cd8fe71414ef
|
| 3 |
+
size 134079792
|
saved_models/normal_model.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27545d01fffc5d97dacce571d62889a62fe72a4dcc3ff999f7c376fee160152b
|
| 3 |
+
size 134079792
|
saved_models/stage1_model.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2eaff752b6defb58b63ec2fceeb267c343ad41e7b9e114de3f4b749ca5c7be1f
|
| 3 |
+
size 134077984
|