Spaces:

Anvit25
/

new_audio

Sleeping

App Files Files Community

Anvit25 commited on Sep 27, 2025

Commit

32b6dba

1 Parent(s): f58f540

Add clean gradio app

Browse files

Files changed (11) hide show

.gitattributes +1 -0
.gitignore +0 -0
README.md +4 -4
app_gradio.py +191 -0
dl.py +180 -0
extractaudio.py +38 -0
main.py +89 -0
requirements.txt +8 -0
saved_models/abnormal_model.h5 +3 -0
saved_models/normal_model.h5 +3 -0
saved_models/stage1_model.h5 +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mat filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

File without changes

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: New Audio
-emoji: 🐠
-colorFrom: blue
-colorTo: purple
 sdk: gradio
 sdk_version: 5.47.2
 app_file: app.py

 ---
+title: Audio
+emoji: 🔥
+colorFrom: red
+colorTo: indigo
 sdk: gradio
 sdk_version: 5.47.2
 app_file: app.py

app_gradio.py ADDED Viewed

	@@ -0,0 +1,191 @@

+import os
+import shutil
+import gradio as gr
+import numpy as np
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+import tensorflow as tf
+from tensorflow.keras import models
+# --- 1. Configuration & Global Variables ---
+# Create a temporary directory for spectrograms if it doesn't exist
+TEMP_DIR = "temp_gradio_specs"
+os.makedirs(TEMP_DIR, exist_ok=True)
+# Define image size for the model
+IMG_SIZE = (224, 224)
+# --- 2. Load Models and Define Classes (Done once on startup) ---
+print("🚀 Loading machine learning models...")
+try:
+    stage1_model = models.load_model("saved_models/stage1_model.h5")
+    abnormal_model = models.load_model("saved_models/abnormal_model.h5")
+    normal_model = models.load_model("saved_models/normal_model.h5")
+    print("✅ Models loaded successfully.")
+except Exception as e:
+    print(f"❌ Error loading models: {e}")
+    # Exit if models can't be loaded
+    exit()
+# Define class lists exactly as they were during training
+stage1_classes = ["00 - Abnormal", "01 - Normal"]
+abnormal_classes = sorted(os.listdir("MelSpectrograms/00 - Abnormal"))
+normal_classes = sorted(os.listdir("MelSpectrograms/01 - Normal"))
+print(f"Stage 1 Classes: {stage1_classes}")
+print(f"Abnormal Sub-classes: {abnormal_classes}")
+print(f"Normal Sub-classes: {normal_classes}")
+# --- 3. Helper Functions and Classes ---
+def save_mel_spectrogram(file_path, save_dir, sr=22050, n_mels=128, hop_length=512, n_fft=2048):
+    """Generates and saves a Mel Spectrogram from an audio file."""
+    try:
+        y, sr = librosa.load(file_path, sr=sr, mono=True)
+        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
+        S_db = librosa.power_to_db(S, ref=np.max)
+        filename = os.path.basename(file_path).replace(".wav", ".png")
+        save_path = os.path.join(save_dir, filename)
+        plt.figure(figsize=(4, 4))
+        librosa.display.specshow(S_db, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel', cmap='magma')
+        plt.axis("off")
+        plt.savefig(save_path, bbox_inches="tight", pad_inches=0)
+        plt.close()
+        return save_path
+    except Exception as e:
+        print(f"Error creating spectrogram: {e}")
+        return None
+class HierarchicalClassifier:
+    """A wrapper class for the two-stage prediction logic."""
+    def __init__(self, stage1_model, abnormal_model, normal_model,
+                 stage1_classes, abnormal_classes, normal_classes):
+        self.img_size = IMG_SIZE
+        self.stage1_model = stage1_model
+        self.abnormal_model = abnormal_model
+        self.normal_model = normal_model
+        self.stage1_classes = stage1_classes
+        self.abnormal_classes = abnormal_classes
+        self.normal_classes = normal_classes
+    def _preprocess_image(self, image_path):
+        img = tf.keras.utils.load_img(image_path, target_size=self.img_size)
+        img_array = tf.keras.utils.img_to_array(img) / 255.0
+        img_array = tf.expand_dims(img_array, 0)
+        return img_array
+    def predict(self, image_path):
+        img_array = self._preprocess_image(image_path)
+        stage1_pred = self.stage1_model.predict(img_array, verbose=0)
+        stage1_idx = np.argmax(stage1_pred)
+        main_class = self.stage1_classes[stage1_idx]
+        if main_class == "00 - Abnormal":
+            sub_pred = self.abnormal_model.predict(img_array, verbose=0)
+            sub_idx = np.argmax(sub_pred)
+            sub_class = self.abnormal_classes[sub_idx]
+        else:
+            sub_pred = self.normal_model.predict(img_array, verbose=0)
+            sub_idx = np.argmax(sub_pred)
+            sub_class = self.normal_classes[sub_idx]
+        return {
+            "stage1_class": main_class,
+            "stage1_confidence": float(np.max(stage1_pred)),
+            "stage2_class": sub_class,
+            "stage2_confidence": float(np.max(sub_pred)),
+            "final_prediction": f"{main_class.split(' - ')[1]} → {sub_class.split(' - ')[1]}"
+        }
+# Instantiate the classifier with loaded models and classes
+classifier = HierarchicalClassifier(
+    stage1_model, abnormal_model, normal_model,
+    stage1_classes, abnormal_classes, normal_classes
+)
+# --- 4. The Main Prediction Function for Gradio ---
+def predict_washing_machine_sound(audio_filepath):
+    """
+    This is the core function that Gradio will call.
+    It takes an audio file path, processes it, and returns the formatted result.
+    """
+    if audio_filepath is None:
+        return "Please upload an audio file first.", None
+    print(f"Processing file: {audio_filepath}")
+    # The spectrogram path needs to be cleaned up after prediction
+    spec_path = None
+    try:
+        # Generate a spectrogram from the input audio file
+        spec_path = save_mel_spectrogram(audio_filepath, TEMP_DIR)
+        if not spec_path:
+            return "Error: Could not generate spectrogram from the audio file.", None
+        # Get prediction from the classifier
+        result = classifier.predict(spec_path)
+        # Format the output for better readability
+        output_text = (
+            f"🎯 Final Prediction: {result['final_prediction']}\n\n"
+            f"Confidence Scores:\n"
+            f"--------------------\n"
+            f"Stage 1 ({result['stage1_class']}): {result['stage1_confidence']:.4f}\n"
+            f"Stage 2 ({result['stage2_class']}): {result['stage2_confidence']:.4f}"
+        )
+        # Return the formatted text and the path to the spectrogram image to display it
+        return output_text, spec_path
+    except Exception as e:
+        print(f"An error occurred during prediction: {e}")
+        return f"An error occurred: {str(e)}", None
+    finally:
+        # Clean up the generated spectrogram image file after it's been used
+        # Gradio handles the temp audio file, but we must handle the temp spectrogram
+        if spec_path and os.path.exists(spec_path):
+            # Note: Gradio might need the file to display it, so cleaning up here
+            # might be too early if the image component relies on the path.
+            # For simplicity, we can let them accumulate in the temp folder or
+            # implement more complex cleanup later. Let's comment out the immediate delete.
+            # os.remove(spec_path)
+            pass
+# --- 5. Build and Launch the Gradio Interface ---
+if __name__ == "__main__":
+    # Define some example audio files from your dataset
+    example_files = [
+        "Washing machine/00 - Abnormal/00-2 - Dehydration mode noise/04.wav",
+        "Washing machine/01 - Normal/01-1 - Washing mode/01.wav",
+        "Washing machine/00 - Abnormal/00-1 - Bearing noise/02.wav"
+    ]
+    demo = gr.Interface(
+        fn=predict_washing_machine_sound,
+        inputs=gr.Audio(type="filepath", label="Upload Washing Machine Audio (.wav)"),
+        outputs=[
+            gr.Textbox(label="Prediction Result"),
+            gr.Image(label="Generated Mel Spectrogram")
+        ],
+        title="Washing Machine Sound Classifier",
+        description="Upload a WAV audio file of a washing machine to classify its operation status. The model performs a two-stage classification: first identifying 'Normal' vs 'Abnormal' sound, then determining the specific sub-type.",
+        examples=example_files,
+        allow_flagging="never"
+    )
+    # Launch the web UI
+    demo.launch()
+    # Clean up the entire temp directory on exit
+    # This is a simple way to manage temp files
+    try:
+        print("\nCleaning up temporary files...")
+        shutil.rmtree(TEMP_DIR)
+        print("✅ Cleanup complete.")
+    except Exception as e:
+        print(f"Could not clean up temp files: {e}")

dl.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import tensorflow as tf
+from keras._tf_keras.keras.utils import image_dataset_from_directory
+from keras._tf_keras.keras import Sequential, layers, models
+import numpy as np
+import os
+img_size = (224, 224)
+batch_size = 32
+AUTOTUNE = tf.data.AUTOTUNE
+normalization_layer = layers.Rescaling(1./255)
+def preprocess_ds(ds, training=True):
+    ds = ds.map(lambda x, y: (normalization_layer(x), y))
+    if training:
+        ds = ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
+    else:
+        ds = ds.cache().prefetch(buffer_size=AUTOTUNE)
+    return ds
+def build_model(num_classes):
+    model = models.Sequential([
+        layers.Conv2D(32, (3, 3), activation="relu", input_shape=img_size + (3,)),
+        layers.MaxPooling2D((2, 2)),
+        layers.Conv2D(64, (3, 3), activation="relu"),
+        layers.MaxPooling2D((2, 2)),
+        layers.Conv2D(128, (3, 3), activation="relu"),
+        layers.MaxPooling2D((2, 2)),
+        layers.Flatten(),
+        layers.Dense(128, activation="relu"),
+        layers.Dropout(0.3),
+        layers.Dense(num_classes, activation="softmax")
+    ])
+    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
+    return model
+stage1_train = image_dataset_from_directory(
+    "MelSpectrograms",
+    labels="inferred",
+    label_mode="int",
+    validation_split=0.2,
+    subset="training",
+    seed=42,
+    image_size=img_size,
+    batch_size=batch_size
+)
+stage1_val = image_dataset_from_directory(
+    "MelSpectrograms",
+    labels="inferred",
+    label_mode="int",
+    validation_split=0.2,
+    subset="validation",
+    seed=42,
+    image_size=img_size,
+    batch_size=batch_size
+)
+stage1_classes = stage1_train.class_names
+print("Stage 1 Classes:", stage1_classes)
+stage1_train = preprocess_ds(stage1_train, training=True)
+stage1_val = preprocess_ds(stage1_val, training=False)
+print("Training Stage 1 (Normal vs Abnormal)...")
+stage1_model = build_model(len(stage1_classes))
+stage1_model.fit(stage1_train, validation_data=stage1_val, epochs=10)
+abnormal_train = image_dataset_from_directory(
+    "MelSpectrograms/00 - Abnormal",
+    labels="inferred",
+    label_mode="int",
+    validation_split=0.2,
+    subset="training",
+    seed=42,
+    image_size=img_size,
+    batch_size=batch_size
+)
+abnormal_val = image_dataset_from_directory(
+    "MelSpectrograms/00 - Abnormal",
+    labels="inferred",
+    label_mode="int",
+    validation_split=0.2,
+    subset="validation",
+    seed=42,
+    image_size=img_size,
+    batch_size=batch_size
+)
+abnormal_classes = abnormal_train.class_names
+print("Stage 2 Abnormal Classes:", abnormal_classes)
+abnormal_train = preprocess_ds(abnormal_train, training=True)
+abnormal_val = preprocess_ds(abnormal_val, training=False)
+print("Training Stage 2 (Abnormal)...")
+abnormal_model = build_model(len(abnormal_classes))
+abnormal_model.fit(abnormal_train, validation_data=abnormal_val, epochs=10)
+normal_train = image_dataset_from_directory(
+    "MelSpectrograms/01 - Normal",
+    labels="inferred",
+    label_mode="int",
+    validation_split=0.2,
+    subset="training",
+    seed=42,
+    image_size=img_size,
+    batch_size=batch_size
+)
+normal_val = image_dataset_from_directory(
+    "MelSpectrograms/01 - Normal",
+    labels="inferred",
+    label_mode="int",
+    validation_split=0.2,
+    subset="validation",
+    seed=42,
+    image_size=img_size,
+    batch_size=batch_size
+)
+normal_classes = normal_train.class_names
+print("Stage 2 Normal Classes:", normal_classes)
+normal_train = preprocess_ds(normal_train, training=True)
+normal_val = preprocess_ds(normal_val, training=False)
+print("Training Stage 2 (Normal)...")
+normal_model = build_model(len(normal_classes))
+normal_model.fit(normal_train, validation_data=normal_val, epochs=10)
+class HierarchicalClassifier:
+    def __init__(self, stage1_model, abnormal_model, normal_model,
+                 stage1_classes, abnormal_classes, normal_classes, img_size=(224, 224)):
+        self.img_size = img_size
+        self.stage1_model = stage1_model
+        self.abnormal_model = abnormal_model
+        self.normal_model = normal_model
+        self.stage1_classes = stage1_classes
+        self.abnormal_classes = abnormal_classes
+        self.normal_classes = normal_classes
+    def preprocess(self, image_path):
+        img = tf.keras.utils.load_img(image_path, target_size=self.img_size)
+        img_array = tf.keras.utils.img_to_array(img) / 255.0
+        img_array = tf.expand_dims(img_array, 0)
+        return img_array
+    def predict(self, image_path):
+        img_array = self.preprocess(image_path)
+        stage1_pred = self.stage1_model.predict(img_array)
+        stage1_idx = np.argmax(stage1_pred)
+        main_class = self.stage1_classes[stage1_idx]
+        if main_class == "00 - Abnormal":
+            sub_pred = self.abnormal_model.predict(img_array)
+            sub_idx = np.argmax(sub_pred)
+            sub_class = self.abnormal_classes[sub_idx]
+        else:
+            sub_pred = self.normal_model.predict(img_array)
+            sub_idx = np.argmax(sub_pred)
+            sub_class = self.normal_classes[sub_idx]
+        return {
+            "stage1_class": main_class,
+            "stage1_confidence": float(np.max(stage1_pred)),
+            "stage2_class": sub_class,
+            "stage2_confidence": float(np.max(sub_pred)),
+            "final_prediction": f"{main_class} → {sub_class}"
+        }
+# classifier = HierarchicalClassifier(stage1_model, abnormal_model, normal_model,
+#                                     stage1_classes, abnormal_classes, normal_classes)
+# result = classifier.predict("MelSpectrograms/00 - Abnormal/00-2 - Dehydration mode noise/01.png")
+# print(result["final_prediction"])
+# Save models after training
+os.makedirs("saved_models", exist_ok=True)
+stage1_model.save("saved_models/stage1_model.h5")
+abnormal_model.save("saved_models/abnormal_model.h5")
+normal_model.save("saved_models/normal_model.h5")
+print("✅ Models saved in 'saved_models/' folder")

extractaudio.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+import numpy as np
+def save_mel_spectrogram(file_path, save_dir, sr=22050, n_mels=128, hop_length=512, n_fft=2048):
+    y, sr = librosa.load(file_path, sr=sr, mono=True)
+    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
+    S_db = librosa.power_to_db(S, ref=np.max)
+    os.makedirs(save_dir, exist_ok=True)
+    save_path = os.path.join(save_dir, os.path.basename(file_path).replace(".wav", ".png"))
+    plt.figure(figsize=(4, 4))
+    librosa.display.specshow(S_db, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel', cmap='magma')
+    plt.axis('off')
+    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
+    plt.close()
+    return save_path
+def process_dataset(wav_root, output_root, sr=22050, n_mels=128):
+    for root, dirs, files in os.walk(wav_root):
+        for file in files:
+            if file.endswith(".wav"):
+                file_path = os.path.join(root, file)
+                rel_path = os.path.relpath(root, wav_root)
+                save_dir = os.path.join(output_root, rel_path)
+                save_mel_spectrogram(file_path, save_dir, sr=sr, n_mels=n_mels)
+wav_root = "Washing machine"
+output_root = "MelSpectrograms"
+process_dataset(wav_root, output_root)

main.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import os
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras import models
+# ========== Utility: Save mel spectrogram ==========
+def save_mel_spectrogram(file_path, save_dir="temp_specs", sr=22050, n_mels=128, hop_length=512, n_fft=2048):
+    y, sr = librosa.load(file_path, sr=sr, mono=True)
+    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
+    S_db = librosa.power_to_db(S, ref=np.max)
+    os.makedirs(save_dir, exist_ok=True)
+    save_path = os.path.join(save_dir, os.path.basename(file_path).replace(".wav", ".png"))
+    plt.figure(figsize=(4, 4))
+    librosa.display.specshow(S_db, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel', cmap='magma')
+    plt.axis("off")
+    plt.savefig(save_path, bbox_inches="tight", pad_inches=0)
+    plt.close()
+    return save_path
+# ========== Hierarchical Classifier ==========
+class HierarchicalClassifier:
+    def __init__(self, stage1_model, abnormal_model, normal_model,
+                 stage1_classes, abnormal_classes, normal_classes, img_size=(224, 224)):
+        self.img_size = img_size
+        self.stage1_model = stage1_model
+        self.abnormal_model = abnormal_model
+        self.normal_model = normal_model
+        self.stage1_classes = stage1_classes
+        self.abnormal_classes = abnormal_classes
+        self.normal_classes = normal_classes
+    def preprocess(self, image_path):
+        img = tf.keras.utils.load_img(image_path, target_size=self.img_size)
+        img_array = tf.keras.utils.img_to_array(img) / 255.0
+        img_array = tf.expand_dims(img_array, 0)
+        return img_array
+    def predict(self, image_path):
+        img_array = self.preprocess(image_path)
+        stage1_pred = self.stage1_model.predict(img_array, verbose=0)
+        stage1_idx = np.argmax(stage1_pred)
+        main_class = self.stage1_classes[stage1_idx]
+        if main_class == "00 - Abnormal":
+            sub_pred = self.abnormal_model.predict(img_array, verbose=0)
+            sub_idx = np.argmax(sub_pred)
+            sub_class = self.abnormal_classes[sub_idx]
+        else:
+            sub_pred = self.normal_model.predict(img_array, verbose=0)
+            sub_idx = np.argmax(sub_pred)
+            sub_class = self.normal_classes[sub_idx]
+        return {
+            "stage1_class": main_class,
+            "stage1_confidence": float(np.max(stage1_pred)),
+            "stage2_class": sub_class,
+            "stage2_confidence": float(np.max(sub_pred)),
+            "final_prediction": f"{main_class} → {sub_class}"
+        }
+# ========== Load Models ==========
+stage1_model = models.load_model("saved_models/stage1_model.h5")
+abnormal_model = models.load_model("saved_models/abnormal_model.h5")
+normal_model = models.load_model("saved_models/normal_model.h5")
+# Define class lists (same order as training!)
+stage1_classes = ["00 - Abnormal", "01 - Normal"]
+abnormal_classes = os.listdir("MelSpectrograms/00 - Abnormal")
+normal_classes = os.listdir("MelSpectrograms/01 - Normal")
+classifier = HierarchicalClassifier(stage1_model, abnormal_model, normal_model,
+                                    stage1_classes, abnormal_classes, normal_classes)
+# ========== Example Inference ==========
+audio_file = "C:/Users/dell/3D Objects/Samsung Prism/Brain\Audio/audio-washing-machine/Washing machine/00 - Abnormal/00-2 - Dehydration mode noise/04.wav"   # 🔹 Replace with your audio file
+spec_path = save_mel_spectrogram(audio_file)
+result = classifier.predict(spec_path)
+print("🎯 Final Prediction:", result["final_prediction"])
+print("Stage 1:", result["stage1_class"], "| Confidence:", result["stage1_confidence"])
+print("Stage 2:", result["stage2_class"], "| Confidence:", result["stage2_confidence"])

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+tensorflow
+librosa
+matplotlib
+numpy
+fastapi
+uvicorn[standard]
+python-multipart
+gradio

saved_models/abnormal_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4f715c19462dfdbc5a6d9942d9512eca75a0825bfecc7931329cd8fe71414ef
+size 134079792

saved_models/normal_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27545d01fffc5d97dacce571d62889a62fe72a4dcc3ff999f7c376fee160152b
+size 134079792

saved_models/stage1_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2eaff752b6defb58b63ec2fceeb267c343ad41e7b9e114de3f4b749ca5c7be1f
+size 134077984