Spaces:

h3rsh
/

resp

Sleeping

App Files Files Community

h3rsh commited on Jul 27, 2025

Commit

e912d0a

verified ·

1 Parent(s): 4a70560

Create app.py

Browse files

Files changed (1) hide show

app.py +282 -0

app.py ADDED Viewed

	@@ -0,0 +1,282 @@

+import os
+import numpy as np
+import librosa
+import pickle
+import tensorflow as tf
+import gradio as gr
+from scipy import signal
+import warnings
+import tempfile
+warnings.filterwarnings("ignore", message="Trying to estimate tuning from empty frequency set.")
+# Common parameters (must match training parameters)
+target_sr = 22050
+target_duration = 4
+n_fft = 512
+hop_length = 512
+class RespiratoryPredictor:
+    def __init__(self, model_path='respiratory_model.keras', scalers_path='scalers.pkl',
+                 norm_params_path='norm_params.pkl', class_names_path='class_names.pkl'):
+        """Initialize the predictor with trained model and scalers."""
+        self.target_sr = target_sr
+        self.target_duration = target_duration
+        self.n_fft = n_fft
+        self.hop_length = hop_length
+        # Load model
+        try:
+            self.model = tf.keras.models.load_model(model_path)
+            print(f"✓ Model loaded from {model_path}")
+        except Exception as e:
+            print(f"✗ Error loading model: {e}")
+            raise
+        # Load scalers
+        try:
+            with open(scalers_path, 'rb') as f:
+                self.scalers = pickle.load(f)
+            print(f"✓ Scalers loaded from {scalers_path}")
+        except Exception as e:
+            print(f"✗ Error loading scalers: {e}")
+            raise
+        # Load normalization parameters
+        try:
+            with open(norm_params_path, 'rb') as f:
+                self.norm_params = pickle.load(f)
+            print(f"✓ Normalization parameters loaded from {norm_params_path}")
+        except Exception as e:
+            print(f"✗ Error loading normalization parameters: {e}")
+            raise
+        # Load class names
+        try:
+            with open(class_names_path, 'rb') as f:
+                self.class_names = pickle.load(f)
+            print(f"✓ Class names loaded from {class_names_path}")
+        except Exception as e:
+            print(f"✗ Error loading class names: {e}")
+            raise
+    def denoise_audio(self, audio, sr, methods=['adaptive_median', 'bandpass']):
+        """Denoise audio signal"""
+        denoised_audio = audio.copy()
+        for method in methods:
+            if method == 'adaptive_median':
+                window_size = int(sr * 0.01)  # 10 ms window
+                if window_size % 2 == 0:
+                    window_size += 1
+                denoised_audio = signal.medfilt(denoised_audio, kernel_size=window_size)
+            elif method == 'bandpass':
+                low_freq = 50
+                high_freq = 2000
+                nyquist = sr / 2
+                low = low_freq / nyquist
+                high = high_freq / nyquist
+                b, a = signal.butter(4, [low, high], btype='band')
+                denoised_audio = signal.filtfilt(b, a, denoised_audio)
+        return denoised_audio
+    def extract_features(self, audio_data, sr):
+        """Extract features from audio in the same format as during training"""
+        # Mel spectrogram
+        mel_spec = librosa.feature.melspectrogram(
+            y=audio_data, sr=sr, n_mels=128, n_fft=self.n_fft, hop_length=self.hop_length)
+        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
+        # MFCC
+        mfcc = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=20, hop_length=self.hop_length)
+        # Chroma
+        chroma = librosa.feature.chroma_stft(y=audio_data, sr=sr, hop_length=self.hop_length)
+        features = {
+            'mel_spec': mel_spec_db,
+            'mfcc': mfcc,
+            'chroma': chroma
+        }
+        return features
+    def pad_or_crop(self, arr, shape):
+        """Pad or crop array to target shape"""
+        out = np.zeros(shape, dtype=arr.dtype)
+        n_feat, n_fr = arr.shape
+        out[:min(n_feat, shape[0]), :min(n_fr, shape[1])] = arr[:shape[0], :shape[1]]
+        return out
+    def prepare_input_data(self, features, n_frames=259):
+        """Prepare input data for the multi-input model"""
+        mfcc = self.pad_or_crop(features['mfcc'], (20, n_frames))
+        chroma = self.pad_or_crop(features['chroma'], (12, n_frames))
+        mspec = self.pad_or_crop(features['mel_spec'], (128, n_frames))
+        # Add channel dimension
+        X_mfcc = mfcc[..., np.newaxis]
+        X_chroma = chroma[..., np.newaxis]
+        X_mspec = mspec[..., np.newaxis]
+        return X_mfcc, X_chroma, X_mspec
+    def normalize_features(self, X_mfcc, X_chroma, X_mspec):
+        """Normalize features using the same parameters as training"""
+        def norm(X, mean, std):
+            Xf = X.reshape(X.shape[0], -1)
+            Xn = (Xf - mean) / (std + 1e-8)
+            return Xn.reshape(X.shape)
+        X_mfcc_norm = norm(X_mfcc, self.norm_params['mfcc_mean'], self.norm_params['mfcc_std'])
+        X_chroma_norm = norm(X_chroma, self.norm_params['chroma_mean'], self.norm_params['chroma_std'])
+        X_mspec_norm = norm(X_mspec, self.norm_params['mspec_mean'], self.norm_params['mspec_std'])
+        return X_mfcc_norm, X_chroma_norm, X_mspec_norm
+    def predict_audio(self, audio_file_path):
+        """
+        Predict the class of an audio file for Gradio interface.
+        Args:
+            audio_file_path: Path to the uploaded audio file
+        Returns:
+            tuple: (prediction_text, confidence_text, probabilities_dict)
+        """
+        try:
+            # Load and process audio
+            audio, sr = librosa.load(audio_file_path, sr=self.target_sr, duration=self.target_duration)
+            # Ensure audio is the right length
+            target_samples = self.target_sr * self.target_duration
+            if len(audio) < target_samples:
+                audio = np.pad(audio, (0, target_samples - len(audio)), mode='constant')
+            elif len(audio) > target_samples:
+                audio = audio[:target_samples]
+            # Denoise audio
+            denoised_audio = self.denoise_audio(audio, self.target_sr)
+            # Extract features
+            features = self.extract_features(denoised_audio, self.target_sr)
+            # Prepare input data
+            X_mfcc, X_chroma, X_mspec = self.prepare_input_data(features)
+            # Normalize features
+            X_mfcc_norm, X_chroma_norm, X_mspec_norm = self.normalize_features(X_mfcc, X_chroma, X_mspec)
+            # Add batch dimension
+            X_mfcc_batch = np.expand_dims(X_mfcc_norm, axis=0)
+            X_chroma_batch = np.expand_dims(X_chroma_norm, axis=0)
+            X_mspec_batch = np.expand_dims(X_mspec_norm, axis=0)
+            # Make prediction
+            prediction_prob = self.model.predict([X_mfcc_batch, X_chroma_batch, X_mspec_batch], verbose=0)
+            prediction = int(np.argmax(prediction_prob[0]))
+            confidence = float(np.max(prediction_prob[0]))
+            # Get class name
+            class_name = self.class_names[prediction] if prediction < len(self.class_names) else f"Class {prediction}"
+            # Format results for Gradio
+            prediction_text = f"🎯 **Prediction**: {class_name}"
+            confidence_text = f"📊 **Confidence**: {confidence:.2%}"
+            # Create probabilities dictionary for all classes
+            probabilities_dict = {}
+            for i, (class_name_item, prob) in enumerate(zip(self.class_names, prediction_prob[0])):
+                probabilities_dict[class_name_item] = float(prob)
+            return prediction_text, confidence_text, probabilities_dict
+        except Exception as e:
+            error_msg = f"❌ Error processing audio: {str(e)}"
+            return error_msg, "", {}
+# Initialize the predictor
+print("Loading model and components...")
+try:
+    predictor = RespiratoryPredictor()
+    print("✅ All components loaded successfully!")
+except Exception as e:
+    print(f"❌ Failed to initialize predictor: {e}")
+    raise
+def predict_respiratory_sound(audio_file):
+    """
+    Gradio interface function for respiratory sound prediction.
+    Args:
+        audio_file: Uploaded audio file from Gradio
+    Returns:
+        tuple: (prediction, confidence, probabilities)
+    """
+    if audio_file is None:
+        return "⚠️ Please upload an audio file", "", {}
+    return predictor.predict_audio(audio_file)
+# Create Gradio interface
+with gr.Blocks(title="Respiratory Sound Classifier", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🫁 Respiratory Sound Classification
+        Upload an audio file containing respiratory sounds to classify the type of breathing pattern.
+        **Supported formats**: WAV, MP3, M4A, FLAC
+        **Duration**: Audio will be processed as 4-second segments
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                label="📤 Upload Respiratory Sound",
+                type="filepath",
+                sources=["upload"]
+            )
+            predict_btn = gr.Button("🔍 Analyze Sound", variant="primary")
+        with gr.Column():
+            prediction_output = gr.Markdown(label="🎯 Prediction")
+            confidence_output = gr.Markdown(label="📊 Confidence")
+            probabilities_output = gr.Label(
+                label="📈 Class Probabilities",
+                num_top_classes=len(predictor.class_names)
+            )
+    # Event handlers
+    predict_btn.click(
+        fn=predict_respiratory_sound,
+        inputs=[audio_input],
+        outputs=[prediction_output, confidence_output, probabilities_output]
+    )
+    # Auto-predict when file is uploaded
+    audio_input.change(
+        fn=predict_respiratory_sound,
+        inputs=[audio_input],
+        outputs=[prediction_output, confidence_output, probabilities_output]
+    )
+    gr.Markdown(
+        """
+        ---
+        ### ℹ️ About
+        This model classifies respiratory sounds into different categories.
+        Upload clear audio recordings of breathing sounds for best results.
+        **Note**: This is for research/educational purposes only and should not be used for medical diagnosis.
+        """
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()