h3rsh
/

respiratory

Keras

Model card Files Files and versions

xet

Community

h3rsh commited on Jul 27, 2025

Commit

6a1466b

verified ·

1 Parent(s): 0a63270

Update inference.py

Browse files

Files changed (1) hide show

inference.py +145 -113

inference.py CHANGED Viewed

@@ -1,12 +1,16 @@
 import os
 import numpy as np
 import librosa
 import pickle
 import tensorflow as tf
-import gradio as gr
 from scipy import signal
 import warnings
 import tempfile
 warnings.filterwarnings("ignore", message="Trying to estimate tuning from empty frequency set.")
@@ -17,47 +21,64 @@ n_fft = 512
 hop_length = 512
 class RespiratoryPredictor:
-    def __init__(self, model_path='respiratory_model.keras', scalers_path='scalers.pkl',
-                 norm_params_path='norm_params.pkl', class_names_path='class_names.pkl'):
         """Initialize the predictor with trained model and scalers."""
         self.target_sr = target_sr
         self.target_duration = target_duration
         self.n_fft = n_fft
         self.hop_length = hop_length
-        # Load model
-        try:
-            self.model = tf.keras.models.load_model(model_path)
-            print(f"✓ Model loaded from {model_path}")
-        except Exception as e:
-            print(f"✗ Error loading model: {e}")
-            raise
         # Load scalers
         try:
-            with open(scalers_path, 'rb') as f:
                 self.scalers = pickle.load(f)
-            print(f"✓ Scalers loaded from {scalers_path}")
         except Exception as e:
-            print(f"✗ Error loading scalers: {e}")
             raise
         # Load normalization parameters
         try:
-            with open(norm_params_path, 'rb') as f:
                 self.norm_params = pickle.load(f)
-            print(f"✓ Normalization parameters loaded from {norm_params_path}")
         except Exception as e:
-            print(f"✗ Error loading normalization parameters: {e}")
             raise
         # Load class names
         try:
-            with open(class_names_path, 'rb') as f:
                 self.class_names = pickle.load(f)
-            print(f"✓ Class names loaded from {class_names_path}")
         except Exception as e:
-            print(f"✗ Error loading class names: {e}")
             raise
     def denoise_audio(self, audio, sr, methods=['adaptive_median', 'bandpass']):
@@ -134,19 +155,59 @@ class RespiratoryPredictor:
         return X_mfcc_norm, X_chroma_norm, X_mspec_norm
-    def predict_audio(self, audio_file_path):
-        """
-        Predict the class of an audio file for Gradio interface.
-        Args:
-            audio_file_path: Path to the uploaded audio file
-        Returns:
-            tuple: (prediction_text, confidence_text, probabilities_dict)
-        """
         try:
-            # Load and process audio
-            audio, sr = librosa.load(audio_file_path, sr=self.target_sr, duration=self.target_duration)
             # Ensure audio is the right length
             target_samples = self.target_sr * self.target_duration
@@ -180,103 +241,74 @@ class RespiratoryPredictor:
             # Get class name
             class_name = self.class_names[prediction] if prediction < len(self.class_names) else f"Class {prediction}"
-            # Format results for Gradio
-            prediction_text = f"**Prediction**: {class_name}"
-            confidence_text = f"**Confidence**: {confidence:.2%}"
-            # Create probabilities dictionary for all classes
-            probabilities_dict = {}
-            for i, (class_name_item, prob) in enumerate(zip(self.class_names, prediction_prob[0])):
-                probabilities_dict[class_name_item] = float(prob)
-            return prediction_text, confidence_text, probabilities_dict
         except Exception as e:
-            error_msg = f"Error processing audio: {str(e)}"
-            return error_msg, "", {}
-# Initialize the predictor
-print("Loading model and components...")
-try:
-    predictor = RespiratoryPredictor()
-    print("All components loaded successfully!")
-except Exception as e:
-    print(f"Failed to initialize predictor: {e}")
-    raise
-def predict_respiratory_sound(audio_file):
     """
-    Gradio interface function for respiratory sound prediction.
     Args:
-        audio_file: Uploaded audio file from Gradio
     Returns:
-        tuple: (prediction, confidence, probabilities)
     """
-    if audio_file is None:
-        return "Please upload an audio file", "", {}
-    return predictor.predict_audio(audio_file)
-# Create Gradio interface
-with gr.Blocks(title="Respiratory Sound Classifier", theme=gr.themes.Soft()) as demo:
-    gr.Markdown(
-        """
-        #  Respiratory Sound Classification
-        Upload an audio file containing respiratory sounds to classify the type of breathing pattern.
-        **Supported formats**: WAV, MP3, M4A, FLAC
-        **Duration**: Audio will be processed as 4-second segments
-        """
-    )
-    with gr.Row():
-        with gr.Column():
-            audio_input = gr.Audio(
-                label="Upload Respiratory Sound",
-                type="filepath",
-                sources=["upload"]
-            )
-            predict_btn = gr.Button("🔍 Analyze Sound", variant="primary")
-        with gr.Column():
-            prediction_output = gr.Markdown(label="Prediction")
-            confidence_output = gr.Markdown(label="Confidence")
-            probabilities_output = gr.Label(
-                label="Class Probabilities",
-                num_top_classes=len(predictor.class_names)
-            )
-    # Event handlers
-    predict_btn.click(
-        fn=predict_respiratory_sound,
-        inputs=[audio_input],
-        outputs=[prediction_output, confidence_output, probabilities_output]
-    )
-    # Auto-predict when file is uploaded
-    audio_input.change(
-        fn=predict_respiratory_sound,
-        inputs=[audio_input],
-        outputs=[prediction_output, confidence_output, probabilities_output]
-    )
-    gr.Markdown(
-        """
-        ---
-        ### About
-        This model classifies respiratory sounds into different categories.
-        Upload clear audio recordings of breathing sounds for best results.
-        **Note**: This is for research/educational purposes only and should not be used for medical diagnosis.
-        """
-    )
-# Launch the app
 if __name__ == "__main__":
-    demo.launch()

 import os
+import json
 import numpy as np
 import librosa
 import pickle
 import tensorflow as tf
 from scipy import signal
 import warnings
 import tempfile
+import base64
+from typing import Dict, List, Any, Union
+from io import BytesIO
+import soundfile as sf
 warnings.filterwarnings("ignore", message="Trying to estimate tuning from empty frequency set.")
 hop_length = 512
 class RespiratoryPredictor:
+    def __init__(self):
         """Initialize the predictor with trained model and scalers."""
         self.target_sr = target_sr
         self.target_duration = target_duration
         self.n_fft = n_fft
         self.hop_length = hop_length
+        # Load model with multiple fallback methods
+        model_loaded = False
+        model_path = 'respiratory_model.keras'
+        # Method 1: Try .keras format
+        if os.path.exists(model_path) and not model_loaded:
+            try:
+                self.model = tf.keras.models.load_model(model_path, compile=False)
+                print(f"Model loaded from .keras format: {model_path}")
+                model_loaded = True
+            except Exception as e:
+                print(f"Failed to load .keras format: {e}")
+        # Method 2: Try TensorFlow SavedModel format
+        tf_model_path = model_path.replace('.keras', '_tf')
+        if os.path.exists(tf_model_path) and not model_loaded:
+            try:
+                self.model = tf.keras.models.load_model(tf_model_path)
+                print(f"Model loaded from TF SavedModel format: {tf_model_path}")
+                model_loaded = True
+            except Exception as e:
+                print(f"Failed to load TF SavedModel format: {e}")
+        if not model_loaded:
+            raise RuntimeError("Failed to load model with any available method")
         # Load scalers
         try:
+            with open('scalers.pkl', 'rb') as f:
                 self.scalers = pickle.load(f)
+            print("Scalers loaded successfully")
         except Exception as e:
+            print(f"Error loading scalers: {e}")
             raise
         # Load normalization parameters
         try:
+            with open('norm_params.pkl', 'rb') as f:
                 self.norm_params = pickle.load(f)
+            print("Normalization parameters loaded successfully")
         except Exception as e:
+            print(f"Error loading normalization parameters: {e}")
             raise
         # Load class names
         try:
+            with open('class_names.pkl', 'rb') as f:
                 self.class_names = pickle.load(f)
+            print(f"Class names loaded: {self.class_names}")
         except Exception as e:
+            print(f"Error loading class names: {e}")
             raise
     def denoise_audio(self, audio, sr, methods=['adaptive_median', 'bandpass']):
         return X_mfcc_norm, X_chroma_norm, X_mspec_norm
+    def process_audio_from_bytes(self, audio_bytes: bytes) -> np.ndarray:
+        """Process audio from raw bytes data."""
+        try:
+            # Create a temporary file to write the audio bytes
+            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
+                temp_file.write(audio_bytes)
+                temp_file_path = temp_file.name
+            # Load audio using librosa
+            audio, sr = librosa.load(temp_file_path, sr=self.target_sr, duration=self.target_duration)
+            # Clean up temporary file
+            os.unlink(temp_file_path)
+            return audio
+        except Exception as e:
+            # Fallback: try to read directly with soundfile
+            try:
+                audio_io = BytesIO(audio_bytes)
+                audio, sr = sf.read(audio_io)
+                # Resample if necessary
+                if sr != self.target_sr:
+                    audio = librosa.resample(audio, orig_sr=sr, target_sr=self.target_sr)
+                # Ensure mono
+                if len(audio.shape) > 1:
+                    audio = np.mean(audio, axis=1)
+                # Crop to target duration
+                target_samples = int(self.target_sr * self.target_duration)
+                if len(audio) > target_samples:
+                    audio = audio[:target_samples]
+                return audio
+            except Exception as e2:
+                raise Exception(f"Failed to process audio: {str(e)}, {str(e2)}")
+    def predict(self, audio_input: Union[str, bytes, np.ndarray]) -> Dict[str, Any]:
+        """Make prediction on audio input."""
         try:
+            # Handle different input types
+            if isinstance(audio_input, str):
+                # Assume it's base64 encoded
+                audio_bytes = base64.b64decode(audio_input)
+                audio = self.process_audio_from_bytes(audio_bytes)
+            elif isinstance(audio_input, bytes):
+                audio = self.process_audio_from_bytes(audio_input)
+            elif isinstance(audio_input, np.ndarray):
+                audio = audio_input
+            else:
+                raise ValueError(f"Unsupported audio input type: {type(audio_input)}")
             # Ensure audio is the right length
             target_samples = self.target_sr * self.target_duration
             # Get class name
             class_name = self.class_names[prediction] if prediction < len(self.class_names) else f"Class {prediction}"
+            # Create probabilities dictionary
+            probabilities = {}
+            for i, (cls_name, prob) in enumerate(zip(self.class_names, prediction_prob[0])):
+                probabilities[cls_name] = float(prob)
+            return {
+                "label": class_name,
+                "score": confidence,
+                "probabilities": probabilities
+            }
         except Exception as e:
+            return {
+                "error": str(e),
+                "label": None,
+                "score": 0.0
+            }
+# Global predictor instance
+_predictor = None
+def pipeline(inputs: Union[str, bytes, Dict[str, Any]]) -> List[Dict[str, Any]]:
     """
+    Hugging Face pipeline function for respiratory sound classification.
     Args:
+        inputs: Can be:
+            - Base64 encoded audio string
+            - Raw audio bytes
+            - Dictionary with 'inputs' key containing audio data
     Returns:
+        List of prediction dictionaries
     """
+    global _predictor
+    # Initialize predictor if not already done
+    if _predictor is None:
+        print("Initializing respiratory sound predictor...")
+        _predictor = RespiratoryPredictor()
+        print("Predictor initialized successfully!")
+    try:
+        # Handle different input formats
+        if isinstance(inputs, dict):
+            # Extract audio from inputs dict
+            audio_data = inputs.get('inputs', inputs.get('audio', ''))
+        else:
+            audio_data = inputs
+        if not audio_data:
+            return [{"error": "No audio data provided", "label": None, "score": 0.0}]
+        # Make prediction
+        result = _predictor.predict(audio_data)
+        # Return as list (Hugging Face expects list format)
+        return [result]
+    except Exception as e:
+        return [{"error": str(e), "label": None, "score": 0.0}]
+# For testing locally
 if __name__ == "__main__":
+    # Test the pipeline function
+    print("Testing pipeline function...")
+    # This would normally be called by Hugging Face infrastructure
+    # For testing, you would need actual audio data
+    test_result = pipeline("")
+    print(f"Pipeline ready! Test result: {test_result}")