import gradio as gr import tensorflow as tf import numpy as np import librosa from huggingface_hub import hf_hub_download # Load model with error handling try: # Download model file from Hugging Face with specific path model_path = hf_hub_download( repo_id="IsolaHGVIS/Cat-Meow-Classification", filename="best_model_fold_2.h5", repo_type="model" ) # Load the model model = tf.keras.models.load_model(model_path) print("Model loaded successfully!") except Exception as e: print(f"Error loading model: {str(e)}") model = None def process_audio(audio, sr=22050): """Process audio for model input""" try: # Ensure audio is mono if len(audio.shape) > 1: audio = audio.mean(axis=1) # Generate mel spectrogram mel_spec = librosa.feature.melspectrogram( y=audio, sr=sr, n_mels=128, fmax=8000 ) # Convert to log scale mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max) # Adjust dimensions if mel_spec_db.shape[1] > 174: mel_spec_db = mel_spec_db[:, :174] else: pad_width = ((0, 0), (0, 174 - mel_spec_db.shape[1])) mel_spec_db = np.pad(mel_spec_db, pad_width) return mel_spec_db.reshape(1, 128, 174, 1) except Exception as e: print(f"Error processing audio: {str(e)}") return None def predict_cat_sound(audio): """Analyze recorded cat sound""" if audio is None: return "Please record a cat sound" if model is None: return "Model not loaded properly. Please check the logs." try: # Process audio sr, audio_data = audio features = process_audio(audio_data, sr) if features is None: return "Error processing audio" # Make prediction prediction = model.predict(features, verbose=0) class_idx = np.argmax(prediction[0]) confidence = np.max(prediction[0]) # Map to class names classes = ['brushing', 'waiting for food', 'isolation'] return f""" 🐱 Cat Sound Analysis Results: Detected Context: {classes[class_idx]} Confidence Score: {confidence*100:.1f}% Recording length: {len(audio_data)/sr:.1f} seconds """ except Exception as e: return f"Error during analysis: {str(e)}" # Create Gradio interface interface = gr.Interface( fn=predict_cat_sound, inputs=gr.Audio( type="numpy", sources=["microphone"], label="Record Cat Sound" ), outputs=gr.Textbox( label="Analysis Results", placeholder="Results will appear here..." ), title="🐱 Cat Meow Sound Analyzer", description=""" Record your cat's meow to analyze its context. The model will classify the sound as: - Brushing - Waiting for food - Isolation """, theme="default" ) # Launch the interface interface.launch()