Cat-Meow / app.py
IsolaHGVIS's picture
Update app.py
d831c46 verified
import gradio as gr
import tensorflow as tf
import numpy as np
import librosa
from huggingface_hub import hf_hub_download
# Load model with error handling
try:
# Download model file from Hugging Face with specific path
model_path = hf_hub_download(
repo_id="IsolaHGVIS/Cat-Meow-Classification",
filename="best_model_fold_2.h5",
repo_type="model"
)
# Load the model
model = tf.keras.models.load_model(model_path)
print("Model loaded successfully!")
except Exception as e:
print(f"Error loading model: {str(e)}")
model = None
def process_audio(audio, sr=22050):
"""Process audio for model input"""
try:
# Ensure audio is mono
if len(audio.shape) > 1:
audio = audio.mean(axis=1)
# Generate mel spectrogram
mel_spec = librosa.feature.melspectrogram(
y=audio,
sr=sr,
n_mels=128,
fmax=8000
)
# Convert to log scale
mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
# Adjust dimensions
if mel_spec_db.shape[1] > 174:
mel_spec_db = mel_spec_db[:, :174]
else:
pad_width = ((0, 0), (0, 174 - mel_spec_db.shape[1]))
mel_spec_db = np.pad(mel_spec_db, pad_width)
return mel_spec_db.reshape(1, 128, 174, 1)
except Exception as e:
print(f"Error processing audio: {str(e)}")
return None
def predict_cat_sound(audio):
"""Analyze recorded cat sound"""
if audio is None:
return "Please record a cat sound"
if model is None:
return "Model not loaded properly. Please check the logs."
try:
# Process audio
sr, audio_data = audio
features = process_audio(audio_data, sr)
if features is None:
return "Error processing audio"
# Make prediction
prediction = model.predict(features, verbose=0)
class_idx = np.argmax(prediction[0])
confidence = np.max(prediction[0])
# Map to class names
classes = ['brushing', 'waiting for food', 'isolation']
return f"""
🐱 Cat Sound Analysis Results:
Detected Context: {classes[class_idx]}
Confidence Score: {confidence*100:.1f}%
Recording length: {len(audio_data)/sr:.1f} seconds
"""
except Exception as e:
return f"Error during analysis: {str(e)}"
# Create Gradio interface
interface = gr.Interface(
fn=predict_cat_sound,
inputs=gr.Audio(
type="numpy",
sources=["microphone"],
label="Record Cat Sound"
),
outputs=gr.Textbox(
label="Analysis Results",
placeholder="Results will appear here..."
),
title="🐱 Cat Meow Sound Analyzer",
description="""
Record your cat's meow to analyze its context.
The model will classify the sound as:
- Brushing
- Waiting for food
- Isolation
""",
theme="default"
)
# Launch the interface
interface.launch()