Spaces:

amritn8
/

AnimalSoundClassifier

Sleeping

App Files Files Community

AnimalSoundClassifier / app.py

amritn8

Update app.py

2843631 verified 6 months ago

raw

history blame contribute delete

2.03 kB

	import tensorflow as tf
	import joblib
	import numpy as np
	import gradio as gr
	from scipy.io import wavfile

	# Load model and label encoder
	model = tf.keras.models.load_model("animal_sound_cnn.keras")
	label_encoder = joblib.load("label_encoder.joblib")

	def preprocess_audio(audio_path):
	"""Simple audio preprocessing for animal sounds"""
	try:
	# 1. Load audio file (convert to mono if stereo)
	sr, y = wavfile.read(audio_path)
	y = np.mean(y, axis=1) if len(y.shape) > 1 else y
	y = y.astype(np.float32) / np.max(np.abs(y)) # Normalize

	# 2. Create spectrogram (adjust these parameters to match your training)
	spectrogram = tf.signal.stft(y, frame_length=256, frame_step=128, fft_length=256)
	spectrogram = tf.abs(spectrogram) # Magnitude

	# 3. Reshape to what your model expects (1, 384)
	flattened = tf.reshape(spectrogram, (1, -1)) # Flatten all
	if flattened.shape[1] < 384:
	flattened = tf.pad(flattened, [[0, 0], [0, 384-flattened.shape[1]]])
	else:
	flattened = flattened[:, :384] # Trim if too long

	return flattened.numpy()

	except Exception as e:
	print(f"Audio processing error: {str(e)}")
	return None

	def predict(audio_path):
	try:
	# Process audio
	processed = preprocess_audio(audio_path)
	if processed is None:
	return "Error: Couldn't process audio"

	# Debug output
	print(f"Model input shape: {processed.shape}")

	# Predict and return animal name
	pred = model.predict(processed)
	return label_encoder.inverse_transform([np.argmax(pred)])[0]

	except Exception as e:
	return f"Prediction error: {str(e)}"

	# Create simple interface
	gr.Interface(
	fn=predict,
	inputs=gr.Audio(type="filepath"),
	outputs="label",
	title="Animal Sound Classifier",
	description="Upload a short animal sound (2-5 seconds)"
	).launch()