Spaces:

AIOmarRehan
/

Deep_Audio_Classifier_using_CNN

Sleeping

Upload 7 files

05b56c7 verified 24 days ago

1.65 kB

	import tensorflow as tf
	import numpy as np

	# Load trained model
	model = tf.keras.models.load_model("saved_model/Audio_Model_Classification.h5")

	# IMPORTANT: Keep same order as training (alphabetical default in TF)
	CLASS_NAMES = ["Baby Cry", "Chainsaw", "Clock Tick", "Cow", "Dog", "Fire Crackling", "Frog", "Helicopter", "Person Sneeze", "Pig", "Rain", "Rooster", "Sea Waves"]

	def predict(img):
	# Convert PIL image → numpy array
	img = np.array(img).astype("float32") / 255.0 # normalize [0,1]

	# Resize to match training target (231x232)
	img = tf.image.resize(img, (231, 232)) # (231, 232, 4)

	# Add batch dimension
	img = np.expand_dims(img, axis=0) # (1, 231, 232, 4)

	# Predict
	preds = model.predict(img)
	probs = preds[0]

	class_idx = int(np.argmax(probs))
	confidence = float(np.max(probs))
	prob_dict = {CLASS_NAMES[i]: float(probs[i]) for i in range(len(CLASS_NAMES))}

	return CLASS_NAMES[class_idx], confidence, prob_dict

	# THIS OUR PAST VERSION OF... IT WAS OUT RESIZING BUT EVEN THOUGH THE MODEL WAS WORKING FINE, SO, MAYBE TF COULD ACCEPT DYNAMIC SIZES OF INPUT IMAGES
	# def predict(img):
	# # Convert to numpy array (RGBA)
	# img = np.array(img) / 255.0 # shape (H, W, 4)
	# img = np.expand_dims(img, axis=0) # (1, H, W, 4)
	#
	# # Predict
	# preds = model.predict(img)
	# probs = preds[0]
	#
	# class_idx = int(np.argmax(probs))
	# confidence = float(np.max(probs))
	# prob_dict = {CLASS_NAMES[i]: float(probs[i]) for i in range(len(CLASS_NAMES))}
	#
	# return CLASS_NAMES[class_idx], confidence, prob_dict