Spaces:

AIOmarRehan
/

Deep_Audio_Classifier_using_CNN

Sleeping

File size: 1,648 Bytes

05b56c7

import tensorflow as tf
import numpy as np

# Load trained model
model = tf.keras.models.load_model("saved_model/Audio_Model_Classification.h5")

# IMPORTANT: Keep same order as training (alphabetical default in TF)
CLASS_NAMES = ["Baby Cry", "Chainsaw", "Clock Tick", "Cow", "Dog", "Fire Crackling", "Frog", "Helicopter", "Person Sneeze", "Pig", "Rain", "Rooster", "Sea Waves"]

def predict(img):
    # Convert PIL image → numpy array
    img = np.array(img).astype("float32") / 255.0  # normalize [0,1]

    # Resize to match training target (231x232)
    img = tf.image.resize(img, (231, 232))  # (231, 232, 4)

    # Add batch dimension
    img = np.expand_dims(img, axis=0)  # (1, 231, 232, 4)

    # Predict
    preds = model.predict(img)
    probs = preds[0]

    class_idx = int(np.argmax(probs))
    confidence = float(np.max(probs))
    prob_dict = {CLASS_NAMES[i]: float(probs[i]) for i in range(len(CLASS_NAMES))}

    return CLASS_NAMES[class_idx], confidence, prob_dict

# THIS OUR PAST VERSION OF... IT WAS OUT RESIZING BUT EVEN THOUGH THE MODEL WAS WORKING FINE, SO, MAYBE TF COULD ACCEPT DYNAMIC SIZES OF INPUT IMAGES
# def predict(img):
#     # Convert to numpy array (RGBA)
#     img = np.array(img) / 255.0  # shape (H, W, 4)
#     img = np.expand_dims(img, axis=0)  # (1, H, W, 4)
#
#     # Predict
#     preds = model.predict(img)
#     probs = preds[0]
#
#     class_idx = int(np.argmax(probs))
#     confidence = float(np.max(probs))
#     prob_dict = {CLASS_NAMES[i]: float(probs[i]) for i in range(len(CLASS_NAMES))}
#
#     return CLASS_NAMES[class_idx], confidence, prob_dict