atharvaballa's picture
Update audio processing logic to support multiple formats
7c86e01
import tensorflow as tf
import numpy as np
from audio_utils import audio_to_spectrogram
# ======================================================
# MODEL LOAD
# ======================================================
MODEL_PATH = "models/audio_vit_savedmodel"
model = tf.saved_model.load(MODEL_PATH)
infer = model.signatures["serving_default"]
# ======================================================
# PREDICTION FUNCTION (UI-SAFE)
# ======================================================
def predict_audio(audio_file_path):
"""
Returns:
- label (real / fake)
- confidence (%)
- spectrogram image
- error message (None if OK)
"""
try:
# Convert audio → spectrogram
spec_img = audio_to_spectrogram(audio_file_path)
x = spec_img.astype("float32") / 255.0
x = np.expand_dims(x, axis=0)
preds = infer(tf.constant(x))
prob = list(preds.values())[0].numpy()[0][0]
label = "fake" if prob >= 0.5 else "real"
confidence = round(prob * 100, 2)
return label, confidence, spec_img, None
except ValueError as ve:
# Expected errors (short audio, invalid input)
return None, None, None, str(ve)
except Exception:
# Unexpected errors (decoding/model issues)
return None, None, None, (
"Unable to process the audio file. "
"Please upload a clear audio clip in WAV, MP3, FLAC, or M4A format."
)