baby / app.py
SwitchAlpha's picture
Update app.py
e4c7073 verified
import gradio as gr
import numpy as np
import joblib
import librosa
import traceback
import os
# ==== Özellik/işleme parametreleri (eğitimdekilerle eşleştirmen önerilir) ====
SR = 16000
N_FFT = 1024
HOP_LENGTH = 256
WIN_LENGTH = 1024
N_MELS = 64
N_BANDS = 6
FMIN = 20.0
WINDOW = "hann"
N_MFCC = 40
# ============================================================================
_model = None
_label = None
_model_err = None
def load_artifacts():
"""model.joblib ve label.joblib dosyalarını geç yükle (lazy load)."""
global _model, _label, _model_err
if _model is not None:
return
try:
if not os.path.exists("model.joblib"):
raise FileNotFoundError("model.joblib not found in working dir")
if not os.path.exists("label.joblib"):
raise FileNotFoundError("label.joblib not found in working dir")
_model = joblib.load("model.joblib")
_label = joblib.load("label.joblib")
except Exception as e:
_model_err = f"Model load failed: {e}\n{traceback.format_exc()}"
def _mean_std(feat_2d):
# (time, dim) dizisinden mean ve std çıkar
m = np.mean(feat_2d, axis=0)
s = np.std(feat_2d, axis=0)
return m, s
def extract_features_from_array(y, sr):
"""
194 boyutlu özellik vektörü üret:
MFCC mean+std = 40*2=80
Chroma mean+std = 12*2=24
Mel mean = 64
Spectral contrast mean+std = 7*2=14
Tonnetz mean+std = 6*2=12
Toplam = 194
"""
y = np.asarray(y, dtype=np.float32)
# mono + yeniden örnekleme
if y.ndim > 1:
y = np.mean(y, axis=1)
if sr != SR:
y = librosa.resample(y=y, orig_sr=sr, target_sr=SR)
sr = SR
# çok kısa kayıtları pad et (>=1 sn)
if len(y) < SR:
y = np.pad(y, (0, SR - len(y)))
# MFCC (mean + std) → 80
mfcc = librosa.feature.mfcc(
y=y, sr=sr, n_mfcc=N_MFCC,
n_fft=N_FFT, hop_length=HOP_LENGTH,
win_length=WIN_LENGTH, window=WINDOW
).T
mfcc_mean, mfcc_std = _mean_std(mfcc)
# Mel-spectrogram (sadece mean) → 64
mel = librosa.feature.melspectrogram(
y=y, sr=sr, n_fft=N_FFT,
hop_length=HOP_LENGTH, win_length=WIN_LENGTH,
n_mels=N_MELS
).T
mel_mean = np.mean(mel, axis=0)
# STFT
S = np.abs(librosa.stft(
y, n_fft=N_FFT, hop_length=HOP_LENGTH,
win_length=WIN_LENGTH, window=WINDOW
))
# Chroma (mean + std) → 24
chroma = librosa.feature.chroma_stft(S=S, sr=sr).T
chroma_mean, chroma_std = _mean_std(chroma)
# Spectral Contrast (mean + std) → 14
contrast = librosa.feature.spectral_contrast(
S=S, sr=sr, n_fft=N_FFT, hop_length=HOP_LENGTH,
win_length=WIN_LENGTH, n_bands=N_BANDS, fmin=FMIN
).T
contrast_mean, contrast_std = _mean_std(contrast)
# Tonnetz (mean + std) → 12
y_harm = librosa.effects.harmonic(y)
tonnetz = librosa.feature.tonnetz(y=y_harm, sr=sr).T
tonnetz_mean, tonnetz_std = _mean_std(tonnetz)
feats = np.concatenate([
mfcc_mean, mfcc_std, # 80
chroma_mean, chroma_std, # 24
mel_mean, # 64
contrast_mean, contrast_std, # 14
tonnetz_mean, tonnetz_std # 12
]).astype(np.float32)
# Güvenlik kontrolü
# print("feature_dim:", feats.shape[0]) # 194 olmalı
return feats
def predict_from_audio(audio):
"""
inputs=gr.Audio(type="numpy") → (sr, array)
Dilersen type="filepath" yapıp aşağıdaki string yol dalını kullanabilirsin.
"""
try:
load_artifacts()
if _model_err:
return f"⚠️ {_model_err}"
if audio is None:
return "Lütfen bir ses dosyası yükleyin veya kaydedin."
# Gradio girdi varyantlarını karşıla
if isinstance(audio, dict) and "sampling_rate" in audio and "array" in audio:
sr = int(audio["sampling_rate"])
y = np.array(audio["array"], dtype=np.float32)
elif isinstance(audio, tuple) and len(audio) == 2:
sr, y = audio
sr = int(sr)
y = np.array(y, dtype=np.float32)
elif isinstance(audio, str):
# inputs=gr.Audio(type="filepath") kullanırsan burası çalışır
y, sr = librosa.load(audio, sr=SR)
else:
return "Beklenmedik ses girdisi formatı."
feats = extract_features_from_array(y, sr)
X = feats.reshape(1, -1) # (1, 194)
pred = _model.predict(X)
label = _label.inverse_transform(pred)[0]
return f"Tahmin: {str(label)}"
except Exception as e:
tb = traceback.format_exc()
return f"❌ Hata oluştu:\n{e}\n\nTraceback:\n{tb}"
TITLE = "Baby Cry Classification (foduucom)"
DESC = "Bebek ağlaması sesini yükleyin veya mikrofondan kaydedin; model sınıf tahmini yapsın."
demo = gr.Interface(
fn=predict_from_audio,
inputs=gr.Audio(sources=["upload", "microphone"], type="numpy"),
outputs=gr.Textbox(lines=6),
title=TITLE,
description=DESC,
allow_flagging="never",
)
if __name__ == "__main__":
demo.launch()