Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| import joblib | |
| import librosa | |
| import traceback | |
| import os | |
| # ==== Özellik/işleme parametreleri (eğitimdekilerle eşleştirmen önerilir) ==== | |
| SR = 16000 | |
| N_FFT = 1024 | |
| HOP_LENGTH = 256 | |
| WIN_LENGTH = 1024 | |
| N_MELS = 64 | |
| N_BANDS = 6 | |
| FMIN = 20.0 | |
| WINDOW = "hann" | |
| N_MFCC = 40 | |
| # ============================================================================ | |
| _model = None | |
| _label = None | |
| _model_err = None | |
| def load_artifacts(): | |
| """model.joblib ve label.joblib dosyalarını geç yükle (lazy load).""" | |
| global _model, _label, _model_err | |
| if _model is not None: | |
| return | |
| try: | |
| if not os.path.exists("model.joblib"): | |
| raise FileNotFoundError("model.joblib not found in working dir") | |
| if not os.path.exists("label.joblib"): | |
| raise FileNotFoundError("label.joblib not found in working dir") | |
| _model = joblib.load("model.joblib") | |
| _label = joblib.load("label.joblib") | |
| except Exception as e: | |
| _model_err = f"Model load failed: {e}\n{traceback.format_exc()}" | |
| def _mean_std(feat_2d): | |
| # (time, dim) dizisinden mean ve std çıkar | |
| m = np.mean(feat_2d, axis=0) | |
| s = np.std(feat_2d, axis=0) | |
| return m, s | |
| def extract_features_from_array(y, sr): | |
| """ | |
| 194 boyutlu özellik vektörü üret: | |
| MFCC mean+std = 40*2=80 | |
| Chroma mean+std = 12*2=24 | |
| Mel mean = 64 | |
| Spectral contrast mean+std = 7*2=14 | |
| Tonnetz mean+std = 6*2=12 | |
| Toplam = 194 | |
| """ | |
| y = np.asarray(y, dtype=np.float32) | |
| # mono + yeniden örnekleme | |
| if y.ndim > 1: | |
| y = np.mean(y, axis=1) | |
| if sr != SR: | |
| y = librosa.resample(y=y, orig_sr=sr, target_sr=SR) | |
| sr = SR | |
| # çok kısa kayıtları pad et (>=1 sn) | |
| if len(y) < SR: | |
| y = np.pad(y, (0, SR - len(y))) | |
| # MFCC (mean + std) → 80 | |
| mfcc = librosa.feature.mfcc( | |
| y=y, sr=sr, n_mfcc=N_MFCC, | |
| n_fft=N_FFT, hop_length=HOP_LENGTH, | |
| win_length=WIN_LENGTH, window=WINDOW | |
| ).T | |
| mfcc_mean, mfcc_std = _mean_std(mfcc) | |
| # Mel-spectrogram (sadece mean) → 64 | |
| mel = librosa.feature.melspectrogram( | |
| y=y, sr=sr, n_fft=N_FFT, | |
| hop_length=HOP_LENGTH, win_length=WIN_LENGTH, | |
| n_mels=N_MELS | |
| ).T | |
| mel_mean = np.mean(mel, axis=0) | |
| # STFT | |
| S = np.abs(librosa.stft( | |
| y, n_fft=N_FFT, hop_length=HOP_LENGTH, | |
| win_length=WIN_LENGTH, window=WINDOW | |
| )) | |
| # Chroma (mean + std) → 24 | |
| chroma = librosa.feature.chroma_stft(S=S, sr=sr).T | |
| chroma_mean, chroma_std = _mean_std(chroma) | |
| # Spectral Contrast (mean + std) → 14 | |
| contrast = librosa.feature.spectral_contrast( | |
| S=S, sr=sr, n_fft=N_FFT, hop_length=HOP_LENGTH, | |
| win_length=WIN_LENGTH, n_bands=N_BANDS, fmin=FMIN | |
| ).T | |
| contrast_mean, contrast_std = _mean_std(contrast) | |
| # Tonnetz (mean + std) → 12 | |
| y_harm = librosa.effects.harmonic(y) | |
| tonnetz = librosa.feature.tonnetz(y=y_harm, sr=sr).T | |
| tonnetz_mean, tonnetz_std = _mean_std(tonnetz) | |
| feats = np.concatenate([ | |
| mfcc_mean, mfcc_std, # 80 | |
| chroma_mean, chroma_std, # 24 | |
| mel_mean, # 64 | |
| contrast_mean, contrast_std, # 14 | |
| tonnetz_mean, tonnetz_std # 12 | |
| ]).astype(np.float32) | |
| # Güvenlik kontrolü | |
| # print("feature_dim:", feats.shape[0]) # 194 olmalı | |
| return feats | |
| def predict_from_audio(audio): | |
| """ | |
| inputs=gr.Audio(type="numpy") → (sr, array) | |
| Dilersen type="filepath" yapıp aşağıdaki string yol dalını kullanabilirsin. | |
| """ | |
| try: | |
| load_artifacts() | |
| if _model_err: | |
| return f"⚠️ {_model_err}" | |
| if audio is None: | |
| return "Lütfen bir ses dosyası yükleyin veya kaydedin." | |
| # Gradio girdi varyantlarını karşıla | |
| if isinstance(audio, dict) and "sampling_rate" in audio and "array" in audio: | |
| sr = int(audio["sampling_rate"]) | |
| y = np.array(audio["array"], dtype=np.float32) | |
| elif isinstance(audio, tuple) and len(audio) == 2: | |
| sr, y = audio | |
| sr = int(sr) | |
| y = np.array(y, dtype=np.float32) | |
| elif isinstance(audio, str): | |
| # inputs=gr.Audio(type="filepath") kullanırsan burası çalışır | |
| y, sr = librosa.load(audio, sr=SR) | |
| else: | |
| return "Beklenmedik ses girdisi formatı." | |
| feats = extract_features_from_array(y, sr) | |
| X = feats.reshape(1, -1) # (1, 194) | |
| pred = _model.predict(X) | |
| label = _label.inverse_transform(pred)[0] | |
| return f"Tahmin: {str(label)}" | |
| except Exception as e: | |
| tb = traceback.format_exc() | |
| return f"❌ Hata oluştu:\n{e}\n\nTraceback:\n{tb}" | |
| TITLE = "Baby Cry Classification (foduucom)" | |
| DESC = "Bebek ağlaması sesini yükleyin veya mikrofondan kaydedin; model sınıf tahmini yapsın." | |
| demo = gr.Interface( | |
| fn=predict_from_audio, | |
| inputs=gr.Audio(sources=["upload", "microphone"], type="numpy"), | |
| outputs=gr.Textbox(lines=6), | |
| title=TITLE, | |
| description=DESC, | |
| allow_flagging="never", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |