pcnn's picture
Update app.py
43a97b9 verified
import gradio as gr
import librosa
import numpy as np
import joblib
MODEL_PATH = "gender_recognition_svm.joblib"
# Mapping nhãn model -> text hiển thị
VALID_LABELS = {
"male": "Male",
"male_masculine": "Male",
"female": "Female",
"female_feminine": "Female",
}
print("Loading model...")
model = joblib.load(MODEL_PATH)
print("Model loaded successfully.")
def predict_gender(audio):
print("Received input to predict_gender:", type(audio))
# Không có audio
if audio is None:
print("No audio input received.")
return "No voice detected. Please record or upload audio."
# Gradio (type='numpy') -> (sr, y)
try:
sr, y = audio
except Exception as e:
print("Unexpected audio format when unpacking:", e, "value:", audio)
return "Unknown (invalid audio format)."
if not isinstance(y, np.ndarray):
print("Audio data is not a numpy array:", type(y))
return "Unknown (invalid audio data)."
print(f"Original sample rate: {sr}, audio shape: {y.shape}")
# Convert stereo -> mono nếu cần
if y.ndim == 2:
print("Converting stereo to mono.")
try:
# y shape: (samples, channels) -> transpose để to_mono xử lý
y = librosa.to_mono(y.T)
except Exception as e:
print("Error converting to mono:", e)
return "Unknown (error converting audio to mono)."
# Đảm bảo kiểu float
if not np.issubdtype(y.dtype, np.floating):
y = y.astype(np.float32)
# Chuẩn hóa sample rate về 16k
target_sr = 16000
if sr != target_sr:
print(f"Resampling from {sr} Hz to {target_sr} Hz")
try:
y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
sr = target_sr
print(f"Resampled audio shape: {y.shape}")
except Exception as e:
print("Error during resampling:", e)
return "Unknown (error during resampling)."
print(f"Preprocessed audio shape: {y.shape}, sample rate: {sr}")
# Trích xuất MFCC
try:
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
features = np.mean(mfcc.T, axis=0).reshape(1, -1)
print("MFCC features extracted:", features.shape)
except Exception as e:
print("Error extracting MFCC features:", e)
return "Unknown (error processing audio features)."
# Dự đoán
try:
pred = model.predict(features)
print("Raw prediction:", pred)
label = str(pred[0])
except Exception as e:
print("Error during model prediction:", e)
return "Unknown (error during prediction)."
# Kiểm tra nhãn hợp lệ
if label not in VALID_LABELS:
print("Warning: unexpected label from model:", label)
return f"Unknown (unexpected model label: {label})"
# Trả về kết quả chuẩn
return VALID_LABELS[label]
iface = gr.Interface(
fn=predict_gender,
inputs=gr.Audio(type="numpy", label="Record or upload voice"),
outputs=gr.Textbox(label="Predicted gender"),
title="Gender Recognition For Thai Voices",
description="Upload or record a short voice clip to classify gender.",
)
if __name__ == "__main__":
print("Launching Gradio interface...")
iface.launch(share=True)