import gradio as gr import librosa import numpy as np import joblib MODEL_PATH = "gender_recognition_svm.joblib" # Mapping nhãn model -> text hiển thị VALID_LABELS = { "male": "Male", "male_masculine": "Male", "female": "Female", "female_feminine": "Female", } print("Loading model...") model = joblib.load(MODEL_PATH) print("Model loaded successfully.") def predict_gender(audio): print("Received input to predict_gender:", type(audio)) # Không có audio if audio is None: print("No audio input received.") return "No voice detected. Please record or upload audio." # Gradio (type='numpy') -> (sr, y) try: sr, y = audio except Exception as e: print("Unexpected audio format when unpacking:", e, "value:", audio) return "Unknown (invalid audio format)." if not isinstance(y, np.ndarray): print("Audio data is not a numpy array:", type(y)) return "Unknown (invalid audio data)." print(f"Original sample rate: {sr}, audio shape: {y.shape}") # Convert stereo -> mono nếu cần if y.ndim == 2: print("Converting stereo to mono.") try: # y shape: (samples, channels) -> transpose để to_mono xử lý y = librosa.to_mono(y.T) except Exception as e: print("Error converting to mono:", e) return "Unknown (error converting audio to mono)." # Đảm bảo kiểu float if not np.issubdtype(y.dtype, np.floating): y = y.astype(np.float32) # Chuẩn hóa sample rate về 16k target_sr = 16000 if sr != target_sr: print(f"Resampling from {sr} Hz to {target_sr} Hz") try: y = librosa.resample(y, orig_sr=sr, target_sr=target_sr) sr = target_sr print(f"Resampled audio shape: {y.shape}") except Exception as e: print("Error during resampling:", e) return "Unknown (error during resampling)." print(f"Preprocessed audio shape: {y.shape}, sample rate: {sr}") # Trích xuất MFCC try: mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) features = np.mean(mfcc.T, axis=0).reshape(1, -1) print("MFCC features extracted:", features.shape) except Exception as e: print("Error extracting MFCC features:", e) return "Unknown (error processing audio features)." # Dự đoán try: pred = model.predict(features) print("Raw prediction:", pred) label = str(pred[0]) except Exception as e: print("Error during model prediction:", e) return "Unknown (error during prediction)." # Kiểm tra nhãn hợp lệ if label not in VALID_LABELS: print("Warning: unexpected label from model:", label) return f"Unknown (unexpected model label: {label})" # Trả về kết quả chuẩn return VALID_LABELS[label] iface = gr.Interface( fn=predict_gender, inputs=gr.Audio(type="numpy", label="Record or upload voice"), outputs=gr.Textbox(label="Predicted gender"), title="Gender Recognition For Thai Voices", description="Upload or record a short voice clip to classify gender.", ) if __name__ == "__main__": print("Launching Gradio interface...") iface.launch(share=True)