Spaces:

pcnn
/

th-gender-detection

Sleeping

App Files Files Community

th-gender-detection / app.py

pcnn

Update app.py

43a97b9 verified 4 months ago

raw

history blame contribute delete

3.33 kB

	import gradio as gr
	import librosa
	import numpy as np
	import joblib

	MODEL_PATH = "gender_recognition_svm.joblib"

	# Mapping nhãn model -> text hiển thị
	VALID_LABELS = {
	"male": "Male",
	"male_masculine": "Male",
	"female": "Female",
	"female_feminine": "Female",
	}

	print("Loading model...")
	model = joblib.load(MODEL_PATH)
	print("Model loaded successfully.")


	def predict_gender(audio):
	print("Received input to predict_gender:", type(audio))

	# Không có audio
	if audio is None:
	print("No audio input received.")
	return "No voice detected. Please record or upload audio."

	# Gradio (type='numpy') -> (sr, y)
	try:
	sr, y = audio
	except Exception as e:
	print("Unexpected audio format when unpacking:", e, "value:", audio)
	return "Unknown (invalid audio format)."

	if not isinstance(y, np.ndarray):
	print("Audio data is not a numpy array:", type(y))
	return "Unknown (invalid audio data)."

	print(f"Original sample rate: {sr}, audio shape: {y.shape}")

	# Convert stereo -> mono nếu cần
	if y.ndim == 2:
	print("Converting stereo to mono.")
	try:
	# y shape: (samples, channels) -> transpose để to_mono xử lý
	y = librosa.to_mono(y.T)
	except Exception as e:
	print("Error converting to mono:", e)
	return "Unknown (error converting audio to mono)."

	# Đảm bảo kiểu float
	if not np.issubdtype(y.dtype, np.floating):
	y = y.astype(np.float32)

	# Chuẩn hóa sample rate về 16k
	target_sr = 16000
	if sr != target_sr:
	print(f"Resampling from {sr} Hz to {target_sr} Hz")
	try:
	y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
	sr = target_sr
	print(f"Resampled audio shape: {y.shape}")
	except Exception as e:
	print("Error during resampling:", e)
	return "Unknown (error during resampling)."

	print(f"Preprocessed audio shape: {y.shape}, sample rate: {sr}")

	# Trích xuất MFCC
	try:
	mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
	features = np.mean(mfcc.T, axis=0).reshape(1, -1)
	print("MFCC features extracted:", features.shape)
	except Exception as e:
	print("Error extracting MFCC features:", e)
	return "Unknown (error processing audio features)."

	# Dự đoán
	try:
	pred = model.predict(features)
	print("Raw prediction:", pred)
	label = str(pred[0])
	except Exception as e:
	print("Error during model prediction:", e)
	return "Unknown (error during prediction)."

	# Kiểm tra nhãn hợp lệ
	if label not in VALID_LABELS:
	print("Warning: unexpected label from model:", label)
	return f"Unknown (unexpected model label: {label})"

	# Trả về kết quả chuẩn
	return VALID_LABELS[label]


	iface = gr.Interface(
	fn=predict_gender,
	inputs=gr.Audio(type="numpy", label="Record or upload voice"),
	outputs=gr.Textbox(label="Predicted gender"),
	title="Gender Recognition For Thai Voices",
	description="Upload or record a short voice clip to classify gender.",
	)

	if __name__ == "__main__":
	print("Launching Gradio interface...")
	iface.launch(share=True)