import gradio as gr
import librosa
import numpy as np
import joblib

MODEL_PATH = "gender_recognition_svm.joblib"

# Mapping nhãn model -> text hiển thị
VALID_LABELS = {
    "male": "Male",
    "male_masculine": "Male",
    "female": "Female",
    "female_feminine": "Female",
}

print("Loading model...")
model = joblib.load(MODEL_PATH)
print("Model loaded successfully.")


def predict_gender(audio):
    print("Received input to predict_gender:", type(audio))

    # Không có audio
    if audio is None:
        print("No audio input received.")
        return "No voice detected. Please record or upload audio."

    # Gradio (type='numpy') -> (sr, y)
    try:
        sr, y = audio
    except Exception as e:
        print("Unexpected audio format when unpacking:", e, "value:", audio)
        return "Unknown (invalid audio format)."

    if not isinstance(y, np.ndarray):
        print("Audio data is not a numpy array:", type(y))
        return "Unknown (invalid audio data)."

    print(f"Original sample rate: {sr}, audio shape: {y.shape}")

    # Convert stereo -> mono nếu cần
    if y.ndim == 2:
        print("Converting stereo to mono.")
        try:
            # y shape: (samples, channels) -> transpose để to_mono xử lý
            y = librosa.to_mono(y.T)
        except Exception as e:
            print("Error converting to mono:", e)
            return "Unknown (error converting audio to mono)."

    # Đảm bảo kiểu float
    if not np.issubdtype(y.dtype, np.floating):
        y = y.astype(np.float32)

    # Chuẩn hóa sample rate về 16k
    target_sr = 16000
    if sr != target_sr:
        print(f"Resampling from {sr} Hz to {target_sr} Hz")
        try:
            y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
            sr = target_sr
            print(f"Resampled audio shape: {y.shape}")
        except Exception as e:
            print("Error during resampling:", e)
            return "Unknown (error during resampling)."

    print(f"Preprocessed audio shape: {y.shape}, sample rate: {sr}")

    # Trích xuất MFCC
    try:
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        features = np.mean(mfcc.T, axis=0).reshape(1, -1)
        print("MFCC features extracted:", features.shape)
    except Exception as e:
        print("Error extracting MFCC features:", e)
        return "Unknown (error processing audio features)."

    # Dự đoán
    try:
        pred = model.predict(features)
        print("Raw prediction:", pred)
        label = str(pred[0])
    except Exception as e:
        print("Error during model prediction:", e)
        return "Unknown (error during prediction)."

    # Kiểm tra nhãn hợp lệ
    if label not in VALID_LABELS:
        print("Warning: unexpected label from model:", label)
        return f"Unknown (unexpected model label: {label})"

    # Trả về kết quả chuẩn
    return VALID_LABELS[label]


iface = gr.Interface(
    fn=predict_gender,
    inputs=gr.Audio(type="numpy", label="Record or upload voice"),
    outputs=gr.Textbox(label="Predicted gender"),
    title="Gender Recognition For Thai Voices",
    description="Upload or record a short voice clip to classify gender.",
)

if __name__ == "__main__":
    print("Launching Gradio interface...")
    iface.launch(share=True)