import gradio as gr
import tensorflow as tf
import numpy as np
import librosa
from huggingface_hub import hf_hub_download

# Load model with error handling
try:
    # Download model file from Hugging Face with specific path
    model_path = hf_hub_download(
        repo_id="IsolaHGVIS/Cat-Meow-Classification",
        filename="best_model_fold_2.h5",
        repo_type="model"
    )
    # Load the model
    model = tf.keras.models.load_model(model_path)
    print("Model loaded successfully!")
except Exception as e:
    print(f"Error loading model: {str(e)}")
    model = None

def process_audio(audio, sr=22050):
    """Process audio for model input"""
    try:
        # Ensure audio is mono
        if len(audio.shape) > 1:
            audio = audio.mean(axis=1)
            
        # Generate mel spectrogram
        mel_spec = librosa.feature.melspectrogram(
            y=audio, 
            sr=sr,
            n_mels=128,
            fmax=8000
        )
        
        # Convert to log scale
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
        
        # Adjust dimensions
        if mel_spec_db.shape[1] > 174:
            mel_spec_db = mel_spec_db[:, :174]
        else:
            pad_width = ((0, 0), (0, 174 - mel_spec_db.shape[1]))
            mel_spec_db = np.pad(mel_spec_db, pad_width)
            
        return mel_spec_db.reshape(1, 128, 174, 1)
    except Exception as e:
        print(f"Error processing audio: {str(e)}")
        return None

def predict_cat_sound(audio):
    """Analyze recorded cat sound"""
    if audio is None:
        return "Please record a cat sound"
    
    if model is None:
        return "Model not loaded properly. Please check the logs."
        
    try:
        # Process audio
        sr, audio_data = audio
        features = process_audio(audio_data, sr)
        
        if features is None:
            return "Error processing audio"
            
        # Make prediction
        prediction = model.predict(features, verbose=0)
        class_idx = np.argmax(prediction[0])
        confidence = np.max(prediction[0])
        
        # Map to class names
        classes = ['brushing', 'waiting for food', 'isolation']
        
        return f"""
        🐱 Cat Sound Analysis Results:
        
        Detected Context: {classes[class_idx]}
        Confidence Score: {confidence*100:.1f}%
        
        Recording length: {len(audio_data)/sr:.1f} seconds
        """
        
    except Exception as e:
        return f"Error during analysis: {str(e)}"

# Create Gradio interface
interface = gr.Interface(
    fn=predict_cat_sound,
    inputs=gr.Audio(
        type="numpy",
        sources=["microphone"],
        label="Record Cat Sound"
    ),
    outputs=gr.Textbox(
        label="Analysis Results",
        placeholder="Results will appear here..."
    ),
    title="🐱 Cat Meow Sound Analyzer",
    description="""
    Record your cat's meow to analyze its context.
    The model will classify the sound as:
    - Brushing
    - Waiting for food
    - Isolation
    """,
    theme="default"
)

# Launch the interface
interface.launch()