import os
import librosa
import numpy as np
import speech_recognition as sr
from groq import Groq
from inference_sdk import InferenceHTTPClient
from transformers import pipeline

# Initialize the voice emotion pipeline once (global)
# This prevents reloading the model on every function call
try:
    voice_pipe = pipeline(
        "audio-classification",
        model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
    )
except Exception as e:
    print(f"Warning: Could not load voice emotion model: {e}")
    voice_pipe = None


def get_facial_emotion(image_path):
    """
    Analyzes facial emotion from an image using Roboflow API.
    
    Args:
        image_path: Path to the image file
        
    Returns:
        str: Detected emotion (e.g., "happy", "sad", "neutral")
    """
    try:
        # Get API key from environment variable
        api_key = os.getenv("ROBOFLOW_API_KEY")
        if not api_key:
            print("Error: ROBOFLOW_API_KEY not found in environment variables")
            return "neutral"
        
        # Initialize Roboflow client
        client = InferenceHTTPClient(
            api_url="https://detect.roboflow.com",
            api_key=api_key
        )
        
        # Run inference on the image
        result = client.infer(image_path, model_id="human-face-emotions/28")
        
        # Parse response and get top prediction
        if result and "predictions" in result and len(result["predictions"]) > 0:
            top_prediction = result["predictions"][0]
            emotion = top_prediction.get("class", "neutral")
            confidence = top_prediction.get("confidence", 0)
            print(f"Facial emotion detected: {emotion} (confidence: {confidence:.2f})")
            return emotion
        else:
            print("No face detected in image")
            return "neutral"
            
    except Exception as e:
        print(f"Error in facial emotion detection: {e}")
        return "neutral"


def get_voice_emotion(audio_path):
    """
    Analyzes vocal emotion from an audio file using Hugging Face transformers.
    
    Args:
        audio_path: Path to the audio file
        
    Returns:
        str: Detected emotion (e.g., "calm", "angry", "happy")
    """
    try:
        if voice_pipe is None:
            print("Voice emotion model not loaded")
            return "neutral"
        
        # Load audio file and resample to 16kHz (required by the model)
        audio_array, sample_rate = librosa.load(audio_path, sr=16000)
        
        # Run inference
        result = voice_pipe(audio_array)
        
        # Get the highest scoring emotion
        if result and len(result) > 0:
            top_emotion = result[0]
            emotion_label = top_emotion.get("label", "neutral")
            score = top_emotion.get("score", 0)
            print(f"Voice emotion detected: {emotion_label} (score: {score:.2f})")
            return emotion_label
        else:
            return "neutral"
            
    except Exception as e:
        print(f"Error in voice emotion detection: {e}")
        return "neutral"


def get_transcript(audio_path):
    """
    Transcribes speech from an audio file using Google Speech Recognition.
    
    Args:
        audio_path: Path to the audio file
        
    Returns:
        str: Transcribed text, or empty string if transcription fails
    """
    try:
        # Initialize recognizer
        r = sr.Recognizer()
        
        # Load audio file
        with sr.AudioFile(audio_path) as source:
            audio_data = r.record(source)
        
        # Transcribe using Google Speech Recognition
        text = r.recognize_google(audio_data)
        print(f"Transcription: {text}")
        return text
        
    except sr.UnknownValueError:
        print("Could not understand audio")
        return ""
    except sr.RequestError as e:
        print(f"Could not request results from Google Speech Recognition service: {e}")
        return ""
    except Exception as e:
        print(f"Error in transcription: {e}")
        return ""


def get_llm_response(user_query, face, voice, text):
    """
    Generates an empathetic response using Groq LLM based on emotional context.
    
    Args:
        user_query: The user's typed query
        face: Detected facial emotion
        voice: Detected vocal emotion
        text: Transcribed speech text
        
    Returns:
        str: AI-generated empathetic response
    """
    try:
        # Get API key from environment variable
        api_key = os.getenv("GROQ_API_KEY")
        if not api_key:
            return "Error: GROQ_API_KEY not found in environment variables"
        
        # Initialize Groq client
        client = Groq(api_key=api_key)
        
        # Create detailed system prompt with emotional context
        system_prompt = f"""You are an empathetic AI assistant that provides thoughtful, caring responses based on the user's emotional state.

**Emotional Context Analysis:**
- Facial Expression: {face}
- Vocal Tone: {voice}
- Spoken Words: {text if text else "No speech detected"}

**Instructions:**
1. First, acknowledge and validate the user's emotional state based on the above indicators
2. Show empathy and understanding
3. Provide a helpful, supportive answer to their query
4. Keep your response warm, genuine, and human-like
5. If there are discrepancies between emotional signals, address them sensitively

**User's Query:** {user_query}

Respond in a natural, conversational manner that demonstrates emotional intelligence."""

        # Call Groq API
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": system_prompt
                }
            ],
            model="llama-3.1-8b-instant",
            temperature=0.7,
            max_tokens=1024
        )
        
        # Extract and return response
        response = chat_completion.choices[0].message.content
        return response
        
    except Exception as e:
        return f"Error generating response: {e}"

# The record_audio function has been removed as it is no longer needed.
# st.audio_recorder in app.py handles audio capture in the browser.