import numpy as np
import wave
import io
import logging
import subprocess
import tempfile
import os
from pathlib import Path
from typing import Tuple, Optional

logger = logging.getLogger(__name__)

def check_ffmpeg_available() -> bool:
    """Check if ffmpeg is available on the system."""
    try:
        result = subprocess.run(['ffmpeg', '-version'], 
                              capture_output=True, 
                              text=True, 
                              timeout=5)
        return result.returncode == 0
    except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
        return False

def convert_with_ffmpeg(audio_data: bytes, target_sr: int = 8000, target_format: str = 'wav') -> Optional[bytes]:
    """
    Convert audio using ffmpeg for high-quality format conversion.
    
    Args:
        audio_data: Input audio bytes in any format
        target_sr: Target sampling rate (default: 8000 Hz for ML models)
        target_format: Target audio format (default: wav)
        
    Returns:
        Converted audio bytes or None if conversion fails
    """
    if not check_ffmpeg_available():
        logger.warning("ffmpeg not available for audio conversion")
        return None
    
    temp_input = None
    temp_output = None
    
    try:
        # Create temporary files
        with tempfile.NamedTemporaryFile(suffix='.input', delete=False) as temp_input:
            temp_input.write(audio_data)
            temp_input.flush()
            
        with tempfile.NamedTemporaryFile(suffix=f'.{target_format}', delete=False) as temp_output:
            pass  # Just need the filename
        
        # Build ffmpeg command for high-quality conversion
        ffmpeg_cmd = [
            'ffmpeg',
            '-i', temp_input.name,
            '-ar', str(target_sr),          # Resample to target sample rate
            '-ac', '1',                     # Convert to mono
            '-acodec', 'pcm_s16le',         # 16-bit PCM (standard for ML)
            '-f', target_format,            # Output format
            '-loglevel', 'error',           # Reduce ffmpeg output
            '-y',                           # Overwrite output
            temp_output.name
        ]
        
        logger.debug(f"Running ffmpeg conversion: {' '.join(ffmpeg_cmd)}")
        
        # Run ffmpeg conversion
        result = subprocess.run(ffmpeg_cmd, 
                              capture_output=True, 
                              text=True,
                              timeout=30)
        
        if result.returncode == 0:
            # Read converted audio
            with open(temp_output.name, 'rb') as f:
                converted_audio = f.read()
            
            logger.debug(f"ffmpeg conversion successful: "
                        f"{len(audio_data)} -> {len(converted_audio)} bytes "
                        f"({target_sr}Hz, mono, {target_format})")
            
            return converted_audio
        else:
            logger.error(f"ffmpeg conversion failed: {result.stderr}")
            return None
            
    except Exception as e:
        logger.error(f"ffmpeg conversion error: {str(e)}")
        return None
        
    finally:
        # Clean up temporary files
        try:
            if temp_input and os.path.exists(temp_input.name):
                os.unlink(temp_input.name)
            if temp_output and os.path.exists(temp_output.name):
                os.unlink(temp_output.name)
        except Exception as cleanup_error:
            logger.warning(f"Failed to cleanup temp files: {cleanup_error}")

def convert_for_ml_models(audio_data: bytes, pipeline_type: str = 'mfcc') -> bytes:
    """
    Convert audio specifically for ML model requirements.
    
    Args:
        audio_data: Input audio bytes
        pipeline_type: ML pipeline type ('mfcc', 'mel_cnn', 'raw_cnn')
        
    Returns:
        Audio bytes optimized for the specific ML model
    """
    # All our ML models expect 8kHz, mono, 16-bit PCM
    target_sr = 8000
    
    # Try ffmpeg first for best quality
    converted = convert_with_ffmpeg(audio_data, target_sr=target_sr)
    if converted:
        logger.debug(f"Used ffmpeg for {pipeline_type} model audio conversion")
        return converted
    
    # Fallback to existing conversion methods
    logger.debug(f"Using fallback audio conversion for {pipeline_type} model")
    return convert_audio_format(audio_data)

def validate_audio_format(audio_data: bytes) -> bool:
    """
    Validate that audio data is in a supported format.
    
    Args:
        audio_data: Raw audio bytes
        
    Returns:
        True if format is supported, False otherwise
    """
    # Check minimum size
    if len(audio_data) < 44:  # WAV header is 44 bytes
        logger.debug(f"Audio data too small: {len(audio_data)} bytes (minimum 44 for WAV header)")
        return False
    
    # Check for null/empty data
    if audio_data[:20] == b'\x00' * 20:
        logger.error("Audio data appears to be empty/null bytes")
        return False
    
    # Check if it starts with RIFF header
    if not audio_data.startswith(b'RIFF'):
        logger.error(f"Audio data does not start with RIFF header. First 8 bytes: {audio_data[:8]}")
        # Try to provide more diagnostic info
        if len(audio_data) > 20:
            logger.error(f"First 20 bytes as hex: {audio_data[:20].hex()}")
        return False
    
    try:
        with wave.open(io.BytesIO(audio_data), 'rb') as wav_file:
            # Check basic WAV properties
            channels = wav_file.getnchannels()
            sample_width = wav_file.getsampwidth()
            frame_rate = wav_file.getframerate()
            frames = wav_file.getnframes()
            
            logger.debug(f"Audio format: {channels} channels, {sample_width} bytes/sample, {frame_rate} Hz, {frames} frames")
            
            # Be more lenient with streaming chunks
            if channels not in [1, 2]:
                logger.warning(f"Unusual channel count: {channels}")
                return False
            if sample_width not in [1, 2, 4]:  # 8-bit, 16-bit, 32-bit
                logger.warning(f"Unusual sample width: {sample_width}")
                return False
            if frame_rate < 8000 or frame_rate > 48000:  # Wider range
                logger.warning(f"Unusual frame rate: {frame_rate}")
                return False
            if frames == 0:
                logger.warning("No audio frames found")
                return False
                
            return True
    except wave.Error as e:
        logger.error(f"WAV format error: {str(e)}")
        logger.error(f"Audio data size: {len(audio_data)} bytes")
        if len(audio_data) > 44:
            logger.error(f"WAV header bytes: {audio_data[:44].hex()}")
        return False
    except Exception as e:
        logger.error(f"Audio validation failed: {str(e)}")
        logger.error(f"Audio data size: {len(audio_data)} bytes")
        return False

def convert_audio_format(audio_data: bytes) -> bytes:
    """
    Convert various audio formats (WebM, OGG, MP3, etc.) to WAV format.
    
    Args:
        audio_data: Input audio bytes in any supported format
        
    Returns:
        Converted audio bytes in WAV format
        
    Raises:
        Exception: If conversion fails
    """
    try:
        # First detect the audio format
        from .webm_converter import detect_audio_format, convert_webm_to_wav
        
        audio_format = detect_audio_format(audio_data)
        logger.debug(f"Detected audio format: {audio_format}")
        
        # Handle WebM specifically (common from MediaRecorder)
        if audio_format == 'webm':
            logger.info("Converting WebM audio to WAV (fallback method)")
            converted = convert_webm_to_wav(audio_data)
            if converted:
                return converted
            else:
                raise Exception("WebM conversion failed")
        
        # Try using pydub for format conversion (handles WebM, OGG, MP3, etc.)
        try:
            from pydub import AudioSegment
            import io
            
            # Load audio from bytes
            audio = AudioSegment.from_file(io.BytesIO(audio_data))
            
            # Convert to mono and 16kHz
            audio = audio.set_channels(1)  # Mono
            audio = audio.set_frame_rate(16000)  # 16kHz
            audio = audio.set_sample_width(2)  # 16-bit
            
            # Export as WAV
            output_buffer = io.BytesIO()
            audio.export(output_buffer, format="wav")
            return output_buffer.getvalue()
            
        except ImportError:
            logger.warning("pydub not installed, falling back to basic WAV conversion")
            # Fall back to basic WAV processing
            return convert_to_mono_16khz(audio_data)
        except Exception as e:
            logger.warning(f"pydub conversion failed: {str(e)}, trying fallback methods")
            
            # Try WebM converter as fallback
            if audio_format in ['webm', 'unknown']:
                logger.info("Trying WebM fallback converter")
                converted = convert_webm_to_wav(audio_data)
                if converted:
                    return converted
            
            # Last resort: basic WAV processing
            return convert_to_mono_16khz(audio_data)
            
    except Exception as e:
        logger.error(f"All audio conversion methods failed: {str(e)}")
        raise Exception(f"Failed to convert audio format: {str(e)}")

def convert_to_mono_16khz(audio_data: bytes) -> bytes:
    """
    Convert audio to mono, 16kHz format suitable for speech recognition.
    
    Args:
        audio_data: Input audio bytes (WAV format)
        
    Returns:
        Converted audio bytes in mono 16kHz WAV format
        
    Raises:
        Exception: If conversion fails
    """
    try:
        with wave.open(io.BytesIO(audio_data), 'rb') as input_wav:
            frames = input_wav.readframes(input_wav.getnframes())
            channels = input_wav.getnchannels()
            sample_width = input_wav.getsampwidth()
            frame_rate = input_wav.getframerate()
            
            # Convert to numpy array
            if sample_width == 2:
                audio_array = np.frombuffer(frames, dtype=np.int16)
            else:
                raise Exception(f"Unsupported sample width: {sample_width}")
            
            # Convert stereo to mono if needed
            if channels == 2:
                audio_array = audio_array.reshape(-1, 2)
                audio_array = np.mean(audio_array, axis=1).astype(np.int16)
            
            # Resample to 16kHz if needed
            if frame_rate != 16000:
                # Simple downsampling (for production, use proper resampling)
                ratio = frame_rate / 16000
                if ratio > 1:
                    # Downsample by taking every nth sample
                    indices = np.arange(0, len(audio_array), ratio).astype(int)
                    audio_array = audio_array[indices]
                else:
                    # Upsample by repeating samples (basic interpolation)
                    audio_array = np.repeat(audio_array, int(1/ratio))
            
            # Create output WAV
            output = io.BytesIO()
            with wave.open(output, 'wb') as output_wav:
                output_wav.setnchannels(1)  # Mono
                output_wav.setsampwidth(2)  # 16-bit
                output_wav.setframerate(16000)  # 16kHz
                output_wav.writeframes(audio_array.tobytes())
            
            return output.getvalue()
            
    except Exception as e:
        logger.error(f"Audio conversion failed: {str(e)}")
        raise Exception(f"Failed to convert audio: {str(e)}")

def get_audio_duration(audio_data: bytes) -> float:
    """
    Get duration of audio in seconds.
    
    Args:
        audio_data: WAV audio bytes
        
    Returns:
        Duration in seconds
    """
    try:
        with wave.open(io.BytesIO(audio_data), 'rb') as wav_file:
            frames = wav_file.getnframes()
            frame_rate = wav_file.getframerate()
            duration = frames / frame_rate
            return duration
    except Exception as e:
        logger.error(f"Failed to get audio duration: {str(e)}")
        return 0.0

def audio_to_numpy(audio_data: bytes) -> Tuple[np.ndarray, int]:
    """
    Convert WAV audio bytes to numpy array.
    
    Args:
        audio_data: WAV audio bytes
        
    Returns:
        Tuple of (audio_array, sample_rate)
        
    Raises:
        Exception: If conversion fails
    """
    try:
        with wave.open(io.BytesIO(audio_data), 'rb') as wav_file:
            frames = wav_file.readframes(wav_file.getnframes())
            sample_rate = wav_file.getframerate()
            channels = wav_file.getnchannels()
            sample_width = wav_file.getsampwidth()
            
            if sample_width == 2:
                audio_array = np.frombuffer(frames, dtype=np.int16)
            else:
                raise Exception(f"Unsupported sample width: {sample_width}")
            
            # Convert to float32 and normalize
            audio_array = audio_array.astype(np.float32) / 32767.0
            
            # Handle stereo
            if channels == 2:
                audio_array = audio_array.reshape(-1, 2)
                audio_array = np.mean(audio_array, axis=1)
            
            return audio_array, sample_rate
            
    except Exception as e:
        logger.error(f"Failed to convert audio to numpy: {str(e)}")
        raise Exception(f"Audio conversion failed: {str(e)}")

def create_test_audio(digit: str, duration: float = 1.0, sample_rate: int = 16000) -> bytes:
    """
    Create test audio data for development purposes.
    
    Args:
        digit: Digit to simulate ('0'-'9')
        duration: Audio duration in seconds
        sample_rate: Sample rate in Hz
        
    Returns:
        WAV audio bytes
    """
    try:
        # Create simple tone pattern based on digit
        t = np.linspace(0, duration, int(sample_rate * duration), False)
        
        # Different frequency patterns for each digit
        freq_map = {
            '0': [400, 600],     # Low frequencies
            '1': [800, 1000],    # Higher frequencies
            '2': [600, 800],
            '3': [700, 900],
            '4': [500, 700],
            '5': [900, 1100],
            '6': [450, 650],
            '7': [750, 950],
            '8': [550, 750],
            '9': [850, 1050]
        }
        
        freqs = freq_map.get(digit, [440, 880])
        
        # Generate tone
        signal = np.sin(freqs[0] * 2.0 * np.pi * t) * 0.3 + np.sin(freqs[1] * 2.0 * np.pi * t) * 0.3
        
        # Add some envelope
        envelope = np.exp(-3 * t)
        signal = signal * envelope
        
        # Convert to int16
        signal = (signal * 32767).astype(np.int16)
        
        # Create WAV
        output = io.BytesIO()
        with wave.open(output, 'wb') as wav_file:
            wav_file.setnchannels(1)
            wav_file.setsampwidth(2)
            wav_file.setframerate(sample_rate)
            wav_file.writeframes(signal.tobytes())
        
        return output.getvalue()
        
    except Exception as e:
        logger.error(f"Failed to create test audio: {str(e)}")
        raise Exception(f"Test audio creation failed: {str(e)}")