"""
Utility functions for preprocessing tabular and audio data
"""

import numpy as np
import pandas as pd
import librosa
from sklearn.impute import SimpleImputer
import warnings
warnings.filterwarnings('ignore')


class TabularPreprocessor:
    """Preprocessor for tabular medical data"""
    
    def __init__(self):
        self.feature_cols = [
            'age', 'gender', 'tbContactHistory', 'wheezingHistory', 
            'phlegmCough', 'familyAsthmaHistory', 'feverHistory', 
            'coldPresent', 'packYears'
        ]
        self.imputer = SimpleImputer(strategy='most_frequent')
        self.is_fitted = False
    
    def fit(self, X_df):
        """Fit the imputer on training data"""
        X_train = X_df[self.feature_cols].copy()
        self.imputer.fit(X_train)
        self.is_fitted = True
        return self
    
    def transform(self, data_dict):
        """
        Transform a single input dictionary to model-ready format
        
        Args:
            data_dict: Dictionary with keys matching feature_cols
                      e.g., {'age': 43, 'gender': 1, ...}
        
        Returns:
            numpy array of shape (1, n_features)
        """
        if not self.is_fitted:
            # If not fitted, create a simple imputer with most_frequent strategy
            # This handles the case where we load a pre-trained model
            self.imputer = SimpleImputer(strategy='most_frequent')
            # Create a dummy dataframe to fit
            dummy_df = pd.DataFrame([data_dict])
            self.imputer.fit(dummy_df[self.feature_cols])
            self.is_fitted = True
        
        # Create dataframe from input
        df = pd.DataFrame([data_dict])
        
        # Ensure all feature columns exist
        for col in self.feature_cols:
            if col not in df.columns:
                df[col] = np.nan
        
        # Select and order features
        X = df[self.feature_cols].copy()
        
        # Impute missing values
        X_imputed = self.imputer.transform(X)
        
        # Convert to float32
        X_imputed = X_imputed.astype(np.float32)
        
        return X_imputed


class AudioPreprocessor:
    """Preprocessor for audio data (cough and vowel sounds)"""
    
    def __init__(self, sample_rate=16000, duration=1.0, n_mfcc=20, 
                 n_fft=2048, hop_length=512, n_mels=64):
        """
        Initialize audio preprocessor with same parameters as training
        
        Args:
            sample_rate: Target sample rate (Hz)
            duration: Target duration in seconds
            n_mfcc: Number of MFCC coefficients
            n_fft: FFT window size
            hop_length: Hop length for STFT
            n_mels: Number of mel bands
        """
        self.sample_rate = sample_rate
        self.duration = duration
        self.n_mfcc = n_mfcc
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.n_mels = n_mels
        self.input_size = n_mfcc * 3  # MFCC + Delta + Delta-Delta
    
    def load_and_extract_features(self, audio_path_or_array):
        """
        Load audio file and extract MFCC features
        
        Args:
            audio_path_or_array: Path to audio file or numpy array
        
        Returns:
            numpy array of shape (n_frames, n_mfcc*3)
        """
        try:
            # Load audio
            if isinstance(audio_path_or_array, str):
                audio, sr = librosa.load(audio_path_or_array, sr=self.sample_rate)
            else:
                # Assume it's already an array
                audio = audio_path_or_array
                sr = self.sample_rate
            
            # Ensure audio is mono
            if len(audio.shape) > 1:
                audio = np.mean(audio, axis=0)
            
            # Segment to target duration (1 second)
            target_samples = int(self.sample_rate * self.duration)
            if len(audio) > target_samples:
                audio = audio[:target_samples]
            else:
                audio = np.pad(audio, (0, target_samples - len(audio)), mode='constant')
            
            # Extract MFCC
            mfcc = librosa.feature.mfcc(
                y=audio,
                sr=self.sample_rate,
                n_mfcc=self.n_mfcc,
                n_fft=self.n_fft,
                hop_length=self.hop_length,
                n_mels=self.n_mels
            )
            
            # Extract delta and delta-delta
            delta = librosa.feature.delta(mfcc)
            delta_delta = librosa.feature.delta(mfcc, order=2)
            
            # Combine features (n_mfcc*3, n_frames)
            features = np.vstack([mfcc, delta, delta_delta])
            
            # Transpose to (n_frames, n_mfcc*3)
            features = features.T
            
            # Handle NaN and Inf values
            features = np.nan_to_num(features, nan=0.0, posinf=0.0, neginf=0.0)
            
            # Clip extreme values
            features = np.clip(features, -1e6, 1e6)
            
            return features
            
        except Exception as e:
            print(f"Error processing audio: {str(e)}")
            # Return zero features on error
            expected_frames = int((self.sample_rate * self.duration) / self.hop_length) + 1
            return np.zeros((expected_frames, self.input_size))
    
    def extract_from_both_audios(self, cough_audio, vowel_audio, combine_mode="concat"):
        """
        Extract features from both cough and vowel audio
        
        Args:
            cough_audio: Path to cough audio or numpy array
            vowel_audio: Path to vowel audio or numpy array
            combine_mode: How to combine features ("concat" or "average")
        
        Returns:
            Combined features as numpy array
        """
        cough_features = self.load_and_extract_features(cough_audio)
        vowel_features = self.load_and_extract_features(vowel_audio)
        
        if combine_mode == "concat":
            # Concatenate along feature dimension
            combined = np.concatenate([cough_features, vowel_features], axis=1)
        elif combine_mode == "average":
            # Average the features
            combined = (cough_features + vowel_features) / 2.0
        else:
            raise ValueError(f"Unknown combine_mode: {combine_mode}")
        
        return combined


def get_disease_name(prediction):
    """Convert disease prediction to readable name"""
    disease_map = {
        0: "Healthy",
        1: "COPD (Chronic Obstructive Pulmonary Disease)",
        2: "Asthma"
    }
    return disease_map.get(int(prediction), "Unknown")


def get_disease_info(prediction):
    """Get detailed information about the predicted disease"""
    info_map = {
        0: {
            "name": "Healthy",
            "description": "No respiratory disease detected. Lung function appears normal.",
            "recommendations": [
                "Maintain regular exercise and healthy lifestyle",
                "Avoid smoking and secondhand smoke",
                "Get regular health check-ups"
            ]
        },
        1: {
            "name": "COPD (Chronic Obstructive Pulmonary Disease)",
            "description": "A chronic inflammatory lung disease that causes obstructed airflow from the lungs.",
            "recommendations": [
                "Consult with a pulmonologist for proper diagnosis",
                "Consider pulmonary rehabilitation program",
                "Quit smoking if applicable",
                "Use prescribed medications as directed",
                "Get vaccinated against flu and pneumonia"
            ]
        },
        2: {
            "name": "Asthma",
            "description": "A condition in which airways narrow and swell, producing extra mucus.",
            "recommendations": [
                "Consult with an allergist or pulmonologist",
                "Identify and avoid asthma triggers",
                "Use prescribed inhalers as directed",
                "Monitor breathing with a peak flow meter",
                "Have an asthma action plan"
            ]
        }
    }
    return info_map.get(int(prediction), info_map[0])