import numpy as np
import pandas as pd
import torch
from peft import PeftModel
from torch.amp import autocast
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
)


class EmotionAnalyzer:
    """
    Wrapper for emotion analysis model
    """
    def __init__(
        self,
        checkpoint: str = 'bhadresh-savani/roberta-base-emotion',
        window_size: int = 512,
        stride: int = 256,
        batch_size: int = 16,
        use_amp: bool = True,
        adapter_path: str | None = None,
        num_emotions: int | None = None,
        problem_type: str = 'multi_label_classification'
    ):
        """
        Args:
            checkpoint (str, optional): Model's name (hf repo id). Defaults to 'bhadresh-savani/roberta-base-emotion'.
            window_size (int, optional): Amount of phrases in window. Defaults to 512.
            stride (int, optional): Overlap in windows. Defaults to 256.
            batch_size (int, optional): batch size. Defaults to 16.
            use_amp (bool, optional): Flag to use AMP. Defaults to True.
            adapter_path (str | None, optional): Path to quantized model (if was trained with PEFT). Defaults to None.
            num_emotions (int | None, optional): Number of emotions in model's output. Defaults to None.
            problem_type (str, optional): Model's task. Defaults to 'multi_label_classification'.
        """
        self.checkpoint = checkpoint

        if adapter_path is not None:
            # If the model was trained with PEFT and saved in this format
            # we need to explicitly tell HuggingFace API about it

            # Quantization config
            bnb_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.bfloat16,
                bnb_4bit_use_double_quant=True
            )

            # Basic initialization
            base_model = AutoModelForSequenceClassification.from_pretrained(
                checkpoint,
                quantization_config=bnb_config,
                num_labels=num_emotions,
                problem_type=problem_type
            )
            self.model = PeftModel.from_pretrained(base_model, adapter_path) # Peft
            self.use_sigmoid = True
        else:
            self.model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
            self.use_sigmoid = False

        self.tokenizer = AutoTokenizer.from_pretrained(checkpoint)

        self.window_size = window_size
        self.stride = stride
        self.batch_size = batch_size

        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.use_amp = use_amp and self.device == 'cuda'
        self.model.to(self.device)
        self.model.eval()


        # Default labels
        self.EMOTION_LABELS = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']


    def _create_windows(self, inputs_ids: torch.Tensor) -> list[torch.Tensor]:
        """
        Creates windows from tokenized text

        Args:
            inputs_ids (torch.Tensor): tensor with tokens

        Returns:
            list[torch.Tensor]: windows
        """
        windows = []

        for i in range(0, len(inputs_ids), self.stride):
            window = inputs_ids[i : i + self.window_size]
            windows.append(window)

        return windows


    def _process_batch(self, batch_windows: list[torch.Tensor]) -> np.ndarray:
        """
        Processes windows in batches

        Args:
            batch_windows (list[torch.Tensor]): batch with windows

        Returns:
            np.ndarray: embeddings
        """
        batch = torch.nn.utils.rnn.pad_sequence( # padding
            batch_windows,
            batch_first=True,
            padding_value=self.tokenizer.pad_token_id
        ).to(self.device)

        attention_mask = (batch != self.tokenizer.pad_token_id).long()

        with torch.no_grad():
            if self.use_amp:
                with autocast():
                    outputs = self.model(batch, attention_mask=attention_mask)
            else:
                outputs = self.model(batch, attention_mask=attention_mask)

            # Depending on the task select appropriate function.
            # For example, for multilabel clf sigmoid is required,
            # but for single label extraction softmax must be used
            if self.use_sigmoid:
                probs = torch.sigmoid(outputs.logits)
            else:
                probs = torch.softmax(outputs.logits, dim=-1)

        return probs.cpu().numpy()


    def analyze_text(self, text: str) -> pd.DataFrame:
        """
        Tokenizes text, splits it into overlapping windows and
        analyzes them

        Args:
            text (str): subtitles

        Returns:
            pd.DataFrame: dataframe with embeddings
        """
        tokens = self.tokenizer(text, return_tensors='pt', truncation=False) # tokenize and cast to torch tensor
        input_ids = tokens['input_ids'][0] # select only tokens' ids

        windows = self._create_windows(input_ids)

        if not windows:
            return pd.DataFrame(columns=self.EMOTION_LABELS)

        all_emotions = []

        # process batches
        for i in range(0, len(windows), self.batch_size):
            batch_windows = windows[i:i + self.batch_size]
            batch_emotions = self._process_batch(batch_windows)
            all_emotions.extend(batch_emotions)

        # Create dataframe
        df = pd.DataFrame(all_emotions, columns=self.EMOTION_LABELS)

        df['window_id'] = range(len(df))
        df['window_start'] = [i * self.stride for i in range(len(df))]
        df['window_end'] = [min((i * self.stride) + self.window_size, len(input_ids))
                           for i in range(len(df))]

        return df