import transformers
from transformers import PretrainedConfig
import os
from pathlib import Path
import numpy as np
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
import torch.nn as nn
import torch
# This include should be add when using different AlignScoreFunction methods instead of score() 
# from nltk.tokenize import sent_tokenize
from tqdm import tqdm

class AlignScoreCS(transformers.XLMRobertaModel):
    """
    ALIGNSCORE class

    Description:
        Model ALIGNSCORECS has been trained according the paper for 3 days on 4GPUs AMD NVIDIA.
        (3 epochs, 1e-5 learning rate, 1e-6 AdamWeps, batchsize 32, WarmupRatio 0.06, 0.1 WeighDecay)
        - XLMROBERTA-base model with 3 classification HEAD {regression,binary,3way} using shared encoder
    
    USAGE: AlignScore.py
        - from_pretrained - loads the model, usage as transformers.model
        - .score(context, claim) - function
                - returns probs of the ALIGNED class using 3way class head as in the paper.

        alignScoreCS = AlignScoreCS.from_pretrained("/mnt/data/factcheck/AlignScore-data/AAmodel/MTLModel/mo
        alignScoreCS.score(context,claim)

        If you want to try different classification head use parameter:
            - task_name = "re" : regression head
            - task_name = "bin" : binary classification head
            - task_name = "3way" : 3way classification head

    """
    _regression_model = "re_model"
    _binary_class_model = "bin_model"
    _3way_class_model = "3way_model"
    
    def __init__(self, encoder, taskmodels_dict, model_name= "xlm-roberta-large", **kwargs):
        super().__init__(transformers.XLMRobertaConfig(), **kwargs)
        self.encoder = encoder
        self.taskmodels_dict = nn.ModuleDict(taskmodels_dict)
        self.tokenizer = None
        self.model_name =  model_name
        self.inferencer = None
    
    def init_inferencer(self, device = "cuda"):
        self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) if not self.tokenizer else self.tokenizer
        self.inferencer = self.InferenceHandler(self, self.tokenizer, device)
    
    
    
    """
        Score: scores the context and claim with Aligned probabitlity of 3way classification head
         - using paper code inferencer from ALignScore
    
    """
    def score(self, context, claim, **kwargs):
        if self.inferencer is None:
            self.init_inferencer()
        scores = self.inferencer.nlg_eval(context, claim)
        return scores
    
    """
        Score: scores the context and claim with ALIGNED probability (wrt task_name ["re" | "bin" | "3way"])
        
        Returns the probability of the ALIGNED CLASS between context text and claim text
         - chunks text by 350 tokens and splits claim into sentences
         - using 3way classification head
    """
    def score_sentences(self, context :str, claim :str, task_name = "3way", batch_size = 2, return_all_outputs = False, **kwargs):
        self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) if not self.tokenizer else self.tokenizer
        chunked_inputs = self.chunk_sent_input(context,claim, chunk_size=350,chunk_claim_size=150)
        nclaims, ncontexts = (chunked_inputs["n_claims"],chunked_inputs["n_contexts"])
        with torch.no_grad():
            chunked_inputs = {key : torch.tensor(item).to(self.device) for key, item in chunked_inputs.items() if not key.startswith("n_")}
            chunked_outputs = {}
            for i in range(0,len(chunked_inputs["input_ids"]),batch_size):
                tmp = self.forward(task_name = task_name,**{"input_ids":chunked_inputs["input_ids"][i:i+batch_size],"attention_mask" :chunked_inputs["attention_mask"][i:i+batch_size]}, **kwargs)
                for k, item in tmp.items():
                    chunked_outputs[k] = chunked_outputs.get(k, []) + [item]
            logits = torch.vstack(chunked_outputs["logits"]).cpu()
            outputs = {"score" : self.alignscore_input(logits,nclaims=nclaims,ncontexts=ncontexts, task_name=task_name)}
            outputs["outputs"] = chunked_outputs
        return torch.tensor([outputs["score"]]) if not return_all_outputs else outputs
    
    
    """
        Score: scores the context and claim with ALIGNED probability (wrt task_name ["re" | "bin" | "3way"])
        
        Returns the probability of the ALIGNED CLASS between context text and claim text
         - chunks text into 350 tolens and chunks claim into 150 tokens
         - using 3way classification head
    """
    def score_chunks(self, context :str, claim :str, task_name = "3way", batch_size = 2, return_all_outputs = False, **kwargs):
        self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) if not self.tokenizer else self.tokenizer
        chunked_inputs = self.chunk_inputs(context,claim, chunk_size=350)
        chunked_inputs = {key : torch.tensor(item).to(self.device) for key, item in chunked_inputs.items()}
        chunked_outputs = self.forward(task_name = task_name, **chunked_inputs, **kwargs)
        outputs = {"score" : self.alignscore_input_deprecated(chunked_outputs.logits.cpu(), task_name=task_name)}
        outputs["outputs"] = chunked_outputs
        return outputs["score"] if not return_all_outputs else outputs
    
    """
        Classify: classify the context and claim to the class label given the task_name ["re" | "bin" | "3way"]
        
        Returns the class of {Neutral, contradict, aligned} between context text and claim text
         - using 3way classification head
    """
    def classify(self, context :str, claim :str, task_name = "3way", return_all_outputs = False, **kwargs):
        self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) if not self.tokenizer else self.tokenizer
        chunked_inputs = self.chunk_inputs(context,claim, chunk_size=350)
        chunked_inputs = {key : torch.tensor(item).to(self.device) for key, item in chunked_inputs.items()}
        chunked_outputs = self.forward(task_name = task_name, **chunked_inputs, **kwargs)
        outputs = {"class" : self.get_system_label(chunked_outputs.logits.cpu(), task_name=task_name)}
        outputs["outputs"] = chunked_outputs
        return outputs["class"] if not return_all_outputs else outputs
    
    
    def score_truncated(self, context :str, claim :str, task_name = "3way",  return_all_outputs = False, **kwargs):
        self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name) if not self.tokenizer else self.tokenizer
        tokenized_inputs = self.tokenizer(list(zip([context], [claim])), padding = "max_length", truncation = True, max_length = 512, return_tensors="pt")
        tokenized_inputs = {key : torch.tensor(item).to(self.device) for key, item in tokenized_inputs.items()}
        with torch.no_grad():
            model_outputs = self.forward(task_name=task_name, **tokenized_inputs, **kwargs)
            outputs = {"score" : self.alignscore_input(model_outputs["logits"].cpu(),nclaims=1, ncontexts=1, task_name=task_name)}
            outputs["outputs"] = model_outputs
        return torch.tensor([outputs["score"]]) if not return_all_outputs else outputs
    
    def forward(self, task_name = "3way", **kwargs):
        return self.taskmodels_dict[task_name](**kwargs)
    
    def __call__(self, task_name, **kwargs):
        return self.taskmodels_dict[task_name](**kwargs)
    
    """
        Get the probability of the ALIGNED label from input
    """
    def alignscore_input(self, chunked_logits, nclaims, ncontexts, task_name = "3way"):
        if task_name == "re":
            ouptuts = chunked_logits.detach()
            # Reshape the tensor to separate each block of n rows
            reshaped_tensor = ouptuts.view(nclaims, ncontexts)

            # Extract the maximum values from the first column (index 0) within each block of n rows
            max_values, _ = reshaped_tensor.max(dim=1)

            # Calculate the mean of the max values for each block of n rows
            mean_of_maxes = torch.mean(max_values, dim=0)
            return mean_of_maxes.tolist()
        else:
            nlabels = {"3way" : 3, "re" : 1, "2way" : 2}[task_name]
            ouptuts = chunked_logits.softmax(1).detach()
            # Reshape the tensor to separate each block of n rows
            reshaped_tensor = ouptuts.view(nclaims, ncontexts, nlabels)

            # Extract the maximum values from the first column (index 0) within each block of n rows
            max_values, _ = torch.max(reshaped_tensor[:, :, 1], dim=1)

            # Calculate the mean of the max values for each block of n rows
            mean_of_maxes = torch.mean(max_values, dim=0)
            return mean_of_maxes.tolist()
        
        
    def alignscore_input_deprecated(self, chunked_logits, task_name = "3way"):
        if task_name == "re":
            return chunked_logits.detach().amax(0).tolist()
        else:
            return chunked_logits.softmax(1).detach()[:, 1].amax(0).tolist() # return max probability over the ALIGNED class
    
    
    """
        get the label from the input
    """
    def get_system_label(self, chunked_logits, task_name):
        if task_name == "re":
            return (chunked_logits.sum(0) / chunked_logits.size()[0]).detach().tolist()
        else:
            avg_probs = chunked_logits.softmax(1).sum(0) / chunked_logits.size()[0]
            numpy_array = chunked_logits.softmax(1).argmax(1).detach().numpy()
            # Calculate the frequencies of each value
            unique_values, counts = np.unique(numpy_array, return_counts=True)
            # Find the maximum count
            max_count = np.max(counts)
            # Find all values with the maximum count
            most_frequent_values = unique_values[counts == max_count]
            return most_frequent_values[0] if most_frequent_values.size == 1 else avg_probs.detach().argmax().tolist()

    """
        Chunks input context and claim  - context is chunked into 350 tokens
                                        - claim is chunked into sentences
                                        - using stride for overflowing tokens 
    """
    def chunk_sent_input(self, context, claim, max_length = 512, chunk_size = 350, chunk_claim_size = 150):
        assert chunk_size <= max_length, "Chunk size {} cannot be greater than max size {}".format(chunk_size, chunk_claim_size, max_length)
        chunk_claim_size = max_length - chunk_size if chunk_claim_size is None else chunk_claim_size
        assert chunk_size + chunk_claim_size <= max_length, "Chunk size {} and Chunk claim size {} cannot be together greater than max size {}".format(chunk_size, chunk_claim_size, max_length)
        return_chunked_inputs = {}
        context_chunks = self.chunk_text(context, chunk_size=chunk_size, overflowing_tokens_stride = 25, first_special_token=[0])
        claim_chunks = self.chunk_sentences(claim, chunk_size=chunk_claim_size,overflowing_tokens_stride=int(chunk_claim_size/3), first_special_token=[2])
        for claim_chunk in claim_chunks:
            for context_chunk in context_chunks:
                inputs,attention =self.fill_with_pad_tokens(context_chunk,claim_chunk )
                return_chunked_inputs["input_ids"] = return_chunked_inputs.get("input_ids",[]) + [inputs]
                return_chunked_inputs["attention_mask"] = return_chunked_inputs.get("attention_mask",[]) + [attention]
        return_chunked_inputs["n_claims"] = len(claim_chunks)
        return_chunked_inputs["n_contexts"] = len(context_chunks)
        return return_chunked_inputs
   
    """
        According to paper - chunk the text into smaller parts (350tokens + claim_tokens) when the tokenized inputs exceed the max_length
        returns chunked input
    """
    def chunk_inputs(self, context, claim, max_length = 512, chunk_size = 512, first_fit_within_max_length = True):
        assert chunk_size <= max_length, "Chunk size {} cannot be greater than max size {}".format(chunk_size, max_length)
        
        tokenized_claim = self.tokenizer(claim, return_length=True)
        tokenized_claim["input_ids"][0] = 2 # </s> token according to pair tokenization where the separator of the context and claim is </s></s>
        tokenized_context = self.tokenizer(context, return_length = True)
        assert tokenized_claim["length"][0] < max_length*4/5, "Create chunks of claim sentences. Claim is too long {} which is more than 4/5 from {}.".format(tokenized_claim["length"][0], max_length)

        # set chunk size to incorporate the claim size as well
        chunk_size = min(max_length, chunk_size + tokenized_claim["length"][0])
        
        first_check_max_size = max_length if first_fit_within_max_length else chunk_size
        
        if tokenized_claim["length"][0] + tokenized_context["length"][0] <= first_check_max_size: #if it fits within max_length
            input_ids, attention_mask = self.fill_with_pad_tokens(tokenized_context["input_ids"],tokenized_claim["input_ids"])
            return {"input_ids" : [input_ids], "attention_mask" : [attention_mask]}
        else: # make chunks
            return_chunked_inputs = {}
            current_chunk = {}
            for sentence in sent_tokenize(context, language="czech"):
                tok_sent = self.tokenizer(sentence, return_length=True)
                if len(current_chunk.get("input_ids",[0])) + tok_sent["length"][0] - 1  + tokenized_claim["length"][0] <= chunk_size:
                    current_chunk["input_ids"] = current_chunk.get("input_ids",[0]) + tok_sent["input_ids"][1:-1]
                else:
                    return_chunked_inputs = self._update_chunked_inputs(tokenized_claim, current_chunk, return_chunked_inputs, max_length, tok_sent)
                    current_chunk["input_ids"] = [0] + tok_sent["input_ids"][1:-1]
            if current_chunk != {}: # add the rest
                    return_chunked_inputs = self._update_chunked_inputs(tokenized_claim, current_chunk, return_chunked_inputs, max_length)
                    current_chunk = {}
            return return_chunked_inputs
   
    """
        Chunks input context and claim  - context is chunked into 350 tokens
                                        - claim is chunked into 150 tokens 
                                        - using stride for overflowing tokens 
    """
    def chunk_input_deprecated(self, context, claim, max_length = 512, chunk_size = 350, chunk_claim_size = 150):
        assert chunk_size <= max_length, "Chunk size {} cannot be greater than max size {}".format(chunk_size, chunk_claim_size, max_length)
        chunk_claim_size = max_length - chunk_size if chunk_claim_size is None else chunk_claim_size
        assert chunk_size + chunk_claim_size <= max_length, "Chunk size {} and Chunk claim size {} cannot be together greater than max size {}".format(chunk_size, chunk_claim_size, max_length)
        return_chunked_inputs = {}
        context_chunks = self.chunk_text(context, chunk_size=chunk_size, overflowing_tokens_stride = 25, first_special_token=[0])
        claim_chunks = self.chunk_text(claim, chunk_size=chunk_claim_size,overflowing_tokens_stride=int(chunk_claim_size/3), first_special_token=[2])
        for claim_chunk in claim_chunks:
            for context_chunk in context_chunks:
                inputs,attention =self.fill_with_pad_tokens(context_chunk,claim_chunk )
                return_chunked_inputs["input_ids"] = return_chunked_inputs.get("input_ids",[]) + [inputs]
                return_chunked_inputs["attention_mask"] = return_chunked_inputs.get("attention_mask",[]) + [attention]
        return_chunked_inputs["n_claims"] = len(claim_chunks)
        return_chunked_inputs["n_contexts"] = len(context_chunks)
        return return_chunked_inputs
    
   
    """
        Chunk texts into blocks of chunk_size tokens
        
    """ 
    def chunk_text(self, text, chunk_size = 350, overflowing_tokens_stride = 25, language="czech", first_special_token = [0]):
        sentences = sent_tokenize(text, language=language)
        tokenized = self.tokenizer(sentences if sentences != [] else [""], return_length=True)
        chunks = []
        chunk, current_chunk_size = ([], 0)
        for i, length in enumerate(tokenized["length"]):
            
            # WRAP THE TOKENIZED SENTNECE INTO LIST TO HANDLE OVERFLOWING TOKENS EASILY
            # Case when length of one sentence is longer than the chunk size - split the sentence into chunks of chunk size
            if length > chunk_size:
                splits = [first_special_token + tokenized["input_ids"][i][max(1,cs):min(cs + chunk_size - 2, length - 1)] + [2] for cs in range(0, length , chunk_size-(2+overflowing_tokens_stride))]
            # Case when lenght of sequence is equal or smaller than the chunk size - only continue
            else:
                splits = [first_special_token + tokenized["input_ids"][i][1:]] 
            
            # Go through sentence or splits of sentence
            for subsentence in splits:
                up_length = len(subsentence) - 2 
                
                # Case when the current chunk = 0
                if current_chunk_size == 0:
                    current_chunk_size = up_length + 2  # First include <s> and </s>  tokens
                    chunk = subsentence[:-1]
                # Case when the current chunk + length of new subsentence <= chunk_size - only add
                elif current_chunk_size + up_length <= chunk_size:
                    current_chunk_size += up_length
                    chunk += subsentence[1:-1]
                # Case when the current chunk + length of new subsentence > chunk_size - create chunk
                else:
                    chunks += [chunk + [2]]
                    current_chunk_size = up_length + 2  # First include <s> and </s>  tokens
                    chunk = subsentence[:-1]
        #Case when the loop ended but the current chunk isnt saved in the chunks
        if chunk != []:
            chunks += [chunk + [2]]
        # lengths = [len(ch) for ch in chunks]
        # print("Lenght in tokens of ",len(lengths)," chunks (AVG=",np.mean(lengths),",MAX=",np.max(lengths),",MIN=", np.min(lengths),")")
        return chunks
    
    """
        Chunks text into sentences using nlt.sent_tokenize
    """
    def chunk_sentences(self, text, chunk_size, overflowing_tokens_stride = 0, language="czech", sentence_window = 2, first_special_token = [2]):
        sentences = sent_tokenize(text, language=language)
        tokenized = self.tokenizer(sentences if sentences != [] else [""], return_length=True)
        chunks = []
        current_chunk = []
        for i, length in enumerate(tokenized["length"]):
            # WRAP THE TOKENIZED SENTNECE INTO LIST TO HANDLE OVERFLOWING TOKENS EASILY
            # Case when length of one sentence is longer than the chunk size - split the sentence into chunks of chunk size
            if length > chunk_size:
                splits = [first_special_token + tokenized["input_ids"][i][max(1,cs):min(cs + chunk_size - 2, length - 1)] + [2] for cs in range(0, length , chunk_size-(2+overflowing_tokens_stride))]
            # Case when lenght of sequence is equal or smaller than the chunk size - only continue
            else:
                splits = [first_special_token + tokenized["input_ids"][i][1:]] 
            
            #Go through sentence or parts of sentence and create chunks
            for split in splits:
                chunks += [split]
                # if len(current_chunk) == sentence_window:
                #     chunks += [first_special_token + [item for row in current_chunk for item in row] + [2]]
                #     current_chunk = current_chunk[1:] + [split[1:-1]]
                # else:
                #     current_chunk += [split[1:-1]]
                    
        # if chunks == []:
        #     chunks += [first_special_token + [item for row in current_chunk for item in row] + [2]]
        return chunks

    """
        join context and claim tokens as input_ids and create attention_mask    
    """
    def fill_with_pad_tokens(self, first, second, max_length=512, pad_token = 1):
        return first + second + [pad_token]*max(max_length-len(first)-len(second),0), [1]*(len(first)+len(second)) + [0]*max(max_length-len(first)-len(second),0)
        
    
    def _update_chunked_inputs(self, tokenized_claim, current_chunk, return_chunked_inputs, max_length, tok_sent = {"input_ids" : []}):
        # truncate if there is a long sentence (rare occurrences)
        if len(current_chunk.get("input_ids",[0])) + tokenized_claim["length"][0] >= max_length:
            chunk = current_chunk["input_ids"].copy()[:max_length-tokenized_claim["length"][0]-1] + [2]
        elif not current_chunk.get("input_ids",False):
            chunk = tok_sent["input_ids"][: max_length - tokenized_claim["length"][0] -1] + [2]
        else:
            chunk = current_chunk["input_ids"].copy() + [2] # add </s> end of sentence
        claim_ids = tokenized_claim["input_ids"].copy()
        inputs, attention = self.fill_with_pad_tokens(chunk,claim_ids )
        return_chunked_inputs["input_ids"] = return_chunked_inputs.get("input_ids",[]) + [inputs]
        return_chunked_inputs["attention_mask"] = return_chunked_inputs.get("attention_mask",[]) + [attention]
        return return_chunked_inputs
        
    @classmethod
    def get_encoder_attr_name(cls, model):
        """
        The encoder transformer is named differently in each model "architecture".
        This method lets us get the name of the encoder attribute
        """
        model_class_name = model.__class__.__name__
        if model_class_name.startswith("XLMRoberta"):
            return "roberta"
        else:
            raise KeyError(f"Add support for new model {model_class_name}")
    
    
    @classmethod
    def from_pretrained(
        cls,
        pretrained_model_name_or_path: Optional[Union[str, os.PathLike]],
        model_name : str = "xlm-roberta-large",
        *model_args,
        config: Optional[Union[PretrainedConfig, str, os.PathLike]] = None,
        cache_dir: Optional[Union[str, os.PathLike]] = None,
        ignore_mismatched_sizes: bool = False,
        force_download: bool = False,
        local_files_only: bool = False,
        token: Optional[Union[str, bool]] = None,
        revision: str = "main",
        use_safetensors: bool = None,
        **kwargs,
    ):
        # Check if the required model directories exist then load it from file
        if all(os.path.exists(os.path.join(pretrained_model_name_or_path, model_dir)) for model_dir in [cls._3way_class_model, cls._regression_model, cls._binary_class_model]):
            # assert all(
                
            #     for model_dir in [cls._3way_class_model, cls._regression_model, cls._binary_class_model]
            # ), "Error: Required model directories not found!"
            
            # Disable the warning about newly initialized weights
            transformers.logging.set_verbosity_error()

            shared_encoder = None
            taskmodels_dict = {}
            for path_name in [cls._regression_model, cls._binary_class_model, cls._3way_class_model]:
                task_name = path_name.split("_")[0]
                
                # Load the configuration for the task-specific model
                task_config = transformers.XLMRobertaConfig.from_json_file("{}/{}/config.json".format(pretrained_model_name_or_path,path_name))
                # Create the task-specific model
                model = transformers.XLMRobertaForSequenceClassification.from_pretrained(model_name, config=task_config,*model_args,**kwargs)
                # Load the weights for the task-specific model
                model.load_state_dict(torch.load("{}/{}/pytorch_model.bin".format(pretrained_model_name_or_path,path_name), map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
                # Set the shared encoder to the model's encoder
                if shared_encoder is None:
                    shared_encoder = getattr(model, AlignScoreCS.get_encoder_attr_name(model))
                else:
                    setattr(model, AlignScoreCS.get_encoder_attr_name(model), shared_encoder)
                taskmodels_dict[task_name] = model

            # Create the AlignScoreCS with the shared encoder and loaded task-specific models
            alignScoreCS = AlignScoreCS(encoder=shared_encoder, taskmodels_dict=taskmodels_dict, model_name=model_name)
         #Try load the model from huggingface hub
        else:
            shared_encoder = None
            taskmodels_dict = {}
            for model_dir in [cls._regression_model, cls._binary_class_model, cls._3way_class_model]:
                task_name = model_dir.split("_")[0]
                config = transformers.XLMRobertaConfig.from_pretrained(f"{pretrained_model_name_or_path}", subfolder=model_dir)
                model = transformers.XLMRobertaForSequenceClassification.from_pretrained(f"{pretrained_model_name_or_path}",config=config, subfolder=model_dir)
                if shared_encoder is None:
                    shared_encoder = getattr(model, AlignScoreCS.get_encoder_attr_name(model))
                else:
                    setattr(model, AlignScoreCS.get_encoder_attr_name(model), shared_encoder)
                taskmodels_dict[task_name] = model
            alignScoreCS = AlignScoreCS(encoder=shared_encoder, taskmodels_dict=taskmodels_dict, model_name=model_name)

        return alignScoreCS
    
    
    def save_pretrained(
        self,
        save_directory: Union[str, os.PathLike],
        is_main_process: bool = True,
        state_dict: Optional[dict] = None,
        save_function: Callable = torch.save,
        push_to_hub: bool = False,
        max_shard_size: Union[int, str] = "10GB",
        safe_serialization: bool = False,
        variant: Optional[str] = None,
        token: Optional[Union[str, bool]] = None,
        save_peft_format: bool = True,
        **kwargs,
    ):
        for task_name, model_type in self.taskmodels_dict.items():
            model_type.save_pretrained(save_directory = Path(save_directory,task_name+"_model"),
                                    is_main_process = is_main_process,
                                    state_dict = state_dict,
                                    save_function = save_function,
                                    push_to_hub = push_to_hub,
                                    max_shard_size = max_shard_size,
                                    safe_serialization = safe_serialization,
                                    variant = variant,
                                    token = token,
                                    save_peft_format = save_peft_format,
                                    **kwargs)

# This piece of code is copied from AlignScore github repository
# if you want to use different nlg_eval_mode you have to fix errors on your own
    class InferenceHandler:
        def __init__(self, model, tokenizer, device = "cuda"):
            self.model = model
            self.device = device
            self.tokenizer = tokenizer
            self.model.to(self.device)
            self.model.eval()
            self.batch_size = 32
            self.nlg_eval_mode = "nli_sp"
            self.verbose = False
            self.task_name = "3way"
            self.softmax = nn.Softmax(dim=-1)
        
        def nlg_eval(self, premise, hypo):
            if isinstance(premise, str) and isinstance(hypo, str):
                premise = [premise]
                hypo = [hypo]
            return self.inference_example_batch(premise, hypo)
        
        def inference_example_batch(self, premise: list, hypo: list):
            """
            inference a example,
            premise: list
            hypo: list
            using self.inference to batch the process

            SummaC Style aggregation
            """
            self.disable_progress_bar_in_inference = True
            assert len(premise) == len(hypo), "Premise must has the same length with Hypothesis!"

            out_score = []
            for one_pre, one_hypo in tqdm(zip(premise, hypo), desc="Evaluating", total=len(premise), disable=(not self.verbose)):
                out_score.append(self.inference_per_example(one_pre, one_hypo))
            
            return torch.tensor(out_score)
        
        def inference_per_example(self, premise:str, hypo: str):
            """
            inference a example,
            premise: string
            hypo: string
            using self.inference to batch the process
            """
            def chunks(lst, n):
                """Yield successive n-sized chunks from lst."""
                for i in range(0, len(lst), n):
                    yield ' '.join(lst[i:i + n])
            
            premise_sents = sent_tokenize(premise)
            premise_sents = premise_sents or ['']

            n_chunk = len(premise.strip().split()) // 350 + 1
            n_chunk = max(len(premise_sents) // n_chunk, 1)
            premise_sents = [each for each in chunks(premise_sents, n_chunk)]

            hypo_sents = sent_tokenize(hypo)

            premise_sent_mat = []
            hypo_sents_mat = []
            for i in range(len(premise_sents)):
                for j in range(len(hypo_sents)):
                    premise_sent_mat.append(premise_sents[i])
                    hypo_sents_mat.append(hypo_sents[j])
            
            if self.nlg_eval_mode is not None:
                if self.nlg_eval_mode == 'nli_sp':
                    output_score = self.inference(premise_sent_mat, hypo_sents_mat)[:,1] ### use NLI head OR ALIGN head
                output_score = output_score.view(len(premise_sents), len(hypo_sents)).max(dim=0).values.mean().item() ### sum or mean depends on the task/aspect
                
                return output_score

            
            output_score = self.inference(premise_sent_mat, hypo_sents_mat) ### use NLI head OR ALIGN head
            output_score = output_score.view(len(premise_sents), len(hypo_sents)).max(dim=0).values.mean().item() ### sum or mean depends on the task/aspect

            return output_score

        def inference(self, premise, hypo, task_name = None):
            """
            inference a list of premise and hypo

            Standard aggregation
            """
            task_name = self.task_name if task_name is None else task_name
            if isinstance(premise, str) and isinstance(hypo, str):
                premise = [premise]
                hypo = [hypo]
            
            batch = self.batch_tokenize(premise, hypo)
            output_score = []

            for mini_batch in tqdm(batch, desc="Evaluating", disable=not self.verbose or self.disable_progress_bar_in_inference):
                mini_batch = mini_batch.to(self.device)
                with torch.no_grad():
                    model_output = self.model.forward(task_name=task_name, **mini_batch)
                    model_output = model_output.logits
                    if task_name == "re":
                        model_output = model_output.cpu()
                    else:
                        model_output = self.softmax(model_output).cpu()
                output_score.append(model_output[:,:])
            
            output_score = torch.cat(output_score)
            
            if self.nlg_eval_mode is not None:
                if self.nlg_eval_mode == 'nli':
                    output_score_nli = output_score[:,1]
                    return output_score_nli
                elif self.nlg_eval_mode == 'bin':
                    return output_score
                elif self.nlg_eval_mode == 'reg':
                    return output_score
                else:
                    ValueError("unrecognized nlg eval mode")

            
            return output_score
    
        def batch_tokenize(self, premise, hypo):
            """
            input premise and hypos are lists
            """
            assert isinstance(premise, list) and isinstance(hypo, list)
            assert len(premise) == len(hypo), "premise and hypo should be in the same length."

            batch = []
            for mini_batch_pre, mini_batch_hypo in zip(self.chunks(premise, self.batch_size), self.chunks(hypo, self.batch_size)):
                try:
                    mini_batch = self.tokenizer(mini_batch_pre, mini_batch_hypo, truncation='only_first', padding='max_length', max_length=self.tokenizer.model_max_length, return_tensors='pt')
                except:
                    print('text_b too long...')
                    mini_batch = self.tokenizer(mini_batch_pre, mini_batch_hypo, truncation=True, padding='max_length', max_length=self.tokenizer.model_max_length, return_tensors='pt')
                batch.append(mini_batch)

            return batch
        
        def chunks(self, lst, n):
            """Yield successive n-sized chunks from lst."""
            for i in range(0, len(lst), n):
                yield lst[i:i + n]
        
        

if __name__ == "__main__":
    alignScore = AlignScoreCS.from_pretrained("krotima1/AlignScoreCS")
    alignScore.to("cuda" if torch.cuda.is_available() else "cpu")
    
    print("Tomáš miluje Zuzku!", "|",  "Tomáš miluje Petru!",alignScore.score("Tomáš miluje Zuzku!", "Tomáš miluje Petru."))
    print("Tomáš miluje Zuzku!", "|",  "Tomáš miluje Zuzku!",alignScore.score("Tomáš miluje Zuzku!", "Tomáš miluje Zuzku!"))
    print("Tomáš miluje Zuzku.", "|",  "Zuzka miluje Tomáše.",alignScore.score("Tomáš miluje Zuzku!", "Zuzka miluje Tomáše."))
    print("Tomáš miluje Zuzku.", "|",  "Zuzka nemiluje Tomáše.",alignScore.score("Tomáš miluje Zuzku!", "Zuzka nemiluje Tomáše."))
    print("Tomáš miluje Zuzku.", "|",  "Tomáš nemiluje Zuzku.",alignScore.score("Tomáš miluje Zuzku!", "Tomáš nemiluje Zuzku."))
    print("Dva chlapi se perou.", "|",  "Je tu bitka.",alignScore.score("Dva chlapi se perou.", "Je tu bitka."))
    print("Dva chlapi se perou.", "|",  "Je tu láska.",alignScore.score("Dva chlapi se perou.", "Je tu láska."))
    print("Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta. \n Kdo nechal vystavět katedrálu?", "|", "Byl to Karel.",alignScore.score("Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.\nKdo nechal vystavět katedrálu?", "Byl to Karel."))
    print("Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta. \n Kdo nechal vystavět katedrálu?", "|", "Byl to Vít.",alignScore.score("Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.\nKdo nechal vystavět katedrálu?", "Byl to Vít."))
    print("Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta. \n Kdo nechal vystavět katedrálu?", "|", "Byla to katedrála.",alignScore.score("Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.\nKdo nechal vystavět katedrálu?", "Byla to katedrála."))
    print("Kdo je Karel IV.? Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.", "|", "Je Otec.",alignScore.score("Kdo je Karel IV.? Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.", "Je Otec."))
    print("Kdo je Karel IV.? Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.", "|", "Je Otec vlasti.",alignScore.score("Kdo je Karel IV.? Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.", "Je Otec vlasti."))
    print("Kdo je Karel IV.? Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.", "|", "Je katedrála svatého Víta.",alignScore.score("Kdo je Karel IV.? Karel IV. je Otec vlasti. Nechal postavit katedrálu svatého Víta.", "Je katedrála svatého Víta."))
    print("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "|", "Karkulka utekla vklovi.",alignScore.score("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "Karkulka utekla vklovi."))
    print("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "|", "Karkulka neutekla vklovi.",alignScore.score("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "Karkulka neutekla vklovi."))
    print("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "|", "Vlk snědl Karkulku.",alignScore.score("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "Vlk snědl karkulku."))
    print("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "|", "Vlk nesnědl Karkulku.",alignScore.score("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "Vlk nesnědl karkulku."))
    print("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "|", "Karkulka snědla vlka.",alignScore.score("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "Karkulka snědla vlka."))
    print("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "|", "Karkulka dala vlkovi jablko.",alignScore.score("Karkulka šla do lesa. V lese potkala vlka. Vlk ji zkoušel sníst, ale Karkulka se nedala a Vlkovi utekla!", "Karkulka dala vlkovi jablko."))