import argparse
import os
import re
import time

import openai
import requests
from nltk.tokenize import sent_tokenize


def replace_jinhao(line, replacement=None):
    if replacement is not None and re.match(r'^( *#*)*', line)[0].strip() != '':
        # 全部替换为 replacement
        return re.sub(r'^( *#*)*', replacement, line, count=1)
    else:
        return line


def count_jinhao(line):
    return re.match(r'^( *#*)*', line)[0].count('#')


def is_english(strs):
    for _char in strs:
        if '\u4e00' <= _char <= '\u9fa5':
            return False
    return True


def sentence_split_en(line):
    res = filter(lambda l: l.strip() != '', sent_tokenize(line))
    res = list(map(lambda l: l.strip(), res))
    idx = 0
    while idx < len(res) - 1:
        if len(res[idx]) < 10:  # 句子少于10个字符，向后合并。
            res[idx + 1] = res[idx] + ' ' + res[idx + 1]
            res.pop(idx)
        else:
            idx += 1

    return res


def sentence_split_zh(line):
    res = []
    pre_idx = 0
    for i in range(1, len(line)-1):
        if line[i] != '。':  # 按句号切分
            continue
        if line[i - 1] in '0123456789':   # ocr error
            continue
        if line[i + 1] in '0123456789':  # ocr error
            continue
        if len(line[pre_idx: i + 1].strip()) <= 5:
            continue

        res.append(line[pre_idx: i + 1].strip())
        pre_idx = i + 1

    if pre_idx < len(line):
        res.append(line[pre_idx:])

    return res


def sentence_split(line):
    if is_english(line):
        return sentence_split_en(line)
    else:
        return sentence_split_zh(line)


def sentence_truncation(line, head_limit=15, tail_limit=15):
    total_limit = head_limit+tail_limit
    if is_english(line):
        len_factor = 10
    else:
        len_factor = 1

    if 0 < total_limit * len_factor < len(line):
        _head_limit = head_limit * len_factor
        _tail_limit = len(line) - tail_limit * len_factor
        line = line[:_head_limit] + line[_tail_limit:]

    return line


def text2sentence(lines, replacement=None, head_limit=15, tail_limit=15):
    """
    :param lines:
    :param replacement: in [None, '# ', '']. replace the jinhao prefix of one line. None means no replacement.
    :param head_limit:
    :param tail_limit:
    :return:
    """
    res = []
    for idx, line in enumerate(lines):
        res.extend(sentence_split(line))

    for idx, temp in enumerate(res):
        _temp = replace_jinhao(temp, '# ')
        _temp = sentence_truncation(_temp, head_limit, tail_limit)
        _temp = replace_jinhao(_temp, f"{'#'*count_jinhao(temp)} ")
        _temp = replace_jinhao(_temp, replacement)
        res[idx] = _temp+'\n'
    return res


PROMPT = ('You are an assistant good at reading and formatting documents, and you are also skilled at distinguishing '
          'the semantic and logical relationships of sentences between document context. The following is a text that '
          'has already been divided into sentences. Each line is formatted as: "{line number} @ {sentence content}". '
          'You need to segment this text based on semantics and format. There are multiple levels of granularity for '
          'segmentation, the higher level number means the finer granularity of the segmentation. Please ensure that '
          'each Level One segment is semantically complete after segmentation. A Level One segment may contain '
          'multiple Level Two segments, and so on. Please incrementally output the starting line numbers of each level '
          'of segments, and determine the level of the segment, as well as whether the content of the sentence at the '
          'starting line number can be used as the title of the segment. Finally, output a list format result, '
          'where each element is in the format of: "{line number}, {segment level}, {be a title?}".'
          '\n\n>>> Input text:\n')


def index_format(idx, line):
    return f'{idx} @ {line}'


def points2clip(points, start_idx, end_idx):
    """
    :param points: [a, b, c, d]
    :param start_idx: x
    :param end_idx: y
    assert: x <= a < b < c < d < y
    return [[x, a], [a, b], [b, c], [c, d], [d, y]]
    """
    clips = []
    pre_p = start_idx
    for p in points:
        if p == start_idx or p >= end_idx:
            continue
        clips.append([pre_p, p])
        pre_p = p

    clips.append([pre_p, end_idx])
    return clips


# parse answer string to list of chunking points
def parse_answer_chunking_point(answer_string, max_level):
    local_chunk_points = {level_dict_en[i]: [] for i in range(max_level)}
    for line in answer_string.split('\n'):
        [point, level, _] = line.split(', ')
        if level in local_chunk_points:
            local_chunk_points[level].append(int(point))

    res = list(local_chunk_points.values())
    for idx, _ in enumerate(res):
        if len(_) == 0:
            continue
        keep_idx = list(filter(lambda i: _[i] > _[i-1], range(1, len(_))))
        res[idx] = [_[0]] + list(map(lambda i: _[i], keep_idx))
    return res


level_dict_en = {
    0: 'Level One',
    1: 'Level Two',
    2: 'Level Three',
    3: 'Level Four',
    4: 'Level Five',
    5: 'Level Six',
    6: 'Level Seven',
    7: 'Level Eight',
    8: 'Level Nine',
    9: 'Level Ten',
}


def check_answer_point(first_level_points, start_idx, end_idx):
    print('parsed_answer:', first_level_points, start_idx, end_idx)
    if len(first_level_points) > 0 and first_level_points[0] < start_idx:
        return False
    for idx in range(1, len(first_level_points)):
        p = first_level_points[idx]
        if p <= first_level_points[idx-1] or p > end_idx:
            return False
    return True


def build_residual_lines(lines, global_chunk_points, start_idx, window_size, recurrent_type):
    if recurrent_type in [0, 1]:
        return []
    assert recurrent_type == 2, f'Not implemented for recurrent_type: {recurrent_type}'

    last_first_point = 0
    if len(global_chunk_points[0]) > 0:
        last_first_point = global_chunk_points[0][-1]
    current_second_points = filter(lambda p: p >= last_first_point, global_chunk_points[1])
    temp_second_clips = points2clip(current_second_points, last_first_point, start_idx)
    # 每个一级片段中，最多保留5个二级片段，前2后3，每个二级片段最多20行。
    pre_seg_num, post_seg_num, line_num = 2, 3, 20
    while True:
        residual_second_clips = temp_second_clips
        if len(temp_second_clips) > (pre_seg_num + post_seg_num):
            residual_second_clips = (
                    temp_second_clips[:pre_seg_num] + temp_second_clips[len(temp_second_clips)-post_seg_num:]
            )
        residual_lines = []
        for rsc in residual_second_clips:
            # 每个二级片段最多保留20行
            pre_sent_idx, post_sent_idx = rsc[0], min(rsc[1], rsc[0]+line_num)
            residual_lines.extend(lines[pre_sent_idx: post_sent_idx])
        if len('\n'.join(residual_lines)) < window_size/2:
            print(residual_lines)
            return residual_lines

        # 超出推理窗口一半，则需要减少残余输入。前减1，后减1，行数减5。
        pre_seg_num, post_seg_num, line_num = pre_seg_num-1, post_seg_num-1, line_num-5
        # 最小设置的情况下仍然超出窗口一半，则不添加残余输入。
        if pre_seg_num * post_seg_num * line_num <= 0:
            return []


def union_chunk_points(local_chunk_points, global_chunk_points, max_idx):
    for idx, level in enumerate(global_chunk_points):
        global_chunk_points[idx].extend(filter(lambda p: p < max_idx, local_chunk_points[idx]))
    return global_chunk_points


class HiChunkInferenceEngine:
    def __init__(self, window_size, line_max_len, max_level, prompt):
        self.window_size = window_size
        self.line_max_len = line_max_len
        self.max_level = max_level
        self.prompt = prompt
        self.base_url = os.environ.get("OPENAI_BASE_URL", "http://localhost:8000")
        self.llm = openai.Client(base_url=f"{self.base_url}/v1", api_key="[empty]")

    def init_chunk_points(self):
        global_chunk_points = [[] for i in range(self.max_level)]
        return global_chunk_points

    def build_input_instruction(self, prompt, global_start_idx, sentences, window_size, residual_lines=None):
        """
        Build input instruction for once inference
        :param prompt: prompt
        :param global_start_idx: global start index of input sentences
        :param sentences: global sentences
        :param window_size:
        :param residual_lines:
        :return:
        """
        q = prompt
        # concat residual lines if exists
        residual_index = 0
        while residual_lines is not None and residual_index < len(residual_lines):
            line_text = index_format(residual_index, residual_lines[residual_index])
            temp_text = q + line_text
            q = temp_text
            residual_index += 1
        assert self.count_length(q) <= window_size, 'residual lines exceeds window size'

        local_start_idx = 0
        cur_token_num = self.count_length(q)
        end = False
        # concat sentences until reach window_size
        while global_start_idx < len(sentences):
            line_text = index_format(local_start_idx + residual_index, sentences[global_start_idx])
            temp_text = q + line_text
            line_token_num = self.count_length(line_text)
            if cur_token_num + line_token_num > window_size:
                break
            cur_token_num += line_token_num
            q = temp_text
            local_start_idx += 1
            global_start_idx += 1
        if global_start_idx == len(sentences):
            end = True

        return q, end, local_start_idx

    def call_llm(self, input_text):
        response = self.llm.chat.completions.create(
            model='HiChunk',
            messages=[{'role': 'user', 'content': input_text}],
            temperature=0.0,
            max_tokens=4096,
            extra_body={
                "chat_template_kwargs": {"add_generation_prompt": True, "enable_thinking": False}
            }
        )
        return response.choices[0].message.content

    def count_length(self, text):
        response = requests.post(
            url=f'{self.base_url}/tokenize',
            json={'model': 'HiChunk', 'prompt': text}
        ).json()
        return response['count']

    def pre_process(self, document):
        lines = map(lambda l: l.strip(), document.split('\n'))
        lines = list(filter(lambda l: len(l) != 0, lines))
        origin_lines = text2sentence(lines, None, -1, 0)        # 原始文本行，不截断
        input_lines = text2sentence(lines, '', self.line_max_len, 0)    # 原始文本行，截断长度为self.line_max_len
        return input_lines, origin_lines

    @staticmethod
    def post_process(origin_lines, global_chunk_points):
        origin_lines_remove_jinhao = [replace_jinhao(l, '') for l in origin_lines]
        total_points = sorted(
            [[__, i + 1] for i, _ in enumerate(global_chunk_points) for __ in _],
            key=lambda p: p[0]
        )
        splits = []
        pre_level, pre_point = 1, 0
        for i, [p, level] in enumerate(total_points):
            if p == 0:
                continue
            splits.append([''.join(origin_lines_remove_jinhao[pre_point: p]), pre_level])
            pre_level = level
            pre_point = p
        splits.append([''.join(origin_lines_remove_jinhao[pre_point:]), pre_level])
        return splits

    def iterative_inf(self, lines, recurrent_type=1):
        error_count, start_idx = 0, 0
        raw_qa, residual_lines = [], []
        global_chunk_points = self.init_chunk_points()
        while start_idx < len(lines):
            residual_sent_num = len(residual_lines)
            question, is_end, question_sent_num = self.build_input_instruction(
                self.prompt, start_idx, lines, self.window_size, residual_lines
            )
            question_token_num = self.count_length(question)
            print('question len:', len(question), question_token_num)
            start_time = time.time()
            answer = self.call_llm(question)
            inf_time = time.time() - start_time
            answer_token_num = self.count_length(answer)
            print('answer:', answer)
            print('answer len:', answer_token_num)
            tmp = {
                'question': question, 'answer': answer, 'start_idx': start_idx, 'end_idx': start_idx+question_sent_num,
                'residual_sent_num': residual_sent_num, 'time': inf_time,
                'question_token_num': question_token_num, 'answer_token_num': answer_token_num,
            }
            # 解析输出结果，将局部句子序号转化为全局的句子序号
            try:
                local_chunk_points = parse_answer_chunking_point(answer, self.max_level)
                if not check_answer_point(local_chunk_points[0], 0, question_sent_num+residual_sent_num-1):
                    print('###########check error##############')
                    tmp['status'] = 'check error'
                    local_chunk_points = self.init_chunk_points()
                    local_chunk_points[0].append(start_idx)
                    error_count += 1
                else:
                    tmp['status'] = 'check ok'
                    print('#############check ok################')
                    for idx, points in enumerate(local_chunk_points):
                        filter_points = filter(lambda p: p >= residual_sent_num, points)
                        # p-residual_sent_num+start_idx 将局部推理的句子序号转化为全局文档的句子序号
                        local_chunk_points[idx] = [p - residual_sent_num + start_idx for p in filter_points]

            except:
                print('##########parsed error################')
                tmp['status'] = 'parse error'
                local_chunk_points = self.init_chunk_points()
                local_chunk_points[0].append(start_idx)
                error_count += 1
            raw_qa.append(tmp)
            print('local_chunk_points:', local_chunk_points)

            if is_end:  # 全文档推理结束
                start_idx += question_sent_num
                global_chunk_points = union_chunk_points(local_chunk_points, global_chunk_points, start_idx)
                break
            if len(local_chunk_points[0]) > 1 and recurrent_type in [1, 2]:  # 多个一级片段，丢弃掉本次结果的最后一个一级片段
                # 从最后一个一级片段开始构建下次迭代的输入
                start_idx = local_chunk_points[0][-1]
                global_chunk_points = union_chunk_points(local_chunk_points, global_chunk_points, start_idx)
                residual_lines = []
            else:   # 局部推理结果中只有一个一级片段
                # 从上次迭代输入的最后一行开始构建下次迭代的输入，并带上上次迭代的残余行
                start_idx += question_sent_num
                global_chunk_points = union_chunk_points(local_chunk_points, global_chunk_points, start_idx)
                residual_lines = build_residual_lines(
                    lines, global_chunk_points, start_idx, self.window_size, recurrent_type
                )

        print('global_chunk_points:', global_chunk_points)
        result = {
            'global_chunk_points': global_chunk_points,
            'raw_qa': raw_qa,
            'error_count': error_count,
        }
        return result

    def inference(self, document, recurrent_type=1):
        input_lines, origin_lines = self.pre_process(document)
        chunked_result = self.iterative_inf(input_lines, recurrent_type=recurrent_type)
        chunks = self.post_process(origin_lines, chunked_result['global_chunk_points'])
        chunked_document = '\n'.join(['#'*c[1] + ' ' + c[0] for c in chunks])
        return chunked_document, chunks