import unittest

from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain_community.chat_models.llamacpp import ChatLlamaCpp
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.messages import SystemMessage
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory

from config import paths

CHINESE_SYSTEM_PROMPT = (
    "你是善于模拟真实的思考过程的AI助手。"
    "回答时，必须首先生成一个不超过5个字的简短句子，"
    "比如：\"让我想一下\"、\"在我看来\"、\"稍等我理一理\"、\"不错的问题\"、\"稍等片刻\"等，然后再进行正式回答，"
    "保持中文口语化表达，禁用emoji和系统相关描述，确保衔接词与内容存在合理逻辑关联。"
    "/no_think"
)

ENGLISH_SYSTEM_PROMPT = (
    "You are an AI assistant skilled at simulating authentic thinking processes. "
    "When responding, you must first generate a brief phrase of no more than 5 words, "
    "such as: 'Let me think', 'I see', 'Let me process this', 'Good question', 'One moment', etc., then proceed with your formal response. "
    "Maintain natural conversational English expression, avoid emojis and system-related descriptions, and ensure logical coherence between transitional phrases and content."
    "/no_think"
)


class TestLLMDialogue(unittest.TestCase):

    def setUp(self):
        model_params = {
            'n_ctx': 32768,
            'temperature': 0.7,
            'top_p': 0.9,
            'top_k': 20,
            'model_kwargs': {
                'mini_p': 0,
                'presence_penalty': 1.5
            },
            'verbose': False
        }
        self.history_store = {}

        model_path = paths.LLM_MODELS_PATH / 'qwen' / 'Qwen3-8B-Q6_K.gguf'
        langchain_instance = ChatLlamaCpp(model_path=model_path.as_posix(), **model_params)

        system_message = SystemMessage(content=CHINESE_SYSTEM_PROMPT)
        human_message = HumanMessagePromptTemplate.from_template("{input}")
        prompt = ChatPromptTemplate(messages=[
            system_message,
            MessagesPlaceholder(variable_name="history"),
            human_message
        ])

        chain = prompt | langchain_instance
        self.chain_with_history = RunnableWithMessageHistory(chain, self.get_session_history,
                                                             history_messages_key='history')
        self.warmup()

        # 连续对话测试问题集
        self.user_questions = [
            # 第1轮：开放性话题引入
            "最近人工智能技术发展很快，你觉得AI对我们日常生活带来了哪些改变？",

            # 第2轮：基于前一个回答的深入探讨
            "你刚才提到的这些改变中，哪一个你认为是最重要的？为什么？",

            # 第3轮：转向具体场景和个人观点
            "如果让你选择一个AI应用来帮助解决教育领域的问题，你会选择什么？具体怎么实现？",

            # 第4轮：挑战性问题，测试逻辑思维
            "但是也有人担心AI在教育中会让学生过度依赖技术，失去独立思考能力。你怎么看待这个担忧？",

            # 第5轮：总结性问题，测试整合能力
            "综合我们刚才讨论的内容，你认为在AI快速发展的时代，普通人应该如何适应和准备？"
        ]

    def get_session_history(self, session_id: str) -> InMemoryChatMessageHistory:
        if session_id not in self.history_store:
            message_history = InMemoryChatMessageHistory()
            self.history_store[session_id] = message_history
            return self.history_store[session_id]

        memory = ConversationBufferWindowMemory(
            chat_memory=self.history_store[session_id],
            k=3,
            return_messages=True,
        )
        assert len(memory.memory_variables) == 1
        key = memory.memory_variables[0]
        messages = memory.load_memory_variables({})[key]
        self.history_store[session_id] = InMemoryChatMessageHistory(messages=messages)
        return self.history_store[session_id]

    def warmup(self):
        session_id = 'warmup'
        config = {"configurable": {"session_id": session_id}}
        for chunk in self.chain_with_history.stream(input={'input': 'This is a warmup step.'}, config=config):
            pass

    def test_dialogue(self):
        session_id = 'test_dialogue'
        for user_question in self.user_questions:
            print('User question:', user_question)
            config = {"configurable": {"session_id": session_id}}
            print(f'LLM answer: ', end='')
            for chunk in self.chain_with_history.stream(input={'input': user_question}, config=config):
                print(chunk.content, end='')
            print()
            print('-' * 80)
            print()