from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from copy import deepcopy
import gradio as gr

inst_llm = ChatNVIDIA(model="mixtral_8x7b")  ### Feel free to change the models

prompt1 = ChatPromptTemplate.from_messages([
    ("system", "Only respond in rhymes"),
    ("user", "{input}")
])
prompt2 =  ChatPromptTemplate.from_messages([
    ("system", (
        "Only responding in rhyme, change the topic of the input poem to be about {topic}!"
        " Make it happy! Try to keep the same sentence structure, but make sure it's easy to recite!"
        " Try not to rhyme a word with itself."
    )),
    ("user", "{input}")
])

## These are the main chains, constructed here as modules of functionality.
chain1 = prompt1 | inst_llm | StrOutputParser()  ## only expects input
chain2 = prompt2 | inst_llm | StrOutputParser()  ## expects both input and topic

################################################################################

def rhyme_chat2_stream(message, history, return_buffer=True):
    '''This is a generator function, where each call will yield the next entry'''

    first_poem = None
    for entry in history:
        if entry[0] and entry[1]:
            ## If a generation occurred as a direct result of a user input,
            ##  keep that response (the first poem generated) and break out
            first_poem = entry[1]
            break

    if first_poem is None:
        ## First Case: There is no initial poem generated. Better make one up!

        buffer = "Oh! I can make a wonderful poem about that! Let me think!\n\n"
        yield buffer

        ## iterate over stream generator for first generation
        inst_out = ""
        chat_gen = chain1.stream({"input" : message})
        for token in chat_gen:
            inst_out += token
            buffer += token
            yield buffer if return_buffer else token

        passage = "\n\nNow let me rewrite it with a different focus! What should the new focus be?"
        buffer += passage
        yield buffer if return_buffer else passage

    else:
        ## Subsequent Cases: There is a poem to start with. Generate a similar one with a new topic!

        buffer = f"Sure! Here you go!\n\n"
        yield buffer

        return  ## <- TODO: Early termination for generators. Comment this out

        
        ## iterate over stream generator for second generation
        chat_gen = chain2.stream({"input" : first_poem, "topic" : message})
        for token in chat_gen:
            buffer += token
            yield buffer if return_buffer else token


        passage = "\n\nThis is fun! Give me another topic!"
        buffer += passage
        yield buffer if return_buffer else passage

################################################################################
## Below: This is a small-scale simulation of the gradio routine.

def queue_fake_streaming_gradio(chat_stream, history = [], max_questions=5):

    ## Mimic of the gradio initialization routine, where a set of starter messages can be printed off
    for human_msg, agent_msg in history:
        if human_msg: print("\n[ Human ]:", human_msg)
        if agent_msg: print("\n[ Agent ]:", agent_msg)

    ## Mimic of the gradio loop with an initial message from the agent.
    for _ in range(max_questions):
        message = input("\n[ Human ]: ")
        print("\n[ Agent ]: ")
        history_entry = [message, ""]
        for token in chat_stream(message, history, return_buffer=False):
            print(token, end='')
            history_entry[1] += token
        history += [history_entry]
        print("\n")

## history is of format [[User response 0, Bot response 0], ...]
history = [[None, "Let me help you make a poem! What would you like for me to write?"]]

## Simple way to initialize history for the ChatInterface
chatbot = gr.Chatbot(value = [[None, "Let me help you make a poem! What would you like for me to write?"]])

## IF USING COLAB: Share=False is faster
gr.ChatInterface(rhyme_chat2_stream, chatbot=chatbot).queue().launch(debug=True, share=True)