import streamlit as st
from langchain_community.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_core.callbacks import StreamingStdOutCallbackHandler
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
from langchain import PromptTemplate
from langchain.retrievers import TFIDFRetriever

callbacks = [StreamingStdOutCallbackHandler()]
print("creating llm started")
llm = LlamaCpp(
    model_path='unsloth.Q5_K_M.gguf',
    temperature=0.75,
    max_tokens=30,
    top_p=4,
    callback_manager=callbacks,
    verbose=True,  # Verbose is required to pass to the callback manager
)
print("creating llm ended")

def with_memory(llm):
    retriever = TFIDFRetriever.from_texts(
        ["Finatial AI"])
    
    
    template = """You are the Finiantial expert:
    {history}
    {context}
    ### Instruction:
    {question}
    
    ### Input:
    
    
    ### Response:
    """
    
    prompt1 = PromptTemplate(
        input_variables=["history", "context", "question"],
        template=template,
    )
    
    qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type='stuff',
        retriever=retriever,
        verbose=False,
        chain_type_kwargs={
            "verbose": False,
            "prompt": prompt1,
            "memory": ConversationBufferMemory(
                memory_key="history",
                input_key="question"),
        }
    )
    return qa

def without_memory(llm):
    template = """You are the Finiantial expert:
    ### Instruction:
    {question}
    ### Input:
    ### Response:
    """
    
    prompt = PromptTemplate(template=template, input_variables=["question"])
    
    llm_chain_model = LLMChain(prompt=prompt, llm=llm)
    print("creating model created")
    return llm_chain_model


def main():

    """Build a streamlit layout"""
    # Wide mode
    st.set_page_config(layout="wide")
    
    llm_models = {
    
        "Base": "unsloth.Q5_K_M.gguf",
        "Cerebras": "cerebras_Llama3-DocChat-1.0-8B_Base_adapt_basic_model_16bit.gguf",
        "Bavest": "bavest_fin_llama_33b_adapt_basic_model_16bit.gguf",
        "Aliyasir": "aliyasir_Llama-3-8B-Instruct-Finance-RAG_adapt_basic_model_16bit.gguf",
        "Basic Adapt": "adapt-unsloth.Q5_K_M.gguf",
        "adapt llm": "AdaptLLM_finance-LLM-13B_adapt_basic_model_16bit.gguf",
        "Fibro" : "finbro-v0.1.0-llama-3-8B-instruct-1m.gguf",
    }
    # Designing the interface
    st.title("Financial LLM test")
    # For newline
    st.write("\n")
    # Instructions
    st.markdown("*Hint: you can select the LLM model and write your prompt")
    # Set the columns

    col1, col2 = st.columns(2)
    col1.subheader("Prompt Section")
    col2.subheader("Model Output")

    llm_qa = without_memory(llm)
    # Model selection
    st.sidebar.title("Model selection")
    det_arch = st.sidebar.selectbox("LLM model", ['With Memory', 'Without Memory'])

    # For newline
    st.sidebar.write("\n")


    if st.sidebar.button("Select LLM"):
        with st.spinner("Loading model..."):
            if det_arch == 'Without Memory':
                llm_qa = without_memory(llm)
            else:
                llm_qa = with_memory(llm)
            # llm_qa = get_model(llm_models.get(det_arch))
            # load the model TODO

    text_input = ''
    with col1:
        text_input_temp = st.text_input(
            "Please, type your question and submit.",
            "Write Your Prompt",
            key="placeholder",
        )
        
        if st.button("Submit"):
            text_input = text_input_temp
            
    with col2:
        if text_input != '':
            with st.spinner("Analyzing..."):
                out_gen = llm_qa.run(question)  
                st.write("LLM Response: ", out_gen)
                text_input = ''
            
    
if __name__ == "__main__":
    main()