import streamlit as st from langchain_community.llms import LlamaCpp from langchain.prompts import PromptTemplate from langchain.chains import LLMChain from langchain_core.callbacks import StreamingStdOutCallbackHandler from langchain.chains import RetrievalQA from langchain.memory import ConversationBufferMemory from langchain import PromptTemplate from langchain.retrievers import TFIDFRetriever callbacks = [StreamingStdOutCallbackHandler()] print("creating llm started") llm = LlamaCpp( model_path='unsloth.Q5_K_M.gguf', temperature=0.75, max_tokens=30, top_p=4, callback_manager=callbacks, verbose=True, # Verbose is required to pass to the callback manager ) print("creating llm ended") def with_memory(llm): retriever = TFIDFRetriever.from_texts( ["Finatial AI"]) template = """You are the Finiantial expert: {history} {context} ### Instruction: {question} ### Input: ### Response: """ prompt1 = PromptTemplate( input_variables=["history", "context", "question"], template=template, ) qa = RetrievalQA.from_chain_type( llm=llm, chain_type='stuff', retriever=retriever, verbose=False, chain_type_kwargs={ "verbose": False, "prompt": prompt1, "memory": ConversationBufferMemory( memory_key="history", input_key="question"), } ) return qa def without_memory(llm): template = """You are the Finiantial expert: ### Instruction: {question} ### Input: ### Response: """ prompt = PromptTemplate(template=template, input_variables=["question"]) llm_chain_model = LLMChain(prompt=prompt, llm=llm) print("creating model created") return llm_chain_model def main(): """Build a streamlit layout""" # Wide mode st.set_page_config(layout="wide") llm_models = { "Base": "unsloth.Q5_K_M.gguf", "Cerebras": "cerebras_Llama3-DocChat-1.0-8B_Base_adapt_basic_model_16bit.gguf", "Bavest": "bavest_fin_llama_33b_adapt_basic_model_16bit.gguf", "Aliyasir": "aliyasir_Llama-3-8B-Instruct-Finance-RAG_adapt_basic_model_16bit.gguf", "Basic Adapt": "adapt-unsloth.Q5_K_M.gguf", "adapt llm": "AdaptLLM_finance-LLM-13B_adapt_basic_model_16bit.gguf", "Fibro" : "finbro-v0.1.0-llama-3-8B-instruct-1m.gguf", } # Designing the interface st.title("Financial LLM test") # For newline st.write("\n") # Instructions st.markdown("*Hint: you can select the LLM model and write your prompt") # Set the columns col1, col2 = st.columns(2) col1.subheader("Prompt Section") col2.subheader("Model Output") llm_qa = without_memory(llm) # Model selection st.sidebar.title("Model selection") det_arch = st.sidebar.selectbox("LLM model", ['With Memory', 'Without Memory']) # For newline st.sidebar.write("\n") if st.sidebar.button("Select LLM"): with st.spinner("Loading model..."): if det_arch == 'Without Memory': llm_qa = without_memory(llm) else: llm_qa = with_memory(llm) # llm_qa = get_model(llm_models.get(det_arch)) # load the model TODO text_input = '' with col1: text_input_temp = st.text_input( "Please, type your question and submit.", "Write Your Prompt", key="placeholder", ) if st.button("Submit"): text_input = text_input_temp with col2: if text_input != '': with st.spinner("Analyzing..."): out_gen = llm_qa.run(question) st.write("LLM Response: ", out_gen) text_input = '' if __name__ == "__main__": main()