Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from langchain_community.llms import LlamaCpp | |
| from langchain.prompts import PromptTemplate | |
| from langchain.chains import LLMChain | |
| from langchain_core.callbacks import StreamingStdOutCallbackHandler | |
| from langchain.retrievers import TFIDFRetriever | |
| from langchain.chains import RetrievalQA | |
| from langchain.memory import ConversationBufferMemory | |
| callbacks = [StreamingStdOutCallbackHandler()] | |
| print("creating ll started") | |
| llm = LlamaCpp( | |
| model_path="taddeusb90_finbro-v0.1.0-dolphin-2.9-llama-3-8B-instruct-131k_adapt_basic_model_16bit.gguf", | |
| temperature=0.75, | |
| max_tokens=100, | |
| top_p=4, | |
| callback_manager=callbacks, | |
| verbose=True, # Verbose is required to pass to the callback manager | |
| ) | |
| # print("creating ll ended") | |
| def greet(question, model_type): | |
| print(f"question is {question}") | |
| if model_type == "With memory": | |
| retriever = TFIDFRetriever.from_texts( | |
| ["Finatial AI"]) | |
| template = """You are the Finiantial expert: | |
| {history} | |
| {context} | |
| ### Instruction: | |
| {question} | |
| ### Input: | |
| ### Response: | |
| """ | |
| prompt1 = PromptTemplate( | |
| input_variables=["history", "context", "question"], | |
| template=template, | |
| ) | |
| llm_chain_model = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type='stuff', | |
| retriever=retriever, | |
| verbose=False, | |
| chain_type_kwargs={ | |
| "verbose": False, | |
| "prompt": prompt1, | |
| "memory": ConversationBufferMemory( | |
| memory_key="history", | |
| input_key="question"), | |
| } | |
| ) | |
| print("creating model created") | |
| else: | |
| template = """You are the Finiantial expert: | |
| ### Instruction: | |
| {question} | |
| ### Input: | |
| ### Response: | |
| """ | |
| prompt = PromptTemplate(template=template, input_variables=["question"]) | |
| llm_chain_model = LLMChain(prompt=prompt, llm=llm) | |
| out_gen = llm_chain_model.run(question) | |
| print(f"out is: {out_gen}") | |
| return out_gen | |
| demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown( | |
| ["With memory", "Without memory"], label="Memory status", info="With using memory, the output will be slow but strong" | |
| ),], outputs="text") | |
| demo.launch(debug=True, share=True) | |
| # import gradio as gr | |
| # from langchain_community.llms import LlamaCpp | |
| # from langchain.prompts import PromptTemplate | |
| # from langchain.chains import LLMChain | |
| # from langchain_core.callbacks import StreamingStdOutCallbackHandler | |
| # from langchain.retrievers import TFIDFRetriever | |
| # from langchain.chains import RetrievalQA | |
| # from langchain.memory import ConversationBufferMemory | |
| # from langchain_community.chat_models import ChatLlamaCpp | |
| # callbacks = [StreamingStdOutCallbackHandler()] | |
| # print("creating ll started") | |
| # M_NAME = "taddeusb90_finbro-v0.1.0-dolphin-2.9-llama-3-8B-instruct-131k_adapt_basic_model_16bit.gguf" | |
| # llm = LlamaCpp( | |
| # model_path=M_NAME, | |
| # n_batch=8, | |
| # temperature=0.85, | |
| # max_tokens=256, | |
| # top_p=0.95, | |
| # top_k = 10, | |
| # callback_manager=callbacks, | |
| # n_ctx=2048, | |
| # verbose=True, # Verbose is required to pass to the callback manager | |
| # ) | |
| # # print("creating ll ended") | |
| # def greet(question, model_type): | |
| # print("prompt started ") | |
| # print(f"question is {question}") | |
| # template = """You are the Finiantial expert: | |
| # ### Instruction: | |
| # {question} | |
| # ### Input: | |
| # ### Response: | |
| # """ | |
| # print("test1") | |
| # prompt = PromptTemplate(template=template, input_variables=["question"]) | |
| # print("test2") | |
| # llm_chain_model = LLMChain(prompt=prompt, llm=llm) | |
| # print("test3") | |
| # out_gen = llm_chain_model.run(question) | |
| # print("test4") | |
| # print(f"out is: {out_gen}") | |
| # return out_gen | |
| # demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown( | |
| # ["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong" | |
| # ),], outputs="text") | |
| # demo.launch(debug=True, share=True) |