Spaces:

abhivsh
/

ModelTS_SearchEngine

Running

App Files Files Community

LLM updated

by abhivsh - opened Apr 15, 2024

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+72

-13

Files changed (1) hide show

app.py +72 -13

app.py CHANGED Viewed

@@ -48,23 +48,82 @@ llm_name = "gpt-3.5-turbo"
 vectordb = initialize.initialize()
-def chat_query(question, history):
-    llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY)
-    # Conversation Retrival Chain with Memory
-    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-    retriever=vectordb.as_retriever()
-    qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
-    # Replace input() with question variable for Gradio
-    result = qa({"question": question})
-    return result['answer']
-    # Chatbot only answers based on Documents
-    # qa = VectorDBQA.from_chain_type(llm=OpenAI(openai_api_key = OPENAI_API_KEY, ), chain_type="stuff", vectorstore=vectordb)
-    # result = qa.run(question)
-    # return result

 vectordb = initialize.initialize()
+#-------------------------------------------
+from langchain import HuggingFacePipeline, PromptTemplate, LLMChain, RetrievalQA
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
+quantization_config = {
+    "load_in_4bit": True,
+    "bnb_4bit_compute_dtype": torch.float16,
+    "bnb_4bit_quant_type": "nf4",
+    "bnb_4bit_use_double_quant": True,
+}
+llm = HuggingFacePipeline(pipeline=pipeline)
+model_id = "mistralai/Mistral-7B-Instruct-v0.1"
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    model_id, device="cuda", quantization_config=quantization_config
+)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+pipeline = pipeline(
+    "text-generation",
+    model=model_4bit,
+    tokenizer=tokenizer,
+    use_cache=True,
+    device=0,  # '0' is for GPU, 'cpu' for CPU
+    max_length=500,
+    do_sample=True,
+    top_k=5,
+    num_return_sequences=1,
+    eos_token_id=tokenizer.eos_token_id,
+    pad_token_id=tokenizer.eos_token_id,
+)
+template = """[INST] You are a helpful, respectful and honest assistant. Answer exactly in few words from the context
+Answer the question below from the context below:
+{context}
+{question} [/INST]
+"""
+def chat_query(retrieverQA, text_query):
+    retrieverQA = RetrievalQA.from_chain_type(llm=llm, chain_type="retrieval", retriever=vectordb.as_retriever(), verbose=True)
+    result = retrieverQA.run(text_query)
+    return result
+#-------------------------------------------
+# def chat_query(question, history):
+#     llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY)
+#     # Conversation Retrival Chain with Memory
+#     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+#     retriever=vectordb.as_retriever()
+#     qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
+#     # Replace input() with question variable for Gradio
+#     result = qa({"question": question})
+#     return result['answer']
+#     # Chatbot only answers based on Documents
+#     # qa = VectorDBQA.from_chain_type(llm=OpenAI(openai_api_key = OPENAI_API_KEY, ), chain_type="stuff", vectorstore=vectordb)
+#     # result = qa.run(question)
+#     # return result