Files changed (1) hide show
  1. app.py +72 -13
app.py CHANGED
@@ -48,23 +48,82 @@ llm_name = "gpt-3.5-turbo"
48
 
49
  vectordb = initialize.initialize()
50
 
51
- def chat_query(question, history):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY)
54
 
55
- # Conversation Retrival Chain with Memory
56
- memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
57
- retriever=vectordb.as_retriever()
58
- qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
59
 
60
- # Replace input() with question variable for Gradio
61
- result = qa({"question": question})
62
- return result['answer']
63
 
64
- # Chatbot only answers based on Documents
65
- # qa = VectorDBQA.from_chain_type(llm=OpenAI(openai_api_key = OPENAI_API_KEY, ), chain_type="stuff", vectorstore=vectordb)
66
- # result = qa.run(question)
67
- # return result
68
 
69
 
70
 
 
48
 
49
  vectordb = initialize.initialize()
50
 
51
+
52
+ #-------------------------------------------
53
+
54
+
55
+
56
+ from langchain import HuggingFacePipeline, PromptTemplate, LLMChain, RetrievalQA
57
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
58
+ import torch
59
+
60
+ quantization_config = {
61
+ "load_in_4bit": True,
62
+ "bnb_4bit_compute_dtype": torch.float16,
63
+ "bnb_4bit_quant_type": "nf4",
64
+ "bnb_4bit_use_double_quant": True,
65
+ }
66
+
67
+ llm = HuggingFacePipeline(pipeline=pipeline)
68
+ model_id = "mistralai/Mistral-7B-Instruct-v0.1"
69
+ model_4bit = AutoModelForCausalLM.from_pretrained(
70
+ model_id, device="cuda", quantization_config=quantization_config
71
+ )
72
+
73
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
74
+
75
+ pipeline = pipeline(
76
+ "text-generation",
77
+ model=model_4bit,
78
+ tokenizer=tokenizer,
79
+ use_cache=True,
80
+ device=0, # '0' is for GPU, 'cpu' for CPU
81
+ max_length=500,
82
+ do_sample=True,
83
+ top_k=5,
84
+ num_return_sequences=1,
85
+ eos_token_id=tokenizer.eos_token_id,
86
+ pad_token_id=tokenizer.eos_token_id,
87
+ )
88
+
89
+ template = """[INST] You are a helpful, respectful and honest assistant. Answer exactly in few words from the context
90
+ Answer the question below from the context below:
91
+ {context}
92
+ {question} [/INST]
93
+ """
94
+
95
+
96
+
97
+
98
+ def chat_query(retrieverQA, text_query):
99
+
100
+ retrieverQA = RetrievalQA.from_chain_type(llm=llm, chain_type="retrieval", retriever=vectordb.as_retriever(), verbose=True)
101
+ result = retrieverQA.run(text_query)
102
+ return result
103
+
104
+
105
+
106
+
107
+ #-------------------------------------------
108
+
109
+
110
+ # def chat_query(question, history):
111
 
112
+ # llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY)
113
 
114
+ # # Conversation Retrival Chain with Memory
115
+ # memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
116
+ # retriever=vectordb.as_retriever()
117
+ # qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
118
 
119
+ # # Replace input() with question variable for Gradio
120
+ # result = qa({"question": question})
121
+ # return result['answer']
122
 
123
+ # # Chatbot only answers based on Documents
124
+ # # qa = VectorDBQA.from_chain_type(llm=OpenAI(openai_api_key = OPENAI_API_KEY, ), chain_type="stuff", vectorstore=vectordb)
125
+ # # result = qa.run(question)
126
+ # # return result
127
 
128
 
129