Sadiaa commited on
Commit
35a5f1c
·
verified ·
1 Parent(s): 61cd3e5

Update chatbot.py

Browse files
Files changed (1) hide show
  1. chatbot.py +155 -26
chatbot.py CHANGED
@@ -1,75 +1,108 @@
1
  import os
2
  import time
3
- import pandas as pd
4
- from huggingface_hub import hf_hub_download
5
  from groq import Groq
6
  from langchain.memory import ConversationBufferMemory
 
 
7
  from langchain_community.vectorstores import FAISS
8
  from deep_translator import GoogleTranslator
9
 
 
 
 
10
 
11
  class Comsatsbot:
12
- def __init__(self, hf_space_repo, llm, api_keys, chats_collection,index_path: str = "faiss_kb" ):
 
13
  self.llm = llm
14
  self.api_keys = api_keys
15
  self.client = None
16
  self.models = [
 
17
  "llama-3.3-70b-versatile",
18
  "llama3-70b-8192"
19
  ]
20
  self.memory = ConversationBufferMemory(llm=self.llm, max_token_limit=3000)
21
  self.chats_collection = chats_collection
22
  self.index_path = index_path
23
- self.hf_space_repo = hf_space_repo
24
  self.faiss_index = None
25
  self.faiss_retriever = None
 
26
  self.initialize_faiss_index()
27
 
28
- def load_data_from_hf_space(self):
29
- files = ["english_data.csv", "urdu_data.csv", "FYP Supervisor Feedback.csv"]
30
  documents = []
31
-
32
- for file in files:
33
- local_path = hf_hub_download(repo_id=self.hf_space_repo, filename=file)
34
- df = pd.read_csv(local_path)
35
- docs = df.astype(str).to_dict(orient="records")
36
- documents.extend(docs)
37
-
38
  return documents
39
 
40
  def initialize_faiss_index(self):
 
41
  if os.path.exists(self.index_path):
42
- self.faiss_index = FAISS.load_local(self.index_path)
 
43
  else:
44
- documents = self.load_data_from_hf_space()
45
- self.faiss_index = FAISS.from_documents(documents)
 
46
  self.faiss_index.save_local(self.index_path)
47
-
48
  self.faiss_retriever = self.faiss_index.as_retriever(search_kwargs={"k": 5})
 
49
 
50
  def retrieve_answer(self, query):
 
51
  if self.faiss_retriever:
52
- return self.faiss_retriever.invoke(query)
 
 
 
53
  return None
54
 
55
  def create_chat_record(self, chat_id):
 
56
  self.chats_collection.insert_one({
57
  "_id": chat_id,
58
  "history": []
59
  })
60
 
61
  def update_chat(self, chat_id, question, answer):
 
62
  self.chats_collection.update_one(
63
  {"_id": chat_id},
64
  {"$push": {"history": {"question": question, "answer": answer}}}
65
  )
66
 
67
  def load_chat(self, chat_id):
 
68
  chat_record = self.chats_collection.find_one({"_id": chat_id})
69
  if not chat_record:
 
70
  raise KeyError(f"Chat ID {chat_id} does not exist.")
71
  return chat_record.get('history', [])
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def get_system_prompt(self):
74
  return """
75
  You are a comsats assistant to help the user with comsats university-related queries. Your response should be concise, direct, and to the point. Avoid any unnecessary explanations. Always consider the provided context and chat history to generate the answer.
@@ -78,11 +111,14 @@ Use emojis only when required based on the user's tone and emotions. Do not over
78
  - **Sad emotions**: Use 😔 when the user is asking about something disappointing or negative.
79
  - **Surprise**: Use 😯 when the user expresses surprise.
80
  - **Anger or frustration**: Use 😡 when the user expresses frustration or dissatisfaction.
 
 
81
  If the user writes question in urdu, give answer in urdu.
82
- If the user writes question in English, give answer in English.
83
  please provide the personalized answer and provide answer quickly
84
  please answer from the dataset i provided to you in csv files. And donot write in every answer that i donot know the exact answer.and refer website only where it is necessary.
85
- Do not include the phrase "According to the provided context" or "Based on the chat history". Simply generate the answer like a human would, without referencing where the information comes from.
 
86
  If the question requires a URL, format it like this:
87
  [Click here to visit COMSATS](https://comsats.edu.pk).
88
  Your task is to help students at COMSATS University, Attock campus, with their university-related queries. The following are key details about the university:
@@ -101,25 +137,118 @@ Context ends here. Now, answer the following question:
101
  """
102
 
103
  def generate_response(self, question, history, context):
 
104
  prompt = self.get_system_prompt().format(question=question, history=history, context=context)
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  for api_key in self.api_keys:
107
  self.client = Groq(api_key=api_key)
108
  for model in self.models:
109
  try:
110
  chat_completion = self.client.chat.completions.create(
111
  messages=[
112
- {"role": "system", "content": prompt},
113
- {"role": "user", "content": f"Answer the following question: {question}"}
 
 
 
 
 
 
 
 
 
114
  ],
115
  model=model,
116
- max_tokens=1024,
 
117
  )
118
- return chat_completion.choices[0].message.content
119
- except Exception:
 
 
 
 
120
  time.sleep(2)
121
  continue
122
- return "Sorry, unable to provide an answer at this time."
 
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
 
125
 
 
 
 
1
  import os
2
  import time
3
+ import json
4
+ import logging
5
  from groq import Groq
6
  from langchain.memory import ConversationBufferMemory
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain_community.document_loaders import CSVLoader
9
  from langchain_community.vectorstores import FAISS
10
  from deep_translator import GoogleTranslator
11
 
12
+ # Set up logging
13
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
14
+ logger = logging.getLogger(__name__)
15
 
16
  class Comsatsbot:
17
+ def __init__(self, hf, llm, api_keys, chats_collection, paths, index_path='faiss_kb'):
18
+ logger.info("Initializing Comsatsbot...")
19
  self.llm = llm
20
  self.api_keys = api_keys
21
  self.client = None
22
  self.models = [
23
+ # "llama3-groq-70b-8192-tool-use-preview",
24
  "llama-3.3-70b-versatile",
25
  "llama3-70b-8192"
26
  ]
27
  self.memory = ConversationBufferMemory(llm=self.llm, max_token_limit=3000)
28
  self.chats_collection = chats_collection
29
  self.index_path = index_path
30
+ self.hf = hf
31
  self.faiss_index = None
32
  self.faiss_retriever = None
33
+ self.paths = paths
34
  self.initialize_faiss_index()
35
 
36
+ def load_data(self, paths):
37
+ logger.info(f"Loading data from paths: {paths}")
38
  documents = []
39
+ for path in paths:
40
+ loader = CSVLoader(file_path=path)
41
+ data = loader.load()
42
+ documents.extend(data)
43
+ logger.debug(f"Loaded {len(documents)} documents.")
 
 
44
  return documents
45
 
46
  def initialize_faiss_index(self):
47
+ logger.info("Initializing FAISS index...")
48
  if os.path.exists(self.index_path):
49
+ logger.info(f"FAISS index found at {self.index_path}. Loading...")
50
+ self.faiss_index = FAISS.load_local(self.index_path, self.hf, allow_dangerous_deserialization=True)
51
  else:
52
+ logger.info(f"FAISS index not found. Creating a new one...")
53
+ documents = self.load_data(self.paths)
54
+ self.faiss_index = FAISS.from_documents(documents, self.hf)
55
  self.faiss_index.save_local(self.index_path)
 
56
  self.faiss_retriever = self.faiss_index.as_retriever(search_kwargs={"k": 5})
57
+ logger.info("FAISS index initialized successfully.")
58
 
59
  def retrieve_answer(self, query):
60
+ logger.info(f"Retrieving answer for query: {query}")
61
  if self.faiss_retriever:
62
+ result = self.faiss_retriever.invoke(query)
63
+ logger.debug(f"Retrieved answer: {result}")
64
+ return result
65
+ logger.warning("FAISS retriever is not initialized.")
66
  return None
67
 
68
  def create_chat_record(self, chat_id):
69
+ logger.info(f"Creating new chat record for chat_id: {chat_id}")
70
  self.chats_collection.insert_one({
71
  "_id": chat_id,
72
  "history": []
73
  })
74
 
75
  def update_chat(self, chat_id, question, answer):
76
+ logger.info(f"Updating chat history for chat_id: {chat_id}")
77
  self.chats_collection.update_one(
78
  {"_id": chat_id},
79
  {"$push": {"history": {"question": question, "answer": answer}}}
80
  )
81
 
82
  def load_chat(self, chat_id):
83
+ logger.info(f"Loading chat history for chat_id: {chat_id}")
84
  chat_record = self.chats_collection.find_one({"_id": chat_id})
85
  if not chat_record:
86
+ logger.error(f"Chat ID {chat_id} does not exist.")
87
  raise KeyError(f"Chat ID {chat_id} does not exist.")
88
  return chat_record.get('history', [])
89
 
90
+ def new_chat(self, chat_id):
91
+ logger.info(f"Creating new chat with ID: {chat_id}")
92
+ if self.chats_collection.find_one({"_id": chat_id}):
93
+ logger.error(f"Chat ID {chat_id} already exists.")
94
+ raise KeyError(f"Chat ID {chat_id} exists already.")
95
+ self.create_chat_record(chat_id)
96
+ return "success"
97
+
98
+ def delete_chat(self, chat_id):
99
+ logger.info(f"Deleting chat record for chat_id: {chat_id}")
100
+ if not self.chats_collection.find_one({"_id": chat_id}):
101
+ logger.error(f"Chat ID {chat_id} does not exist.")
102
+ raise KeyError(f"Chat ID {chat_id} does not exist.")
103
+ self.chats_collection.delete_one({"_id": chat_id})
104
+ return "success"
105
+
106
  def get_system_prompt(self):
107
  return """
108
  You are a comsats assistant to help the user with comsats university-related queries. Your response should be concise, direct, and to the point. Avoid any unnecessary explanations. Always consider the provided context and chat history to generate the answer.
 
111
  - **Sad emotions**: Use 😔 when the user is asking about something disappointing or negative.
112
  - **Surprise**: Use 😯 when the user expresses surprise.
113
  - **Anger or frustration**: Use 😡 when the user expresses frustration or dissatisfaction.
114
+ If the user asks the same question repeatedly or asks an illogical question, feel free to use emojis to subtly convey frustration, confusion, or amusement.
115
+
116
  If the user writes question in urdu, give answer in urdu.
117
+ If the user writes question in English, give answer in English .
118
  please provide the personalized answer and provide answer quickly
119
  please answer from the dataset i provided to you in csv files. And donot write in every answer that i donot know the exact answer.and refer website only where it is necessary.
120
+
121
+ Do not include the phrase "According to the provided context" or "Based on the chat history". Simply generate the answer like a human would, without referencing where the information comes from.
122
  If the question requires a URL, format it like this:
123
  [Click here to visit COMSATS](https://comsats.edu.pk).
124
  Your task is to help students at COMSATS University, Attock campus, with their university-related queries. The following are key details about the university:
 
137
  """
138
 
139
  def generate_response(self, question, history, context):
140
+ logger.info(f"Generating response for question: {question}")
141
  prompt = self.get_system_prompt().format(question=question, history=history, context=context)
142
 
143
+ while True:
144
+ for api_key in self.api_keys:
145
+ self.client = Groq(api_key=api_key)
146
+ for model in self.models:
147
+ try:
148
+ logger.info(f"Calling model {model} for response...")
149
+ chat_completion = self.client.chat.completions.create(
150
+ messages=[
151
+ {"role": "system", "content": prompt},
152
+ {"role": "user", "content": f"Answer the following question: {question}"}
153
+ ],
154
+ model=model,
155
+ max_tokens=1024,
156
+ )
157
+ response = chat_completion.choices[0].message.content
158
+ logger.debug(f"Received response: {response}")
159
+ return response
160
+ except Exception as e:
161
+ logger.error(f"Error with model {model}: {e}")
162
+ time.sleep(2)
163
+ continue
164
+ logger.warning("Unable to generate a response.")
165
+ return "Sorry, unable to provide an answer at this time."
166
+
167
+ def detect_language(self, question):
168
+ logger.info(f"Detecting language for question: {question}")
169
  for api_key in self.api_keys:
170
  self.client = Groq(api_key=api_key)
171
  for model in self.models:
172
  try:
173
  chat_completion = self.client.chat.completions.create(
174
  messages=[
175
+ {
176
+ "role": "system",
177
+ "content": """
178
+ You are an expert agent, and your task is to detect the language.
179
+ Return a JSON: {'detected_language': 'urdu' or 'english'}
180
+ """
181
+ },
182
+ {
183
+ "role": "user",
184
+ "content": f"Detect the language for: {question}"
185
+ }
186
  ],
187
  model=model,
188
+ max_tokens=256,
189
+ response_format={"type": "json_object"},
190
  )
191
+ response = json.loads(chat_completion.choices[0].message.content)
192
+ detected_language = response['detected_language'].lower()
193
+ logger.debug(f"Detected language: {detected_language}")
194
+ return detected_language
195
+ except Exception as e:
196
+ logger.error(f"Error detecting language: {e}")
197
  time.sleep(2)
198
  continue
199
+ logger.warning("Unable to detect language.")
200
+ return "english"
201
 
202
+ def translate_urdu(self, text):
203
+ logger.info(f"Translating text to Urdu: {text}")
204
+ for api_key in self.api_keys:
205
+ self.client = Groq(api_key=api_key)
206
+ for model in self.models:
207
+ try:
208
+ chat_completion = self.client.chat.completions.create(
209
+ messages=[
210
+ {
211
+ "role": "system",
212
+ "content": """
213
+ Translate the following text into proper Urdu. Return a JSON:
214
+ {'text': 'translated urdu text'}
215
+ """
216
+ },
217
+ {
218
+ "role": "user",
219
+ "content": f"Translate this: {text}"
220
+ }
221
+ ],
222
+ model=model,
223
+ max_tokens=512,
224
+ response_format={"type": "json_object"},
225
+ )
226
+ response = json.loads(chat_completion.choices[0].message.content)
227
+ translated_text = response['text']
228
+ logger.debug(f"Translated text: {translated_text}")
229
+ return translated_text
230
+ except Exception as e:
231
+ logger.error(f"Error translating text: {e}")
232
+ time.sleep(2)
233
+ continue
234
+ return text
235
+
236
+ def response(self, question, chat_id):
237
+ logger.info(f"Processing response for question: {question} (chat_id: {chat_id})")
238
+ chat_history = self.load_chat(chat_id)
239
+
240
+ for entry in chat_history:
241
+ self.memory.save_context({"input": entry["question"]}, {"output": entry["answer"]})
242
+
243
+ language = self.detect_language(question)
244
+
245
+ if language == 'urdu':
246
+ question_translation = GoogleTranslator(source='ur', target='en').translate(question)
247
+ context = self.faiss_retriever.invoke(question_translation)
248
+ else:
249
+ context = self.faiss_retriever.invoke(question)
250
 
251
+ answer = self.generate_response(question, chat_history, context)
252
 
253
+ self.update_chat(chat_id, question, answer)
254
+ return answer