K00B404 commited on
Commit
875003b
·
verified ·
1 Parent(s): bd16efc

Upload hugging_chat_wrapper.py

Browse files
Files changed (1) hide show
  1. hugging_chat_wrapper.py +802 -0
hugging_chat_wrapper.py ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os,re
2
+ import faiss
3
+ import numpy as np
4
+ import pickle
5
+ import logging
6
+ from uuid import uuid4
7
+ from rich import print as rp
8
+ from dotenv import load_dotenv,find_dotenv
9
+
10
+ load_dotenv(find_dotenv())
11
+ from hugchat import hugchat
12
+ from hugchat.login import Login
13
+ from hugchat.types.tool import Tool
14
+ from hugchat.types.assistant import Assistant
15
+ from hugchat.types.message import MessageNode as Message
16
+ from hugchat.types.file import File
17
+ from hugchat.hugchat import Conversation, Model, ChatBot
18
+ from typing import List, Dict, Any,Tuple,Optional
19
+ from datetime import datetime
20
+ from langchain.schema import Document
21
+ from langchain_community.vectorstores import FAISS
22
+ from langchain_community.docstore.in_memory import InMemoryDocstore
23
+ from langchain_huggingface import HuggingFaceEmbeddings
24
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
25
+ from langchain.schema import Document
26
+ from generated_tools.faiss_vector_store_plot import VectorStorePlotter
27
+ import warnings
28
+ #logging.basicConfig(filename='chatbots.log', level=logging.INFO)
29
+ warnings.filterwarnings("ignore", category=FutureWarning, message="clean_up_tokenization_spaces")
30
+ warnings.filterwarnings("ignore", category=DeprecationWarning, message="sipPyTypeDict")
31
+ warnings.filterwarnings("ignore", category=DeprecationWarning, message="langchain")
32
+ warnings.filterwarnings("ignore", message="clean_up_tokenization_spaces was not set. It will be set to True by default. This behavior will be deprecated in transformers v4.45, and will be then set to False by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884")
33
+
34
+ class VectorStorage:
35
+ def __init__(self, dim: int = None, persistence_path: Optional[str] = None):
36
+ self.dim = dim
37
+ self.vector_store = None
38
+ self.index = None
39
+ self.docstore = None
40
+ self.persistence_path = persistence_path
41
+ self.compressed_retriever = None
42
+ self.embeddings = self.get_embeddings()
43
+ self.setup_vector_store()
44
+
45
+ def setup_logging(self):
46
+ # Set up the logger
47
+ self.logger = logging.getLogger(__name__)
48
+ self.logger.setLevel(logging.INFO)
49
+
50
+ # Create a file handler
51
+ file_handler = logging.FileHandler('chatbots.log')
52
+ file_handler.setLevel(logging.INFO)
53
+
54
+ # Create a formatter
55
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
56
+ file_handler.setFormatter(formatter)
57
+
58
+ # Add the handler to the logger
59
+ self.logger.addHandler(file_handler)
60
+
61
+ # Create a custom handler to emit signals
62
+ custom_handler = logging.Handler()
63
+ custom_handler.emit = self.log_handler
64
+ custom_handler.setFormatter(formatter)
65
+ self.logger.addHandler(custom_handler)
66
+
67
+ def log_handler(self, record):
68
+ log_entry = self.logger.handlers[0].formatter.format(record) # Format using the first handler
69
+ self.log.append(log_entry)
70
+ self.log_updated.emit(log_entry)
71
+
72
+ def get_embeddings(self):
73
+ return HuggingFaceEmbeddings(
74
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
75
+ cache_folder="./embeddings_cache",
76
+ #show_progress=True,
77
+ model_kwargs={'device': 'cpu'},
78
+ encode_kwargs={'normalize_embeddings': True}
79
+ )
80
+
81
+ def setup_vector_store(self):
82
+ if self.persistence_path and os.path.exists(self.persistence_path):
83
+ #print(f"Loading existing vector store from {self.persistence_path}")
84
+ self.vector_store = self.load_vector_store()
85
+ else:
86
+ #print("Creating new vector store")
87
+ self.create_new_vector_store()
88
+
89
+ def create_new_vector_store(self):
90
+ self.dim = len(self.embeddings.embed_query("hello world"))
91
+ self.index = faiss.IndexFlatL2(self.dim)
92
+ self.docstore = InMemoryDocstore()
93
+ self.vector_store = FAISS(
94
+ self.embeddings,
95
+ index=self.index,
96
+ docstore=self.docstore,
97
+ index_to_docstore_id={}
98
+ )
99
+
100
+ def load_vector_store(self):
101
+ return FAISS.load_local(self.persistence_path, self.embeddings, allow_dangerous_deserialization=True)
102
+
103
+ def save_vector_store(self):
104
+ if self.persistence_path:
105
+ self.vector_store.save_local(self.persistence_path)
106
+ print(f"Vector store saved to {self.persistence_path}")
107
+ else:
108
+ print("No persistence path specified. Vector store not saved.")
109
+
110
+ def add_and_persist(self,file_paths: List[str]):
111
+ docs, added_files = self.fetch_documents(file_paths)
112
+ split_docs = self.split_documents(docs)
113
+ self.add_vectors(split_docs)
114
+ #self.save_vector_store()
115
+ return added_files
116
+
117
+ def fetch_documents(self, file_paths: List[str]):
118
+ documents = []
119
+ extensions_to_load = ['.py', '.mmd', '.html', '.yaml', '.txt']
120
+ added_files = []
121
+ # load documents from file_paths list
122
+ for file_path in file_paths:
123
+ # Check if the file extension is in the allowed list
124
+ ext = os.path.splitext(file_path)[1]
125
+ if ext not in extensions_to_load:
126
+ continue
127
+
128
+ try:
129
+ # Attempt to open and read the file as UTF-8
130
+ with open(file_path, 'r', encoding='utf-8') as f:
131
+ content = f.read()
132
+
133
+ doc = Document(page_content=content, metadata={'source': file_path})
134
+ documents.append(doc)
135
+ added_files.append(file_path)
136
+
137
+ except UnicodeDecodeError as e:
138
+ print(f"Error reading {file_path}: {e}")
139
+ # Optionally, log the error or handle it as needed
140
+
141
+ return documents,added_files
142
+
143
+ def add_vectors(self, documents: List[Document], ids: Optional[List[str]] = None):
144
+ split_docs = self.split_documents(documents)
145
+ rp(f"length split_docs:{len(split_docs)}")
146
+ # Add datetime to metadata
147
+ current_time = datetime.now().isoformat()
148
+ for doc in split_docs:
149
+ doc.metadata['storage_datetime'] = current_time
150
+
151
+ if ids is None:
152
+ self.vector_store.add_documents(split_docs)
153
+ else:
154
+ if len(ids) != len(split_docs):
155
+ raise ValueError("The number of ids must match the number of documents after splitting.")
156
+ self.vector_store.add_documents(documents=split_docs, ids=ids)
157
+ self.save_vector_store()
158
+
159
+ def add_vectors_old(self, documents: List[Document], ids: Optional[List[str]] = None):
160
+ split_docs = self.split_documents(documents)
161
+ if ids is None:
162
+ self.vector_store.add_documents(split_docs)
163
+ else:
164
+ if len(ids) != len(split_docs):
165
+ raise ValueError("The number of ids must match the number of documents after splitting.")
166
+ self.vector_store.add_documents(documents=split_docs, ids=ids)
167
+ self.save_vector_store()
168
+
169
+ def search_vectors(self, query: str, k: int):
170
+ return self.vector_store.similarity_search_with_score(query, k)
171
+
172
+
173
+ def split_documents(self, documents: List[Document], chunk_s=1024, chunk_o=0):
174
+ split_docs = []
175
+ for doc in documents:
176
+ ext = os.path.splitext(getattr(doc, 'metadata', {}).get('source', '') or
177
+ getattr(doc, 'metadata', {}).get('filename', ''))[1].lower()
178
+ if ext == '.py':
179
+ splitter = RecursiveCharacterTextSplitter.from_language(language='python', chunk_size=chunk_s, chunk_overlap=chunk_o)
180
+ elif ext in ['.md', '.markdown']:
181
+ splitter = RecursiveCharacterTextSplitter.from_language(language='markdown', chunk_size=chunk_s, chunk_overlap=chunk_o)
182
+ elif ext in ['.html', '.htm']:
183
+ splitter = RecursiveCharacterTextSplitter.from_language(language='html', chunk_size=chunk_s, chunk_overlap=chunk_o)
184
+ else:
185
+ splitter = CharacterTextSplitter(chunk_size=chunk_s, chunk_overlap=chunk_o, add_start_index=True)
186
+
187
+ split_docs.extend(splitter.split_documents([doc]))
188
+ return split_docs
189
+
190
+ def delete_vectors(self, ids: List[str]):
191
+ self.vector_store.delete(ids)
192
+ self.save_vector_store()
193
+
194
+ def get_document(self, id: str) -> Optional[Document]:
195
+ return self.vector_store.docstore.search(id)
196
+
197
+ def save_vectorstore_local(self, folder_path: str="vectorstore", index_name: str = "faiss_index"):
198
+ documents = self.vector_store.docstore.values()
199
+
200
+ docstore: Dict[str, Document] = {}
201
+ index_to_docstore_id: Dict[int, str] = {}
202
+
203
+ for i, doc in enumerate(documents):
204
+ doc_id = str(uuid4())
205
+ docstore[doc_id] = doc
206
+ index_to_docstore_id[i] = doc_id
207
+
208
+ self.vector_store.save_local(folder_path, index_name)
209
+
210
+ with open(os.path.join(folder_path, f"{index_name}_docstore.pkl"), "wb") as f:
211
+ pickle.dump(docstore, f)
212
+
213
+ with open(os.path.join(folder_path, f"{index_name}_index_to_docstore_id.pkl"), "wb") as f:
214
+ pickle.dump(index_to_docstore_id, f)
215
+
216
+ print(f"Vectorstore saved successfully to {folder_path}")
217
+ return folder_path
218
+
219
+ @classmethod
220
+ def load_vectorstore_local(cls, folder_path: str, index_name: str = "faiss_index", embeddings=None):
221
+ allow_dangerous_deserialization = True
222
+
223
+ with open(os.path.join(folder_path, f"{index_name}_docstore.pkl"), "rb") as f:
224
+ docstore = pickle.load(f)
225
+
226
+ with open(os.path.join(folder_path, f"{index_name}_index_to_docstore_id.pkl"), "rb") as f:
227
+ index_to_docstore_id = pickle.load(f)
228
+
229
+ vectorstore = FAISS.load_local(
230
+ folder_path,
231
+ embeddings or HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"),
232
+ index_name,
233
+ allow_dangerous_deserialization=allow_dangerous_deserialization
234
+ )
235
+ vectorstore.docstore = docstore
236
+ vectorstore.index_to_docstore_id = index_to_docstore_id
237
+
238
+ instance = cls()
239
+ instance.vector_store = vectorstore
240
+ return instance
241
+
242
+ class Artifact:
243
+ def __init__(self, content: Any, type: str, metadata: Dict[str, Any] = None):
244
+ self.content = content
245
+ self.type = type
246
+ self.metadata = metadata or {}
247
+
248
+ class ArtifactCollector:
249
+ def __init__(self, vector_storage: VectorStorage):
250
+ self.artifacts: Dict[str, List[Artifact]] = {}
251
+
252
+ def add_artifact(self, content: Any, type: str, metadata: Dict[str, Any] = None):
253
+ if type not in self.artifacts:
254
+ self.artifacts[type] = []
255
+ self.artifacts[type].append(Artifact(content, type, metadata))
256
+
257
+ def get_artifacts(self, type: str) -> List[Artifact]:
258
+ return self.artifacts.get(type, [])
259
+
260
+ class KnowledgeRetriever:
261
+ def __init__(self, vector_storage: VectorStorage):
262
+ self.vector_storage = vector_storage
263
+
264
+ def retrieve(self, query: str, k: int) -> List[Tuple[Document, float]]:
265
+ return self.vector_storage.search_vectors(query, k)
266
+
267
+ class ArtifactDetector:
268
+ def __init__(self, vectorstorage: VectorStorage):
269
+ self.vectorstorage = vectorstorage
270
+ self.artifact_types = ['python', 'yaml', 'image_description', 'text', 'mermaid','chat']
271
+ self.artifacts = []
272
+
273
+ def detect_artifacts(self, text: str, user_input: str) -> List[Dict[str, Any]]:
274
+ """
275
+ Detect artifacts within the provided text and return a list of dictionaries containing artifact details.
276
+
277
+ :param text: The text containing potential artifacts.
278
+ :param user_input: The original user input associated with the text.
279
+ :return: A list of dictionaries containing artifact type, source, user_input, and content.
280
+ """
281
+ artifacts = []
282
+
283
+
284
+ for artifact_type in self.artifact_types:
285
+ # Regex to find blocks of code/artifacts encapsulated within triple backticks and labeled by the artifact type.
286
+ pattern = rf"```{artifact_type}\s*(.*?)```"
287
+ matches = re.findall(pattern, text.text, re.DOTALL)
288
+ for match in matches:
289
+ content = match.strip()
290
+
291
+ if artifact_type == 'chat':
292
+ source="ChatHistoryText"
293
+ else:
294
+ source = self._extract_filename(content)
295
+
296
+ artifacts.append({
297
+ "type": artifact_type,
298
+ "source": source,
299
+ "user_input": user_input,
300
+ "content": content
301
+ })
302
+
303
+ documented_artifacts = self._to_documents(artifacts)
304
+ self.vectorstorage.add_vectors(documented_artifacts)
305
+ self.vectorstorage.save_vector_store()
306
+ rp(f"Number of documented artifacts:{len(documented_artifacts)}")
307
+ return artifacts
308
+
309
+ def _extract_filename(self, content: str) -> str:
310
+ """
311
+ Extract the filename from the content based on a specific pattern.
312
+
313
+ :param content: The content of the artifact where the filename might be specified.
314
+ :return: The extracted filename or a default if not found.
315
+ """
316
+
317
+ match = re.search(r'filename:\s*(\S+)', content)
318
+ if match:
319
+ return match.group(1)
320
+ else:
321
+ return "No_filename_found.txt"
322
+
323
+ def _to_documents(self, data: List[Dict[str, Any]]) -> List[Document]:
324
+ """
325
+ Convert the detected artifact data into a Document object.
326
+ This method is a placeholder for future implementation.
327
+
328
+ :param data: The artifact data dictionary.
329
+ :return:List of The Document objects.
330
+ """
331
+ documents = []
332
+ for d in data:
333
+ if isinstance(d, dict):
334
+ document = Document(
335
+ page_content=d['content'],
336
+ metadata={
337
+ 'source': d['source'],
338
+ 'type': d['type'],
339
+ 'user_input': d['user_input']
340
+ }
341
+ )
342
+ documents.append(document)
343
+ return documents
344
+
345
+ class ConversationManager:
346
+
347
+ """Manages the add,deleting andf persistance of conversations amd sessions
348
+ --- Because every session wil create a online conversation but no garbage collection is done by hugchat
349
+ ( LIMIT 25 ) after that no more conversations wil be added,
350
+ hence new sessions will timeout with EMPTY response"""
351
+
352
+ """
353
+ Manages the adding, deleting, and persistence of conversations and sessions.
354
+ Handles the limit of 25 conversations imposed by HuggingChat.
355
+ """
356
+ def __init__(self,email,passwd,cookie_folder, system_prompt, modelIndex=1):
357
+
358
+ self.email = email
359
+ self.passwd = passwd
360
+ self.cookie_folder= cookie_folder,
361
+ self.system_prompt = system_prompt # this is the prompt we will use to interact with the chatbot.
362
+ self.conversations: Dict[str, Dict] = {}
363
+ self.default_model_id = modelIndex # llama3.1 70B :1 = command-r Cohere
364
+ self.chatbot = self._login_and_create_chatbot()
365
+ self.load_conversations()
366
+
367
+
368
+ def _login_and_create_chatbot(self) -> hugchat.ChatBot:
369
+ sign = Login(self.email, self.passwd)
370
+ #rp(self.cookie_folder)
371
+ cookies = sign.login(self.cookie_folder)
372
+ return ChatBot(cookies=cookies.get_dict(), system_prompt=self.system_prompt)
373
+
374
+ def load_conversations(self):
375
+ conversations_list = self.chatbot.get_conversation_list()
376
+ #rp(f"The online conversations:\n{conversations_list}")
377
+ self.conversations = conversations_list
378
+
379
+ def add_conversation(self, conversation_id: str = None) -> str:
380
+ """
381
+ Add a new conversation. If no ID is provided, create a new one.
382
+ If the limit is reached, remove the oldest conversation.
383
+ """
384
+ if len(self.conversations) >= 25:
385
+ oldest_id = min(self.conversations, key=lambda k: self.conversations[k]['last_used'])
386
+ self.delete_conversation(oldest_id)
387
+
388
+ if conversation_id is None:
389
+ # before we make a new conversation we need to determine :
390
+ # modelIndex 0~7 (default to 0),
391
+ # desired system_prompt_template ,
392
+ # artifact collection,
393
+ # storage,
394
+ # retriever,
395
+ #
396
+ # then we can :
397
+ # ingest user inputs
398
+ # retrieve potential artifacts on the users input
399
+ # Inject this context in the system_prompt_template to prep it for creation
400
+ # create the conversation(model_id, system_prompt, switch_to=True)
401
+ # [Chat for the user can start]
402
+ conversation_id = self.chatbot.new_conversation(model_id, system_prompt, switch_to=True)
403
+ # the artifact retriever:
404
+ # retrieves context with the users intput
405
+ # concats this before the user input
406
+ # ONLY to the request towards the chatbot!
407
+ # NOT in the output of the chat!
408
+ # NOT entering the the artifact collector!
409
+
410
+
411
+
412
+ self.conversations[conversation_id] = {
413
+ 'id': conversation_id,
414
+ 'created_at': datetime.now().isoformat(),
415
+ 'last_used': datetime.now().isoformat()
416
+ }
417
+ self.save_conversations()
418
+ return conversation_id
419
+
420
+ def delete_conversation(self, conversation_id: str):
421
+ """Delete a conversation both locally and on HuggingChat."""
422
+ if conversation_id in self.conversations:
423
+ del self.conversations[conversation_id]
424
+ self.chatbot.delete_conversation(conversation_id)
425
+ self.save_conversations()
426
+
427
+ def get_conversation(self, conversation_id: str) -> Dict:
428
+ """Retrieve a conversation by its ID."""
429
+ return self.conversations.get(conversation_id)
430
+
431
+ def list_conversations(self) -> List[Dict]:
432
+ """List all conversations."""
433
+ self.load_conversations()
434
+ return self.conversations
435
+
436
+ def use_conversation(self, conversation_id: str):
437
+ """Mark a conversation as used, updating its last_used timestamp."""
438
+ if conversation_id in self.conversations:
439
+ self.conversations[conversation_id]['last_used'] = datetime.now().isoformat()
440
+ self.save_conversations()
441
+
442
+ def clean_old_conversations(self, days: int = 30):
443
+ """Remove conversations older than the specified number of days."""
444
+ now = datetime.now()
445
+ to_delete = []
446
+ for conv_id, conv in self.conversations.items():
447
+ created_at = datetime.fromisoformat(conv['created_at'])
448
+ if (now - created_at).days > days:
449
+ to_delete.append(conv_id)
450
+
451
+ for conv_id in to_delete:
452
+ self.delete_conversation(conv_id)
453
+
454
+ def get_or_create_conversation(self) -> str:
455
+ """Get an existing conversation or create a new one if none exist."""
456
+ if not self.conversations:
457
+ return self.add_conversation()
458
+ return next(iter(self.conversations))
459
+
460
+ def chat(self, message: str, conversation_id: str = None, web_search: bool=False) -> str:
461
+ """Send a message to a specific conversation or create a new one."""
462
+ if conversation_id is None or conversation_id not in self.conversations:
463
+ conversation_id = self.get_or_create_conversation()
464
+
465
+ self.chatbot.change_conversation(conversation_id)
466
+ response = self.chatbot.chat(text=message, web_search=web_search)
467
+ self.use_conversation(conversation_id)
468
+ return response
469
+
470
+ class PromptFactory:
471
+ def __init__(self,
472
+ language="english",
473
+ extention="txt",
474
+ task="Provide weather information",
475
+ rules="Be concise and accurate",
476
+ role="AI Assistant"
477
+ ):
478
+ self.template = """
479
+ You ACT in the ROLE of {role}
480
+ Your TASK is to assist {task}
481
+
482
+ Your chat with the user will be automatically augmented so you can respond even better:
483
+ - recent chat 'HISTORY:'
484
+ - retrieved 'CONTEXT:' from external sources.
485
+ - 'RULES:' to follow
486
+
487
+ Here's how you should respond:
488
+ {rules}
489
+
490
+ HISTORY:
491
+ {history}
492
+ **Final Notes:**
493
+ Remember 'You Rock!' think step by step and don't break ACT nor ROLE nor TASK.
494
+ CONTEXT:
495
+ {context}
496
+ User Question:
497
+ {input}
498
+ """
499
+ self.language = language
500
+ self.extention = extention
501
+ self.task = task
502
+ self.rules = rules
503
+ self.role = role
504
+ self.history = "The start of a new chat."
505
+ self.context = "No context provided."
506
+
507
+ def create_prompt(self, user_input):
508
+ """
509
+ Create a prompt using the current state of the PromptFactory.
510
+
511
+ :param user_input: The user's input or question
512
+ :return: The formatted prompt string
513
+ """
514
+ replacements = {
515
+ "role": self.role.replace("{language}", self.language),
516
+ "task": self.task,
517
+ "rules": self.rules.replace("###EXT###",self.extention).replace("###LANGUAGE###",self.language),
518
+ "history": self.history,
519
+ "context": self.context,
520
+ "input": user_input
521
+ }
522
+
523
+ return self.template.format(**replacements)
524
+
525
+ def update_chat_state(self, user_input, new_history=None, new_context=None):
526
+ """
527
+ Update the chat state with new history, context, and user input.
528
+
529
+ :param user_input: The new user input
530
+ :param new_history: The updated chat history
531
+ :param new_context: The updated context
532
+ :return: The updated prompt string
533
+ """
534
+ # Append the new history if provided
535
+ if new_history:
536
+ self.history += f"\n{new_history}"
537
+
538
+ # Update the context if provided
539
+ if new_context:
540
+ self.context = new_context
541
+
542
+ # Update the chat history with the user's new input
543
+ #self.history += f"\nUser: {user_input}"
544
+
545
+ # Generate and return the updated prompt
546
+ return self.create_prompt(user_input)
547
+
548
+ class HuggingChatWrapper:
549
+ def __init__(self, project_name: str,cookie_folder: str = "cookies", gallery_folder: str="gallery", storage_folder: str ="storage", datasets_folder: str = "datasets"):
550
+
551
+ self.email = os.getenv("EMAIL")
552
+ self.password = os.getenv("PASSWD")
553
+ self.project_name = project_name
554
+
555
+ self.cookie_folder = project_name+"/"+cookie_folder+'/'
556
+ self.storage_folder = os.path.join(project_name,storage_folder)
557
+ self.datasets_folder = os.path.join(project_name,datasets_folder)
558
+ self.gallery_folder = os.path.join(project_name,gallery_folder)
559
+ os.makedirs(self.project_name,exist_ok=True)
560
+ os.makedirs(self.cookie_folder,exist_ok=True)
561
+ os.makedirs(self.datasets_folder,exist_ok=True)
562
+ os.makedirs(self.gallery_folder,exist_ok=True)
563
+
564
+ self.history = ""
565
+ self.artifacts = []
566
+ language="""python"""
567
+ role="""highly intelligent RAG augmented ###LANGUAGE### Coder"""
568
+ task="""Assist users by generating code that is:
569
+ OOP
570
+ fully implemented
571
+ procedural
572
+ generic
573
+ complete
574
+ conform autopep8 format
575
+ contains NO placeholders
576
+ """
577
+ rules="""
578
+ ALL response must be in encapsulated 'artifacts',
579
+ defined by the following file types:
580
+ <type> : <encapsulation>
581
+ "###EXT###" : "```###LANGUAGE### <content>```"
582
+ "yaml" : "```yaml <content>```"
583
+ "txt" : "```text <content>```"
584
+ "yaml" : "```image_description <content>```"
585
+ "jpg" : "```image <content>```"
586
+ "txt" : "```chat <content>```"
587
+ "mmd" : "```mermaid <content>```"
588
+ While answering think step-by-step and justify your answer.
589
+ Always start the content of the artifact with # filename: <filename>.<type>
590
+
591
+ Example interaction:
592
+
593
+ Question:
594
+ make a lemonade tycoon game in pyqt6 with yaml usage guide and image_description of front image of the app and a mermaid flowchart of its process
595
+ Answer:
596
+ ```chat
597
+ Here is a simple implementation of a Lemonade Tycoon game using PyQt6.
598
+ ```
599
+ ```python
600
+ # filename: lemonade_tycoon.py
601
+ import sys
602
+ from PyQt6.QtWidgets import QApplication, QWidget, QGridLayout, QPushButton, QLabel, QSpinBox, QLCDNumber
603
+ from PyQt6.QtCore import Qt
604
+
605
+ class LemonadeTycoon(QWidget):
606
+ def __init__(self):
607
+ super().__init__()
608
+
609
+ self.initUI()
610
+
611
+ def initUI(self):
612
+ self.setGeometry(300, 300, 300, 200)
613
+ self.setWindowTitle('Lemonade Tycoon')
614
+
615
+ layout = QGridLayout()
616
+
617
+ self.lemonade_price_label = QLabel('Lemonade Price:')
618
+ self.lemonade_price_spinbox = QSpinBox()
619
+ self.lemonade_price_spinbox.setRange(1, 100)
620
+ self.lemonade_price_spinbox.setValue(50)
621
+
622
+ self.lemonade_price_label.setAlignment(Qt.AlignmentFlag.AlignRight)
623
+ layout.addWidget(self.lemonade_price_label, 0, 0)
624
+ layout.addWidget(self.lemonade_price_spinbox, 0, 1)
625
+
626
+ self.lemonade_quantity_label = QLabel('Lemonade Quantity:')
627
+ self.lemonade_quantity_spinbox = QSpinBox()
628
+ self.lemonade_quantity_spinbox.setRange(1, 100)
629
+ self.lemonade_quantity_spinbox.setValue(10)
630
+
631
+ self.lemonade_quantity_label.setAlignment(Qt.AlignmentFlag.AlignRight)
632
+ layout.addWidget(self.lemonade_quantity_label, 1, 0)
633
+ layout.addWidget(self.lemonade_quantity_spinbox, 1, 1)
634
+
635
+ self.total_cost_label = QLabel('Total Cost:')
636
+ self.total_cost_lcd = QLCDNumber()
637
+ self.total_cost_lcd.setDigitCount(10)
638
+
639
+ self.total_cost_label.setAlignment(Qt.AlignmentFlag.AlignRight)
640
+ layout.addWidget(self.total_cost_label, 2, 0)
641
+ layout.addWidget(self.total_cost_lcd, 2, 1)
642
+
643
+ self.sell_button = QPushButton('Sell Lemonade')
644
+ self.sell_button.clicked.connect(self.sell_lemonade)
645
+
646
+ layout.addWidget(self.sell_button, 3, 0, 1, 2)
647
+
648
+ self.setLayout(layout)
649
+ self.show()
650
+
651
+ def sell_lemonade(self):
652
+ price = self.lemonade_price_spinbox.value()
653
+ quantity = self.lemonade_quantity_spinbox.value()
654
+ total_cost = price * quantity
655
+ self.total_cost_lcd.display(total_cost)
656
+
657
+ if __name__ == '__main__':
658
+ app = QApplication(sys.argv)
659
+ ex = LemonadeTycoon()
660
+ sys.exit(app.exec())
661
+ ```
662
+ ```yaml
663
+ # filename: lemonade_tycoon_usage_guide.yaml
664
+ lemonade_tycoon:
665
+ description: A simple lemonade tycoon game in PyQt6
666
+ usage:
667
+ - Run the game by executing the `lemonade_tycoon.py` file
668
+ - Set the lemonade price and quantity using the spin boxes
669
+ - Click the "Sell Lemonade" button to calculate the total cost
670
+ dependencies:
671
+ - PyQt6
672
+ - Python 3.x
673
+ notes:
674
+ - This is a simple implementation of a lemonade tycoon game
675
+ - The game calculates the total cost based on the lemonade price and quantity
676
+
677
+ ```
678
+ ```image_description
679
+ A simple window with two spin boxes for lemonade price and quantity, a button to sell lemonade, and a LCD display to show the total cost.
680
+ ```
681
+ ```mermaid
682
+ # filename: lemonade_tycoon_flowchart.mmd
683
+ graph TD
684
+ A[Lemonade Tycoon Game] -->|Run|> B[Set Lemonade Price and Quantity]
685
+ B -->|Click Sell Button|> C[Calculate Total Cost]
686
+ C -->|Display Total Cost|> D[LCD Display]
687
+ ```
688
+ ```chat
689
+ In this simple implementation of a lemonade tycoon game, we use PyQt6 to create a window with two spin boxes for lemonade price and quantity, a button to sell lemonade, and a LCD display to show the total cost. The game calculates the total cost based on the lemonade price and quantity.
690
+ ```
691
+
692
+ """
693
+ self.PromptFactory=PromptFactory(
694
+ language=language,
695
+ task=task,
696
+ rules=rules,
697
+ role=role.replace("###LANGUAGE###",language)
698
+ )
699
+ #rp(self.email, self.password,self.cookie_folder,self.PromptFactory.create_prompt(user_input=""))
700
+ self.conversation_manager = ConversationManager(self.email, self.password,self.cookie_folder,self.PromptFactory.create_prompt(user_input=""))
701
+ self.chatbot = self.conversation_manager.chatbot
702
+ self.vector_storage = VectorStorage(persistence_path=self.storage_folder)
703
+ self.vectorstore_plotter = VectorStorePlotter(self.vector_storage.vector_store)
704
+ #self.visualizer = Visualizer(self) # ERROR! qapp before qwidget ERROR!
705
+ self.knowledge_retriever = KnowledgeRetriever(self.vector_storage)
706
+ self.artifact_detector = ArtifactDetector(self.vector_storage)
707
+ self.artifact_collector = ArtifactCollector(self.vector_storage)
708
+ #self.chat()
709
+
710
+ def chat(self):
711
+ # continues chat until context window is full of growing history
712
+ while True:
713
+ user_input = input("User:")
714
+ self.test_system(user_input)
715
+ rp(self.history)
716
+
717
+ def test_system(self, user_input):
718
+ knowledge_retrieved = self.knowledge_retriever.retrieve(query=user_input,k=1)
719
+ updated_prompt = self.PromptFactory.update_chat_state(user_input=user_input,new_history=self.history,new_context=knowledge_retrieved)
720
+ raw_response = self.chatbot.chat(text=updated_prompt)
721
+ self.artifacts = self.artifact_detector.detect_artifacts(text=raw_response, user_input=user_input)
722
+ concat_content = ""
723
+ for art in self.artifacts:
724
+ rp(art)
725
+ concat_content += str(art) + "\n"
726
+
727
+ # TODO Implementation: Manage chat history size
728
+ MAX_HISTORY_SIZE = 500 # Define the maximum allowed size for the chat history in characters
729
+
730
+ # Combine the new interaction (user input + chatbot response) with the existing history
731
+ new_interaction = f"User: {user_input}\nAssistant: {raw_response}\n"
732
+ new_history_size = len(self.history) + len(new_interaction)
733
+
734
+ # Check if the new history size exceeds the maximum allowed size
735
+ if new_history_size > MAX_HISTORY_SIZE:
736
+ # Determine how many characters need to be removed
737
+ excess_characters = new_history_size - MAX_HISTORY_SIZE
738
+
739
+ # Trim the oldest part of the history by removing excess characters
740
+ self.history = self.history[excess_characters:]
741
+
742
+ # Step 5: Update the chat history with the new interaction
743
+ self.history += new_interaction
744
+
745
+ return self.artifacts
746
+
747
+ def RAG_Augmented_Bot(self, user_input):
748
+ knowledge_retrieved = self.knowledge_retriever.retrieve(query=user_input,k=1)
749
+ updated_prompt = self.PromptFactory.update_chat_state(user_input=user_input,new_history=self.history,new_context=knowledge_retrieved)
750
+ raw_response = self.chatbot.chat(text=updated_prompt)
751
+ self.artifacts = self.artifact_detector.detect_artifacts(text=raw_response, user_input=user_input)
752
+
753
+
754
+ concat_content = ""
755
+
756
+ concat_content = '\n'.join([str(artifact) for artifact in self.artifacts])
757
+
758
+ rp(dir(self.artifacts))
759
+ # TODO Implementation: Manage chat history size
760
+ MAX_HISTORY_SIZE = 500 # Define the maximum allowed size for the chat history in characters
761
+ # Combine the new interaction (user input + chatbot response) with the existing history
762
+ new_interaction = f"User: {user_input}\nAssistant: {raw_response}\n"
763
+ new_history_size = len(self.history) + len(new_interaction)
764
+ # Check if the new history size exceeds the maximum allowed size
765
+ if new_history_size > MAX_HISTORY_SIZE:
766
+ # Determine how many characters need to be removed
767
+ excess_characters = new_history_size - MAX_HISTORY_SIZE
768
+ # Trim the oldest part of the history by removing excess characters
769
+ self.history = self.history[excess_characters:]
770
+ # Step 5: Update the chat history with the new interaction
771
+ self.history += new_interaction
772
+ return self.artifacts
773
+
774
+
775
+ def _chat(self, message: str) -> Message:
776
+ relevant_artifacts = self.knowledge_retriever.retrieve(message, k=3)
777
+ context = self._format_context(relevant_artifacts)
778
+
779
+ full_message = f"{context}\n\nUser: {message}"
780
+
781
+ response = self.chatbot.chat(full_message)
782
+ self._collect_artifacts(response)
783
+ return response
784
+
785
+ def _collect_artifacts(self, response: Message):
786
+ text = response.get_final_text()
787
+ detected_artifacts = self.artifact_detector.detect_artifacts(text)
788
+
789
+ for artifact in detected_artifacts:
790
+ self.artifact_collector.add_artifact(artifact["content"], artifact["type"])
791
+
792
+ self.artifact_collector.add_artifact(text, "text")
793
+
794
+ def _format_context(self, relevant_artifacts: List[Tuple[Document, float]]) -> str:
795
+ context = "Relevant information:\n"
796
+ for doc, score in relevant_artifacts:
797
+ context += f"- {doc.metadata.get('type', 'text')}: {doc.page_content[:100]}... (relevance: {score:.2f})\n"
798
+ return context
799
+
800
+ def retrieve_knowledge(self, query: str, k: int):
801
+ return self.knowledge_retriever.retrieve(query, k)
802
+