File size: 35,244 Bytes
875003b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
import os,re
import faiss
import numpy as np
import pickle
import logging
from uuid import uuid4
from rich import print as rp

from hugchat import hugchat
from hugchat.login import Login
from hugchat.types.tool import Tool
from hugchat.types.assistant import Assistant
from hugchat.types.message import MessageNode as Message
from hugchat.types.file import File
from hugchat.hugchat import Conversation, Model, ChatBot
from typing import List, Dict, Any,Tuple,Optional
from datetime import datetime
from langchain.schema import Document
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.schema import Document
from generated_tools.faiss_vector_store_plot  import VectorStorePlotter
import warnings
#logging.basicConfig(filename='chatbots.log', level=logging.INFO)
warnings.filterwarnings("ignore", category=FutureWarning, message="clean_up_tokenization_spaces")
warnings.filterwarnings("ignore", category=DeprecationWarning, message="sipPyTypeDict")
warnings.filterwarnings("ignore", category=DeprecationWarning, message="langchain")
warnings.filterwarnings("ignore", message="clean_up_tokenization_spaces was not set. It will be set to True by default. This behavior will be deprecated in transformers v4.45, and will be then set to False by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884")
	
class VectorStorage:
    def __init__(self, dim: int = None, persistence_path: Optional[str] = None):
        self.dim = dim
        self.vector_store = None
        self.index = None
        self.docstore = None
        self.persistence_path = persistence_path
        self.compressed_retriever = None
        self.embeddings = self.get_embeddings()
        self.setup_vector_store()

    def setup_logging(self):
        # Set up the logger
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.INFO)

        # Create a file handler
        file_handler = logging.FileHandler('chatbots.log')
        file_handler.setLevel(logging.INFO)

        # Create a formatter
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        file_handler.setFormatter(formatter)

        # Add the handler to the logger
        self.logger.addHandler(file_handler)

        # Create a custom handler to emit signals
        custom_handler = logging.Handler()
        custom_handler.emit = self.log_handler
        custom_handler.setFormatter(formatter)
        self.logger.addHandler(custom_handler)

    def log_handler(self, record):
        log_entry = self.logger.handlers[0].formatter.format(record)  # Format using the first handler
        self.log.append(log_entry)
        self.log_updated.emit(log_entry)

    def get_embeddings(self):
        return HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2",
            cache_folder="./embeddings_cache",
           #show_progress=True,
            model_kwargs={'device': 'cpu'},
            encode_kwargs={'normalize_embeddings': True}
        )

    def setup_vector_store(self):
        if self.persistence_path and os.path.exists(self.persistence_path):
            #print(f"Loading existing vector store from {self.persistence_path}")
            self.vector_store = self.load_vector_store()
        else:
            #print("Creating new vector store")
            self.create_new_vector_store()

    def create_new_vector_store(self):
        self.dim = len(self.embeddings.embed_query("hello world"))
        self.index = faiss.IndexFlatL2(self.dim)
        self.docstore = InMemoryDocstore()
        self.vector_store = FAISS(
            self.embeddings,
            index=self.index,
            docstore=self.docstore,
            index_to_docstore_id={}
        )
        
    def load_vector_store(self):
        return FAISS.load_local(self.persistence_path, self.embeddings, allow_dangerous_deserialization=True)

    def save_vector_store(self):
        if self.persistence_path:
            self.vector_store.save_local(self.persistence_path)
            print(f"Vector store saved to {self.persistence_path}")
        else:
            print("No persistence path specified. Vector store not saved.")
    
    def add_and_persist(self,file_paths: List[str]):
        docs, added_files = self.fetch_documents(file_paths)
        split_docs = self.split_documents(docs)
        self.add_vectors(split_docs)
        #self.save_vector_store()
        return added_files
    
    def fetch_documents(self, file_paths: List[str]):
        documents = []
        extensions_to_load = ['.py', '.mmd', '.html', '.yaml', '.txt']
        added_files = []
        # load documents from file_paths list
        for file_path in file_paths:
            # Check if the file extension is in the allowed list
            ext = os.path.splitext(file_path)[1]
            if ext not in extensions_to_load:
                continue
            
            try:
                # Attempt to open and read the file as UTF-8
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                    
                doc = Document(page_content=content, metadata={'source': file_path})
                documents.append(doc)
                added_files.append(file_path)
            
            except UnicodeDecodeError as e:
                print(f"Error reading {file_path}: {e}")
                # Optionally, log the error or handle it as needed

        return documents,added_files

    def add_vectors(self, documents: List[Document], ids: Optional[List[str]] = None):
        split_docs = self.split_documents(documents)
        rp(f"length split_docs:{len(split_docs)}")
        # Add datetime to metadata
        current_time = datetime.now().isoformat()
        for doc in split_docs:
            doc.metadata['storage_datetime'] = current_time

        if ids is None:
            self.vector_store.add_documents(split_docs)
        else:
            if len(ids) != len(split_docs):
                raise ValueError("The number of ids must match the number of documents after splitting.")
            self.vector_store.add_documents(documents=split_docs, ids=ids)
        self.save_vector_store()

    def add_vectors_old(self, documents: List[Document], ids: Optional[List[str]] = None):
        split_docs = self.split_documents(documents)
        if ids is None:
            self.vector_store.add_documents(split_docs)
        else:
            if len(ids) != len(split_docs):
                raise ValueError("The number of ids must match the number of documents after splitting.")
            self.vector_store.add_documents(documents=split_docs, ids=ids)
        self.save_vector_store()

    def search_vectors(self, query: str, k: int):
        return self.vector_store.similarity_search_with_score(query, k)
    

    def split_documents(self, documents: List[Document], chunk_s=1024, chunk_o=0):
        split_docs = []
        for doc in documents:
            ext = os.path.splitext(getattr(doc, 'metadata', {}).get('source', '') or 
                                   getattr(doc, 'metadata', {}).get('filename', ''))[1].lower()
            if ext == '.py':
                splitter = RecursiveCharacterTextSplitter.from_language(language='python', chunk_size=chunk_s, chunk_overlap=chunk_o)
            elif ext in ['.md', '.markdown']:
                splitter = RecursiveCharacterTextSplitter.from_language(language='markdown', chunk_size=chunk_s, chunk_overlap=chunk_o)
            elif ext in ['.html', '.htm']:
                splitter = RecursiveCharacterTextSplitter.from_language(language='html', chunk_size=chunk_s, chunk_overlap=chunk_o)
            else:
                splitter = CharacterTextSplitter(chunk_size=chunk_s, chunk_overlap=chunk_o, add_start_index=True)
            
            split_docs.extend(splitter.split_documents([doc]))
        return split_docs

    def delete_vectors(self, ids: List[str]):
        self.vector_store.delete(ids)
        self.save_vector_store()

    def get_document(self, id: str) -> Optional[Document]:
        return self.vector_store.docstore.search(id)

    def save_vectorstore_local(self, folder_path: str="vectorstore", index_name: str = "faiss_index"):
        documents = self.vector_store.docstore.values()
        
        docstore: Dict[str, Document] = {}
        index_to_docstore_id: Dict[int, str] = {}
        
        for i, doc in enumerate(documents):
            doc_id = str(uuid4())
            docstore[doc_id] = doc
            index_to_docstore_id[i] = doc_id
        
        self.vector_store.save_local(folder_path, index_name)
        
        with open(os.path.join(folder_path, f"{index_name}_docstore.pkl"), "wb") as f:
            pickle.dump(docstore, f)
        
        with open(os.path.join(folder_path, f"{index_name}_index_to_docstore_id.pkl"), "wb") as f:
            pickle.dump(index_to_docstore_id, f)
        
        print(f"Vectorstore saved successfully to {folder_path}")
        return folder_path

    @classmethod
    def load_vectorstore_local(cls, folder_path: str, index_name: str = "faiss_index", embeddings=None):
        allow_dangerous_deserialization = True
        
        with open(os.path.join(folder_path, f"{index_name}_docstore.pkl"), "rb") as f:
            docstore = pickle.load(f)
        
        with open(os.path.join(folder_path, f"{index_name}_index_to_docstore_id.pkl"), "rb") as f:
            index_to_docstore_id = pickle.load(f)
        
        vectorstore = FAISS.load_local(
            folder_path,
            embeddings or HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"),
            index_name,
            allow_dangerous_deserialization=allow_dangerous_deserialization
        )
        vectorstore.docstore = docstore
        vectorstore.index_to_docstore_id = index_to_docstore_id
        
        instance = cls()
        instance.vector_store = vectorstore
        return instance

class Artifact:
    def __init__(self, content: Any, type: str, metadata: Dict[str, Any] = None):
        self.content = content
        self.type = type
        self.metadata = metadata or {}

class ArtifactCollector:
    def __init__(self, vector_storage: VectorStorage):
        self.artifacts: Dict[str, List[Artifact]] = {}

    def add_artifact(self, content: Any, type: str, metadata: Dict[str, Any] = None):
        if type not in self.artifacts:
            self.artifacts[type] = []
        self.artifacts[type].append(Artifact(content, type, metadata))

    def get_artifacts(self, type: str) -> List[Artifact]:
        return self.artifacts.get(type, [])

class KnowledgeRetriever:
    def __init__(self, vector_storage: VectorStorage):
        self.vector_storage = vector_storage

    def retrieve(self, query: str, k: int) -> List[Tuple[Document, float]]:
        return self.vector_storage.search_vectors(query, k)

class ArtifactDetector:
    def __init__(self, vectorstorage: VectorStorage):
        self.vectorstorage = vectorstorage
        self.artifact_types = ['python', 'yaml', 'image_description', 'text', 'mermaid','chat']
        self.artifacts = []

    def detect_artifacts(self, text: str, user_input: str) -> List[Dict[str, Any]]:
        """
        Detect artifacts within the provided text and return a list of dictionaries containing artifact details.

        :param text: The text containing potential artifacts.
        :param user_input: The original user input associated with the text.
        :return: A list of dictionaries containing artifact type, source, user_input, and content.
        """
        artifacts = []

        
        for artifact_type in self.artifact_types:
            # Regex to find blocks of code/artifacts encapsulated within triple backticks and labeled by the artifact type.
            pattern = rf"```{artifact_type}\s*(.*?)```"
            matches = re.findall(pattern, text.text, re.DOTALL)
            for match in matches:
                content = match.strip()
               
                if  artifact_type == 'chat':
                    source="ChatHistoryText"
                else:
                    source = self._extract_filename(content)

                artifacts.append({
                    "type": artifact_type,
                    "source": source,
                    "user_input": user_input,
                    "content": content
                })
        
        documented_artifacts = self._to_documents(artifacts)
        self.vectorstorage.add_vectors(documented_artifacts)
        self.vectorstorage.save_vector_store()
        rp(f"Number of documented artifacts:{len(documented_artifacts)}")
        return artifacts
    
    def _extract_filename(self, content: str) -> str:
        """
        Extract the filename from the content based on a specific pattern.

        :param content: The content of the artifact where the filename might be specified.
        :return: The extracted filename or a default if not found.
        """

        match = re.search(r'filename:\s*(\S+)', content)
        if match:
            return match.group(1)
        else:
            return "No_filename_found.txt"

    def _to_documents(self, data: List[Dict[str, Any]]) -> List[Document]:
        """
        Convert the detected artifact data into a Document object.
        This method is a placeholder for future implementation.
        
        :param data: The artifact data dictionary.
        :return:List of The Document objects.
        """
        documents = []
        for d in data:
            if isinstance(d, dict):
                document = Document(
                    page_content=d['content'],
                    metadata={
                        'source': d['source'],
                        'type': d['type'],
                        'user_input': d['user_input']
                    }
                )
                documents.append(document)
        return documents
          
class ConversationManager:
    
    """Manages the add,deleting andf persistance of conversations amd sessions
        --- Because every session wil create a online conversation but no garbage collection is done by hugchat
            (   LIMIT 25    ) after that no more conversations wil be added, 
            hence new sessions will timeout with EMPTY response"""

    """
    Manages the adding, deleting, and persistence of conversations and sessions.
    Handles the limit of 25 conversations imposed by HuggingChat.
    """
    def __init__(self,email,passwd,cookie_folder, system_prompt, modelIndex=1):
        
        self.email = email
        self.passwd = passwd
        self.cookie_folder= cookie_folder,
        self.system_prompt = system_prompt  # this is the prompt we will use to interact with the chatbot.
        self.conversations: Dict[str, Dict] = {}
        self.default_model_id = modelIndex # llama3.1 70B :1 = command-r Cohere
        self.chatbot =  self._login_and_create_chatbot()
        self.load_conversations()
       
  
    def _login_and_create_chatbot(self) -> hugchat.ChatBot:
        sign = Login(self.email, self.passwd)
        #rp(self.cookie_folder)
        cookies = sign.login(self.cookie_folder)
        return ChatBot(cookies=cookies.get_dict(),  system_prompt=self.system_prompt)
    
    def load_conversations(self):
        conversations_list = self.chatbot.get_conversation_list()
        #rp(f"The online conversations:\n{conversations_list}")
        self.conversations = conversations_list

    def add_conversation(self, conversation_id: str = None) -> str:
        """
        Add a new conversation. If no ID is provided, create a new one.
        If the limit is reached, remove the oldest conversation.
        """
        if len(self.conversations) >= 25:
            oldest_id = min(self.conversations, key=lambda k: self.conversations[k]['last_used'])
            self.delete_conversation(oldest_id)

        if conversation_id is None:
            # before we make a new conversation we need to determine :
            # modelIndex 0~7 (default to 0), 
            # desired system_prompt_template , 
            # artifact collection, 
            # storage, 
            # retriever,
            #   
            # then we can :
            # ingest user inputs
            # retrieve potential artifacts on the users input
            # Inject this context in the system_prompt_template to prep it for creation
            # create the conversation(model_id, system_prompt, switch_to=True)
            # [Chat for the user can start]
            conversation_id = self.chatbot.new_conversation(model_id, system_prompt, switch_to=True)
            # the artifact retriever:
            #       retrieves context with the users intput
            #       concats this before the user input 
    #                   ONLY to the request towards the chatbot!
            #           NOT in the output of the chat! 
            #           NOT entering the the artifact collector!
 
            

        self.conversations[conversation_id] = {
            'id': conversation_id,
            'created_at': datetime.now().isoformat(),
            'last_used': datetime.now().isoformat()
        }
        self.save_conversations()
        return conversation_id

    def delete_conversation(self, conversation_id: str):
        """Delete a conversation both locally and on HuggingChat."""
        if conversation_id in self.conversations:
            del self.conversations[conversation_id]
            self.chatbot.delete_conversation(conversation_id)
            self.save_conversations()

    def get_conversation(self, conversation_id: str) -> Dict:
        """Retrieve a conversation by its ID."""
        return self.conversations.get(conversation_id)

    def list_conversations(self) -> List[Dict]:
        """List all conversations."""
        self.load_conversations()
        return self.conversations

    def use_conversation(self, conversation_id: str):
        """Mark a conversation as used, updating its last_used timestamp."""
        if conversation_id in self.conversations:
            self.conversations[conversation_id]['last_used'] = datetime.now().isoformat()
            self.save_conversations()

    def clean_old_conversations(self, days: int = 30):
        """Remove conversations older than the specified number of days."""
        now = datetime.now()
        to_delete = []
        for conv_id, conv in self.conversations.items():
            created_at = datetime.fromisoformat(conv['created_at'])
            if (now - created_at).days > days:
                to_delete.append(conv_id)

        for conv_id in to_delete:
            self.delete_conversation(conv_id)

    def get_or_create_conversation(self) -> str:
        """Get an existing conversation or create a new one if none exist."""
        if not self.conversations:
            return self.add_conversation()
        return next(iter(self.conversations))

    def chat(self, message: str, conversation_id: str = None, web_search: bool=False) -> str:
        """Send a message to a specific conversation or create a new one."""
        if conversation_id is None or conversation_id not in self.conversations:
            conversation_id = self.get_or_create_conversation()

        self.chatbot.change_conversation(conversation_id)
        response = self.chatbot.chat(text=message, web_search=web_search)
        self.use_conversation(conversation_id)
        return response

class PromptFactory:
    def __init__(self, 
                 language="english",
                 extention="txt",
                 task="Provide weather information", 
                 rules="Be concise and accurate", 
                 role="AI Assistant"
                 ):
        self.template = """
            You ACT in the ROLE of {role}
            Your TASK is to assist {task}

            Your chat with the user will be automatically augmented so you can respond even better:
                - recent chat 'HISTORY:'
                - retrieved 'CONTEXT:' from external sources.
                - 'RULES:' to follow

            Here's how you should respond:
            {rules}

            HISTORY:
            {history}
            **Final Notes:**
            Remember 'You Rock!' think step by step and don't break ACT nor ROLE nor TASK.
            CONTEXT:
            {context}
            User Question:
            {input}
            """
        self.language = language
        self.extention = extention
        self.task = task
        self.rules = rules
        self.role = role
        self.history = "The start of a new chat."
        self.context = "No context provided."

    def create_prompt(self, user_input):
        """
        Create a prompt using the current state of the PromptFactory.
        
        :param user_input: The user's input or question
        :return: The formatted prompt string
        """
        replacements = {
            "role": self.role.replace("{language}", self.language),
            "task": self.task,
            "rules": self.rules.replace("###EXT###",self.extention).replace("###LANGUAGE###",self.language),
            "history": self.history,
            "context": self.context,
            "input": user_input
        }
        
        return self.template.format(**replacements)

    def update_chat_state(self, user_input, new_history=None, new_context=None):
        """
        Update the chat state with new history, context, and user input.
        
        :param user_input: The new user input
        :param new_history: The updated chat history
        :param new_context: The updated context
        :return: The updated prompt string
        """
        # Append the new history if provided
        if new_history:
            self.history += f"\n{new_history}"
        
        # Update the context if provided
        if new_context:
            self.context = new_context
        
        # Update the chat history with the user's new input
        #self.history += f"\nUser: {user_input}"

        # Generate and return the updated prompt
        return self.create_prompt(user_input)

class HuggingChatWrapper:
    def __init__(self, project_name: str,cookie_folder: str = "cookies", gallery_folder: str="gallery", storage_folder: str ="storage", datasets_folder: str = "datasets"):        

        self.email = os.getenv("EMAIL")
        self.password = os.getenv("PASSWD")
        self.project_name = project_name
        
        self.cookie_folder = project_name+"/"+cookie_folder+'/'
        self.storage_folder = os.path.join(project_name,storage_folder)
        self.datasets_folder = os.path.join(project_name,datasets_folder)
        self.gallery_folder = os.path.join(project_name,gallery_folder)
        os.makedirs(self.project_name,exist_ok=True)
        os.makedirs(self.cookie_folder,exist_ok=True)
        os.makedirs(self.datasets_folder,exist_ok=True)
        os.makedirs(self.gallery_folder,exist_ok=True)

        self.history = ""
        self.artifacts = []
        language="""python"""
        role="""highly intelligent RAG augmented ###LANGUAGE### Coder"""
        task="""Assist users by generating code that is:
            OOP
            fully implemented
            procedural
            generic
            complete
            conform autopep8 format
            contains NO placeholders
        """
        rules="""
        ALL response must be in encapsulated 'artifacts', 
        defined by the following file types:
            <type>          :       <encapsulation>
            "###EXT###"           :    "```###LANGUAGE### <content>```"
            "yaml"          :    "```yaml <content>```"
            "txt"           :    "```text <content>```"
            "yaml"          :    "```image_description <content>```"
            "jpg"           :    "```image <content>```"
            "txt"           :    "```chat <content>```" 
            "mmd"           :    "```mermaid <content>```" 
        While answering think step-by-step and justify your answer.
        Always start the content of the artifact with # filename: <filename>.<type>

        Example interaction:
            
            Question: 
                make a lemonade tycoon game in pyqt6 with yaml usage guide and image_description of front image of the app and a mermaid flowchart  of its process
            Answer:
                ```chat
                Here is a simple implementation of a Lemonade Tycoon game using PyQt6.
                ```
                ```python
                # filename: lemonade_tycoon.py
                import sys
                from PyQt6.QtWidgets import QApplication, QWidget, QGridLayout, QPushButton, QLabel, QSpinBox, QLCDNumber
                from PyQt6.QtCore import Qt

                class LemonadeTycoon(QWidget):
                    def __init__(self):
                        super().__init__()

                        self.initUI()

                    def initUI(self):
                        self.setGeometry(300, 300, 300, 200)
                        self.setWindowTitle('Lemonade Tycoon')

                        layout = QGridLayout()

                        self.lemonade_price_label = QLabel('Lemonade Price:')
                        self.lemonade_price_spinbox = QSpinBox()
                        self.lemonade_price_spinbox.setRange(1, 100)
                        self.lemonade_price_spinbox.setValue(50)

                        self.lemonade_price_label.setAlignment(Qt.AlignmentFlag.AlignRight)
                        layout.addWidget(self.lemonade_price_label, 0, 0)
                        layout.addWidget(self.lemonade_price_spinbox, 0, 1)

                        self.lemonade_quantity_label = QLabel('Lemonade Quantity:')
                        self.lemonade_quantity_spinbox = QSpinBox()
                        self.lemonade_quantity_spinbox.setRange(1, 100)
                        self.lemonade_quantity_spinbox.setValue(10)

                        self.lemonade_quantity_label.setAlignment(Qt.AlignmentFlag.AlignRight)
                        layout.addWidget(self.lemonade_quantity_label, 1, 0)
                        layout.addWidget(self.lemonade_quantity_spinbox, 1, 1)

                        self.total_cost_label = QLabel('Total Cost:')
                        self.total_cost_lcd = QLCDNumber()
                        self.total_cost_lcd.setDigitCount(10)

                        self.total_cost_label.setAlignment(Qt.AlignmentFlag.AlignRight)
                        layout.addWidget(self.total_cost_label, 2, 0)
                        layout.addWidget(self.total_cost_lcd, 2, 1)

                        self.sell_button = QPushButton('Sell Lemonade')
                        self.sell_button.clicked.connect(self.sell_lemonade)

                        layout.addWidget(self.sell_button, 3, 0, 1, 2)

                        self.setLayout(layout)
                        self.show()

                    def sell_lemonade(self):
                        price = self.lemonade_price_spinbox.value()
                        quantity = self.lemonade_quantity_spinbox.value()
                        total_cost = price * quantity
                        self.total_cost_lcd.display(total_cost)

                if __name__ == '__main__':
                    app = QApplication(sys.argv)
                    ex = LemonadeTycoon()
                    sys.exit(app.exec())
                ```
                ```yaml
                # filename: lemonade_tycoon_usage_guide.yaml
                lemonade_tycoon:
                    description: A simple lemonade tycoon game in PyQt6
                    usage:
                        - Run the game by executing the `lemonade_tycoon.py` file
                        - Set the lemonade price and quantity using the spin boxes
                        - Click the "Sell Lemonade" button to calculate the total cost
                    dependencies:
                        - PyQt6
                        - Python 3.x
                    notes:
                        - This is a simple implementation of a lemonade tycoon game
                        - The game calculates the total cost based on the lemonade price and quantity
                        
                ```
                ```image_description
                A simple window with two spin boxes for lemonade price and quantity, a button to sell lemonade, and a LCD display to show the total cost.
                ```
                ```mermaid
                # filename: lemonade_tycoon_flowchart.mmd
                graph TD
                    A[Lemonade Tycoon Game] -->|Run|> B[Set Lemonade Price and Quantity]
                    B -->|Click Sell Button|> C[Calculate Total Cost]
                    C -->|Display Total Cost|> D[LCD Display]
                ```
                ```chat
                In this simple implementation of a lemonade tycoon game, we use PyQt6 to create a window with two spin boxes for lemonade price and quantity, a button to sell lemonade, and a LCD display to show the total cost. The game calculates the total cost based on the lemonade price and quantity.
                ```
        
        """
        self.PromptFactory=PromptFactory( 
                            language=language,
                            task=task,
                            rules=rules,  
                            role=role.replace("###LANGUAGE###",language)
                        )
        #rp(self.email, self.password,self.cookie_folder,self.PromptFactory.create_prompt(user_input=""))
        self.conversation_manager   = ConversationManager(self.email, self.password,self.cookie_folder,self.PromptFactory.create_prompt(user_input=""))
        self.chatbot                = self.conversation_manager.chatbot
        self.vector_storage         = VectorStorage(persistence_path=self.storage_folder)
        self.vectorstore_plotter    = VectorStorePlotter(self.vector_storage.vector_store)
        #self.visualizer             = Visualizer(self) # ERROR! qapp before qwidget ERROR!
        self.knowledge_retriever    = KnowledgeRetriever(self.vector_storage)
        self.artifact_detector      = ArtifactDetector(self.vector_storage)
        self.artifact_collector     = ArtifactCollector(self.vector_storage)
        #self.chat()
   
    def chat(self):
        # continues chat until context window is full of growing history
        while True:
            user_input = input("User:")
            self.test_system(user_input)
            rp(self.history)
    
    def test_system(self, user_input):
        knowledge_retrieved = self.knowledge_retriever.retrieve(query=user_input,k=1)
        updated_prompt      = self.PromptFactory.update_chat_state(user_input=user_input,new_history=self.history,new_context=knowledge_retrieved)
        raw_response        = self.chatbot.chat(text=updated_prompt)
        self.artifacts      = self.artifact_detector.detect_artifacts(text=raw_response, user_input=user_input)
        concat_content = ""
        for art in self.artifacts:
            rp(art)
            concat_content += str(art) + "\n"
        
        # TODO Implementation: Manage chat history size
        MAX_HISTORY_SIZE = 500  # Define the maximum allowed size for the chat history in characters

        # Combine the new interaction (user input + chatbot response) with the existing history
        new_interaction = f"User: {user_input}\nAssistant: {raw_response}\n"
        new_history_size = len(self.history) + len(new_interaction)

        # Check if the new history size exceeds the maximum allowed size
        if new_history_size > MAX_HISTORY_SIZE:
            # Determine how many characters need to be removed
            excess_characters = new_history_size - MAX_HISTORY_SIZE
            
            # Trim the oldest part of the history by removing excess characters
            self.history = self.history[excess_characters:]

        # Step 5: Update the chat history with the new interaction
        self.history += new_interaction

        return self.artifacts
    
    def RAG_Augmented_Bot(self, user_input):
        knowledge_retrieved = self.knowledge_retriever.retrieve(query=user_input,k=1)
        updated_prompt      = self.PromptFactory.update_chat_state(user_input=user_input,new_history=self.history,new_context=knowledge_retrieved)
        raw_response        = self.chatbot.chat(text=updated_prompt)
        self.artifacts      = self.artifact_detector.detect_artifacts(text=raw_response, user_input=user_input)
        
        
        concat_content = ""
        
        concat_content = '\n'.join([str(artifact) for artifact in self.artifacts])
        
        rp(dir(self.artifacts))
        # TODO Implementation: Manage chat history size
        MAX_HISTORY_SIZE = 500  # Define the maximum allowed size for the chat history in characters
        # Combine the new interaction (user input + chatbot response) with the existing history
        new_interaction = f"User: {user_input}\nAssistant: {raw_response}\n"
        new_history_size = len(self.history) + len(new_interaction)
        # Check if the new history size exceeds the maximum allowed size
        if new_history_size > MAX_HISTORY_SIZE:
            # Determine how many characters need to be removed
            excess_characters = new_history_size - MAX_HISTORY_SIZE
            # Trim the oldest part of the history by removing excess characters
            self.history = self.history[excess_characters:]
        # Step 5: Update the chat history with the new interaction
        self.history += new_interaction
        return self.artifacts
            

    def _chat(self, message: str) -> Message:
        relevant_artifacts = self.knowledge_retriever.retrieve(message, k=3)
        context = self._format_context(relevant_artifacts)
        
        full_message = f"{context}\n\nUser: {message}"
        
        response = self.chatbot.chat(full_message)
        self._collect_artifacts(response)
        return response

    def _collect_artifacts(self, response: Message):
        text = response.get_final_text()
        detected_artifacts = self.artifact_detector.detect_artifacts(text)
        
        for artifact in detected_artifacts:
            self.artifact_collector.add_artifact(artifact["content"], artifact["type"])
        
        self.artifact_collector.add_artifact(text, "text")

    def _format_context(self, relevant_artifacts: List[Tuple[Document, float]]) -> str:
        context = "Relevant information:\n"
        for doc, score in relevant_artifacts:
            context += f"- {doc.metadata.get('type', 'text')}: {doc.page_content[:100]}... (relevance: {score:.2f})\n"
        return context

    def retrieve_knowledge(self, query: str, k: int):
        return self.knowledge_retriever.retrieve(query, k)