File size: 3,152 Bytes
a9bbea2
 
a61eda1
 
 
a9bbea2
a61eda1
 
 
 
 
 
 
 
 
 
 
 
 
 
a9bbea2
a61eda1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9bbea2
a61eda1
 
 
 
 
 
a9bbea2
a61eda1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9bbea2
a61eda1
85a658e
a61eda1
85a658e
 
a61eda1
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import pinecone
import openai
from components.configs import querydocnumber, num_chatmsg_history, embedderengine
from components.configs import PINECONE_API_ENV, PINECONE_INDEX_NAME,PINECONE_NAMESPACE

#Short term memory
## this updates the chat history. It keeps relevantcontents together with user message so it can be accessed at later stage by the
def autochatmemory(chatmemory, #original state of chatmemory
                   relevantcontents, #contents that were used to produce the response
                   user_message, #the original user message
                   response, #response generated
                   num_chatmsg_history=num_chatmsg_history #limiting the history as per config
                   ):
    formatted_usermessage = f"Provided Info:\n{relevantcontents}\nUser:{user_message}"
    chatmemory.append({"role": "user", "content": formatted_usermessage})
    chatmemory.append({"role": "assistant", "content": response})
    if len(chatmemory) > num_chatmsg_history * 2:
        chatmemory[:-(num_chatmsg_history * 2)] = []
    return chatmemory

#Long Term Memory
## gather documents from pinecone using an embedding of the user query
def documentretrival(
        user_message, 
        docnumber = querydocnumber,
        PINECONE_API_ENV = PINECONE_API_ENV, 
        PINECONE_INDEX_NAME = PINECONE_INDEX_NAME, 
        PINECONE_NAMESPACE =PINECONE_NAMESPACE,
        embedderengine=embedderengine, 
        ):
    #embed the user_message using openai embedder
    embeddeduser_message = openai.Embedding.create(
        input=user_message, 
        engine=embedderengine
        )['data'][0]['embedding']
    #initialize pinecone
    pinecone.init(api_key=os.environ['PINECONE_API_KEY'],
              environment=PINECONE_API_ENV)
    index = pinecone.Index(PINECONE_INDEX_NAME)
    #search the embedded query on pinecone
    relevantdocument = index.query(
        namespace=PINECONE_NAMESPACE,
        top_k=docnumber,
        include_values=False,
        include_metadata=True,
        vector=embeddeduser_message,
        )
    return relevantdocument

## format the dictionary obtained from pinecone in a way that I think suits the LLM better
def pinecone_documentformatter(relevantdocument):
    formattedrelevantdocument = ""
    matches = relevantdocument['matches']
    for i, match in enumerate(matches, 1):
        formattedrelevantdocument += f"{i}-\n"
        metadata = match['metadata']   
        # Put Text in front
        formattedrelevantdocument += f"text: {metadata.get('text', '')}\n"
        # Put other metadata following that
        for key in metadata:
            if key != 'text':
                formattedrelevantdocument += f"{key}: {metadata[key]}\n"
        formattedrelevantdocument
    return formattedrelevantdocument

## A unique function to get pinecone doc and have back the content formatted
def formatted_documentretrival(user_message,docnumber=querydocnumber):
    relevantdocument= documentretrival(
        user_message,
        docnumber
        )
    formattedrelevantdocument=pinecone_documentformatter(relevantdocument)
    return formattedrelevantdocument