File size: 2,685 Bytes
c7abf19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ae3fb9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from pinecone import Pinecone
from typing import Union

class PineconeDB:
    def __init__(self, api_key, index_name, embedding_model):
        self.api_key = api_key
        self.index_name = index_name
        self.embedding_model = embedding_model
        self.pc = Pinecone(api_key = self.api_key)
        self.index = None

        if not self.pc.has_index(self.index_name):
            self.pc.create_index_for_model(
                name=self.index_name,
                cloud="aws",
                region="us-east-1",
                embed={
                    "model":self.embedding_model,
                    "field_map":{"text": "text"}
                }
            )
        
        self.index = self.pc.Index(self.index_name)
        self.namespace = self.index_name + "-namespace"

        print(f"This PC contains the following Indexes: {self.pc.list_indexes()}")

    def delete_index(self):
        self.pc.delete_index(name=self.index_name)
        print("Index Deleted")


    def add_data(self, data):
        #should generally create two namespaces one for semantic chunks and then for nonsemantic with source metadata
        #delete previous records
        lst_ids = []
        for ids in self.index.list(namespace=self.namespace):
            lst_ids.append(ids)
        if len(lst_ids) > 0:
            self.index.delete(ids=lst_ids, namespace=self.namespace)
        self.index.upsert_records(
            self.namespace,
            data
        )

        print("Data Added")

    def add_data_question(self, data, question):
        lst_ids = []
        namesp = f"{self.namespace}_question{question}"
        for ids in self.index.list(namespace=namesp):
            lst_ids.append(ids)
        if len(lst_ids) > 0:
            self.index.delete(ids=lst_ids, namespace= namesp)
        self.index.upsert_records(
            namesp,
            data
        )

        print("Data Added Question")

    def get_content_ids(self):
        return self.index.list(namespace=self.namespace)

    def query_db(self, query):
        results = self.index.search_records(
            namespace=self.namespace, 
            query={
                "inputs": {"text": query}, 
                "top_k": 2
            },
        )
        return results

    def query_question_namesp(self, query, question):
        namesp = f"{self.namespace}_question{question}"
        results = self.index.search_records(
            namespace=namesp, 
            query={
                "inputs": {"text": query}, 
                "top_k": 2
            },
        )
        return results