GRC-AIGOV-Final / vectorDB /pinecone.py
ShehryarAppsWork's picture
Upload pinecone.py
c7abf19 verified
from pinecone import Pinecone
from typing import Union
class PineconeDB:
def __init__(self, api_key, index_name, embedding_model):
self.api_key = api_key
self.index_name = index_name
self.embedding_model = embedding_model
self.pc = Pinecone(api_key = self.api_key)
self.index = None
if not self.pc.has_index(self.index_name):
self.pc.create_index_for_model(
name=self.index_name,
cloud="aws",
region="us-east-1",
embed={
"model":self.embedding_model,
"field_map":{"text": "text"}
}
)
self.index = self.pc.Index(self.index_name)
self.namespace = self.index_name + "-namespace"
print(f"This PC contains the following Indexes: {self.pc.list_indexes()}")
def delete_index(self):
self.pc.delete_index(name=self.index_name)
print("Index Deleted")
def add_data(self, data):
#should generally create two namespaces one for semantic chunks and then for nonsemantic with source metadata
#delete previous records
lst_ids = []
for ids in self.index.list(namespace=self.namespace):
lst_ids.append(ids)
if len(lst_ids) > 0:
self.index.delete(ids=lst_ids, namespace=self.namespace)
self.index.upsert_records(
self.namespace,
data
)
print("Data Added")
def add_data_question(self, data, question):
lst_ids = []
namesp = f"{self.namespace}_question{question}"
for ids in self.index.list(namespace=namesp):
lst_ids.append(ids)
if len(lst_ids) > 0:
self.index.delete(ids=lst_ids, namespace= namesp)
self.index.upsert_records(
namesp,
data
)
print("Data Added Question")
def get_content_ids(self):
return self.index.list(namespace=self.namespace)
def query_db(self, query):
results = self.index.search_records(
namespace=self.namespace,
query={
"inputs": {"text": query},
"top_k": 2
},
)
return results
def query_question_namesp(self, query, question):
namesp = f"{self.namespace}_question{question}"
results = self.index.search_records(
namespace=namesp,
query={
"inputs": {"text": query},
"top_k": 2
},
)
return results