from pinecone import Pinecone from typing import Union class PineconeDB: def __init__(self, api_key, index_name, embedding_model): self.api_key = api_key self.index_name = index_name self.embedding_model = embedding_model self.pc = Pinecone(api_key = self.api_key) self.index = None if not self.pc.has_index(self.index_name): self.pc.create_index_for_model( name=self.index_name, cloud="aws", region="us-east-1", embed={ "model":self.embedding_model, "field_map":{"text": "text"} } ) self.index = self.pc.Index(self.index_name) self.namespace = self.index_name + "-namespace" print(f"This PC contains the following Indexes: {self.pc.list_indexes()}") def delete_index(self): self.pc.delete_index(name=self.index_name) print("Index Deleted") def add_data(self, data): #should generally create two namespaces one for semantic chunks and then for nonsemantic with source metadata #delete previous records lst_ids = [] for ids in self.index.list(namespace=self.namespace): lst_ids.append(ids) if len(lst_ids) > 0: self.index.delete(ids=lst_ids, namespace=self.namespace) self.index.upsert_records( self.namespace, data ) print("Data Added") def add_data_question(self, data, question): lst_ids = [] namesp = f"{self.namespace}_question{question}" for ids in self.index.list(namespace=namesp): lst_ids.append(ids) if len(lst_ids) > 0: self.index.delete(ids=lst_ids, namespace= namesp) self.index.upsert_records( namesp, data ) print("Data Added Question") def get_content_ids(self): return self.index.list(namespace=self.namespace) def query_db(self, query): results = self.index.search_records( namespace=self.namespace, query={ "inputs": {"text": query}, "top_k": 2 }, ) return results def query_question_namesp(self, query, question): namesp = f"{self.namespace}_question{question}" results = self.index.search_records( namespace=namesp, query={ "inputs": {"text": query}, "top_k": 2 }, ) return results