Spaces:
Sleeping
Sleeping
| from pinecone import Pinecone | |
| from typing import Union | |
| class PineconeDB: | |
| def __init__(self, api_key, index_name, embedding_model): | |
| self.api_key = api_key | |
| self.index_name = index_name | |
| self.embedding_model = embedding_model | |
| self.pc = Pinecone(api_key = self.api_key) | |
| self.index = None | |
| if not self.pc.has_index(self.index_name): | |
| self.pc.create_index_for_model( | |
| name=self.index_name, | |
| cloud="aws", | |
| region="us-east-1", | |
| embed={ | |
| "model":self.embedding_model, | |
| "field_map":{"text": "text"} | |
| } | |
| ) | |
| self.index = self.pc.Index(self.index_name) | |
| self.namespace = self.index_name + "-namespace" | |
| print(f"This PC contains the following Indexes: {self.pc.list_indexes()}") | |
| def delete_index(self): | |
| self.pc.delete_index(name=self.index_name) | |
| print("Index Deleted") | |
| def add_data(self, data): | |
| #should generally create two namespaces one for semantic chunks and then for nonsemantic with source metadata | |
| #delete previous records | |
| lst_ids = [] | |
| for ids in self.index.list(namespace=self.namespace): | |
| lst_ids.append(ids) | |
| if len(lst_ids) > 0: | |
| self.index.delete(ids=lst_ids, namespace=self.namespace) | |
| self.index.upsert_records( | |
| self.namespace, | |
| data | |
| ) | |
| print("Data Added") | |
| def add_data_question(self, data, question): | |
| lst_ids = [] | |
| namesp = f"{self.namespace}_question{question}" | |
| for ids in self.index.list(namespace=namesp): | |
| lst_ids.append(ids) | |
| if len(lst_ids) > 0: | |
| self.index.delete(ids=lst_ids, namespace= namesp) | |
| self.index.upsert_records( | |
| namesp, | |
| data | |
| ) | |
| print("Data Added Question") | |
| def get_content_ids(self): | |
| return self.index.list(namespace=self.namespace) | |
| def query_db(self, query): | |
| results = self.index.search_records( | |
| namespace=self.namespace, | |
| query={ | |
| "inputs": {"text": query}, | |
| "top_k": 2 | |
| }, | |
| ) | |
| return results | |
| def query_question_namesp(self, query, question): | |
| namesp = f"{self.namespace}_question{question}" | |
| results = self.index.search_records( | |
| namespace=namesp, | |
| query={ | |
| "inputs": {"text": query}, | |
| "top_k": 2 | |
| }, | |
| ) | |
| return results |