Spaces:
No application file
No application file
| import os | |
| from pinecone import Pinecone, ServerlessSpec | |
| from dotenv import load_dotenv | |
| import numpy as np | |
| load_dotenv() | |
| class VectorStore: | |
| def __init__(self): | |
| api_key = os.getenv("PINECONE_API_KEY") | |
| index_name = os.getenv("PINECONE_INDEX_NAME") | |
| # connect to Pinecone | |
| self.pc = Pinecone(api_key=api_key) | |
| if index_name not in self.pc.list_indexes().names(): | |
| self.pc.create_index( | |
| name=index_name, | |
| dimension=1024, | |
| metric="cosine", | |
| spec=ServerlessSpec( | |
| cloud='aws', | |
| region='us-east-1' | |
| ) | |
| ) | |
| print(f" Created new Pinecone index: {index_name}") | |
| else: | |
| print(f"Reusing existing Pinecone index: {index_name}") | |
| self.index = self.pc.Index(index_name) | |
| def add(self, embeddings, chunks): | |
| vectors = [] | |
| for idx, emb in enumerate(embeddings): | |
| vectors.append(( | |
| f"chunk-{idx}", | |
| emb, | |
| {"text": chunks[idx]["content"], "source": chunks[idx]["source"], "position": chunks[idx]["chunk_index"]} | |
| )) | |
| self.index.upsert(vectors) | |
| def search(self, query_embedding, top_k=5): | |
| query_embedding = query_embedding | |
| results = self.index.query(vector=query_embedding, top_k=top_k, include_metadata=True) | |
| return [ | |
| { | |
| "text": item["metadata"]["text"], | |
| "source": item["metadata"]["source"], | |
| "position": item["metadata"]["position"], | |
| "score": item["score"] | |
| } | |
| for item in results["matches"] | |
| ] | |