yiqing111's picture
Upload 7 files
8255e91 verified
import os
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv
import numpy as np
load_dotenv()
class VectorStore:
def __init__(self):
api_key = os.getenv("PINECONE_API_KEY")
index_name = os.getenv("PINECONE_INDEX_NAME")
# connect to Pinecone
self.pc = Pinecone(api_key=api_key)
if index_name not in self.pc.list_indexes().names():
self.pc.create_index(
name=index_name,
dimension=1024,
metric="cosine",
spec=ServerlessSpec(
cloud='aws',
region='us-east-1'
)
)
print(f" Created new Pinecone index: {index_name}")
else:
print(f"Reusing existing Pinecone index: {index_name}")
self.index = self.pc.Index(index_name)
def add(self, embeddings, chunks):
vectors = []
for idx, emb in enumerate(embeddings):
vectors.append((
f"chunk-{idx}",
emb,
{"text": chunks[idx]["content"], "source": chunks[idx]["source"], "position": chunks[idx]["chunk_index"]}
))
self.index.upsert(vectors)
def search(self, query_embedding, top_k=5):
query_embedding = query_embedding
results = self.index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
return [
{
"text": item["metadata"]["text"],
"source": item["metadata"]["source"],
"position": item["metadata"]["position"],
"score": item["score"]
}
for item in results["matches"]
]