Spaces:
No application file
No application file
File size: 1,728 Bytes
8255e91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import os
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv
import numpy as np
load_dotenv()
class VectorStore:
def __init__(self):
api_key = os.getenv("PINECONE_API_KEY")
index_name = os.getenv("PINECONE_INDEX_NAME")
# connect to Pinecone
self.pc = Pinecone(api_key=api_key)
if index_name not in self.pc.list_indexes().names():
self.pc.create_index(
name=index_name,
dimension=1024,
metric="cosine",
spec=ServerlessSpec(
cloud='aws',
region='us-east-1'
)
)
print(f" Created new Pinecone index: {index_name}")
else:
print(f"Reusing existing Pinecone index: {index_name}")
self.index = self.pc.Index(index_name)
def add(self, embeddings, chunks):
vectors = []
for idx, emb in enumerate(embeddings):
vectors.append((
f"chunk-{idx}",
emb,
{"text": chunks[idx]["content"], "source": chunks[idx]["source"], "position": chunks[idx]["chunk_index"]}
))
self.index.upsert(vectors)
def search(self, query_embedding, top_k=5):
query_embedding = query_embedding
results = self.index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
return [
{
"text": item["metadata"]["text"],
"source": item["metadata"]["source"],
"position": item["metadata"]["position"],
"score": item["score"]
}
for item in results["matches"]
]
|