File size: 1,728 Bytes
8255e91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv
import numpy as np

load_dotenv()

class VectorStore:
    def __init__(self):
        api_key = os.getenv("PINECONE_API_KEY")
        index_name = os.getenv("PINECONE_INDEX_NAME")
        
        # connect to Pinecone
        self.pc = Pinecone(api_key=api_key)
        if index_name not in self.pc.list_indexes().names():
            self.pc.create_index(
                name=index_name,
                dimension=1024,
                metric="cosine",
                spec=ServerlessSpec(
                    cloud='aws',  
                    region='us-east-1'
                )
            )
            print(f" Created new Pinecone index: {index_name}")
        else:
            print(f"Reusing existing Pinecone index: {index_name}")


        self.index = self.pc.Index(index_name)

    def add(self, embeddings, chunks):
        vectors = []
        for idx, emb in enumerate(embeddings):
            vectors.append((
                f"chunk-{idx}",
                emb,
                {"text": chunks[idx]["content"], "source": chunks[idx]["source"], "position": chunks[idx]["chunk_index"]}
            ))
        self.index.upsert(vectors)

    def search(self, query_embedding, top_k=5):
        query_embedding = query_embedding
        results = self.index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
        return [
            {
                "text": item["metadata"]["text"],
                "source": item["metadata"]["source"],
                "position": item["metadata"]["position"],
                "score": item["score"]
            }
            for item in results["matches"]
        ]