NavyDevilDoc commited on
Commit
f642092
·
verified ·
1 Parent(s): 5b91370

Create PineconeManager.py

Browse files
Files changed (1) hide show
  1. src/core/PineconeManager.py +104 -0
src/core/PineconeManager.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import logging
3
+ from pinecone import Pinecone, ServerlessSpec, PodSpec
4
+ from langchain_pinecone import PineconeVectorStore
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class PineconeManager:
9
+ def __init__(self, api_key: str):
10
+ if not api_key:
11
+ raise ValueError("Pinecone API Key is missing.")
12
+
13
+ # Initialize the client
14
+ self.pc = Pinecone(api_key=api_key)
15
+
16
+ def list_indexes(self):
17
+ """Returns a list of all index names."""
18
+ try:
19
+ return [i.name for i in self.pc.list_indexes()]
20
+ except Exception as e:
21
+ logger.error(f"Error listing indexes: {e}")
22
+ return []
23
+
24
+ def get_index_stats(self, index_name: str):
25
+ """Returns stats like total vector count and dimension."""
26
+ try:
27
+ idx = self.pc.Index(index_name)
28
+ return idx.describe_index_stats()
29
+ except Exception as e:
30
+ logger.error(f"Error fetching stats for {index_name}: {e}")
31
+ return None
32
+
33
+ def check_dimension_compatibility(self, index_name: str, target_dim: int = 384) -> bool:
34
+ """
35
+ SAFETY MECHANISM: Ensures the Index dimension matches the Model dimension.
36
+ all-MiniLM-L6-v2 output is 384.
37
+ """
38
+ try:
39
+ # We have to get the description from the list API, not the index object
40
+ idx_info = self.pc.describe_index(index_name)
41
+ idx_dim = int(idx_info.dimension)
42
+
43
+ if idx_dim != target_dim:
44
+ logger.warning(f"Dimension Mismatch! Index: {idx_dim}, Model: {target_dim}")
45
+ return False
46
+ return True
47
+ except Exception as e:
48
+ logger.error(f"Error checking dimension: {e}")
49
+ return False
50
+
51
+ def create_index(self, index_name: str, dimension: int = 384, metric: str = "cosine"):
52
+ """
53
+ Creates a new Serverless Index (cheapest/easiest option).
54
+ Includes a wait loop to ensure it's ready.
55
+ """
56
+ existing = self.list_indexes()
57
+ if index_name in existing:
58
+ logger.info(f"Index {index_name} already exists.")
59
+ return True, "Index already exists."
60
+
61
+ try:
62
+ # Create Serverless Index (AWS/US-EAST-1 is usually the default free region)
63
+ self.pc.create_index(
64
+ name=index_name,
65
+ dimension=dimension,
66
+ metric=metric,
67
+ spec=ServerlessSpec(cloud="aws", region="us-east-1")
68
+ )
69
+
70
+ # Wait for initialization
71
+ logger.info("Waiting for index to initialize...")
72
+ while not self.pc.describe_index(index_name).status['ready']:
73
+ time.sleep(1)
74
+
75
+ return True, f"Index {index_name} created successfully."
76
+ except Exception as e:
77
+ logger.error(f"Failed to create index: {e}")
78
+ return False, str(e)
79
+
80
+ def get_vectorstore(self, index_name: str, embedding_function, namespace: str):
81
+ """
82
+ Returns the LangChain VectorStore object for RAG operations.
83
+ """
84
+ return PineconeVectorStore(
85
+ index_name=index_name,
86
+ embedding=embedding_function,
87
+ namespace=namespace
88
+ )
89
+
90
+ def delete_file_from_index(self, index_name: str, filename: str, namespace: str):
91
+ """
92
+ Deletes all vectors associated with a specific file source.
93
+ """
94
+ try:
95
+ index = self.pc.Index(index_name)
96
+ # Pinecone delete by metadata filter
97
+ index.delete(
98
+ filter={"source": filename},
99
+ namespace=namespace
100
+ )
101
+ return True, f"Deleted vectors for {filename}"
102
+ except Exception as e:
103
+ logger.error(f"Delete failed: {e}")
104
+ return False, str(e)