NavyDevilDoc commited on
Commit
f076cab
·
verified ·
1 Parent(s): ff3310f

Update src/core/PineconeManager.py

Browse files
Files changed (1) hide show
  1. src/core/PineconeManager.py +7 -38
src/core/PineconeManager.py CHANGED
@@ -1,6 +1,6 @@
1
  import time
2
  import logging
3
- from pinecone import Pinecone, ServerlessSpec, PodSpec
4
  from langchain_pinecone import PineconeVectorStore
5
 
6
  logger = logging.getLogger(__name__)
@@ -9,8 +9,6 @@ class PineconeManager:
9
  def __init__(self, api_key: str):
10
  if not api_key:
11
  raise ValueError("Pinecone API Key is missing.")
12
-
13
- # Initialize the client
14
  self.pc = Pinecone(api_key=api_key)
15
 
16
  def list_indexes(self):
@@ -21,25 +19,14 @@ class PineconeManager:
21
  logger.error(f"Error listing indexes: {e}")
22
  return []
23
 
24
- def get_index_stats(self, index_name: str):
25
- """Returns stats like total vector count and dimension."""
26
- try:
27
- idx = self.pc.Index(index_name)
28
- return idx.describe_index_stats()
29
- except Exception as e:
30
- logger.error(f"Error fetching stats for {index_name}: {e}")
31
- return None
32
-
33
  def check_dimension_compatibility(self, index_name: str, target_dim: int = 384) -> bool:
34
  """
35
- SAFETY MECHANISM: Ensures the Index dimension matches the Model dimension.
36
  all-MiniLM-L6-v2 output is 384.
37
  """
38
  try:
39
- # We have to get the description from the list API, not the index object
40
  idx_info = self.pc.describe_index(index_name)
41
  idx_dim = int(idx_info.dimension)
42
-
43
  if idx_dim != target_dim:
44
  logger.warning(f"Dimension Mismatch! Index: {idx_dim}, Model: {target_dim}")
45
  return False
@@ -49,56 +36,38 @@ class PineconeManager:
49
  return False
50
 
51
  def create_index(self, index_name: str, dimension: int = 384, metric: str = "cosine"):
52
- """
53
- Creates a new Serverless Index (cheapest/easiest option).
54
- Includes a wait loop to ensure it's ready.
55
- """
56
  existing = self.list_indexes()
57
  if index_name in existing:
58
- logger.info(f"Index {index_name} already exists.")
59
  return True, "Index already exists."
60
 
61
  try:
62
- # Create Serverless Index (AWS/US-EAST-1 is usually the default free region)
63
  self.pc.create_index(
64
  name=index_name,
65
  dimension=dimension,
66
  metric=metric,
67
  spec=ServerlessSpec(cloud="aws", region="us-east-1")
68
  )
69
-
70
  # Wait for initialization
71
- logger.info("Waiting for index to initialize...")
72
  while not self.pc.describe_index(index_name).status['ready']:
73
  time.sleep(1)
74
-
75
  return True, f"Index {index_name} created successfully."
76
  except Exception as e:
77
- logger.error(f"Failed to create index: {e}")
78
  return False, str(e)
79
 
80
  def get_vectorstore(self, index_name: str, embedding_function, namespace: str):
81
- """
82
- Returns the LangChain VectorStore object for RAG operations.
83
- """
84
  return PineconeVectorStore(
85
  index_name=index_name,
86
  embedding=embedding_function,
87
  namespace=namespace
88
  )
89
 
90
- def delete_file_from_index(self, index_name: str, filename: str, namespace: str):
91
- """
92
- Deletes all vectors associated with a specific file source.
93
- """
94
  try:
95
  index = self.pc.Index(index_name)
96
- # Pinecone delete by metadata filter
97
- index.delete(
98
- filter={"source": filename},
99
- namespace=namespace
100
- )
101
  return True, f"Deleted vectors for {filename}"
102
  except Exception as e:
103
- logger.error(f"Delete failed: {e}")
104
  return False, str(e)
 
1
  import time
2
  import logging
3
+ from pinecone import Pinecone, ServerlessSpec
4
  from langchain_pinecone import PineconeVectorStore
5
 
6
  logger = logging.getLogger(__name__)
 
9
  def __init__(self, api_key: str):
10
  if not api_key:
11
  raise ValueError("Pinecone API Key is missing.")
 
 
12
  self.pc = Pinecone(api_key=api_key)
13
 
14
  def list_indexes(self):
 
19
  logger.error(f"Error listing indexes: {e}")
20
  return []
21
 
 
 
 
 
 
 
 
 
 
22
  def check_dimension_compatibility(self, index_name: str, target_dim: int = 384) -> bool:
23
  """
24
+ SAFETY MECHANISM: Ensures the Index dimension matches the Model.
25
  all-MiniLM-L6-v2 output is 384.
26
  """
27
  try:
 
28
  idx_info = self.pc.describe_index(index_name)
29
  idx_dim = int(idx_info.dimension)
 
30
  if idx_dim != target_dim:
31
  logger.warning(f"Dimension Mismatch! Index: {idx_dim}, Model: {target_dim}")
32
  return False
 
36
  return False
37
 
38
  def create_index(self, index_name: str, dimension: int = 384, metric: str = "cosine"):
39
+ """Creates a new Serverless Index with a wait loop."""
 
 
 
40
  existing = self.list_indexes()
41
  if index_name in existing:
 
42
  return True, "Index already exists."
43
 
44
  try:
 
45
  self.pc.create_index(
46
  name=index_name,
47
  dimension=dimension,
48
  metric=metric,
49
  spec=ServerlessSpec(cloud="aws", region="us-east-1")
50
  )
 
51
  # Wait for initialization
 
52
  while not self.pc.describe_index(index_name).status['ready']:
53
  time.sleep(1)
 
54
  return True, f"Index {index_name} created successfully."
55
  except Exception as e:
 
56
  return False, str(e)
57
 
58
  def get_vectorstore(self, index_name: str, embedding_function, namespace: str):
59
+ """Returns the LangChain VectorStore object."""
 
 
60
  return PineconeVectorStore(
61
  index_name=index_name,
62
  embedding=embedding_function,
63
  namespace=namespace
64
  )
65
 
66
+ def delete_file(self, index_name: str, filename: str, namespace: str):
67
+ """Deletes vectors for a specific file."""
 
 
68
  try:
69
  index = self.pc.Index(index_name)
70
+ index.delete(filter={"source": filename}, namespace=namespace)
 
 
 
 
71
  return True, f"Deleted vectors for {filename}"
72
  except Exception as e:
 
73
  return False, str(e)