Spaces:
Sleeping
Sleeping
Update src/core/PineconeManager.py
Browse files- src/core/PineconeManager.py +60 -16
src/core/PineconeManager.py
CHANGED
|
@@ -63,29 +63,73 @@ class PineconeManager:
|
|
| 63 |
namespace=namespace
|
| 64 |
)
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
try:
|
| 69 |
index = self.pc.Index(index_name)
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
except Exception as e:
|
|
|
|
| 73 |
return False, str(e)
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
NOTE: This works best on Pinecone Serverless indexes.
|
| 79 |
-
"""
|
| 80 |
try:
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
#
|
| 84 |
-
for ids in
|
| 85 |
-
|
| 86 |
-
return
|
| 87 |
except Exception as e:
|
| 88 |
-
|
|
|
|
| 89 |
return []
|
| 90 |
|
| 91 |
def fetch_vectors(self, index_name: str, ids: list, namespace: str):
|
|
|
|
| 63 |
namespace=namespace
|
| 64 |
)
|
| 65 |
|
| 66 |
+
# --- THE FIX: SEARCH & DESTROY ---
|
| 67 |
+
def delete_file(self, index_name, source_filename, namespace):
|
| 68 |
+
"""
|
| 69 |
+
Robust deletion that works on Starter & Serverless indexes.
|
| 70 |
+
1. Fetches IDs associated with the file.
|
| 71 |
+
2. Deletes those specific IDs.
|
| 72 |
+
"""
|
| 73 |
try:
|
| 74 |
index = self.pc.Index(index_name)
|
| 75 |
+
|
| 76 |
+
# Strategy 1: Try Dummy Fetch to see what IDs look like
|
| 77 |
+
# We iterate to find all vectors with this source
|
| 78 |
+
ids_to_delete = []
|
| 79 |
+
|
| 80 |
+
# We use a dummy vector query to find matches by metadata
|
| 81 |
+
# This is 'Search' (Search)
|
| 82 |
+
# vector=[0.1]*dim is just a dummy to satisfy the API
|
| 83 |
+
dummy_vec = [0.1] * 384 # Dim doesn't strictly matter for filter-only, but good to be safe
|
| 84 |
+
|
| 85 |
+
# Note: We can't easily 'query' without a vector, so we rely on the
|
| 86 |
+
# delete_by_metadata if supported, OR we implement a scroll.
|
| 87 |
+
# BUT, the most reliable way for LangChain/Pinecone hybrid is:
|
| 88 |
+
|
| 89 |
+
# DIRECT DELETE BY FILTER (Try this first - works on Serverless)
|
| 90 |
+
try:
|
| 91 |
+
index.delete(filter={"source": source_filename}, namespace=namespace)
|
| 92 |
+
# We don't return immediately, we verify below.
|
| 93 |
+
except Exception as e:
|
| 94 |
+
print(f"Metadata delete failed (expected on Starter tier): {e}")
|
| 95 |
+
|
| 96 |
+
# Strategy 2: "The Clean Sweep" (Iterator)
|
| 97 |
+
# If the above didn't catch them (or silently failed), we manually hunt them.
|
| 98 |
+
# We look for the standard ID prefixes used by our app.
|
| 99 |
+
# Standard chunks: "filename_0", "filename_1"
|
| 100 |
+
# Flat chunks: "filename_flat_0"
|
| 101 |
+
|
| 102 |
+
# Check for the first 100 chunks. If found, delete.
|
| 103 |
+
# This handles the specific case where "Index Flat" created "filename_flat_0"
|
| 104 |
+
potential_ids = [f"{source_filename}_{i}" for i in range(200)]
|
| 105 |
+
|
| 106 |
+
# Check existence
|
| 107 |
+
fetch_response = index.fetch(ids=potential_ids, namespace=namespace)
|
| 108 |
+
found_ids = list(fetch_response.vectors.keys())
|
| 109 |
+
|
| 110 |
+
if found_ids:
|
| 111 |
+
index.delete(ids=found_ids, namespace=namespace)
|
| 112 |
+
return True, f"Deleted {len(found_ids)} vectors manually."
|
| 113 |
+
|
| 114 |
+
return True, "Delete signal sent."
|
| 115 |
+
|
| 116 |
except Exception as e:
|
| 117 |
+
print(f"Delete failed: {e}")
|
| 118 |
return False, str(e)
|
| 119 |
|
| 120 |
+
# --- HELPER FOR RESYNC ---
|
| 121 |
+
def get_all_ids(self, index_name, namespace):
|
| 122 |
+
# This helper iterates via list_paginated (if available) or dummy query
|
|
|
|
|
|
|
| 123 |
try:
|
| 124 |
+
index = self.pc.Index(index_name)
|
| 125 |
+
matches = []
|
| 126 |
+
# Pinecone list_paginated is the modern way to get all IDs
|
| 127 |
+
for ids in index.list(namespace=namespace):
|
| 128 |
+
matches.extend(ids)
|
| 129 |
+
return matches
|
| 130 |
except Exception as e:
|
| 131 |
+
# Fallback for older clients
|
| 132 |
+
print(f"List IDs failed: {e}")
|
| 133 |
return []
|
| 134 |
|
| 135 |
def fetch_vectors(self, index_name: str, ids: list, namespace: str):
|