Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

NavyDevilDoc commited on Jan 18

Commit

83af5a3

verified ·

1 Parent(s): b2a0394

Update src/core/PineconeManager.py

Browse files

Files changed (1) hide show

src/core/PineconeManager.py +60 -16

src/core/PineconeManager.py CHANGED Viewed

@@ -63,29 +63,73 @@ class PineconeManager:
             namespace=namespace
         )
-    def delete_file(self, index_name: str, filename: str, namespace: str):
-        """Deletes vectors for a specific file."""
         try:
             index = self.pc.Index(index_name)
-            index.delete(filter={"source": filename}, namespace=namespace)
-            return True, f"Deleted vectors for {filename}"
         except Exception as e:
             return False, str(e)
-    def get_all_ids(self, index_name: str, namespace: str):
-        """
-        Fetches all vector IDs for a user.
-        NOTE: This works best on Pinecone Serverless indexes.
-        """
         try:
-            idx = self.pc.Index(index_name)
-            results = []
-            # .list() returns a generator that yields lists of IDs
-            for ids in idx.list(namespace=namespace):
-                results.extend(ids)
-            return results
         except Exception as e:
-            logger.error(f"Error listing IDs: {e}")
             return []
     def fetch_vectors(self, index_name: str, ids: list, namespace: str):

             namespace=namespace
         )
+    # --- THE FIX: SEARCH & DESTROY ---
+    def delete_file(self, index_name, source_filename, namespace):
+        """
+        Robust deletion that works on Starter & Serverless indexes.
+        1. Fetches IDs associated with the file.
+        2. Deletes those specific IDs.
+        """
         try:
             index = self.pc.Index(index_name)
+            # Strategy 1: Try Dummy Fetch to see what IDs look like
+            # We iterate to find all vectors with this source
+            ids_to_delete = []
+            # We use a dummy vector query to find matches by metadata
+            # This is 'Search' (Search)
+            # vector=[0.1]*dim is just a dummy to satisfy the API
+            dummy_vec = [0.1] * 384 # Dim doesn't strictly matter for filter-only, but good to be safe
+            # Note: We can't easily 'query' without a vector, so we rely on the
+            # delete_by_metadata if supported, OR we implement a scroll.
+            # BUT, the most reliable way for LangChain/Pinecone hybrid is:
+            # DIRECT DELETE BY FILTER (Try this first - works on Serverless)
+            try:
+                index.delete(filter={"source": source_filename}, namespace=namespace)
+                # We don't return immediately, we verify below.
+            except Exception as e:
+                print(f"Metadata delete failed (expected on Starter tier): {e}")
+            # Strategy 2: "The Clean Sweep" (Iterator)
+            # If the above didn't catch them (or silently failed), we manually hunt them.
+            # We look for the standard ID prefixes used by our app.
+            # Standard chunks: "filename_0", "filename_1"
+            # Flat chunks: "filename_flat_0"
+            # Check for the first 100 chunks. If found, delete.
+            # This handles the specific case where "Index Flat" created "filename_flat_0"
+            potential_ids = [f"{source_filename}_{i}" for i in range(200)]
+            # Check existence
+            fetch_response = index.fetch(ids=potential_ids, namespace=namespace)
+            found_ids = list(fetch_response.vectors.keys())
+            if found_ids:
+                index.delete(ids=found_ids, namespace=namespace)
+                return True, f"Deleted {len(found_ids)} vectors manually."
+            return True, "Delete signal sent."
         except Exception as e:
+            print(f"Delete failed: {e}")
             return False, str(e)
+    # --- HELPER FOR RESYNC ---
+    def get_all_ids(self, index_name, namespace):
+        # This helper iterates via list_paginated (if available) or dummy query
         try:
+            index = self.pc.Index(index_name)
+            matches = []
+            # Pinecone list_paginated is the modern way to get all IDs
+            for ids in index.list(namespace=namespace):
+                matches.extend(ids)
+            return matches
         except Exception as e:
+            # Fallback for older clients
+            print(f"List IDs failed: {e}")
             return []
     def fetch_vectors(self, index_name: str, ids: list, namespace: str):