Spaces:
Sleeping
Sleeping
| import time | |
| import logging | |
| from pinecone import Pinecone, ServerlessSpec, PodSpec | |
| from langchain_pinecone import PineconeVectorStore | |
| logger = logging.getLogger(__name__) | |
| class PineconeManager: | |
| def __init__(self, api_key: str): | |
| if not api_key: | |
| raise ValueError("Pinecone API Key is missing.") | |
| # Initialize the client | |
| self.pc = Pinecone(api_key=api_key) | |
| def list_indexes(self): | |
| """Returns a list of all index names.""" | |
| try: | |
| return [i.name for i in self.pc.list_indexes()] | |
| except Exception as e: | |
| logger.error(f"Error listing indexes: {e}") | |
| return [] | |
| def get_index_stats(self, index_name: str): | |
| """Returns stats like total vector count and dimension.""" | |
| try: | |
| idx = self.pc.Index(index_name) | |
| return idx.describe_index_stats() | |
| except Exception as e: | |
| logger.error(f"Error fetching stats for {index_name}: {e}") | |
| return None | |
| def check_dimension_compatibility(self, index_name: str, target_dim: int = 384) -> bool: | |
| """ | |
| SAFETY MECHANISM: Ensures the Index dimension matches the Model dimension. | |
| all-MiniLM-L6-v2 output is 384. | |
| """ | |
| try: | |
| # We have to get the description from the list API, not the index object | |
| idx_info = self.pc.describe_index(index_name) | |
| idx_dim = int(idx_info.dimension) | |
| if idx_dim != target_dim: | |
| logger.warning(f"Dimension Mismatch! Index: {idx_dim}, Model: {target_dim}") | |
| return False | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error checking dimension: {e}") | |
| return False | |
| def create_index(self, index_name: str, dimension: int = 384, metric: str = "cosine"): | |
| """ | |
| Creates a new Serverless Index (cheapest/easiest option). | |
| Includes a wait loop to ensure it's ready. | |
| """ | |
| existing = self.list_indexes() | |
| if index_name in existing: | |
| logger.info(f"Index {index_name} already exists.") | |
| return True, "Index already exists." | |
| try: | |
| # Create Serverless Index (AWS/US-EAST-1 is usually the default free region) | |
| self.pc.create_index( | |
| name=index_name, | |
| dimension=dimension, | |
| metric=metric, | |
| spec=ServerlessSpec(cloud="aws", region="us-east-1") | |
| ) | |
| # Wait for initialization | |
| logger.info("Waiting for index to initialize...") | |
| while not self.pc.describe_index(index_name).status['ready']: | |
| time.sleep(1) | |
| return True, f"Index {index_name} created successfully." | |
| except Exception as e: | |
| logger.error(f"Failed to create index: {e}") | |
| return False, str(e) | |
| def get_vectorstore(self, index_name: str, embedding_function, namespace: str): | |
| """ | |
| Returns the LangChain VectorStore object for RAG operations. | |
| """ | |
| return PineconeVectorStore( | |
| index_name=index_name, | |
| embedding=embedding_function, | |
| namespace=namespace | |
| ) | |
| def delete_file_from_index(self, index_name: str, filename: str, namespace: str): | |
| """ | |
| Deletes all vectors associated with a specific file source. | |
| """ | |
| try: | |
| index = self.pc.Index(index_name) | |
| # Pinecone delete by metadata filter | |
| index.delete( | |
| filter={"source": filename}, | |
| namespace=namespace | |
| ) | |
| return True, f"Deleted vectors for {filename}" | |
| except Exception as e: | |
| logger.error(f"Delete failed: {e}") | |
| return False, str(e) |