Spaces:

Rajhuggingface4253
/

grammar

Running

App Files Files Community

Rajhuggingface4253 commited on Dec 29, 2025

Commit

7af1947

verified ·

1 Parent(s): 11b6956

Update vector.py

Browse files

Files changed (1) hide show

vector.py +1281 -376

vector.py CHANGED Viewed

@@ -11,6 +11,25 @@ import ast
 import re
 from filelock import FileLock
 import atexit
 # Configure Logging
 logging.basicConfig(
@@ -25,23 +44,1108 @@ class VectorDatabase:
         self.metadata_path = metadata_path
         self.lock_path = index_path + ".lock"
-        # File lock for multi-process safety
-        self.file_lock = FileLock(self.lock_path, timeout=60)
-        self.memory_lock = threading.Lock()
-        logger.info("🧠 Initializing Production Vector Engine (Multi-Worker Safe)...")
-        # Load models
-        self.embedder = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
-        self.ranker = Ranker(model_name="ms-marco-MiniLM-L-12-v2", cache_dir="./flashrank_cache")
-        # Load or create index with file locking
-        self._load_or_create_index()
-        # Register cleanup
-        atexit.register(self._cleanup)
-        logger.info(f"✅ Vector Engine Ready. Index: {self.index.ntotal} vectors, {len(self.metadata)} metadata entries")
     def _load_or_create_index(self):
         """Thread-safe and process-safe index loading/creation"""
@@ -50,8 +1154,18 @@ class VectorDatabase:
                 try:
                     logger.info("📂 Loading existing vector index...")
                     self.index = faiss.read_index(self.index_path)
                     with open(self.metadata_path, "rb") as f:
                         self.metadata = pickle.load(f)
                     logger.info(f"✅ Loaded index with {self.index.ntotal} vectors, {len(self.metadata)} metadata entries")
                 except Exception as e:
                     logger.error(f"⚠️ Failed to load index: {e}. Creating new one.")
@@ -61,298 +1175,148 @@ class VectorDatabase:
                 self._create_new_index()
     def _create_new_index(self):
-        """Create fresh IndexFlatL2 for reliable performance"""
         dimension = 384
-        # Use IndexFlatIP (inner product) for cosine similarity
-        # Or IndexFlatL2 for Euclidean distance
-        self.index = faiss.IndexFlatIP(dimension)  # Cosine similarity
         self.metadata = []
         logger.info(f"🆕 Created new IndexFlatIP with dimension {dimension}")
     def _save_index(self):
         """Thread-safe and process-safe index saving with atomic writes"""
         with self.file_lock:
-            # Create temporary files
             temp_index = f"{self.index_path}.tmp"
             temp_meta = f"{self.metadata_path}.tmp"
             try:
-                # Save to temporary files
                 faiss.write_index(self.index, temp_index)
                 with open(temp_meta, "wb") as f:
                     pickle.dump(self.metadata, f)
-                # Atomic rename (POSIX operation)
                 os.replace(temp_index, self.index_path)
                 os.replace(temp_meta, self.metadata_path)
-                logger.info(f"💾 Saved index: {self.index.ntotal} vectors, {len(self.metadata)} metadata")
             except Exception as e:
                 logger.error(f"❌ Failed to save index: {e}")
-                # Clean up temp files on failure
                 for f in [temp_index, temp_meta]:
                     if os.path.exists(f):
                         try:
                             os.remove(f)
-                        except:
-                            pass
-    def _chunk_smart_code(self, text, filename):
-        """
-        Structure-aware chunker for JS, HTML, CSS, etc.
-        Splits by logical boundaries (tags, functions) instead of random characters.
-        """
-        ext = os.path.splitext(filename)[1].lower()
-        chunks = []
-        # Define split patterns for different languages
-        patterns = {
-            # HTML/XML: Split before opening tags, effectively keeping tags grouped
-            '.html': r'(?=\n\s*<[^/])',
-            '.htm': r'(?=\n\s*<[^/])',
-            '.xml': r'(?=\n\s*<[^/])',
-            '.vue': r'(?=\n\s*<[^/])',
-            # JS/TS: Split before major keywords
-            '.js': r'(?=\n\s*(?:function|class|const|let|var|export|import|async))',
-            '.jsx': r'(?=\n\s*(?:function|class|const|let|var|export|import|async))',
-            '.ts': r'(?=\n\s*(?:function|class|const|let|var|export|import|async|interface|type))',
-            '.tsx': r'(?=\n\s*(?:function|class|const|let|var|export|import|async|interface|type))',
-            # CSS: Split before selectors
-            '.css': r'(?=\n\s*[.#@a-zA-Z])',
-            '.scss': r'(?=\n\s*[.#@a-zA-Z])',
-        }
-        pattern = patterns.get(ext)
-        # Fallback to standard if no pattern matches or regex fails
-        if not pattern:
-            return self._chunk_text_standard(text)
         try:
-            # 1. Split by pattern
-            segments = re.split(pattern, text)
-            # 2. Re-group segments into chunks of appropriate size (e.g., 1000 chars)
-            current_chunk = ""
-            TARGET_SIZE = 1000
-            for seg in segments:
-                if not seg.strip(): continue
-                # If adding this segment exceeds target, save current and start new
-                if len(current_chunk) + len(seg) > TARGET_SIZE and len(current_chunk) > 100:
-                    chunks.append({
-                        "text": current_chunk.strip(),
-                        "type": "code_block",
-                        "name": f"block_{len(chunks)}"
-                    })
-                    current_chunk = seg
-                else:
-                    current_chunk += seg
-            # Add final chunk
-            if current_chunk:
-                chunks.append({
-                    "text": current_chunk.strip(),
-                    "type": "code_block",
-                    "name": f"block_{len(chunks)}"
-                })
-            return chunks
-        except Exception as e:
-            logger.warning(f"Smart chunking failed for {filename}: {e}. Falling back.")
-            return self._chunk_text_standard(text)
-    def _chunk_python_code(self, text, filename):
-        """Improved AST chunker that captures EVERYTHING (not just functions)"""
-        chunks = []
-        try:
-            tree = ast.parse(text)
-            lines = text.splitlines()
-            # 1. Global Context (Imports & Assignments)
-            global_context = []
-            # 2. Iterate nodes to find blocks
-            for node in tree.body:
-                if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)):
-                    # Extract the block
-                    start = node.lineno - 1
-                    end = node.end_lineno
-                    block_text = "\n".join(lines[start:end])
-                    chunks.append({
-                        "text": block_text,
-                        "type": "code_function",
-                        "name": node.name
-                    })
-                elif isinstance(node, (ast.Import, ast.ImportFrom, ast.Assign, ast.Expr)):
-                    # Group top-level scripts/imports together
-                    # We approximate by grabbing the line
-                    if hasattr(node, 'end_lineno'):
-                        start = node.lineno - 1
-                        end = node.end_lineno
-                        global_context.append("\n".join(lines[start:end]))
-            # Add the collected global context as the first chunk
-            if global_context:
-                # Group globals into chunks of 1000 chars
-                full_global = "\n".join(global_context)
-                if len(full_global) > 100:
-                    chunks.insert(0, {
-                        "text": full_global[:1500], # Cap context size
-                        "type": "code_context",
-                        "name": "imports_and_globals"
-                    })
         except Exception as e:
-            logger.warning(f"AST parsing failed for {filename}: {e}")
-            return self._chunk_text_standard(text)
-        # Fallback: if AST yielded nothing (e.g. empty file), use standards
-        if not chunks:
-            return self._chunk_text_standard(text)
-        return chunks
-    def _chunk_text_standard(self, text, chunk_size=500, overlap=50):
-        """Standard text chunking with sliding window"""
-        chunks = []
-        # Handle very short text
-        if len(text) <= chunk_size:
-            return [{
-                "text": text,
-                "type": "text_block",
-                "name": "full_content"
-            }]
-        # Create overlapping chunks
-        for i in range(0, len(text), chunk_size - overlap):
-            chunk = text[i:i + chunk_size]
-            if len(chunk) > 100:  # Minimum chunk size
-                chunks.append({
-                    "text": chunk,
-                    "type": "text_block",
-                    "name": f"chunk_{i//chunk_size}"
-                })
-        return chunks
-    def store_session_document(self, text: str, filename: str, user_id: str, chat_id: str):
-        """Store extracted file content with 'Whole File' capability & Verification"""
-        if not text or len(text) < 10 or not user_id:
-            logger.warning(f"Invalid input for {filename}")
-            return False
-        logger.info(f"📥 Storing {filename} ({len(text)} chars) for user {user_id[:8]}...")
-        # Determine chunking strategy
-        chunks_data = []
-        ext = os.path.splitext(filename)[1].lower()
-        try:
-            if ext == '.py':
-                chunks_data = self._chunk_python_code(text, filename)
-            elif ext in ['.js', '.html', '.css', '.java', '.cpp', '.ts', '.tsx', '.jsx', '.vue', '.xml']:
-                # Use Smart Regex Chunking
-                chunks_data = self._chunk_smart_code(text, filename)
-            else:
-                chunks_data = self._chunk_text_standard(text, chunk_size=500, overlap=50)
-        except Exception as e:
-            logger.error(f"Chunking failed for {filename}: {e}")
-            chunks_data = self._chunk_text_standard(text, chunk_size=500, overlap=50)
-        # Ensure we have chunks
-        if not chunks_data and text:
-            chunks_data = [{
-                "text": text[:2000],
-                "type": "fallback",
-                "name": "full_document"
-            }]
-        if not chunks_data:
-            logger.error(f"No chunks generated for {filename}")
-            return False
-        # Prepare for indexing
-        final_texts = []
-        final_meta = []
-        # 1. Process Standard Chunks
-        for chunk in chunks_data:
-            final_texts.append(chunk["text"])
-            final_meta.append({
-                "text": chunk["text"],
-                "source": filename,
-                "type": "file",
-                "subtype": chunk.get("type", "general"),
-                "name": chunk.get("name", "unknown"),
-                "user_id": user_id,
-                "chat_id": chat_id,
-                "timestamp": time.time(),
-                "chunk_index": len(final_texts)
             })
-        # 2. Add "Whole File" Entry (FIXED FOR INTENT SEPARATION)
-        marker_text = f"Entire full content of file {filename} code"
-        final_texts.append(marker_text)
-        final_meta.append({
-            "text": marker_text,
-            "actual_content": text,     # The full content
-            "source": filename,
-            # --- THE FIX ---
-            "type": "file",             # Visible to 'file' searches
-            "subtype": "whole_file",    # Identified by Ranking Logic
-            # ----------------
-            "user_id": user_id,
-            "chat_id": chat_id,
-            "timestamp": time.time(),
-            "chunk_index": -1
-        })
-        # Embed and add to index
-        try:
-            embeddings = self.embedder.encode(final_texts)
-            faiss.normalize_L2(embeddings)
-            with self.memory_lock:
-                self.index.add(np.array(embeddings).astype('float32'))
-                self.metadata.extend(final_meta)
-                self._save_index()
-            logger.info(f"✅ Stored {len(final_texts)} chunks from {filename} for user {user_id[:8]}")
-            # Verify storage
-            self._verify_storage(user_id, len(final_texts))
-            return True
-        except Exception as e:
-            logger.error(f"❌ Failed to store vectors for {filename}: {e}")
-            return False
-    def _verify_storage(self, user_id, expected_count):
-        """Verify vectors were stored correctly"""
-        with self.memory_lock:
-            user_vectors = sum(1 for m in self.metadata if m.get("user_id") == user_id)
-        logger.info(f"🔍 Storage verification: User {user_id[:8]} has {user_vectors} vectors (expected: {expected_count})")
-        if user_vectors < expected_count:
-            logger.warning(f"⚠️ Storage mismatch for user {user_id[:8]}")
-    def store_chat_context(self, messages: list, user_id: str, chat_id: str):
         """Store chat history as session memory"""
         if not messages or not user_id:
             return False
-        # Format conversation
         conversation = ""
-        for msg in messages[-10:]:  # Last 10 messages
             role = msg.get("role", "unknown")
             content = msg.get("content", "")
             if content:
@@ -361,13 +1325,11 @@ class VectorDatabase:
         if len(conversation) < 50:
             return False
-        # Chunk conversation
-        chunks = self._chunk_text_standard(conversation, chunk_size=800, overlap=100)
         if not chunks:
             return False
-        # Prepare for indexing
         texts = [c["text"] for c in chunks]
         metadata_list = []
@@ -382,9 +1344,8 @@ class VectorDatabase:
                 "chunk_index": i
             })
-        # Store in index
         try:
-            embeddings = self.embedder.encode(texts)
             faiss.normalize_L2(embeddings)
             with self.memory_lock:
@@ -392,165 +1353,94 @@ class VectorDatabase:
                 self.metadata.extend(metadata_list)
                 self._save_index()
             logger.info(f"💭 Stored {len(texts)} chat history chunks for user {user_id[:8]}")
             return True
         except Exception as e:
-            logger.error(f"Failed to store chat history: {e}")
             return False
-    def retrieve_session_context(self, query: str, user_id: str, chat_id: str, filter_type: str = None, top_k=100, final_k=5, min_score=0.25):
-        """
-        Retrieve context with 'Whole File' capability.
-        MAINTAINS SEPARATION: Files vs. History
-        """
-        if self.index.ntotal == 0 or not user_id:
-            return []
-        # Debug info
-        with self.memory_lock:
-            total_vectors = self.index.ntotal
-            user_vectors = sum(1 for m in self.metadata if m.get("user_id") == user_id)
-        logger.info(f"🔍 Searching for user {user_id[:8]} (User vectors: {user_vectors}/{total_vectors})")
-        # Encode query
-        query_vec = self.embedder.encode([query])
-        faiss.normalize_L2(query_vec)
-        # Search
-        search_k = min(top_k * 3, self.index.ntotal) if self.index.ntotal > 0 else 1
-        with self.memory_lock:
-            D, I = self.index.search(np.array(query_vec).astype('float32'), search_k)
-        candidates = []
-        valid_count = 0
-        query_lower = query.lower()
-        for i, idx in enumerate(I[0]):
-            if idx == -1 or idx >= len(self.metadata): continue
-            item = self.metadata[idx]
-            # 1. STRICT ISOLATION (User & Session)
-            if item.get("user_id") != user_id: continue
-            if item.get("chat_id") != chat_id: continue
-            # 2. INTENT SEPARATION (File vs. History)
-            # If front-end asks for 'file', we return 'file' (which now includes whole_files).
-            # If front-end asks for 'history', we return 'history'.
-            if filter_type and item.get("type") != filter_type:
-                continue
-            score = D[0][i]
-            # 3. WHOLE FILE RANKING LOGIC
-            # We now check 'subtype' instead of 'type'
-            filename = item.get("source", "").lower()
-            is_whole_file = item.get("subtype") == "whole_file" # <--- UPDATED CHECK
-            if is_whole_file:
-                if filename in query_lower:
-                    score = 2.0 # Force to top
-                if item.get("actual_content"):
-                    item = item.copy()
-                    item["text"] = item["actual_content"]
-            # 4. GATEKEEPER
-            if score < min_score: continue
-            candidates.append({
-                "id": int(idx),
-                "text": item.get("text", ""),
-                "meta": item,
-                "score": score
-            })
-            valid_count += 1
-        if not candidates: return []
-        candidates.sort(key=lambda x: x["score"], reverse=True)
-        if candidates[0]["score"] >= 2.0:
-             logger.info(f"🎯 Returning Whole File: {candidates[0]['meta'].get('source')}")
-             return candidates[:1]
-        try:
-            rerank_request = RerankRequest(query=query, passages=candidates)
-            results = self.ranker.rerank(rerank_request)
-            final_results = [r for r in results[:final_k] if r['score'] > min_score]
-            return final_results
-        except Exception as e:
-            logger.error(f"Reranking failed: {e}")
-            return candidates[:final_k]
-    def delete_session(self, user_id: str, chat_id: str):
         """Surgical Strike: Permanently remove ONLY one specific session"""
         with self.memory_lock:
-            # 1. Filter: Keep everything that is NOT this specific chat
             new_metadata = []
             removed_count = 0
             for meta in self.metadata:
-                # Check strict ownership and ID match
                 if meta.get("user_id") == user_id and meta.get("chat_id") == chat_id:
                     removed_count += 1
                 else:
                     new_metadata.append(meta)
             if removed_count == 0:
-                return False # Nothing to delete
             logger.info(f"🧹 Surgically removing {removed_count} vectors for session {chat_id}...")
-            # 2. Rebuild Index (Required for FAISS IndexFlatIP)
             if not new_metadata:
-                self.index = faiss.IndexFlatIP(384) # Reset empty
             else:
-                # Re-embed surviving text to rebuild index
-                # (Optimization: In a huge DB, use IndexIDMap, but for now this is safe)
                 surviving_texts = [m["text"] for m in new_metadata]
                 try:
-                    embeddings = self.embedder.encode(surviving_texts)
                     faiss.normalize_L2(embeddings)
                     new_index = faiss.IndexFlatIP(384)
                     new_index.add(np.array(embeddings).astype('float32'))
                     self.index = new_index
                 except Exception as e:
-                    logger.error(f"Rebuild failed: {e}")
                     return False
             self.metadata = new_metadata
             self._save_index()
             return True
-    def get_user_stats(self, user_id: str):
         """Get statistics for a user's session"""
         with self.memory_lock:
             user_vectors = []
-            for meta in self.metadata:
-                if meta.get("user_id") == user_id:
                     user_vectors.append(meta)
         stats = {
             "user_id": user_id,
             "total_vectors": len(user_vectors),
             "by_type": {},
-            "by_source": {}
         }
-        for vec in user_vectors:
             vec_type = vec.get("type", "unknown")
             source = vec.get("source", "unknown")
             stats["by_type"][vec_type] = stats["by_type"].get(vec_type, 0) + 1
             stats["by_source"][source] = stats["by_source"].get(source, 0) + 1
         return stats
-    def cleanup_old_sessions(self, max_age_hours=24):
         """Clean up old session data"""
         current_time = time.time()
         cutoff = current_time - (max_age_hours * 3600)
@@ -558,35 +1448,46 @@ class VectorDatabase:
         with self.memory_lock:
             old_metadata = []
             new_metadata = []
-            for i, meta in enumerate(self.metadata):
                 if meta.get("timestamp", 0) < cutoff:
-                    old_metadata.append((i, meta))
                 else:
                     new_metadata.append(meta)
             if not old_metadata:
                 return 0
-            # Rebuild index with only recent vectors
             logger.info(f"🧹 Cleaning up {len(old_metadata)} old vectors...")
-            # Extract recent texts
             recent_texts = [m["text"] for m in new_metadata]
             if recent_texts:
-                embeddings = self.embedder.encode(recent_texts)
-                faiss.normalize_L2(embeddings)
-                # Create new index
-                self.index = faiss.IndexFlatIP(384)
-                self.index.add(np.array(embeddings).astype('float32'))
             else:
                 self.index = faiss.IndexFlatIP(384)
             self.metadata = new_metadata
             self._save_index()
             return len(old_metadata)
     def _cleanup(self):
@@ -594,17 +1495,21 @@ class VectorDatabase:
         try:
             if hasattr(self, 'file_lock'):
                 self.file_lock.release()
-        except:
-            pass
 # Global instance (singleton pattern)
 _vdb_instance = None
-def get_vector_db():
-    """Singleton factory for VectorDatabase"""
     global _vdb_instance
     if _vdb_instance is None:
-        _vdb_instance = VectorDatabase()
     return _vdb_instance
 # For backward compatibility

 import re
 from filelock import FileLock
 import atexit
+import gc
+from typing import List, Dict, Any, Optional, Tuple, Union
+from collections import defaultdict, OrderedDict  # <-- FIX 1: Add OrderedDict
+# === NEW IMPORTS FOR HYBRID SEARCH ===
+try:
+    from rank_bm25 import BM25Okapi
+    BM25_AVAILABLE = True
+except ImportError:
+    BM25_AVAILABLE = False
+    logging.warning("BM25 not available. Install: pip install rank-bm25")
+try:
+    import nltk
+    from nltk.tokenize import word_tokenize, sent_tokenize
+    NLTK_AVAILABLE = True
+except ImportError:
+    NLTK_AVAILABLE = False
+    logging.warning("NLTK not available. Install: pip install nltk")
 # Configure Logging
 logging.basicConfig(
         self.metadata_path = metadata_path
         self.lock_path = index_path + ".lock"
+        # File lock for multi-process safety
+        self.file_lock = FileLock(self.lock_path, timeout=60)
+        self.memory_lock = threading.RLock()
+        logger.info("🧠 Initializing Production Vector Engine with Hybrid Search...")
+        # Load models with error handling
+        try:
+            self.embedder = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
+            self.ranker = Ranker(model_name="ms-marco-MiniLM-L-12-v2", cache_dir="./flashrank_cache")
+        except Exception as e:
+            logger.error(f"❌ Failed to load models: {e}")
+            raise RuntimeError(f"Model initialization failed: {e}")
+        # Load or create index with file locking
+        self._load_or_create_index()
+        # === FIX 1: LAZY LOADING & LRU CACHE (Memory Safe) ===
+        # REMOVED: self._initialize_bm25_from_metadata() - No OOM on startup!
+        # Instead, use LRU Cache to load sessions only when searched
+        self.bm25_cache_size = 50  # Limit concurrent BM25 indices in memory
+        self.bm25_indices = OrderedDict()  # {(user_id, chat_id): BM25Okapi} with LRU
+        self.bm25_docs = {}     # {(user_id, chat_id): [tokenized_documents]}
+        self.bm25_doc_to_vector = {}  # {(user_id, chat_id): [vector_ids]}
+        self.bm25_lock = threading.RLock()
+        # Performance tracking
+        self.query_history = []
+        self.performance_stats = {
+            "exact_matches": 0,
+            "semantic_matches": 0,
+            "bm25_matches": 0,
+            "hybrid_matches": 0,
+            "fallback_matches": 0,
+            "avg_retrieval_time": 0
+        }
+        # Query type classification stats
+        self.query_types = defaultdict(int)
+        # Register cleanup
+        atexit.register(self._cleanup)
+        logger.info(f"✅ Vector Engine Ready. Index: {self.index.ntotal} vectors, {len(self.metadata)} metadata entries")
+        logger.info(f"✅ BM25 LRU Cache: {self.bm25_cache_size} sessions max, BM25 Available: {BM25_AVAILABLE}")
+    # ==================== FIX 2: LAZY BM25 LOADING ====================
+    def _get_or_build_bm25(self, user_id: str, chat_id: str) -> Optional[BM25Okapi]:
+        """
+        Retrieve BM25 index from cache or build it on-demand (Lazy Load).
+        Uses LRU eviction to prevent memory explosion.
+        """
+        if not BM25_AVAILABLE:
+            return None
+        key = (user_id, chat_id)
+        with self.bm25_lock:
+            # 1. CACHE HIT: Move to end (mark as recently used)
+            if key in self.bm25_indices:
+                self.bm25_indices.move_to_end(key)
+                return self.bm25_indices[key]
+            # 2. CACHE MISS: Build index on the fly
+            logger.debug(f"🔄 Building BM25 index on-demand for session {key}")
+            tokenized_corpus = []
+            vector_ids = []
+            # Filter documents for this user only (session isolation)
+            with self.memory_lock:
+                for idx, meta in enumerate(self.metadata):
+                    if meta.get("user_id") == user_id and meta.get("chat_id") == chat_id:
+                        text = meta.get("text", "")
+                        tokens = self._tokenize_for_bm25(text)
+                        if tokens:  # Only add non-empty tokenized docs
+                            tokenized_corpus.append(tokens)
+                            vector_ids.append(idx)
+            if not tokenized_corpus:
+                logger.debug(f"⚠️ No documents found for BM25 index {key}")
+                return None
+            # Build BM25 index
+            try:
+                bm25 = BM25Okapi(tokenized_corpus)
+                # Store additional metadata for scoring
+                self.bm25_docs[key] = tokenized_corpus
+                self.bm25_doc_to_vector[key] = vector_ids
+                # 3. STORE IN CACHE with LRU EVICTION POLICY
+                if len(self.bm25_indices) >= self.bm25_cache_size:
+                    # Remove oldest entry
+                    oldest_key, _ = self.bm25_indices.popitem(last=False)
+                    # Clean up associated data
+                    if oldest_key in self.bm25_docs:
+                        del self.bm25_docs[oldest_key]
+                    if oldest_key in self.bm25_doc_to_vector:
+                        del self.bm25_doc_to_vector[oldest_key]
+                    logger.debug(f"🧹 Evicted BM25 cache for session {oldest_key}")
+                self.bm25_indices[key] = bm25
+                logger.debug(f"✅ Built BM25 index for session {key}: {len(tokenized_corpus)} docs")
+                return bm25
+            except Exception as e:
+                logger.error(f"❌ Failed to build BM25 index for {key}: {e}")
+                return None
+    def _invalidate_bm25_cache(self, user_id: str, chat_id: str):
+        """
+        Invalidate BM25 cache for a session (fast, no rebuild).
+        Called when new documents are added.
+        """
+        key = (user_id, chat_id)
+        with self.bm25_lock:
+            if key in self.bm25_indices:
+                del self.bm25_indices[key]
+                if key in self.bm25_docs:
+                    del self.bm25_docs[key]
+                if key in self.bm25_doc_to_vector:
+                    del self.bm25_doc_to_vector[key]
+                logger.debug(f"🧹 Invalidated BM25 cache for session {key}")
+    def _tokenize_for_bm25(self, text: str) -> List[str]:
+        """Tokenize text for BM25 with proper handling"""
+        if not text or not isinstance(text, str):
+            return []
+        # Simple tokenization if NLTK not available
+        if not NLTK_AVAILABLE:
+            # Basic regex tokenization (fallback)
+            tokens = re.findall(r'\b\w{2,}\b', text.lower())
+            return tokens
+        try:
+            # Use NLTK for better tokenization
+            tokens = word_tokenize(text.lower())
+            # Filter out very short tokens and keep alphanumeric
+            tokens = [t for t in tokens if len(t) >= 2 and re.match(r'^[a-z0-9]+$', t)]
+            return tokens
+        except Exception as e:
+            logger.warning(f"Tokenization failed: {e}, using fallback")
+            tokens = re.findall(r'\b\w{2,}\b', text.lower())
+            return tokens
+    # ==================== ENHANCED STORAGE WITH CACHE INVALIDATION ====================
+    def store_session_document(self, text: str, filename: str, user_id: str, chat_id: str) -> bool:
+        """Store extracted file content with enhanced chunking and cache invalidation"""
+        if not text or len(text) < 10 or not user_id:
+            logger.warning(f"Invalid input for {filename}")
+            return False
+        logger.info(f"📥 Storing {filename} ({len(text)} chars) for user {user_id[:8]}...")
+        chunks_data = []
+        ext = os.path.splitext(filename)[1].lower()
+        try:
+            # ===== FIX 4: CORRECT METHOD NAMES =====
+            if ext == '.py':
+                chunks_data = self._chunk_python_ast(text, filename)  # <-- Fixed name
+            elif ext in ['.js', '.html', '.css', '.java', '.cpp', '.ts', '.tsx', '.jsx', '.vue', '.xml', '.scss']:
+                chunks_data = self._chunk_smart_code(text, filename)
+            else:
+                chunks_data = self._chunk_text_enhanced(text, chunk_size=600, overlap=100)
+        except Exception as e:
+            logger.error(f"Chunking failed for {filename}: {e}")
+            chunks_data = self._chunk_text_enhanced(text, chunk_size=600, overlap=100)
+        if not chunks_data and text:
+            chunks_data = [{
+                "text": text[:2000],
+                "type": "fallback",
+                "name": "full_document"
+            }]
+        if not chunks_data:
+            logger.error(f"No chunks generated for {filename}")
+            return False
+        final_texts = []
+        final_meta = []
+        for chunk in chunks_data:
+            final_texts.append(chunk["text"])
+            final_meta.append({
+                "text": chunk["text"],
+                "source": filename,
+                "type": "file",
+                "subtype": chunk.get("type", "general"),
+                "name": chunk.get("name", "unknown"),
+                "user_id": user_id,
+                "chat_id": chat_id,
+                "timestamp": time.time(),
+                "chunk_index": len(final_texts)
+            })
+        # Whole file embedding for comprehensive answers
+        whole_file_text = text[:4000] if len(text) > 4000 else text
+        final_texts.append(f"Complete File: {filename} | Full Content: {whole_file_text}")
+        final_meta.append({
+            "text": whole_file_text,
+            "actual_content": text,
+            "source": filename,
+            "type": "file",
+            "subtype": "whole_file",
+            "is_whole_file": True,
+            "user_id": user_id,
+            "chat_id": chat_id,
+            "timestamp": time.time(),
+            "chunk_index": -1
+        })
+        try:
+            # Optimized embedding
+            embeddings = self.embedder.encode(
+                final_texts,
+                show_progress_bar=False,
+                batch_size=32,
+                convert_to_numpy=True,
+                normalize_embeddings=True
+            )
+            faiss.normalize_L2(embeddings)
+            with self.memory_lock:
+                self.index.add(np.array(embeddings).astype('float32'))
+                self.metadata.extend(final_meta)
+                self._save_index()
+            logger.info(f"✅ Stored {len(final_texts)} chunks from {filename} for user {user_id[:8]}")
+            # ===== FIX 4: CACHE INVALIDATION instead of Immediate Rebuild =====
+            # When new files arrive, just invalidate the old cache.
+            # It will auto-rebuild (including the new file) on next search.
+            self._invalidate_bm25_cache(user_id, chat_id)
+            self._verify_storage(user_id, chat_id, len(final_texts))
+            return True
+        except Exception as e:
+            logger.error(f"❌ Failed to store vectors for {filename}: {e}")
+            # Clean up partial storage
+            with self.memory_lock:
+                if self.index.ntotal >= len(final_texts):
+                    logger.warning("Rolling back partial storage...")
+                    self._rollback_partial_storage(user_id, chat_id)
+            return False
+    # ==================== UPDATED BM25 SEARCH WITH LAZY LOADING ====================
+    def bm25_search(self, query: str, user_id: str, chat_id: str,
+                   top_k: int = 50, min_score: float = 0.0) -> List[Dict[str, Any]]:
+        """
+        Pure BM25 search within a session with lazy loading.
+        Returns ranked results with BM25 scores.
+        """
+        if not BM25_AVAILABLE:
+            logger.warning("BM25 not available. Falling back to semantic search.")
+            return []
+        start_time = time.time()
+        # ===== FIX 3: USE LAZY LOADER =====
+        bm25_index = self._get_or_build_bm25(user_id, chat_id)
+        if not bm25_index:
+            logger.warning(f"No BM25 index for session {(user_id[:8], chat_id[:8])}")
+            return []
+        # Tokenize query
+        query_tokens = self._tokenize_for_bm25(query)
+        if not query_tokens:
+            return []
+        try:
+            # Get BM25 scores from the lazy-loaded index
+            key = (user_id, chat_id)
+            bm25_scores = bm25_index.get_scores(query_tokens)
+            # Get top-k indices
+            top_indices = np.argsort(bm25_scores)[::-1][:top_k * 2]
+            results = []
+            for idx in top_indices:
+                score = float(bm25_scores[idx])
+                # Apply minimum score threshold
+                if score < min_score:
+                    continue
+                # Map BM25 doc index to vector index
+                if (key in self.bm25_doc_to_vector and
+                    idx < len(self.bm25_doc_to_vector[key])):
+                    vector_idx = self.bm25_doc_to_vector[key][idx]
+                    if vector_idx < len(self.metadata):
+                        meta = self.metadata[vector_idx]
+                        # Calculate normalized score (0-1 range)
+                        normalized_score = min(score / 10.0, 1.0) if score > 0 else 0.0
+                        results.append({
+                            "id": int(vector_idx),
+                            "text": meta.get("text", ""),
+                            "meta": meta,
+                            "score": normalized_score,
+                            "match_type": "bm25",
+                            "bm25_raw_score": score,
+                            "is_whole_file": meta.get("is_whole_file", False)
+                        })
+            # Sort by BM25 score
+            results.sort(key=lambda x: x["score"], reverse=True)
+            elapsed = time.time() - start_time
+            logger.debug(f"BM25 search completed in {elapsed:.3f}s: {len(results)} results")
+            return results[:top_k]
+        except Exception as e:
+            logger.error(f"BM25 search failed: {e}")
+            return []
+    # ==================== HYBRID RETRIEVAL ENGINE (UPDATED) ====================
+    def hybrid_retrieve(self, query: str, user_id: str, chat_id: str,
+                       filter_type: str = None, top_k: int = 100,
+                       final_k: int = 5, strategy: str = "smart") -> List[Dict[str, Any]]:
+        """
+        HYBRID RETRIEVAL: BM25 + Semantic + Exact Fusion
+        Now with lazy-loaded BM25 indices for memory safety.
+        """
+        logger.info(f"🤖 HYBRID SEARCH: '{query[:80]}...' | Strategy: {strategy}")
+        # Classify query type
+        query_category = self._classify_query(query)
+        self.query_types[query_category] += 1
+        # Choose strategy based on query type if "smart"
+        if strategy == "smart":
+            if query_category == "code":
+                strategy = "bm25_first"
+            elif query_category == "natural":
+                strategy = "semantic_first"
+            else:
+                strategy = "fusion"
+        start_time = time.time()
+        # === PHASE 1: GET RESULTS FROM BOTH METHODS ===
+        bm25_results = []
+        semantic_results = []
+        if strategy in ["bm25_first", "fusion", "weighted", "smart"]:
+            bm25_results = self.bm25_search(
+                query=query,
+                user_id=user_id,
+                chat_id=chat_id,
+                top_k=top_k * 2,
+                min_score=0.1
+            )
+        if strategy in ["semantic_first", "fusion", "weighted", "smart"]:
+            semantic_results = self._semantic_search(
+                query=query,
+                user_id=user_id,
+                chat_id=chat_id,
+                filter_type=filter_type,
+                top_k=top_k * 2,
+                min_score=0.1,
+                final_k=top_k
+            )
+        # === PHASE 2: APPLY STRATEGY ===
+        if strategy == "bm25_first":
+            results = self._bm25_first_fusion(bm25_results, semantic_results, final_k)
+        elif strategy == "semantic_first":
+            results = self._semantic_first_fusion(semantic_results, bm25_results, final_k)
+        elif strategy == "fusion":
+            results = self._reciprocal_rank_fusion(bm25_results, semantic_results, final_k)
+        elif strategy == "weighted":
+            results = self._weighted_fusion(bm25_results, semantic_results, final_k)
+        else:
+            # Default to fusion
+            results = self._reciprocal_rank_fusion(bm25_results, semantic_results, final_k)
+        # === PHASE 3: EXACT FALLBACK IF NO RESULTS ===
+        if not results:
+            logger.info("🔄 No hybrid results, trying exact fallback...")
+            results = self.retrieve_exact(
+                query=query,
+                user_id=user_id,
+                chat_id=chat_id,
+                filter_type=filter_type,
+                aggressive=True
+            )
+            if results:
+                self.performance_stats["fallback_matches"] += 1
+                return results[:final_k]
+        # === PHASE 4: SMART RERANKING ===
+        if results and len(results) > 1:
+            try:
+                results = self._smart_rerank(query, results, final_k)
+            except Exception as e:
+                logger.warning(f"Reranking failed: {e}")
+        # === PHASE 5: FINAL PROCESSING ===
+        elapsed = time.time() - start_time
+        # Boost whole files for complete answers
+        for result in results:
+            if result.get("is_whole_file"):
+                result["score"] = min(result["score"] * 1.2, 1.0)
+        # Ensure scores are in 0-1 range
+        for result in results:
+            result["score"] = min(max(result["score"], 0.0), 1.0)
+        # Sort by final score
+        results.sort(key=lambda x: x["score"], reverse=True)
+        # Update performance stats
+        if results:
+            self.performance_stats["hybrid_matches"] += 1
+            logger.info(f"✅ Hybrid search found {len(results)} results in {elapsed:.3f}s")
+            logger.info(f"🏆 Top score: {results[0]['score']:.3f}, Type: {results[0].get('match_type', 'unknown')}")
+        else:
+            logger.warning(f"❌ Hybrid search found no results")
+        return results[:final_k]
+    # ==================== CORE METHODS (PRESERVED WITH FIXES) ====================
+    def _chunk_python_ast(self, text: str, filename: str) -> List[Dict[str, Any]]:
+        """Enhanced AST chunker with better context preservation"""
+        chunks = []
+        try:
+            tree = ast.parse(text)
+            lines = text.splitlines()
+            global_context = []
+            for node in tree.body:
+                if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)):
+                    start = max(0, node.lineno - 4)
+                    end = node.end_lineno + 2
+                    block_text = "\n".join(lines[start:end])
+                    chunks.append({
+                        "text": f"File: {filename} | Type: {type(node).__name__} | Name: {node.name} | Content: {block_text}",
+                        "type": "code_function",
+                        "name": node.name,
+                        "line_start": start,
+                        "line_end": end
+                    })
+                elif isinstance(node, (ast.Import, ast.ImportFrom, ast.Assign, ast.Expr)):
+                    if hasattr(node, 'end_lineno'):
+                        start = node.lineno - 1
+                        end = node.end_lineno
+                        global_context.append("\n".join(lines[start:end]))
+            # Add global context as a separate chunk
+            if global_context:
+                full_global = "\n".join(global_context)
+                if len(full_global) > 50:
+                    chunks.insert(0, {
+                        "text": f"File: {filename} | Type: imports_and_globals | Content: {full_global[:2000]}",
+                        "type": "code_context",
+                        "name": "imports_and_globals"
+                    })
+        except Exception as e:
+            logger.warning(f"AST parsing failed for {filename}: {e}")
+            return self._chunk_text_enhanced(text)
+        if not chunks:
+            return self._chunk_text_enhanced(text)
+        return chunks
+    def _chunk_smart_code(self, text: str, filename: str) -> List[Dict[str, Any]]:
+        """ENHANCED Structure-aware chunker with context preservation"""
+        ext = os.path.splitext(filename)[1].lower()
+        chunks = []
+        # Define split patterns for different languages
+        patterns = {
+            '.html': r'(?=\n\s*<[^/])',
+            '.htm': r'(?=\n\s*<[^/])',
+            '.xml': r'(?=\n\s*<[^/])',
+            '.vue': r'(?=\n\s*<[^/])',
+            '.js': r'(?=\n\s*(?:function|class|const|let|var|export|import|async|def|if|for|while|switch))',
+            '.jsx': r'(?=\n\s*(?:function|class|const|let|var|export|import|async|def|if|for|while|switch))',
+            '.ts': r'(?=\n\s*(?:function|class|const|let|var|export|import|async|interface|type|def|if|for|while))',
+            '.tsx': r'(?=\n\s*(?:function|class|const|let|var|export|import|async|interface|type|def|if|for|while))',
+            '.css': r'(?=\n\s*[.#@a-zA-Z])',
+            '.scss': r'(?=\n\s*[.#@a-zA-Z])',
+            '.java': r'(?=\n\s*(?:public|private|protected|class|interface|enum|@))',
+            '.cpp': r'(?=\n\s*(?:#include|using|namespace|class|struct|enum|template))',
+        }
+        pattern = patterns.get(ext)
+        # Fallback to standard if no pattern matches or regex fails
+        if not pattern:
+            return self._chunk_text_enhanced(text)
+        try:
+            segments = re.split(pattern, text)
+            # Process with CONTEXT OVERLAP for better retrieval
+            current_chunk = ""
+            TARGET_SIZE = 800
+            OVERLAP_SIZE = 100
+            for seg_idx, seg in enumerate(segments):
+                if not seg.strip():
+                    continue
+                # Check if adding this segment would exceed target
+                if len(current_chunk) + len(seg) > TARGET_SIZE and len(current_chunk) > 50:
+                    # Save current chunk
+                    chunk_text = current_chunk.strip()
+                    if chunk_text:
+                        chunks.append({
+                            "text": f"File: {filename} | Content: {chunk_text}",
+                            "type": "code_block",
+                            "name": f"block_{len(chunks)}",
+                            "context_id": seg_idx
+                        })
+                    # Start new chunk with overlap from previous
+                    current_chunk = current_chunk[-OVERLAP_SIZE:] + "\n" + seg if OVERLAP_SIZE > 0 else seg
+                else:
+                    current_chunk += seg
+            # Add final chunk
+            if current_chunk:
+                chunks.append({
+                    "text": f"File: {filename} | Content: {current_chunk.strip()}",
+                    "type": "code_block",
+                    "name": f"block_{len(chunks)}",
+                    "context_id": len(segments)
+                })
+            return chunks
+        except Exception as e:
+            logger.warning(f"Smart chunking failed for {filename}: {e}. Falling back.")
+            return self._chunk_text_enhanced(text)
+    def _chunk_text_enhanced(self, text: str, chunk_size: int = 600, overlap: int = 100) -> List[Dict[str, Any]]:
+        """Enhanced text chunking that preserves natural boundaries"""
+        chunks = []
+        # Try to split by paragraphs first
+        paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
+        if not paragraphs:
+            # Fallback to standard chunking
+            return self._chunk_text_standard(text, chunk_size, overlap)
+        current_chunk = ""
+        for para in paragraphs:
+            if len(current_chunk) + len(para) > chunk_size and current_chunk:
+                chunks.append({
+                    "text": current_chunk.strip(),
+                    "type": "text_paragraph",
+                    "name": f"para_{len(chunks)}"
+                })
+                # Keep last overlap portion
+                current_chunk = current_chunk[-overlap:] + "\n\n" + para if overlap > 0 else para
+            else:
+                current_chunk += "\n\n" + para if current_chunk else para
+        if current_chunk:
+            chunks.append({
+                "text": current_chunk.strip(),
+                "type": "text_paragraph",
+                "name": f"para_{len(chunks)}"
+            })
+        return chunks
+    def _chunk_text_standard(self, text: str, chunk_size: int = 500, overlap: int = 50) -> List[Dict[str, Any]]:
+        """Standard text chunking with sliding window"""
+        chunks = []
+        if len(text) <= chunk_size:
+            return [{
+                "text": text,
+                "type": "text_block",
+                "name": "full_content"
+            }]
+        for i in range(0, len(text), chunk_size - overlap):
+            chunk = text[i:i + chunk_size]
+            if len(chunk) > 100:
+                chunks.append({
+                    "text": chunk,
+                    "type": "text_block",
+                    "name": f"chunk_{i//chunk_size}"
+                })
+        return chunks
+    # ==================== HELPER METHODS FOR HYBRID SEARCH ====================
+    def _classify_query(self, query: str) -> str:
+        """Classify query type to determine best search strategy"""
+        query_lower = query.lower()
+        # Code/technical query indicators
+        code_indicators = [
+            r'def\s+\w+\(', r'class\s+\w+', r'function\s+\w+',
+            r'import\s+', r'from\s+', r'\.py$', r'\.js$', r'\.java$',
+            r'\w+\(.*\)', r'\{.*\}', r'\[.*\]', r'=\s*\w+',
+            r'const\s+', r'let\s+', r'var\s+', r'type\s+',
+            r'interface\s+', r'export\s+', r'async\s+', r'await\s+',
+            r'SELECT\s+', r'FROM\s+', r'WHERE\s+', r'JOIN\s+',
+            r'#include', r'using\s+', r'namespace\s+', r'template\s+'
+        ]
+        for pattern in code_indicators:
+            if re.search(pattern, query_lower):
+                return "code"
+        # Natural language query indicators
+        natural_indicators = [
+            r'^how\s+', r'^what\s+', r'^why\s+', r'^explain\s+',
+            r'^describe\s+', r'^summarize\s+', r'^tell\s+me\s+about',
+            r'\?$', r'please', r'could you', r'would you',
+            r'understand', r'meaning', r'concept', r'idea'
+        ]
+        for pattern in natural_indicators:
+            if re.search(pattern, query_lower):
+                return "natural"
+        # Short keyword query (good for BM25)
+        words = query.split()
+        if len(words) <= 4 and len(query) < 30:
+            return "keyword"
+        # Mixed query
+        return "mixed"
+    def _bm25_first_fusion(self, bm25_results: List[Dict], semantic_results: List[Dict],
+                          final_k: int) -> List[Dict]:
+        """BM25 first, supplement with semantic if needed"""
+        results = bm25_results.copy()
+        # If BM25 results are weak, add semantic results
+        if not results or (results[0]["score"] < 0.3):
+            seen_ids = set(r["id"] for r in results)
+            for sem in semantic_results:
+                if sem["id"] not in seen_ids and len(results) < final_k * 2:
+                    seen_ids.add(sem["id"])
+                    sem["match_type"] = "semantic_supplement"
+                    results.append(sem)
+        return results[:final_k]
+    def _semantic_first_fusion(self, semantic_results: List[Dict], bm25_results: List[Dict],
+                              final_k: int) -> List[Dict]:
+        """Semantic first, supplement with BM25 if needed"""
+        results = semantic_results.copy()
+        # If semantic results are weak, add BM25 results
+        if not results or (results[0]["score"] < 0.3):
+            seen_ids = set(r["id"] for r in results)
+            for bm in bm25_results:
+                if bm["id"] not in seen_ids and len(results) < final_k * 2:
+                    seen_ids.add(bm["id"])
+                    bm["match_type"] = "bm25_supplement"
+                    results.append(bm)
+        return results[:final_k]
+    def _reciprocal_rank_fusion(self, results1: List[Dict], results2: List[Dict],
+                               final_k: int, k: int = 60) -> List[Dict]:
+        """Combine results using Reciprocal Rank Fusion (RRF)"""
+        # Create rank dictionaries
+        rank_map1 = {r["id"]: rank + 1 for rank, r in enumerate(results1)}
+        rank_map2 = {r["id"]: rank + 1 for rank, r in enumerate(results2)}
+        # Get all unique IDs
+        all_ids = set(rank_map1.keys()) | set(rank_map2.keys())
+        # Calculate RRF scores
+        rrf_scores = []
+        for doc_id in all_ids:
+            score = 0.0
+            if doc_id in rank_map1:
+                score += 1.0 / (rank_map1[doc_id] + k)
+            if doc_id in rank_map2:
+                score += 1.0 / (rank_map2[doc_id] + k)
+            rrf_scores.append((doc_id, score))
+        # Sort by RRF score
+        rrf_scores.sort(key=lambda x: x[1], reverse=True)
+        # Create result mapping for quick lookup
+        results_map = {}
+        for r in results1 + results2:
+            if r["id"] not in results_map:
+                results_map[r["id"]] = r
+        # Build final results
+        combined_results = []
+        for doc_id, rrf_score in rrf_scores:
+            if doc_id in results_map:
+                result = results_map[doc_id].copy()
+                result["score"] = rrf_score
+                result["match_type"] = "rrf_fusion"
+                combined_results.append(result)
+        return combined_results[:final_k]
+    def _weighted_fusion(self, bm25_results: List[Dict], semantic_results: List[Dict],
+                        final_k: int, bm25_weight: float = 0.4,
+                        semantic_weight: float = 0.6) -> List[Dict]:
+        """Weighted combination of BM25 and semantic scores"""
+        # Normalize scores within each result set
+        def normalize_scores(results):
+            if not results:
+                return {}
+            max_score = max(r["score"] for r in results) if results else 1.0
+            if max_score == 0:
+                max_score = 1.0
+            return {r["id"]: r["score"] / max_score for r in results}
+        bm25_scores = normalize_scores(bm25_results)
+        semantic_scores = normalize_scores(semantic_results)
+        # Get all unique IDs
+        all_ids = set(bm25_scores.keys()) | set(semantic_scores.keys())
+        # Calculate weighted scores
+        weighted_scores = []
+        for doc_id in all_ids:
+            bm25_score = bm25_scores.get(doc_id, 0.0)
+            semantic_score = semantic_scores.get(doc_id, 0.0)
+            weighted = (bm25_score * bm25_weight) + (semantic_score * semantic_weight)
+            weighted_scores.append((doc_id, weighted))
+        # Sort by weighted score
+        weighted_scores.sort(key=lambda x: x[1], reverse=True)
+        # Create result mapping
+        results_map = {}
+        for r in bm25_results + semantic_results:
+            if r["id"] not in results_map:
+                results_map[r["id"]] = r
+        # Build final results
+        combined_results = []
+        for doc_id, weighted_score in weighted_scores:
+            if doc_id in results_map:
+                result = results_map[doc_id].copy()
+                result["score"] = weighted_score
+                result["match_type"] = "weighted_fusion"
+                combined_results.append(result)
+        return combined_results[:final_k]
+    def _smart_rerank(self, query: str, candidates: List[Dict], final_k: int) -> List[Dict]:
+        """Smart reranking using cross-encoder"""
+        if len(candidates) <= 1:
+            return candidates
+        try:
+            # Prepare passages for reranking
+            passages = []
+            for cand in candidates[:30]:
+                text = cand.get("text", "")
+                if len(text) > 1000:
+                    text = text[:1000] + "..."
+                source = cand.get("meta", {}).get("source", "unknown")
+                subtype = cand.get("meta", {}).get("subtype", "general")
+                passages.append({
+                    "id": cand["id"],
+                    "text": f"File: {source} | Type: {subtype} | Content: {text}"
+                })
+            if not passages:
+                return candidates
+            # Rerank with FlashRank
+            rerank_request = RerankRequest(query=query, passages=passages)
+            reranked = self.ranker.rerank(rerank_request)
+            # Update scores based on reranking
+            rerank_map = {r["id"]: r["score"] for r in reranked}
+            for cand in candidates:
+                if cand["id"] in rerank_map:
+                    cand["score"] = (cand["score"] * 0.3) + (rerank_map[cand["id"]] * 0.7)
+                    cand["match_type"] = cand.get("match_type", "unknown") + "_reranked"
+            candidates.sort(key=lambda x: x["score"], reverse=True)
+            logger.debug(f"Smart reranking applied to {len(candidates)} candidates")
+        except Exception as e:
+            logger.warning(f"Reranking error: {e}")
+        return candidates[:final_k]
+    # ==================== COMPATIBILITY METHODS (UPDATED) ====================
+    def retrieve_session_context(self, query: str, user_id: str, chat_id: str,
+                                filter_type: str = None, top_k: int = 100,
+                                final_k: int = 5, min_score: float = 0.25,
+                                use_hybrid: bool = True) -> List[Dict[str, Any]]:
+        """
+        Enhanced retrieval with hybrid capabilities
+        use_hybrid: Whether to use hybrid search (BM25 + semantic)
+        """
+        # Use hybrid search by default if available
+        if use_hybrid and BM25_AVAILABLE:
+            return self.hybrid_retrieve(
+                query=query,
+                user_id=user_id,
+                chat_id=chat_id,
+                filter_type=filter_type,
+                top_k=top_k,
+                final_k=final_k,
+                strategy="smart"
+            )
+        # Fall back to original semantic search
+        return self._semantic_search(
+            query=query,
+            user_id=user_id,
+            chat_id=chat_id,
+            filter_type=filter_type,
+            top_k=top_k,
+            min_score=min_score,
+            final_k=final_k
+        )
+    def _semantic_search(self, query: str, user_id: str, chat_id: str,
+                        filter_type: str = None, top_k: int = 100,
+                        min_score: float = 0.25, final_k: int = 10) -> List[Dict[str, Any]]:
+        """Core semantic search engine"""
+        with self.memory_lock:
+            total_vectors = self.index.ntotal
+            user_vectors = sum(1 for m in self.metadata if m.get("user_id") == user_id and m.get("chat_id") == chat_id)
+        if total_vectors == 0 or user_vectors == 0:
+            return []
+        try:
+            query_vec = self.embedder.encode([query], show_progress_bar=False)
+            faiss.normalize_L2(query_vec)
+        except Exception as e:
+            logger.error(f"❌ Failed to encode query: {e}")
+            return []
+        search_k = min(top_k * 2, total_vectors)
+        if search_k == 0:
+            search_k = min(10, total_vectors)
+        try:
+            with self.memory_lock:
+                if self.index.ntotal == 0:
+                    return []
+                D, I = self.index.search(np.array(query_vec).astype('float32'), search_k)
+        except Exception as e:
+            logger.error(f"❌ Search failed: {e}")
+            return []
+        candidates = []
+        query_lower = query.lower()
+        for i, idx in enumerate(I[0]):
+            if idx == -1 or idx >= len(self.metadata):
+                continue
+            item = self.metadata[idx]
+            # Filter by user and chat
+            if item.get("user_id") != user_id or item.get("chat_id") != chat_id:
+                continue
+            # Filter by type if specified
+            if filter_type and item.get("type") != filter_type:
+                continue
+            score = float(D[0][i])
+            if np.isnan(score) or np.isinf(score):
+                continue
+            # Whole file boosting
+            is_whole_file = item.get("is_whole_file", False) or item.get("subtype") == "whole_file"
+            if is_whole_file:
+                filename = item.get("source", "").lower()
+                if filename in query_lower or any(word in filename for word in query_lower.split()):
+                    score = 2.5
+                if item.get("actual_content"):
+                    item = item.copy()
+                    item["text"] = item["actual_content"]
+            if score < min_score:
+                continue
+            candidates.append({
+                "id": int(idx),
+                "text": item.get("text", ""),
+                "meta": item,
+                "score": score
+            })
+        return candidates
+    def retrieve_exact(self, query: str, user_id: str, chat_id: str,
+                      filter_type: str = None, aggressive: bool = True) -> List[Dict[str, Any]]:
+        """PRIMARY EXACT MATCH RETRIEVAL - Accuracy First!"""
+        start_time = time.time()
+        query_lower = query.lower().strip()
+        if self.index.ntotal == 0 or not user_id:
+            logger.warning(f"❌ Empty index or invalid user_id")
+            return []
+        logger.info(f"🎯 EXACT MODE: Searching for '{query[:80]}...'")
+        all_candidates = []
+        exact_matches = []
+        # TACTIC 1: BRUTE FORCE SUBSTRING SEARCH
+        logger.debug("🔍 Tactic 1: Brute force substring search...")
+        with self.memory_lock:
+            for idx, meta in enumerate(self.metadata):
+                if meta.get("user_id") != user_id or meta.get("chat_id") != chat_id:
+                    continue
+                if filter_type and meta.get("type") != filter_type:
+                    continue
+                text = meta.get("text", "").lower()
+                actual_content = meta.get("actual_content", "").lower()
+                if query_lower in text or query_lower in actual_content:
+                    score = 3.0
+                    match_type = "exact_substring"
+                    display_text = meta.get("actual_content", meta.get("text", ""))
+                    exact_matches.append({
+                        "id": idx,
+                        "text": display_text,
+                        "meta": meta,
+                        "score": score,
+                        "match_type": match_type,
+                        "confidence": "perfect"
+                    })
+        if exact_matches:
+            logger.info(f"✨ Found {len(exact_matches)} PERFECT exact matches!")
+            self.performance_stats["exact_matches"] += 1
+            exact_matches.sort(key=lambda x: (
+                1 if x["meta"].get("is_whole_file") else 0,
+                x["score"]
+            ), reverse=True)
+            elapsed = time.time() - start_time
+            logger.info(f"⚡ Exact match retrieval took {elapsed:.3f}s")
+            return exact_matches[:3]
+        # TACTIC 2: KEYWORD MATCHING
+        if aggressive:
+            logger.debug("🔍 Tactic 2: Aggressive keyword matching...")
+            keywords = [w for w in re.findall(r'\b\w{3,}\b', query_lower) if len(w) > 2]
+            if keywords:
+                with self.memory_lock:
+                    for idx, meta in enumerate(self.metadata):
+                        if meta.get("user_id") != user_id or meta.get("chat_id") != chat_id:
+                            continue
+                        if filter_type and meta.get("type") != filter_type:
+                            continue
+                        text = meta.get("text", "").lower()
+                        keyword_matches = sum(1 for kw in keywords if kw in text)
+                        if keyword_matches >= max(1, len(keywords) * 0.6):
+                            score = 2.0 + (keyword_matches / len(keywords)) * 0.5
+                            all_candidates.append({
+                                "id": idx,
+                                "text": meta.get("actual_content", meta.get("text", "")),
+                                "meta": meta,
+                                "score": score,
+                                "match_type": "keyword_explosion",
+                                "keyword_match_ratio": keyword_matches / len(keywords)
+                            })
+        # TACTIC 3: SEMANTIC SEARCH WITH LOW THRESHOLD
+        logger.debug("🔍 Tactic 3: Semantic search with low threshold...")
+        semantic_results = self._semantic_search(
+            query=query,
+            user_id=user_id,
+            chat_id=chat_id,
+            filter_type=filter_type,
+            top_k=200,
+            min_score=0.1,
+            final_k=30
+        )
+        for res in semantic_results:
+            res["match_type"] = "semantic_low_threshold"
+            all_candidates.append(res)
+        # DEDUPLICATE AND RANK
+        seen_ids = set()
+        unique_candidates = []
+        for candidate in all_candidates:
+            if candidate["id"] not in seen_ids:
+                seen_ids.add(candidate["id"])
+                unique_candidates.append(candidate)
+        unique_candidates.sort(key=lambda x: x["score"], reverse=True)
+        # Apply reranking if available
+        if unique_candidates:
+            try:
+                passages = []
+                for cand in unique_candidates[:50]:
+                    text_for_rerank = cand["text"]
+                    if len(text_for_rerank) > 1000:
+                        text_for_rerank = text_for_rerank[:1000] + "..."
+                    passages.append({
+                        "id": cand["id"],
+                        "text": text_for_rerank
+                    })
+                if passages:
+                    rerank_request = RerankRequest(query=query, passages=passages)
+                    reranked = self.ranker.rerank(rerank_request)
+                    rerank_map = {r["id"]: r["score"] for r in reranked}
+                    for cand in unique_candidates:
+                        if cand["id"] in rerank_map:
+                            cand["score"] = cand["score"] * 0.3 + rerank_map[cand["id"]] * 0.7
+                    unique_candidates.sort(key=lambda x: x["score"], reverse=True)
+            except Exception as e:
+                logger.warning(f"⚠️ Reranking failed: {e}")
+        # FINAL SELECTION
+        final_results = []
+        confidence_threshold = 0.4 if aggressive else 0.5
+        for cand in unique_candidates[:10]:
+            if cand["score"] >= confidence_threshold:
+                final_results.append(cand)
+        if final_results:
+            self.performance_stats["semantic_matches"] += 1
+            logger.info(f"✅ Found {len(final_results)} relevant results")
+            top_match = final_results[0]
+            logger.info(f"🏆 Top match: Score={top_match['score']:.3f}, Type={top_match.get('match_type', 'unknown')}")
+            if top_match["meta"].get("is_whole_file"):
+                logger.info(f"📄 Returning whole file: {top_match['meta'].get('source', 'unknown')}")
+        elapsed = time.time() - start_time
+        logger.info(f"⏱️  Exact retrieval completed in {elapsed:.3f}s")
+        # Store in query history
+        self.query_history.append({
+            "query": query[:100],
+            "timestamp": time.time(),
+            "results_count": len(final_results),
+            "top_score": final_results[0]["score"] if final_results else 0,
+            "elapsed_time": elapsed,
+            "method": "exact"
+        })
+        if len(self.query_history) > 1000:
+            self.query_history = self.query_history[-500:]
+        return final_results[:5]
+    # ==================== INFRASTRUCTURE METHODS ====================
     def _load_or_create_index(self):
         """Thread-safe and process-safe index loading/creation"""
                 try:
                     logger.info("📂 Loading existing vector index...")
                     self.index = faiss.read_index(self.index_path)
+                    if self.index.ntotal < 0:
+                        raise ValueError("Corrupt index: negative vector count")
                     with open(self.metadata_path, "rb") as f:
                         self.metadata = pickle.load(f)
+                    if len(self.metadata) != self.index.ntotal:
+                        logger.error(f"⚠️ Metadata mismatch: {len(self.metadata)} entries vs {self.index.ntotal} vectors. Rebuilding...")
+                        self._create_new_index()
+                        return
                     logger.info(f"✅ Loaded index with {self.index.ntotal} vectors, {len(self.metadata)} metadata entries")
                 except Exception as e:
                     logger.error(f"⚠️ Failed to load index: {e}. Creating new one.")
                 self._create_new_index()
     def _create_new_index(self):
+        """Create fresh IndexFlatIP for cosine similarity"""
         dimension = 384
+        self.index = faiss.IndexFlatIP(dimension)
         self.metadata = []
         logger.info(f"🆕 Created new IndexFlatIP with dimension {dimension}")
     def _save_index(self):
         """Thread-safe and process-safe index saving with atomic writes"""
         with self.file_lock:
             temp_index = f"{self.index_path}.tmp"
             temp_meta = f"{self.metadata_path}.tmp"
             try:
                 faiss.write_index(self.index, temp_index)
                 with open(temp_meta, "wb") as f:
                     pickle.dump(self.metadata, f)
                 os.replace(temp_index, self.index_path)
                 os.replace(temp_meta, self.metadata_path)
+                logger.info(f"💾 Saved index: {self.index.ntotal} vectors, {len(self.metadata)} metadata entries")
             except Exception as e:
                 logger.error(f"❌ Failed to save index: {e}")
                 for f in [temp_index, temp_meta]:
                     if os.path.exists(f):
                         try:
                             os.remove(f)
+                        except Exception:
+                            logger.warning(f"Failed to remove temp file: {f}")
+            finally:
+                gc.collect()
+    def _rollback_partial_storage(self, user_id: str, chat_id: str):
+        """Remove partially stored vectors for a session"""
         try:
+            new_metadata = []
+            surviving_texts = []
+            for meta in self.metadata:
+                if meta.get("user_id") != user_id or meta.get("chat_id") != chat_id:
+                    new_metadata.append(meta)
+                    surviving_texts.append(meta["text"])
+            if len(new_metadata) == len(self.metadata):
+                return
+            if surviving_texts:
+                embeddings = self.embedder.encode(surviving_texts, show_progress_bar=False)
+                faiss.normalize_L2(embeddings)
+                new_index = faiss.IndexFlatIP(384)
+                new_index.add(np.array(embeddings).astype('float32'))
+                self.index = new_index
+            else:
+                self.index = faiss.IndexFlatIP(384)
+            self.metadata = new_metadata
+            self._save_index()
+            # Invalidate BM25 cache
+            self._invalidate_bm25_cache(user_id, chat_id)
+            logger.info(f"🔄 Rolled back partial storage for user {user_id[:8]}")
         except Exception as e:
+            logger.error(f"❌ Rollback failed: {e}")
+            self._create_new_index()
+    def _verify_storage(self, user_id: str, chat_id: str, expected_count: int):
+        """Verify vectors were stored correctly"""
+        with self.memory_lock:
+            user_vectors = sum(1 for m in self.metadata if m.get("user_id") == user_id and m.get("chat_id") == chat_id)
+        logger.info(f"🔍 Storage verification: User {user_id[:8]} has {user_vectors} vectors (expected: {expected_count})")
+        if user_vectors < expected_count:
+            logger.warning(f"⚠️ Storage mismatch for user {user_id[:8]}")
+    # ==================== ANALYTICS & ADMIN METHODS ====================
+    def get_retrieval_analytics(self, query: str = None) -> Dict[str, Any]:
+        """Get detailed analytics about retrieval performance"""
+        analytics = {
+            "performance_stats": self.performance_stats.copy(),
+            "query_types": dict(self.query_types),
+            "query_history_count": len(self.query_history),
+            "index_stats": {
+                "total_vectors": self.index.ntotal,
+                "metadata_count": len(self.metadata),
+                "avg_metadata_size": 0,
+                "bm25_cache_size": len(self.bm25_indices),
+                "bm25_cache_capacity": self.bm25_cache_size,
+                "bm25_available": BM25_AVAILABLE,
+                "nltk_available": NLTK_AVAILABLE
+            },
+            "recent_queries": [],
+            "cache_stats": {
+                "bm25_cache_hits": 0,  # Could be tracked with more instrumentation
+                "bm25_cache_misses": 0
+            }
+        }
+        if self.metadata:
+            total_text_size = sum(len(m.get("text", "")) for m in self.metadata)
+            analytics["index_stats"]["avg_metadata_size"] = total_text_size / len(self.metadata)
+        for qh in self.query_history[-10:]:
+            analytics["recent_queries"].append({
+                "query_preview": qh.get("query", "")[:50],
+                "results": qh.get("results_count", 0),
+                "top_score": qh.get("top_score", 0),
+                "elapsed": qh.get("elapsed_time", 0),
+                "method": qh.get("method", "unknown")
             })
+        if query:
+            query_lower = query.lower()
+            keyword_matches = defaultdict(int)
+            for meta in self.metadata:
+                text = meta.get("text", "").lower()
+                for word in re.findall(r'\b\w{3,}\b', query_lower):
+                    if word in text:
+                        keyword_matches[word] += 1
+            analytics["query_analysis"] = {
+                "query_length": len(query),
+                "word_count": len(query.split()),
+                "keyword_frequency": dict(keyword_matches),
+                "has_file_reference": bool(re.search(r'\.(?:py|js|html|css|ts|java|cpp)', query, re.I)),
+                "classified_as": self._classify_query(query)
+            }
+        return analytics
+    def store_chat_context(self, messages: list, user_id: str, chat_id: str) -> bool:
         """Store chat history as session memory"""
         if not messages or not user_id:
             return False
         conversation = ""
+        for msg in messages[-10:]:
             role = msg.get("role", "unknown")
             content = msg.get("content", "")
             if content:
         if len(conversation) < 50:
             return False
+        chunks = self._chunk_text_enhanced(conversation, chunk_size=800, overlap=100)
         if not chunks:
             return False
         texts = [c["text"] for c in chunks]
         metadata_list = []
                 "chunk_index": i
             })
         try:
+            embeddings = self.embedder.encode(texts, show_progress_bar=False)
             faiss.normalize_L2(embeddings)
             with self.memory_lock:
                 self.metadata.extend(metadata_list)
                 self._save_index()
+            # Invalidate BM25 cache for this session
+            self._invalidate_bm25_cache(user_id, chat_id)
             logger.info(f"💭 Stored {len(texts)} chat history chunks for user {user_id[:8]}")
             return True
         except Exception as e:
+            logger.error(f"❌ Failed to store chat history: {e}")
             return False
+    def delete_session(self, user_id: str, chat_id: str) -> bool:
         """Surgical Strike: Permanently remove ONLY one specific session"""
         with self.memory_lock:
             new_metadata = []
             removed_count = 0
             for meta in self.metadata:
                 if meta.get("user_id") == user_id and meta.get("chat_id") == chat_id:
                     removed_count += 1
                 else:
                     new_metadata.append(meta)
             if removed_count == 0:
+                logger.info(f"ℹ️ No vectors to delete for session {chat_id}")
+                return False
             logger.info(f"🧹 Surgically removing {removed_count} vectors for session {chat_id}...")
             if not new_metadata:
+                self.index = faiss.IndexFlatIP(384)
             else:
                 surviving_texts = [m["text"] for m in new_metadata]
                 try:
+                    embeddings = self.embedder.encode(surviving_texts, show_progress_bar=False)
                     faiss.normalize_L2(embeddings)
                     new_index = faiss.IndexFlatIP(384)
                     new_index.add(np.array(embeddings).astype('float32'))
                     self.index = new_index
                 except Exception as e:
+                    logger.error(f"❌ Rebuild failed: {e}")
                     return False
             self.metadata = new_metadata
             self._save_index()
+            # Invalidate BM25 cache for this session
+            self._invalidate_bm25_cache(user_id, chat_id)
+            logger.info(f"✅ Successfully deleted session {chat_id}")
             return True
+    def get_user_stats(self, user_id: str) -> Dict[str, Any]:
         """Get statistics for a user's session"""
         with self.memory_lock:
             user_vectors = []
+            for meta in enumerate(self.metadata):
+                if meta[1].get("user_id") == user_id:
                     user_vectors.append(meta)
         stats = {
             "user_id": user_id,
             "total_vectors": len(user_vectors),
             "by_type": {},
+            "by_source": {},
+            "sessions": {},
+            "bm25_cached": False
         }
+        for vec_id, vec in user_vectors:
             vec_type = vec.get("type", "unknown")
             source = vec.get("source", "unknown")
+            chat_id = vec.get("chat_id", "unknown")
             stats["by_type"][vec_type] = stats["by_type"].get(vec_type, 0) + 1
             stats["by_source"][source] = stats["by_source"].get(source, 0) + 1
+            stats["sessions"][chat_id] = stats["sessions"].get(chat_id, 0) + 1
+        # Check if any session has BM25 in cache
+        for chat_id in stats["sessions"]:
+            key = (user_id, chat_id)
+            if key in self.bm25_indices:
+                stats["bm25_cached"] = True
+                break
         return stats
+    def cleanup_old_sessions(self, max_age_hours: int = 24) -> int:
         """Clean up old session data"""
         current_time = time.time()
         cutoff = current_time - (max_age_hours * 3600)
         with self.memory_lock:
             old_metadata = []
             new_metadata = []
+            affected_sessions = set()
+            for meta in self.metadata:
                 if meta.get("timestamp", 0) < cutoff:
+                    old_metadata.append(meta)
+                    user_id = meta.get("user_id")
+                    chat_id = meta.get("chat_id")
+                    if user_id and chat_id:
+                        affected_sessions.add((user_id, chat_id))
                 else:
                     new_metadata.append(meta)
             if not old_metadata:
                 return 0
             logger.info(f"🧹 Cleaning up {len(old_metadata)} old vectors...")
             recent_texts = [m["text"] for m in new_metadata]
             if recent_texts:
+                try:
+                    embeddings = self.embedder.encode(recent_texts, show_progress_bar=False)
+                    faiss.normalize_L2(embeddings)
+                    self.index = faiss.IndexFlatIP(384)
+                    self.index.add(np.array(embeddings).astype('float32'))
+                except Exception as e:
+                    logger.error(f"❌ Failed to rebuild index: {e}")
+                    return 0
             else:
                 self.index = faiss.IndexFlatIP(384)
             self.metadata = new_metadata
             self._save_index()
+            # Remove affected sessions from BM25 cache
+            for key in affected_sessions:
+                self._invalidate_bm25_cache(*key)
+            logger.info(f"✅ Cleanup complete. Removed {len(old_metadata)} vectors.")
             return len(old_metadata)
     def _cleanup(self):
         try:
             if hasattr(self, 'file_lock'):
                 self.file_lock.release()
+            gc.collect()
+        except Exception as e:
+            logger.warning(f"Cleanup warning: {e}")
 # Global instance (singleton pattern)
 _vdb_instance = None
+_vdb_lock = threading.Lock()
+def get_vector_db(index_path: str = "faiss_session_index.bin", metadata_path: str = "session_metadata.pkl") -> VectorDatabase:
+    """Singleton factory for VectorDatabase with thread-safe initialization"""
     global _vdb_instance
     if _vdb_instance is None:
+        with _vdb_lock:
+            if _vdb_instance is None:
+                _vdb_instance = VectorDatabase(index_path, metadata_path)
     return _vdb_instance
 # For backward compatibility