Spaces:

notebooklm-group2
/

NotebookLM-Clone

Sleeping

App Files Files Community

rahulrb99 commited on Mar 6

Commit

5ccf778

unverified ·

2 Parent(s): 07f5aca a524c65

Merge pull request #15 from tAnboyy/feature/citations-retrieval-isolation

Browse files

Files changed (8) hide show

app.py +44 -17
backend/chat_service.py +26 -4
backend/chunking.py +81 -0
backend/embedding_service.py +15 -3
backend/ingestion_service.py +5 -16
backend/ingestion_txt.py +8 -18
backend/rag_service.py +7 -5
backend/retrieval_service.py +56 -5

app.py CHANGED Viewed

@@ -256,6 +256,7 @@ def _initial_load(profile: gr.OAuthProfile | None = None):
         gr.update(visible=bool(user_id)),
         gr.update(visible=not bool(user_id)),
         source_status,
     )
@@ -650,29 +651,51 @@ def _chat_history_to_pairs(messages: list[dict]) -> list[tuple[str, str]]:
     return pairs
-def _load_chat_history(notebook_id) -> tuple[list[tuple[str, str]], list[tuple[str, str]]]:
-    """Load chat for notebook. Returns (history_pairs, history_pairs) for State and Chatbot."""
     if not notebook_id:
         return [], []
-    messages = load_chat(notebook_id)
     pairs = _chat_history_to_pairs(messages)
     return pairs, pairs
 def _on_chat_submit(query, notebook_id, chat_history, profile: gr.OAuthProfile | None):
-    """Handle chat submit: call RAG, return updated history."""
     if not notebook_id:
-        return "", chat_history, "Select a notebook first."
     if not query or not query.strip():
-        return "", chat_history, "Enter a message."
     user_id = _user_id(profile)
     if not user_id:
-        return "", chat_history, "Please sign in first."
     try:
-        answer, updated = rag_chat(notebook_id, query.strip(), chat_history)
-        return "", updated, ""
     except Exception as e:
-        return "", chat_history, f"Error: {e}"
 def _get_quiz_pdfs(source_type, notebook_id):
     if source_type != "PDF":
@@ -755,6 +778,7 @@ with gr.Blocks(
         selected_notebook_id = gr.State(None)
         chat_history_state = gr.State([])
         quiz_state = gr.State([])
         with gr.Group(elem_classes=["section-card", "manager-card"]):
             gr.Markdown("**Notebook Manager**", elem_classes=["section-title"])
@@ -891,6 +915,9 @@ with gr.Blocks(
             )
             chat_submit_btn = gr.Button("Send", variant="primary")
             chat_status = gr.Markdown("", elem_classes=["status"])
         with gr.Group(elem_classes=["section-card", "artifacts-card"]):
             gr.Markdown("**Artifacts**", elem_classes=["section-title"])
@@ -945,21 +972,21 @@ with gr.Blocks(
     demo.load(
         _initial_load,
         inputs=None,
-        outputs=[nb_state, selected_notebook_id, notebook_status, auth_text, auth_info_row, app_content, login_container, source_status],
         api_name=False,
     )
     demo.load(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd], api_name=False)
     demo.load(_load_sources, inputs=[selected_notebook_id], outputs=[sources_display], api_name=False)
     demo.load(_selected_notebook_text, inputs=[selected_notebook_id, nb_state], outputs=[selected_notebook_md], api_name=False)
-    def _on_notebook_select_for_chat(notebook_id):
-        hist, _ = _load_chat_history(notebook_id)
-        return hist, hist
     selected_notebook_id.change(
         _on_notebook_select_for_chat,
-        inputs=[selected_notebook_id],
-        outputs=[chat_history_state, chatbot],
         api_name=False,
     )
     selected_notebook_id.change(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd], api_name=False)
@@ -1077,7 +1104,7 @@ with gr.Blocks(
     chat_submit_btn.click(
         _on_chat_submit,
         inputs=[chat_input, selected_notebook_id, chat_history_state],
-        outputs=[chat_input, chat_history_state, chat_status],
         api_name=False,
     ).then(
         lambda h: (h, h),

         gr.update(visible=bool(user_id)),
         gr.update(visible=not bool(user_id)),
         source_status,
+        user_id,
     )
     return pairs
+def _load_chat_history(notebook_id, user_id: str | None) -> tuple[list[tuple[str, str]], list[tuple[str, str]]]:
+    """Load chat for notebook. Returns (history_pairs, history_pairs) for State and Chatbot.
+    Only loads if notebook belongs to user (ownership validation).
+    """
     if not notebook_id:
         return [], []
+    messages = load_chat(notebook_id, user_id)
     pairs = _chat_history_to_pairs(messages)
     return pairs, pairs
+def _format_citations(chunks: list[dict]) -> str:
+    """Format retrieved chunks for citation display."""
+    if not chunks:
+        return ""
+    lines = ["**Sources cited:**", ""]
+    for i, c in enumerate(chunks, 1):
+        meta = c.get("metadata") or {}
+        source_label = meta.get("url") or meta.get("file_name") or meta.get("file_path") or "Source"
+        content = (c.get("content") or "")[:300]
+        if len(c.get("content") or "") > 300:
+            content += "..."
+        content = content.replace("\n", " ")
+        lines.append(f"**[{i}]** *{source_label}*")
+        lines.append(f"> {content}")
+        lines.append("")
+    return "\n".join(lines)
 def _on_chat_submit(query, notebook_id, chat_history, profile: gr.OAuthProfile | None):
+    """Handle chat submit: call RAG, return updated history and citations."""
     if not notebook_id:
+        return "", chat_history, "Select a notebook first.", "", gr.update(visible=False)
     if not query or not query.strip():
+        return "", chat_history, "Enter a message.", "", gr.update(visible=False)
     user_id = _user_id(profile)
     if not user_id:
+        return "", chat_history, "Please sign in first.", "", gr.update(visible=False)
     try:
+        answer, updated, chunks = rag_chat(notebook_id, query.strip(), chat_history, user_id=user_id)
+        citations_md = _format_citations(chunks)
+        accordion_update = gr.update(visible=True) if chunks else gr.update(visible=False)
+        return "", updated, "", citations_md, accordion_update
     except Exception as e:
+        return "", chat_history, f"Error: {e}", "", gr.update(visible=False)
 def _get_quiz_pdfs(source_type, notebook_id):
     if source_type != "PDF":
         selected_notebook_id = gr.State(None)
         chat_history_state = gr.State([])
         quiz_state = gr.State([])
+        user_id_state = gr.State(None)
         with gr.Group(elem_classes=["section-card", "manager-card"]):
             gr.Markdown("**Notebook Manager**", elem_classes=["section-title"])
             )
             chat_submit_btn = gr.Button("Send", variant="primary")
             chat_status = gr.Markdown("", elem_classes=["status"])
+            citations_display = gr.Accordion("📎 Sources cited (from last response)", open=True, visible=False)
+            with citations_display:
+                citations_md = gr.Markdown("", elem_classes=["status"])
         with gr.Group(elem_classes=["section-card", "artifacts-card"]):
             gr.Markdown("**Artifacts**", elem_classes=["section-title"])
     demo.load(
         _initial_load,
         inputs=None,
+        outputs=[nb_state, selected_notebook_id, notebook_status, auth_text, auth_info_row, app_content, login_container, source_status, user_id_state],
         api_name=False,
     )
     demo.load(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd], api_name=False)
     demo.load(_load_sources, inputs=[selected_notebook_id], outputs=[sources_display], api_name=False)
     demo.load(_selected_notebook_text, inputs=[selected_notebook_id, nb_state], outputs=[selected_notebook_md], api_name=False)
+    def _on_notebook_select_for_chat(notebook_id, user_id):
+        hist, _ = _load_chat_history(notebook_id, user_id)
+        return hist, hist, "", gr.update(visible=False)
     selected_notebook_id.change(
         _on_notebook_select_for_chat,
+        inputs=[selected_notebook_id, user_id_state],
+        outputs=[chat_history_state, chatbot, citations_md, citations_display],
         api_name=False,
     )
     selected_notebook_id.change(_list_uploaded_pdfs, inputs=[selected_notebook_id], outputs=[uploaded_pdf_dd], api_name=False)
     chat_submit_btn.click(
         _on_chat_submit,
         inputs=[chat_input, selected_notebook_id, chat_history_state],
+        outputs=[chat_input, chat_history_state, chat_status, citations_md, citations_display],
         api_name=False,
     ).then(
         lambda h: (h, h),

backend/chat_service.py CHANGED Viewed

@@ -3,8 +3,28 @@
 from backend.db import supabase
-def save_message(notebook_id: str, role: str, content: str) -> None:
-    """Append a message to the messages table."""
     supabase.table("messages").insert({
         "notebook_id": notebook_id,
         "role": role,
@@ -12,8 +32,10 @@ def save_message(notebook_id: str, role: str, content: str) -> None:
     }).execute()
-def load_chat(notebook_id: str) -> list[dict]:
-    """Load chat history. Returns [{role, content, created_at}, ...]."""
     result = (
         supabase.table("messages")
         .select("role, content, created_at")

 from backend.db import supabase
+def _notebook_belongs_to_user(notebook_id: str, user_id: str | None) -> bool:
+    """Verify the notebook is owned by the user. Returns False if user_id is None."""
+    if not user_id or not notebook_id:
+        return False
+    try:
+        result = (
+            supabase.table("notebooks")
+            .select("id")
+            .eq("id", notebook_id)
+            .eq("user_id", user_id)
+            .limit(1)
+            .execute()
+        )
+        return len(result.data or []) > 0
+    except Exception:
+        return False
+def save_message(notebook_id: str, user_id: str | None, role: str, content: str) -> None:
+    """Append a message only if the notebook belongs to the user."""
+    if not _notebook_belongs_to_user(notebook_id, user_id):
+        return
     supabase.table("messages").insert({
         "notebook_id": notebook_id,
         "role": role,
     }).execute()
+def load_chat(notebook_id: str, user_id: str | None) -> list[dict]:
+    """Load chat history only if the notebook belongs to the user. Returns [] if not owned."""
+    if not _notebook_belongs_to_user(notebook_id, user_id):
+        return []
     result = (
         supabase.table("messages")
         .select("role, content, created_at")

backend/chunking.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""Shared chunking utilities for RAG ingestion."""
+import re
+DEFAULT_CHUNK_SIZE = 512
+DEFAULT_CHUNK_OVERLAP = 80
+MIN_CHUNK_SIZE = 100
+def _split_into_sentences(text: str) -> list[str]:
+    """Split text on sentence boundaries (rough heuristic)."""
+    text = re.sub(r"\n+", "\n", text.strip())
+    if not text:
+        return []
+    parts = re.split(r"(?<=[.!?])\s+", text)
+    return [p.strip() for p in parts if p.strip()]
+def chunk_text_semantic(
+    text: str,
+    chunk_size: int = DEFAULT_CHUNK_SIZE,
+    overlap: int = DEFAULT_CHUNK_OVERLAP,
+) -> list[str]:
+    """
+    Semantic chunking: split on paragraphs first, then sentences.
+    Preserves context better than blind character splits.
+    """
+    text = " ".join(text.split())
+    if not text:
+        return []
+    paragraphs = [p.strip() for p in re.split(r"\n\s*\n", text) if p.strip()]
+    if len(paragraphs) <= 1:
+        sentences = _split_into_sentences(text)
+        if not sentences:
+            sentences = [text]
+        if len(sentences) == 1 and len(sentences[0]) > chunk_size * 2:
+            return chunk_text_fallback(text, chunk_size, overlap)
+        paragraphs = sentences
+    chunks = []
+    current_chunk = []
+    current_len = 0
+    for para in paragraphs:
+        para_len = len(para) + 1
+        if current_len + para_len > chunk_size and current_chunk:
+            chunk_text = " ".join(current_chunk)
+            if len(chunk_text) >= MIN_CHUNK_SIZE:
+                chunks.append(chunk_text)
+            overlap_len = 0
+            overlap_items = []
+            for item in reversed(current_chunk):
+                if overlap_len + len(item) + 1 <= overlap:
+                    overlap_items.insert(0, item)
+                    overlap_len += len(item) + 1
+                else:
+                    break
+            current_chunk = overlap_items
+            current_len = overlap_len
+        current_chunk.append(para)
+        current_len += para_len
+    if current_chunk:
+        chunks.append(" ".join(current_chunk))
+    return chunks
+def chunk_text_fallback(text: str, chunk_size: int, overlap: int) -> list[str]:
+    """Character-based chunking when semantic splitting fails."""
+    clean = " ".join(text.split())
+    if not clean:
+        return []
+    chunks = []
+    start = 0
+    step = max(1, chunk_size - overlap)
+    while start < len(clean):
+        end = min(len(clean), start + chunk_size)
+        chunks.append(clean[start:end])
+        start += step
+    return chunks

backend/embedding_service.py CHANGED Viewed

@@ -1,10 +1,16 @@
-"""Shared embedding service - 384-dim vectors for RAG (ingestion + retrieval). Uses MiniLM for low memory."""
 from sentence_transformers import SentenceTransformer
-_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 _model = None
 def _get_model() -> SentenceTransformer:
     """Lazy-load the embedding model."""
@@ -14,17 +20,23 @@ def _get_model() -> SentenceTransformer:
     return _model
 def encode(texts: list[str], task: str = "search_document") -> list[list[float]]:
     """
     Embed texts. Returns list of 384-dim vectors.
     Args:
         texts: List of strings to embed.
-        task: Unused (MiniLM doesn't need prefix); kept for API compatibility.
     """
     if not texts:
         return []
     model = _get_model()
     embeddings = model.encode(texts, show_progress_bar=False)
     return [e.tolist() for e in embeddings]

+"""Shared embedding service - 384-dim vectors for RAG (ingestion + retrieval)."""
+import os
 from sentence_transformers import SentenceTransformer
+# all-MiniLM-L6-v2 (default) or BAAI/bge-small-en-v1.5 for better quality (both 384 dims)
+_MODEL_NAME = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
 _model = None
+# BGE models: add prefix only to queries, not to documents
+_BGE_QUERY_PREFIX = "Represent this sentence for searching relevant passages: "
 def _get_model() -> SentenceTransformer:
     """Lazy-load the embedding model."""
     return _model
+def _is_bge_model() -> bool:
+    return "bge" in _MODEL_NAME.lower()
 def encode(texts: list[str], task: str = "search_document") -> list[list[float]]:
     """
     Embed texts. Returns list of 384-dim vectors.
     Args:
         texts: List of strings to embed.
+        task: "search_query" for queries, "search_document" for documents. BGE uses prefixes.
     """
     if not texts:
         return []
     model = _get_model()
+    if _is_bge_model() and task == "search_query":
+        texts = [_BGE_QUERY_PREFIX + t for t in texts]
     embeddings = model.encode(texts, show_progress_bar=False)
     return [e.tolist() for e in embeddings]

backend/ingestion_service.py CHANGED Viewed

@@ -4,14 +4,15 @@ from pathlib import Path
 from pypdf import PdfReader
 from backend.db import supabase
 from backend.embedding_service import encode as embed_texts
 import requests
 from bs4 import BeautifulSoup
-DEFAULT_CHUNK_SIZE = 1200
-DEFAULT_CHUNK_OVERLAP = 200
 def _extract_pdf_text(pdf_path: Path) -> str:
@@ -23,20 +24,8 @@ def _extract_pdf_text(pdf_path: Path) -> str:
 def _chunk_text(text: str, chunk_size: int = DEFAULT_CHUNK_SIZE, overlap: int = DEFAULT_CHUNK_OVERLAP) -> list[str]:
-    clean = " ".join(text.split())
-    if not clean:
-        return []
-    chunks: list[str] = []
-    start = 0
-    step = max(1, chunk_size - overlap)
-    while start < len(clean):
-        end = min(len(clean), start + chunk_size)
-        chunks.append(clean[start:end])
-        start += step
-    return chunks
 def ingest_pdf_chunks(notebook_id: str, source_id: str, pdf_path: Path) -> int:

 from pypdf import PdfReader
+from backend.chunking import chunk_text_semantic, chunk_text_fallback
 from backend.db import supabase
 from backend.embedding_service import encode as embed_texts
 import requests
 from bs4 import BeautifulSoup
+DEFAULT_CHUNK_SIZE = 512
+DEFAULT_CHUNK_OVERLAP = 80
 def _extract_pdf_text(pdf_path: Path) -> str:
 def _chunk_text(text: str, chunk_size: int = DEFAULT_CHUNK_SIZE, overlap: int = DEFAULT_CHUNK_OVERLAP) -> list[str]:
+    """Semantic chunking via shared utility."""
+    return chunk_text_semantic(text, chunk_size, overlap)
 def ingest_pdf_chunks(notebook_id: str, source_id: str, pdf_path: Path) -> int:

backend/ingestion_txt.py CHANGED Viewed

@@ -8,6 +8,7 @@ import re
 from datetime import datetime
 from uuid import uuid4
 from backend.db import supabase
 from backend.storage import save_file, get_sources_path
@@ -82,35 +83,24 @@ def _create_source_record(
         "storage_path": storage_path,
     }).execute()
-# Chunking
 def chunk_text(text: str, source_id: str, notebook_id: str, filename: str = "") -> list[dict]:
-    words = text.split()
-    chunk_size = 400
-    overlap = 40
     chunks = []
-    i = 0
-    # Calculate total chunks upfront
-    total_chunks = max(1, (len(words) + chunk_size - overlap - 1) // (chunk_size - overlap))
-    while i < len(words):
-        chunk_words = words[i:i + chunk_size]
-        content = " ".join(chunk_words)
         chunks.append({
             "id": str(uuid4()),
             "source_id": source_id,
             "notebook_id": notebook_id,
             "content": content,
-            "chunk_index": len(chunks),
             "metadata": {
-                "word_count": len(chunk_words),
                 "file_name": filename,
-                "chunk_index": len(chunks),
-                "total_chunks": total_chunks,
             }
         })
-        i += chunk_size - overlap
     return chunks

 from datetime import datetime
 from uuid import uuid4
+from backend.chunking import chunk_text_semantic
 from backend.db import supabase
 from backend.storage import save_file, get_sources_path
         "storage_path": storage_path,
     }).execute()
+# Chunking - use semantic chunking for better retrieval (aligned with PDF/URL)
 def chunk_text(text: str, source_id: str, notebook_id: str, filename: str = "") -> list[dict]:
+    content_chunks = chunk_text_semantic(text, chunk_size=512, overlap=80)
     chunks = []
+    for i, content in enumerate(content_chunks):
         chunks.append({
             "id": str(uuid4()),
             "source_id": source_id,
             "notebook_id": notebook_id,
             "content": content,
+            "chunk_index": i,
             "metadata": {
+                "word_count": len(content.split()),
                 "file_name": filename,
+                "chunk_index": i,
+                "total_chunks": len(content_chunks),
             }
         })
     return chunks

backend/rag_service.py CHANGED Viewed

@@ -25,14 +25,16 @@ def _validate_citations(text: str, num_chunks: int) -> str:
     return re.sub(r"\[(\d+)\]", replace_citation, text)
-def rag_chat(notebook_id: str, query: str, chat_history: list) -> tuple[str, list]:
     """
     RAG chat: retrieve chunks, build prompt, call LLM, persist, return answer and updated history.
     chat_history: list of [user_msg, assistant_msg] pairs (Gradio Chatbot format).
-    Returns: (assistant_reply, updated_history).
     """
-    save_message(notebook_id, "user", query)
     chunks = retrieve_chunks(notebook_id, query, top_k=TOP_K)
@@ -72,7 +74,7 @@ def rag_chat(notebook_id: str, query: str, chat_history: list) -> tuple[str, lis
     except Exception as e:
         answer = f"Error calling model: {e}"
-    save_message(notebook_id, "assistant", answer)
     updated_history = chat_history + [[query, answer]]
-    return answer, updated_history

     return re.sub(r"\[(\d+)\]", replace_citation, text)
+def rag_chat(notebook_id: str, query: str, chat_history: list, user_id: str | None = None) -> tuple[str, list, list[dict]]:
     """
     RAG chat: retrieve chunks, build prompt, call LLM, persist, return answer and updated history.
     chat_history: list of [user_msg, assistant_msg] pairs (Gradio Chatbot format).
+    user_id: for ownership validation; messages are only saved if notebook belongs to user.
+    Returns: (assistant_reply, updated_history, chunks).
+    chunks: list of dicts with id, content, metadata, similarity for citation display.
     """
+    save_message(notebook_id, user_id, "user", query)
     chunks = retrieve_chunks(notebook_id, query, top_k=TOP_K)
     except Exception as e:
         answer = f"Error calling model: {e}"
+    save_message(notebook_id, user_id, "assistant", answer)
     updated_history = chat_history + [[query, answer]]
+    return answer, updated_history, chunks

backend/retrieval_service.py CHANGED Viewed

@@ -1,32 +1,72 @@
-"""Retrieval service - vector similarity search for RAG."""
 from backend.db import supabase
 from backend.embedding_service import encode
-def retrieve_chunks(notebook_id: str, query: str, top_k: int = 5) -> list[dict]:
     """
     Retrieve top-k chunks for a query, filtered by notebook_id.
     Returns list of dicts with keys: id, content, metadata, similarity.
     """
     if not query or not query.strip():
         return []
-    query_embedding = encode([query.strip()], task="search_query")[0]
     try:
         result = supabase.rpc(
             "match_chunks",
             {
                 "query_embedding": query_embedding,
-                "match_count": top_k,
                 "p_notebook_id": notebook_id,
             },
         ).execute()
         rows = result.data or []
-        return [
             {
                 "id": str(r["id"]),
                 "content": r["content"],
@@ -35,5 +75,16 @@ def retrieve_chunks(notebook_id: str, query: str, top_k: int = 5) -> list[dict]:
             }
             for r in rows
         ]
     except Exception:
         return []

+"""Retrieval service - vector similarity search for RAG with optional reranking."""
+import os
 from backend.db import supabase
 from backend.embedding_service import encode
+# Retrieve more candidates for reranking; final count after rerank/filter
+RETRIEVE_TOP_K = int(os.getenv("RETRIEVE_TOP_K", "12"))
+FINAL_TOP_K = int(os.getenv("FINAL_TOP_K", "5"))
+SIMILARITY_THRESHOLD = float(os.getenv("SIMILARITY_THRESHOLD", "0.2"))
+USE_RERANKER = os.getenv("USE_RERANKER", "true").lower() in ("true", "1", "yes")
+_reranker = None
+def _get_reranker():
+    """Lazy-load cross-encoder reranker."""
+    global _reranker
+    if _reranker is None:
+        try:
+            from sentence_transformers import CrossEncoder
+            _reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")
+        except Exception:
+            _reranker = False  # Disabled on failure
+    return _reranker if _reranker else None
+def _rerank_chunks(query: str, chunks: list[dict], top_k: int) -> list[dict]:
+    """Rerank chunks using cross-encoder; return top_k."""
+    model = _get_reranker()
+    if not model or not chunks:
+        return chunks[:top_k]
+    pairs = [(query, c["content"]) for c in chunks]
+    scores = model.predict(pairs)
+    scored = list(zip(chunks, scores))
+    scored.sort(key=lambda x: x[1], reverse=True)
+    reranked = [c for c, _ in scored[:top_k]]
+    return reranked
+def retrieve_chunks(notebook_id: str, query: str, top_k: int = None) -> list[dict]:
     """
     Retrieve top-k chunks for a query, filtered by notebook_id.
+    Uses two-stage retrieval: vector search -> optional rerank -> similarity filter.
     Returns list of dicts with keys: id, content, metadata, similarity.
     """
     if not query or not query.strip():
         return []
+    top_k = top_k or FINAL_TOP_K
+    query_clean = query.strip()
+    query_embedding = encode([query_clean], task="search_query")[0]
     try:
         result = supabase.rpc(
             "match_chunks",
             {
                 "query_embedding": query_embedding,
+                "match_count": RETRIEVE_TOP_K,
                 "p_notebook_id": notebook_id,
             },
         ).execute()
         rows = result.data or []
+        chunks = [
             {
                 "id": str(r["id"]),
                 "content": r["content"],
             }
             for r in rows
         ]
+        # Filter by similarity threshold
+        chunks = [c for c in chunks if c["similarity"] >= SIMILARITY_THRESHOLD]
+        # Rerank for better precision
+        if USE_RERANKER and len(chunks) > top_k:
+            chunks = _rerank_chunks(query_clean, chunks, top_k)
+        else:
+            chunks = chunks[:top_k]
+        return chunks
     except Exception:
         return []