Spaces:

Group-1-5010
/

NotebookLM

Running

App Files Files Community

Hemanth-05 commited on 24 days ago

Commit

47056dc

1 Parent(s): 3fb7184

RAG: wire chat + prompt + services module

Browse files

Files changed (4) hide show

.gitignore +2 -0
artifacts/prompt.poml +14 -0
pages/chat.py +3 -3
services/rag_engine.py +218 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,5 @@
 __pycache__/
 *.pyc
 .DS_Store

+.venv/
+venv/
 __pycache__/
 *.pyc
 .DS_Store

artifacts/prompt.poml CHANGED Viewed

	@@ -0,0 +1,14 @@

+You are a grounded assistant for a NotebookLM-style app.
+Rules:
+1) Answer ONLY from the provided context.
+2) If the answer is not in context, say you could not find it in the uploaded sources.
+3) Cite supporting sources inline using [S1], [S2], etc.
+4) Keep the answer concise and factual.
+Question:
+{{question}}
+Context:
+{{context}}
+Answer:

pages/chat.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import uuid
 from datetime import datetime
 from state import UserData, Message, get_active_notebook
-from mock_data import get_mock_response
 FILE_TYPE_ICONS = {
@@ -67,8 +67,8 @@ def handle_chat_submit(message: str, state: UserData) -> tuple[UserData, list[di
     )
     nb.messages.append(user_msg)
-    # Get mock response
-    response = get_mock_response(message)
     # Add assistant message
     assistant_msg = Message(

 import uuid
 from datetime import datetime
 from state import UserData, Message, get_active_notebook
+from services.rag_engine import rag_answer
 FILE_TYPE_ICONS = {
     )
     nb.messages.append(user_msg)
+    # Get actual response
+    response = rag_answer(message.strip(), nb.id)
     # Add assistant message
     assistant_msg = Message(

services/rag_engine.py ADDED Viewed

	@@ -0,0 +1,218 @@

+"""Retrieval-only RAG engine for chat responses."""
+from __future__ import annotations
+import logging
+import os
+import re
+from pathlib import Path
+from ingestion_engine.embedding_generator import generate_query
+from persistence.vector_store import VectorStore
+logger = logging.getLogger(__name__)
+K_RETRIEVE = 40
+K_FINAL = 8
+ALPHA = 0.05
+MAX_SNIPPET_CHARS = 280
+GEN_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
+MAX_NEW_TOKENS = 400
+TEMPERATURE = 0.2
+TIMEOUT_SEC = 45
+PROMPT_FILE = Path(__file__).resolve().parent.parent / "artifacts" / "prompt.poml"
+DEFAULT_PROMPT_TEMPLATE = (
+    "You are a grounded assistant for a NotebookLM-style app.\n"
+    "Rules:\n"
+    "1) Answer ONLY from the provided context.\n"
+    "2) If the answer is not in context, say you could not find it in the uploaded sources.\n"
+    "3) Cite supporting sources inline using [S1], [S2], etc.\n"
+    "4) Keep the answer concise and factual.\n\n"
+    "Question:\n{{question}}\n\n"
+    "Context:\n{{context}}\n\n"
+    "Answer:"
+)
+def _clean_text(text: str) -> str:
+    return " ".join((text or "").split())
+def _tokenize_keywords(text: str) -> set[str]:
+    tokens = re.split(r"[^a-z0-9]+", (text or "").lower())
+    return {t for t in tokens if len(t) >= 3}
+def _keyword_hit_count(query_keywords: set[str], chunk_text: str) -> int:
+    if not query_keywords:
+        return 0
+    chunk_tokens = _tokenize_keywords(chunk_text)
+    return len(query_keywords.intersection(chunk_tokens))
+def _rerank_matches(query: str, matches: list[dict]) -> list[dict]:
+    """Stage 2 rerank: pinecone score + ALPHA * lexical keyword hits."""
+    query_keywords = _tokenize_keywords(query)
+    rescored = []
+    for m in matches:
+        pinecone_score = float(m.get("score", 0.0) or 0.0)
+        hits = _keyword_hit_count(query_keywords, m.get("text", ""))
+        combined_score = pinecone_score + ALPHA * hits
+        rescored.append(
+            {
+                **m,
+                "keyword_hit_count": hits,
+                "combined_score": combined_score,
+            }
+        )
+    rescored.sort(key=lambda x: x.get("combined_score", 0.0), reverse=True)
+    return rescored[:K_FINAL]
+def _build_citations(matches: list[dict]) -> list[dict]:
+    """Convert vector matches into the citation format used by pages/chat.py."""
+    citations = []
+    seen = set()
+    for match in matches:
+        source = match.get("source_filename", "Unknown source")
+        chunk_index = int(match.get("chunk_index", 0) or 0)
+        key = (source, chunk_index)
+        if key in seen:
+            continue
+        seen.add(key)
+        snippet = _clean_text(match.get("text", ""))
+        if len(snippet) > MAX_SNIPPET_CHARS:
+            snippet = snippet[:MAX_SNIPPET_CHARS].rstrip() + "..."
+        citations.append(
+            {
+                "source": source,
+                "page": chunk_index,
+                "text": snippet,
+            }
+        )
+    return citations
+def _build_content(matches: list[dict]) -> str:
+    if not matches:
+        return (
+            "I couldn't find relevant information in your uploaded sources for that question. "
+            "Try rephrasing the question or adding more sources."
+        )
+    lines = ["Based on your uploaded sources, here are the most relevant passages:", ""]
+    for idx, match in enumerate(matches, start=1):
+        source = match.get("source_filename", "Unknown source")
+        chunk_index = int(match.get("chunk_index", 0) or 0)
+        score = float(match.get("score", 0.0) or 0.0)
+        combined = float(match.get("combined_score", score) or score)
+        hits = int(match.get("keyword_hit_count", 0) or 0)
+        snippet = _clean_text(match.get("text", ""))
+        if len(snippet) > MAX_SNIPPET_CHARS:
+            snippet = snippet[:MAX_SNIPPET_CHARS].rstrip() + "..."
+        lines.append(
+            f"{idx}. **{source}** (chunk {chunk_index}, pinecone: {score:.3f}, hits: {hits}, combined: {combined:.3f})"
+        )
+        lines.append(f"   {snippet}")
+        lines.append("")
+    lines.append("This is a two-stage retrieval-only response (no LLM synthesis yet).")
+    return "\n".join(lines)
+def _build_prompt(question: str, reranked_matches: list[dict]) -> str:
+    """Build a grounded prompt from top reranked chunks."""
+    context_blocks = []
+    for idx, match in enumerate(reranked_matches, start=1):
+        source = match.get("source_filename", "Unknown source")
+        chunk_index = int(match.get("chunk_index", 0) or 0)
+        text = _clean_text(match.get("text", ""))
+        context_blocks.append(f"[S{idx}] source={source} chunk={chunk_index}\n{text}")
+    context_text = "\n\n".join(context_blocks)
+    template = _load_prompt_template()
+    return (
+        template.replace("{{question}}", question)
+        .replace("{{context}}", context_text)
+    )
+def _load_prompt_template() -> str:
+    """Load prompt template from artifacts/prompt.poml; fallback to default."""
+    try:
+        text = PROMPT_FILE.read_text(encoding="utf-8")
+        if text.strip():
+            return text
+    except Exception:
+        pass
+    return DEFAULT_PROMPT_TEMPLATE
+def _generate_answer(question: str, context_chunks: list[dict]) -> str:
+    """Generate a grounded response using Hugging Face Inference API."""
+    from huggingface_hub import InferenceClient
+    token = os.environ.get("HF_TOKEN")
+    client = InferenceClient(token=token, timeout=TIMEOUT_SEC)
+    prompt = _build_prompt(question, context_chunks)
+    output = client.text_generation(
+        prompt=prompt,
+        model=GEN_MODEL,
+        max_new_tokens=MAX_NEW_TOKENS,
+        temperature=TEMPERATURE,
+        do_sample=True,
+        return_full_text=False,
+    )
+    return (output or "").strip()
+def rag_answer(question: str, notebook_id: str) -> dict:
+    """Return a retrieval-only answer object: {"content": str, "citations": list}."""
+    q = (question or "").strip()
+    if not q:
+        return {"content": "Please enter a question.", "citations": []}
+    try:
+        query_vector = generate_query(q)
+        # Stage 1: retrieve candidate pool
+        matches = VectorStore().query(query_vector=query_vector, namespace=notebook_id, top_k=K_RETRIEVE)
+        candidates = [m for m in matches if m.get("text")]
+        if not candidates:
+            return {
+                "content": (
+                    "I couldn't find relevant information in your uploaded sources for that question. "
+                    "Try rephrasing the question or adding more sources."
+                ),
+                "citations": [],
+            }
+        # Stage 2: rerank and keep top K_FINAL
+        final_matches = _rerank_matches(q, candidates)
+        citations = _build_citations(final_matches)
+        retrieval_only = _build_content(final_matches)
+        try:
+            generated = _generate_answer(q, final_matches)
+            content = generated or retrieval_only
+        except Exception as e:
+            logger.warning("Generation failed, falling back to retrieval-only content: %s", e)
+            content = retrieval_only
+        return {
+            "content": content,
+            "citations": citations,
+        }
+    except Exception as e:
+        logger.error("RAG retrieval failed: %s", e)
+        return {
+            "content": f"I ran into an error while retrieving from sources: {e}",
+            "citations": [],
+        }