Spaces:

kerdosdotio
/

Custom-LLM-Chat

Running

Bhaskar Ram commited on 9 days ago

Commit

9edd318

1 Parent(s): 2623b17

fix: model singleton cache, dedup guard, Gradio type=messages

- embedder.py: introduce _get_model() lazy singleton — SentenceTransformer is
now loaded exactly once per process; subsequent uploads reuse it (saves 5-15s
per incremental index call). Also remove duplicate 'import numpy as np' that
was inside add_to_index() despite numpy already being imported at module level.
- app.py: add gr.State(set()) indexed_sources to track indexed filenames and
skip re-uploading the same document (prevents silent chunk doubling).
Reset clears the tracker as well.
- app.py: add type='messages' to gr.Chatbot to silence Gradio >=5 deprecation.

Files changed (2) hide show

app.py +44 -15
rag/embedder.py +20 -4

app.py CHANGED Viewed

@@ -29,16 +29,41 @@ def get_hf_token(user_token: str) -> str:
 # Gradio handlers
 # ─────────────────────────────────────────────
-def process_files(files, current_index, status_box):
-    """Parse uploaded files and build / extend the FAISS index."""
     if not files:
-        return current_index, "⚠️ No files uploaded."
     file_paths = [f.name for f in files] if hasattr(files[0], "name") else files
-    docs = load_documents(file_paths)
     if not docs:
-        return current_index, "❌ Could not extract text from the uploaded files. Please upload PDF, DOCX, or TXT files."
     try:
         if current_index is None:
@@ -46,15 +71,18 @@ def process_files(files, current_index, status_box):
         else:
             idx = add_to_index(current_index, docs)
     except Exception as e:
-        return current_index, f"❌ Failed to build index: {e}"
-    sources = list({d["source"] for d in docs})
     total_chunks = idx.index.ntotal
     msg = (
-        f"✅ Indexed {len(docs)} file(s): {', '.join(sources)}\n"
         f"📦 Total chunks in knowledge base: {total_chunks}"
     )
-    return idx, msg
 def chat(user_message, history, vector_index, hf_token_input, top_k):
@@ -96,8 +124,8 @@ def chat(user_message, history, vector_index, hf_token_input, top_k):
 def reset_all():
-    """Clear index and chat."""
-    return None, [], "🗑️ Knowledge base and chat cleared.", ""
 # ─────────────────────────────────────────────
@@ -223,6 +251,7 @@ with gr.Blocks(title="Kerdos AI — Custom LLM Chat | Document Q&A Demo") as dem
     # ── Shared state ─────────────────────────
     vector_index = gr.State(None)
     with gr.Row():
         # ── Left panel: Upload + config ──────
@@ -258,7 +287,7 @@ with gr.Blocks(title="Kerdos AI — Custom LLM Chat | Document Q&A Demo") as dem
         # ── Right panel: Chat ─────────────────
         with gr.Column(scale=2):
             gr.Markdown("### 💬 Ask Questions")
-            chatbot = gr.Chatbot(height=460, show_label=False)
             with gr.Row():
                 user_input = gr.Textbox(
                     placeholder="Ask a question about your documents...",
@@ -282,8 +311,8 @@ with gr.Blocks(title="Kerdos AI — Custom LLM Chat | Document Q&A Demo") as dem
     # ── Event wiring ──────────────────────────
     index_btn.click(
         fn=process_files,
-        inputs=[file_upload, vector_index, status_box],
-        outputs=[vector_index, status_box],
     )
     send_btn.click(
@@ -301,7 +330,7 @@ with gr.Blocks(title="Kerdos AI — Custom LLM Chat | Document Q&A Demo") as dem
     reset_btn.click(
         fn=reset_all,
         inputs=[],
-        outputs=[vector_index, chatbot, status_box, user_input],
     )
     # ── Kerdos Footer ─────────────────────────

 # Gradio handlers
 # ─────────────────────────────────────────────
+def process_files(files, current_index, indexed_sources):
+    """Parse uploaded files and build / extend the FAISS index.
+    Args:
+        files:           Uploaded file objects from gr.File.
+        current_index:   Existing VectorIndex state (None on first upload).
+        indexed_sources: Set of already-indexed filenames (duplicate guard).
+    """
     if not files:
+        return current_index, indexed_sources, "⚠️ No files uploaded."
     file_paths = [f.name for f in files] if hasattr(files[0], "name") else files
+    # ── Duplicate guard ────────────────────────────────────────────────────
+    # Filter out files whose name is already in the knowledge base so that
+    # re-uploading the same document doesn't silently double the chunk count.
+    new_paths, skipped = [], []
+    for p in file_paths:
+        from pathlib import Path
+        name = Path(p).name
+        if name in indexed_sources:
+            skipped.append(name)
+        else:
+            new_paths.append(p)
+    if skipped and not new_paths:
+        return current_index, indexed_sources, (
+            f"⚠️ Already indexed: {', '.join(skipped)}. No new documents added."
+        )
+    # ──────────────────────────────────────────────────────────────────────
+    docs = load_documents(new_paths)
     if not docs:
+        return current_index, indexed_sources, "❌ Could not extract text from the uploaded files. Please upload PDF, DOCX, or TXT files."
     try:
         if current_index is None:
         else:
             idx = add_to_index(current_index, docs)
     except Exception as e:
+        return current_index, indexed_sources, f"❌ Failed to build index: {e}"
+    new_sources = {d["source"] for d in docs}
+    updated_sources = indexed_sources | new_sources
     total_chunks = idx.index.ntotal
+    skip_note = f" (skipped duplicates: {', '.join(skipped)})" if skipped else ""
     msg = (
+        f"✅ Indexed {len(new_sources)} new file(s): {', '.join(new_sources)}{skip_note}\n"
         f"📦 Total chunks in knowledge base: {total_chunks}"
     )
+    return idx, updated_sources, msg
 def chat(user_message, history, vector_index, hf_token_input, top_k):
 def reset_all():
+    """Clear index, chat, and the indexed-sources tracker."""
+    return None, set(), [], "🗑️ Knowledge base and chat cleared.", ""
 # ─────────────────────────────────────────────
     # ── Shared state ─────────────────────────
     vector_index = gr.State(None)
+    indexed_sources = gr.State(set())   # tracks filenames already in the index
     with gr.Row():
         # ── Left panel: Upload + config ──────
         # ── Right panel: Chat ─────────────────
         with gr.Column(scale=2):
             gr.Markdown("### 💬 Ask Questions")
+            chatbot = gr.Chatbot(height=460, show_label=False, type="messages")
             with gr.Row():
                 user_input = gr.Textbox(
                     placeholder="Ask a question about your documents...",
     # ── Event wiring ──────────────────────────
     index_btn.click(
         fn=process_files,
+        inputs=[file_upload, vector_index, indexed_sources],
+        outputs=[vector_index, indexed_sources, status_box],
     )
     send_btn.click(
     reset_btn.click(
         fn=reset_all,
         inputs=[],
+        outputs=[vector_index, indexed_sources, chatbot, status_box, user_input],
     )
     # ── Kerdos Footer ─────────────────────────

rag/embedder.py CHANGED Viewed

@@ -4,8 +4,10 @@ Chunks raw text documents and builds an in-memory FAISS vector index.
 """
 from __future__ import annotations
 import numpy as np
 from dataclasses import dataclass, field
 CHUNK_SIZE = 512        # characters — max chars per chunk
 CHUNK_OVERLAP = 64      # characters — approx overlap between consecutive chunks
@@ -13,9 +15,23 @@ EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5"  # State-of-the-art small retrieval m
 # Regex: split on sentence-ending punctuation followed by whitespace + capital letter,
 # or on paragraph / line breaks.
-import re as _re
 _SENTENCE_SPLIT = _re.compile(r'(?<=[.!?])\s+(?=[A-Z])|(?<=\n)\s*\n+')
 @dataclass
 class VectorIndex:
@@ -90,7 +106,8 @@ def build_index(docs: list[dict]) -> VectorIndex:
     Returns a VectorIndex with embeddings stored in FAISS.
     """
     import faiss
-    from sentence_transformers import SentenceTransformer
     # Chunk all documents
     all_chunks = []
@@ -101,7 +118,6 @@ def build_index(docs: list[dict]) -> VectorIndex:
         raise ValueError("No text chunks could be extracted from the uploaded files.")
     print(f"[Embedder] Embedding {len(all_chunks)} chunks...")
-    model = SentenceTransformer(EMBEDDING_MODEL)
     texts = [c["text"] for c in all_chunks]
     embeddings = model.encode(texts, show_progress_bar=False, batch_size=32)
     embeddings = np.array(embeddings, dtype="float32")
@@ -119,7 +135,7 @@ def build_index(docs: list[dict]) -> VectorIndex:
 def add_to_index(vector_index: VectorIndex, docs: list[dict]) -> VectorIndex:
     """Incrementally add new docs to an existing index."""
     import faiss
-    import numpy as np
     new_chunks = []
     for doc in docs:

 """
 from __future__ import annotations
+import re as _re
 import numpy as np
 from dataclasses import dataclass, field
+from typing import Optional
 CHUNK_SIZE = 512        # characters — max chars per chunk
 CHUNK_OVERLAP = 64      # characters — approx overlap between consecutive chunks
 # Regex: split on sentence-ending punctuation followed by whitespace + capital letter,
 # or on paragraph / line breaks.
 _SENTENCE_SPLIT = _re.compile(r'(?<=[.!?])\s+(?=[A-Z])|(?<=\n)\s*\n+')
+# ── Model singleton ───────────────────────────────────────────────────────────
+# SentenceTransformer takes 5–15s to load from disk. We load it exactly once
+# per process and reuse across all build_index / add_to_index calls.
+_MODEL: Optional[object] = None
+def _get_model():
+    """Return the cached SentenceTransformer, loading it on first call only."""
+    global _MODEL
+    if _MODEL is None:
+        from sentence_transformers import SentenceTransformer
+        _MODEL = SentenceTransformer(EMBEDDING_MODEL)
+    return _MODEL
+# ─────────────────────────────────────────────────────────────────────────────
 @dataclass
 class VectorIndex:
     Returns a VectorIndex with embeddings stored in FAISS.
     """
     import faiss
+    model = _get_model()  # reuse cached singleton — no reload cost
     # Chunk all documents
     all_chunks = []
         raise ValueError("No text chunks could be extracted from the uploaded files.")
     print(f"[Embedder] Embedding {len(all_chunks)} chunks...")
     texts = [c["text"] for c in all_chunks]
     embeddings = model.encode(texts, show_progress_bar=False, batch_size=32)
     embeddings = np.array(embeddings, dtype="float32")
 def add_to_index(vector_index: VectorIndex, docs: list[dict]) -> VectorIndex:
     """Incrementally add new docs to an existing index."""
     import faiss
+    # numpy already imported at module level — no duplicate import needed
     new_chunks = []
     for doc in docs: