Spaces:

cmd0160
/

abalone_chat_application

Sleeping

App Files Files Community

cmd0160 commited on Dec 13, 2025

Commit

70de36c

1 Parent(s): 5b73300

Adding kg updates

Browse files

Files changed (16) hide show

app.py +46 -6
requirements.txt +1 -0
src/ingest.py +126 -68
src/utils/rag_runtime.py +60 -38
vectorstore/1f5695a5-7499-40c9-8804-b3c330d70966/data_level0.bin +0 -3
vectorstore/1f5695a5-7499-40c9-8804-b3c330d70966/header.bin +0 -3
vectorstore/1f5695a5-7499-40c9-8804-b3c330d70966/index_metadata.pickle +0 -3
vectorstore/1f5695a5-7499-40c9-8804-b3c330d70966/length.bin +0 -3
vectorstore/1f5695a5-7499-40c9-8804-b3c330d70966/link_lists.bin +0 -3
vectorstore/chroma-embeddings.parquet +0 -3
vectorstore/chroma.sqlite3 +0 -3
vectorstore/chunks_index.json +0 -0
vectorstore/index/id_to_uuid_6855eddb-ade5-445b-9e7f-a8293769c768.pkl +0 -3
vectorstore/index/index_6855eddb-ade5-445b-9e7f-a8293769c768.bin +0 -3
vectorstore/index/index_metadata_6855eddb-ade5-445b-9e7f-a8293769c768.pkl +0 -3
vectorstore/index/uuid_to_id_6855eddb-ade5-445b-9e7f-a8293769c768.pkl +0 -3

app.py CHANGED Viewed

@@ -1,13 +1,14 @@
 import os
 from typing import List, Dict, Tuple, Optional, Any
 # Disable telemetry for LangChain and Chroma by default
 os.environ.setdefault("LANGCHAIN_TELEMETRY_ENABLED", "false")
 os.environ.setdefault("LANGCHAIN_DISABLE_TELEMETRY", "true")
 os.environ.setdefault("CHROMA_TELEMETRY_ENABLED", "false")
-import streamlit as st
 from src.utils.rag_runtime import (
     run_ingest_cli,
     build_or_load_retriever_cached,
@@ -229,11 +230,46 @@ class AbaloneRAGApp:
             unsafe_allow_html=True,
         )
         if confirm:
             with st.spinner("Rebuilding vectorstore..."):
-                run_ingest_cli(data_dir=self.data_dir, persist_dir=self.persist_dir)
-                build_or_load_retriever_cached.clear()
-                get_chain_cached.clear()
                 self.chain = get_chain_cached(
                     model_name=self.model_name,
@@ -444,10 +480,14 @@ def main() -> None:
     """Main entry point for running the Abalone RAG Chatbot app."""
     app = AbaloneRAGApp()
     if not ensure_openai_key():
         st.stop()
-    app.handle_rebuild()
     app.ensure_chain_ready()
     app.render_chat_history()
     app.handle_user_input()

 import os
 from typing import List, Dict, Tuple, Optional, Any
+import streamlit as st
+import logging
+from datetime import datetime
 # Disable telemetry for LangChain and Chroma by default
 os.environ.setdefault("LANGCHAIN_TELEMETRY_ENABLED", "false")
 os.environ.setdefault("LANGCHAIN_DISABLE_TELEMETRY", "true")
 os.environ.setdefault("CHROMA_TELEMETRY_ENABLED", "false")
 from src.utils.rag_runtime import (
     run_ingest_cli,
     build_or_load_retriever_cached,
             unsafe_allow_html=True,
         )
+        # add a small UI log for rebuild actions
+        def _ui_log(msg: str):
+            try:
+                os.makedirs(self.persist_dir, exist_ok=True)
+                with open(os.path.join(self.persist_dir, "ui_rebuild.log"), "a", encoding="utf-8") as fh:
+                    fh.write(f"{msg}\n")
+            except Exception:
+                pass
         if confirm:
+            _ui_log(f"{datetime.utcnow().isoformat()} - Confirm rebuild clicked by user")
             with st.spinner("Rebuilding vectorstore..."):
+                try:
+                    out = run_ingest_cli(data_dir=self.data_dir, persist_dir=self.persist_dir)
+                    _ui_log(f"{datetime.utcnow().isoformat()} - Rebuild succeeded")
+                except Exception as e:
+                    import subprocess as _sp
+                    _ui_log(f"{datetime.utcnow().isoformat()} - Rebuild failed: {e}")
+                    if isinstance(e, _sp.CalledProcessError):
+                        stderr = getattr(e, 'stderr', None)
+                        stdout = getattr(e, 'output', None) or getattr(e, 'stdout', None)
+                        st.error("Rebuild failed. See logs below.")
+                        if stdout:
+                            st.markdown("**ingest stdout:**")
+                            st.code(stdout)
+                        if stderr:
+                            st.markdown("**ingest stderr:**")
+                            st.code(stderr)
+                    else:
+                        st.error(f"Rebuild failed: {e}")
+                    st.session_state["rebuild_pending"] = False
+                    return
+                # On success, clear cached retriever/chain and reload
+                try:
+                    build_or_load_retriever_cached.clear()
+                    get_chain_cached.clear()
+                except Exception:
+                    # if clearing cache fails, just log it in UI log
+                    _ui_log(f"{datetime.utcnow().isoformat()} - Warning: failed to clear cached functions")
                 self.chain = get_chain_cached(
                     model_name=self.model_name,
     """Main entry point for running the Abalone RAG Chatbot app."""
     app = AbaloneRAGApp()
+    # Allow rebuild actions before enforcing OPENAI key so users can inspect logs
+    # and trigger rebuild operations even when the key isn't set. Chain init
+    # requires the key, so enforce it after handling rebuild requests.
+    app.handle_rebuild()
     if not ensure_openai_key():
         st.stop()
     app.ensure_chain_ready()
     app.render_chat_history()
     app.handle_user_input()

requirements.txt CHANGED Viewed

@@ -9,3 +9,4 @@ numpy==1.24.4
 streamlit>=1.25.0
 python-dotenv==1.0.0
 pytest==7.2.0

 streamlit>=1.25.0
 python-dotenv==1.0.0
 pytest==7.2.0
+rdflib

src/ingest.py CHANGED Viewed

@@ -1,26 +1,25 @@
 import argparse
 import os
-from langchain_community.document_loaders import DirectoryLoader, TextLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import Chroma
-from langchain_community.embeddings import OpenAIEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
 from langchain_community.embeddings import OpenAIEmbeddings
-# New: KG integration imports
 import uuid
 import json
-try:
-    from src.kg.extract import extract_triples_with_llm
-    from src.kg.store import KGStore
-    from src.kg.retriever import KGRetriever
-    _HAS_KG = True
-except Exception:
-    _HAS_KG = False
 def load_documents(data_dir: str):
     from pathlib import Path
@@ -40,22 +39,22 @@ def load_documents(data_dir: str):
         loaded = loader.load()
         docs.extend(loaded)
-    print(f"Loaded {len(docs)} documents from {data_dir}")
-    print("Documents ingested:")
-    for d in docs:
-        meta = d.metadata or {}
-        path = meta.get("source") or meta.get("file_path") or meta.get("path")
-        print(f" - {os.path.abspath(path) if path else 'Unknown file'}")
     return docs
-def ingest(data_dir: str, persist_dir: str, chunk_size: int, chunk_overlap: int):
     if not os.path.exists(data_dir):
         raise ValueError(f"Data directory does not exist: {data_dir}")
     docs = load_documents(data_dir)
     if not docs:
         raise ValueError(f"No .txt documents found in {data_dir}")
     splitter = RecursiveCharacterTextSplitter(
@@ -63,25 +62,53 @@ def ingest(data_dir: str, persist_dir: str, chunk_size: int, chunk_overlap: int)
         chunk_overlap=chunk_overlap,
     )
     split_docs = splitter.split_documents(docs)
-    print(f"Split into {len(split_docs)} chunks")
-    embeddings = OpenAIEmbeddings()
     os.makedirs(persist_dir, exist_ok=True)
     # Prepare KG store and local chunk index
     chunks_index = {}
     kg_path = os.path.join(persist_dir, "kg_store.ttl")
-    if _HAS_KG:
-        try:
-            kg = KGStore(path=kg_path)
-        except Exception:
-            kg = None
-    else:
-        kg = None
     # Annotate chunks with stable chunk_id and optionally extract/link KG triples
-    for d in split_docs:
         meta = d.metadata or {}
         chunk_id = meta.get("chunk_id") or str(uuid.uuid4())
         if not meta:
@@ -94,54 +121,83 @@ def ingest(data_dir: str, persist_dir: str, chunk_size: int, chunk_overlap: int)
             "metadata": d.metadata,
         }
-        # If KG is available, attempt to extract triples and link the chunk
-        if kg is not None:
             try:
-                triples = extract_triples_with_llm(chunks_index[chunk_id]["text"], max_triples=4)
-                for t in triples:
-                    try:
-                        kg.add_triple(
-                            t.get("subject"),
-                            t.get("predicate"),
-                            t.get("object"),
-                            provenance={"sentence": t.get("sentence"), "confidence": t.get("confidence")},
-                        )
-                        kg.link_chunk_to_entity(
-                            chunk_id,
-                            t.get("subject"),
-                            sentence=t.get("sentence"),
-                            confidence=t.get("confidence"),
-                        )
-                    except Exception:
-                        # non-fatal: continue
-                        continue
             except Exception:
-                # LLM extraction failed or not configured; skip KG extraction
                 pass
-    # Persist Chroma vectorstore
-    Chroma.from_documents(
-        split_docs,
-        embedding=embeddings,
-        persist_directory=persist_dir,
-    )
-    print(f"Vectorstore built and persisted to {persist_dir}")
     # Persist chunks index for runtime (simple json mapping)
     try:
         idx_path = os.path.join(persist_dir, "chunks_index.json")
         with open(idx_path, "w", encoding="utf-8") as fh:
             json.dump(chunks_index, fh)
     except Exception:
-        pass
-    # Persist KG if available
-    if kg is not None:
-        try:
-            kg.save()
-            print(f"KG persisted to {kg_path}")
-        except Exception:
-            pass
 def main():
@@ -150,6 +206,7 @@ def main():
     parser.add_argument("--persist-dir", type=str, default="./vectorstore")
     parser.add_argument("--chunk-size", type=int, default=200)
     parser.add_argument("--chunk-overlap", type=int, default=50)
     args = parser.parse_args()
     ingest(
@@ -157,6 +214,7 @@ def main():
         persist_dir=args.persist_dir,
         chunk_size=args.chunk_size,
         chunk_overlap=args.chunk_overlap,
     )

 import argparse
 import os
+import logging
+from datetime import datetime, timezone
+# Disable Chroma telemetry to avoid opentelemetry compatibility errors during ingestion
+os.environ.setdefault("CHROMA_TELEMETRY_ENABLED", "false")
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
 from langchain_community.embeddings import OpenAIEmbeddings
+# KG integration: import unconditionally so errors propagate if dependencies missing
+from src.kg.extract import extract_triples_with_llm
+from src.kg.store import KGStore
 import uuid
 import json
+# Module logger
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 def load_documents(data_dir: str):
     from pathlib import Path
         loaded = loader.load()
         docs.extend(loaded)
+    logger.info(f"Loaded {len(docs)} documents from {data_dir}")
+    logger.debug("Documents ingested: %s", [ (d.metadata or {}).get('source') for d in docs ])
     return docs
+def ingest(data_dir: str, persist_dir: str, chunk_size: int, chunk_overlap: int, openai_api_key: str = None):
+    logger.info("Starting ingest: data_dir=%s persist_dir=%s chunk_size=%s chunk_overlap=%s", data_dir, persist_dir, chunk_size, chunk_overlap)
     if not os.path.exists(data_dir):
+        logger.error("Data directory does not exist: %s", data_dir)
         raise ValueError(f"Data directory does not exist: {data_dir}")
     docs = load_documents(data_dir)
     if not docs:
+        logger.error("No documents found in %s", data_dir)
         raise ValueError(f"No .txt documents found in {data_dir}")
     splitter = RecursiveCharacterTextSplitter(
         chunk_overlap=chunk_overlap,
     )
     split_docs = splitter.split_documents(docs)
+    logger.info("Split into %d chunks", len(split_docs))
+    # Ensure persist dir exists and add file handler to logger
     os.makedirs(persist_dir, exist_ok=True)
+    # Add file handler for detailed logs in persist_dir/ingest.log
+    try:
+        fh = logging.FileHandler(os.path.join(persist_dir, 'ingest.log'))
+        fh.setLevel(logging.DEBUG)
+        fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
+        # Avoid adding multiple file handlers on repeated calls
+        if not any(isinstance(h, logging.FileHandler) and getattr(h, 'baseFilename', None) == fh.baseFilename for h in logger.handlers):
+            logger.addHandler(fh)
+    except Exception:
+        logger.exception('Failed to add file handler for ingest log')
     # Prepare KG store and local chunk index
     chunks_index = {}
     kg_path = os.path.join(persist_dir, "kg_store.ttl")
+    # Initialize embeddings; provide a clear error if OpenAI API key is missing
+    try:
+        logger.info('Initializing embeddings')
+        # If an API key was provided on the CLI, inject it into the environment
+        if openai_api_key:
+            os.environ['OPENAI_API_KEY'] = openai_api_key
+            logger.debug('Set OPENAI_API_KEY from CLI flag')
+        embeddings = OpenAIEmbeddings()
+        logger.info('Embeddings initialized')
+    except Exception as e:
+        logger.exception("Failed to initialize OpenAI embeddings. Ensure OPENAI_API_KEY is set in the environment or pass --openai-api-key.")
+        raise
+    # Initialize KG store unconditionally so errors are visible
+    try:
+        logger.info('Initializing KG store at %s', kg_path)
+        kg = KGStore(path=kg_path)
+        logger.info('KG store initialized')
+    except Exception:
+        logger.exception('Failed to initialize KGStore')
+        # re-raise so caller sees the failure
+        raise
     # Annotate chunks with stable chunk_id and optionally extract/link KG triples
+    start_time = datetime.now(timezone.utc)
+    logger.info('Beginning per-chunk processing at %s UTC', start_time.isoformat())
+    for i, d in enumerate(split_docs, start=1):
+        print(i, d)
         meta = d.metadata or {}
         chunk_id = meta.get("chunk_id") or str(uuid.uuid4())
         if not meta:
             "metadata": d.metadata,
         }
+        # Log progress at intervals
+        if i % 50 == 0 or i <= 5:
+            logger.debug('Processing chunk %d/%d (id=%s)', i, len(split_docs), chunk_id)
+        # Attempt to extract triples and link the chunk (errors during extraction are non-fatal)
+        try:
+            triples = extract_triples_with_llm(chunks_index[chunk_id]["text"], max_triples=4)
+            if triples:
+                logger.debug('Extracted %d triples for chunk %s', len(triples), chunk_id)
+            for t in triples:
+                try:
+                    kg.add_triple(
+                        t.get("subject"),
+                        t.get("predicate"),
+                        t.get("object"),
+                        provenance={"sentence": t.get("sentence"), "confidence": t.get("confidence")},
+                    )
+                    kg.link_chunk_to_entity(
+                        chunk_id,
+                        t.get("subject"),
+                        sentence=t.get("sentence"),
+                        confidence=t.get("confidence"),
+                    )
+                except Exception:
+                    logger.exception('Non-fatal error while adding triple or linking chunk %s', chunk_id)
+                    continue
+        except Exception:
+            # LLM extraction failed or not configured; skip KG extraction for this chunk
+            logger.exception('KG extraction failed for chunk %s (continuing)', chunk_id)
+            pass
+    end_time = datetime.now(timezone.utc)
+    logger.info('Finished per-chunk processing at %s UTC (duration %s)', end_time.isoformat(), end_time - start_time)
+    # Persist Chroma vectorstore
+    try:
+        logger.info('Persisting Chroma vectorstore to %s', persist_dir)
+        Chroma.from_documents(
+            split_docs,
+            embedding=embeddings,
+            persist_directory=persist_dir,
+        )
+        logger.info('Vectorstore built and persisted to %s', persist_dir)
+    except Exception as e:
+        import traceback, sys
+        logger.exception('Chroma.from_documents failed to write the vectorstore:')
+        # ensure the log is flushed to file
+        for h in logger.handlers:
             try:
+                h.flush()
             except Exception:
                 pass
+        sys.exit(1)
     # Persist chunks index for runtime (simple json mapping)
     try:
         idx_path = os.path.join(persist_dir, "chunks_index.json")
         with open(idx_path, "w", encoding="utf-8") as fh:
             json.dump(chunks_index, fh)
+        logger.info('Wrote chunks_index.json (%d entries)', len(chunks_index))
     except Exception:
+        logger.exception('Failed to write chunks_index.json')
+    # Persist KG
+    try:
+        kg.save()
+        logger.info('KG persisted to %s', kg_path)
+    except Exception:
+        import traceback, sys
+        logger.exception('Failed to persist KG to disk:')
+        # ensure the log is flushed to file
+        for h in logger.handlers:
+            try:
+                h.flush()
+            except Exception:
+                pass
+        sys.exit(1)
 def main():
     parser.add_argument("--persist-dir", type=str, default="./vectorstore")
     parser.add_argument("--chunk-size", type=int, default=200)
     parser.add_argument("--chunk-overlap", type=int, default=50)
+    parser.add_argument("--openai-api-key", type=str, default=None, help="Optional OpenAI API key to use for embeddings (overrides env var)")
     args = parser.parse_args()
     ingest(
         persist_dir=args.persist_dir,
         chunk_size=args.chunk_size,
         chunk_overlap=args.chunk_overlap,
+        openai_api_key=args.openai_api_key,
     )

src/utils/rag_runtime.py CHANGED Viewed

@@ -8,27 +8,20 @@ from src.vectorstore import get_retriever
 from src.qa_chain import make_conversational_chain
 import os
 import json
-from typing import Dict, List, Tuple
-try:
-    from src.kg.store import KGStore
-    from src.kg.retriever import KGRetriever
-    _HAS_KG = True
-except Exception:
-    _HAS_KG = False
-def run_ingest_cli(data_dir: str, persist_dir: str) -> None:
     """Run the ingestion module to rebuild the vectorstore.
-    Args:
-        data_dir: Directory containing the raw text files.
-        persist_dir: Directory where embeddings and Chroma DB should be stored.
-    Raises:
-        CalledProcessError: If the underlying subprocess fails.
     """
     cmd = [
         sys.executable,
         "-m",
@@ -38,7 +31,29 @@ def run_ingest_cli(data_dir: str, persist_dir: str) -> None:
         "--persist-dir",
         persist_dir,
     ]
-    subprocess.run(cmd, check=True)
 def _load_chunks_index(persist_dir: str) -> Dict[str, Dict]:
     idx_path = os.path.join(persist_dir, "chunks_index.json")
@@ -69,25 +84,25 @@ def answer_with_kg(
     # Load chunks index mapping
     chunks_index = _load_chunks_index(persist_dir)
-    if _HAS_KG:
-        kg_path = os.path.join(persist_dir, "kg_store.ttl")
-        try:
-            kg = KGStore(path=kg_path)
-            retr = KGRetriever(kg)
-            chunk_ids, summaries = retr.get_context_for_question(question, hops=kg_hops)
-            if summaries:
-                kg_text_parts.append("KG entities: " + ", ".join(summaries))
-            # add chunk snippets
-            for cid in chunk_ids:
-                info = chunks_index.get(cid)
-                if info:
-                    txt = info.get("text", "")
-                    if txt:
-                        snippet = txt.strip().replace("\n", " ")[:min(len(txt), kg_context_max_chars)]
-                        kg_text_parts.append(f"[KG chunk {cid}]: {snippet}")
-        except Exception:
-            # If KG load fails, skip KG augmentation
-            kg_text_parts = []
     kg_context = "\n\n".join(kg_text_parts) if kg_text_parts else ""
     if kg_context:
@@ -113,24 +128,31 @@ def build_or_load_retriever_cached(
     Args:
         data_dir: Directory containing input documents.
         persist_dir: Directory where the Chroma vectorstore is stored.
-        top_k: Number of chunks to retrieve for queries.
         retrieval_mode: Retrieval strategy (mmr, similarity, hybrid).
     Returns:
         An initialized retriever instance.
     """
     try:
         return get_retriever(
             persist_dir=persist_dir,
             top_k=top_k,
-            retrieval_mode=retrieval_mode,
         )
     except Exception:
         run_ingest_cli(data_dir=data_dir, persist_dir=persist_dir)
         return get_retriever(
             persist_dir=persist_dir,
             top_k=top_k,
-            retrieval_mode=retrieval_mode,
         )

 from src.qa_chain import make_conversational_chain
 import os
 import json
+from typing import Dict, List, Tuple, cast
+# Unconditionally import KG modules; let import errors propagate so failures are visible
+from src.kg.store import KGStore
+from src.kg.retriever import KGRetriever
+def run_ingest_cli(data_dir: str, persist_dir: str) -> str:
     """Run the ingestion module to rebuild the vectorstore.
+    Runs the ingest CLI as a subprocess and returns stdout on success.
+    On failure raises subprocess.CalledProcessError with captured stdout/stderr so callers
+    (for example the Streamlit UI) can display a helpful error message.
     """
     cmd = [
         sys.executable,
         "-m",
         "--persist-dir",
         persist_dir,
     ]
+    try:
+        # Add a timeout to avoid indefinite hanging; 600s (10 minutes) is generous for large ingests
+        completed = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
+    except subprocess.TimeoutExpired as te:
+        # Provide helpful error including partial output
+        raise subprocess.CalledProcessError(
+            returncode=124,
+            cmd=cmd,
+            output=getattr(te, 'output', '') or '',
+            stderr=f"Ingest process timed out after {te.timeout} seconds",
+        )
+    # Check return code and raise with captured output on failure
+    if completed.returncode != 0:
+        # Raise with captured output to make it easy to present to the user
+        raise subprocess.CalledProcessError(
+            returncode=completed.returncode,
+            cmd=cmd,
+            output=completed.stdout,
+            stderr=completed.stderr,
+        )
+    return completed.stdout
 def _load_chunks_index(persist_dir: str) -> Dict[str, Dict]:
     idx_path = os.path.join(persist_dir, "chunks_index.json")
     # Load chunks index mapping
     chunks_index = _load_chunks_index(persist_dir)
+    # Load KG unconditionally; let import or parse errors raise so callers can see them.
+    kg_path = os.path.join(persist_dir, "kg_store.ttl")
+    try:
+        kg = KGStore(path=kg_path)
+        retr = KGRetriever(kg)
+        chunk_ids, summaries = retr.get_context_for_question(question, hops=kg_hops)
+        if summaries:
+            kg_text_parts.append("KG entities: " + ", ".join(summaries))
+        # add chunk snippets
+        for cid in chunk_ids:
+            info = chunks_index.get(cid)
+            if info:
+                txt = info.get("text", "")
+                if txt:
+                    snippet = txt.strip().replace("\n", " ")[:min(len(txt), kg_context_max_chars)]
+                    kg_text_parts.append(f"[KG chunk {cid}]: {snippet}")
+    except Exception:
+        # If KG load or query fails, skip KG augmentation (allow the exception to surface in logs)
+        kg_text_parts = []
     kg_context = "\n\n".join(kg_text_parts) if kg_text_parts else ""
     if kg_context:
     Args:
         data_dir: Directory containing input documents.
         persist_dir: Directory where the Chroma vectorstore is stored.
+        top_k: Number of chunks to retrieve.
         retrieval_mode: Retrieval strategy (mmr, similarity, hybrid).
     Returns:
         An initialized retriever instance.
     """
     try:
+        # Cast retrieval_mode to the expected literal type to satisfy type checkers
+        from typing import Literal
+        RetrievalMode = Literal["mmr", "similarity", "hybrid"]
+        mode = cast(RetrievalMode, retrieval_mode)
         return get_retriever(
             persist_dir=persist_dir,
             top_k=top_k,
+            retrieval_mode=mode,
         )
     except Exception:
         run_ingest_cli(data_dir=data_dir, persist_dir=persist_dir)
+        from typing import Literal
+        RetrievalMode = Literal["mmr", "similarity", "hybrid"]
+        mode = cast(RetrievalMode, retrieval_mode)
         return get_retriever(
             persist_dir=persist_dir,
             top_k=top_k,
+            retrieval_mode=mode,
         )

vectorstore/1f5695a5-7499-40c9-8804-b3c330d70966/data_level0.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f4f2354cfac4766af62b3206edeadc037befeda51170642759cfde636719b6d0
-size 6284000

vectorstore/1f5695a5-7499-40c9-8804-b3c330d70966/header.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d683b2d00a21eda899c7d7b067fa8afd0eadec8a6a2f4bbcf835c3a028bf30ed
-size 100

vectorstore/1f5695a5-7499-40c9-8804-b3c330d70966/index_metadata.pickle DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e246e950cff89753a1014bf89d814426f6d22f1c0f0fa673dc8d3b11986afb4d
-size 55974

vectorstore/1f5695a5-7499-40c9-8804-b3c330d70966/length.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c10f6f8590ed8a62b4b5da7c3c431202130fc835d35f8318945b0c1fcfd1bb56
-size 4000

vectorstore/1f5695a5-7499-40c9-8804-b3c330d70966/link_lists.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5f4382f14c7a7b600756f34e3493f67f810192c9a1d4afef2cd3d3d335fb092c
-size 8148

vectorstore/chroma-embeddings.parquet DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1e572394a2cfa7976f286fa157be4e5eaf287d0721581969eed4c4df7874f04a
-size 3380376

vectorstore/chroma.sqlite3 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6f9fe3c3f3cddec7b92e068aa41dab33d8c0c831fab65768f7567a457be81fad
-size 12951552

vectorstore/chunks_index.json DELETED Viewed

The diff for this file is too large to render. See raw diff

vectorstore/index/id_to_uuid_6855eddb-ade5-445b-9e7f-a8293769c768.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5fee2dec9920bfc53613b6de9edf335643f12002c9aa363756e403115419c097
-size 8714

vectorstore/index/index_6855eddb-ade5-445b-9e7f-a8293769c768.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ba0cc06f9ba2330e6768388c4467d1b6a5cb39e7421ef240df400fb89baf2b92
-size 1730044

vectorstore/index/index_metadata_6855eddb-ade5-445b-9e7f-a8293769c768.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c0bc6676e35c07eb122909628dcc80dd749794bdfb45c099a06ef1bcb59be763
-size 105

vectorstore/index/uuid_to_id_6855eddb-ade5-445b-9e7f-a8293769c768.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7c41d12045aa07c6d3cfa93a06b0b0e9414c7f9adab57e9f22f1bb3b7328dee1
-size 10211