Spaces:

oluinioluwa814
/

RAG

Sleeping

App Files Files Community

oluinioluwa814 commited on Dec 11, 2025

Commit

c697c36

verified ·

1 Parent(s): b434dd2

Update rag_pipeline.py

Browse files

Files changed (1) hide show

rag_pipeline.py +31 -32

rag_pipeline.py CHANGED Viewed

@@ -1,57 +1,56 @@
 from sentence_transformers import SentenceTransformer
-import chromadb
-from chromadb.config import Settings
 import uuid
 class RAGPipeline:
     """
-    Retrieval-Augmented Generation (RAG) pipeline:
-    - Stores documents in a vector database (ChromaDB)
-    - Generates embeddings using SentenceTransformer
-    - Retrieves relevant documents for queries
     """
     def __init__(self, db_dir: str = "./chroma_store"):
-        # Initialize the embedding model
         self.embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-        # Initialize ChromaDB client
-        self.client = chromadb.Client(
-            Settings(chroma_db_impl="duckdb+parquet", persist_directory=db_dir)
-        )
-        # Create or get a collection for storing documents
-        self.collection = self.client.get_or_create_collection(name="rag_collection")
     def add_document(self, text: str, doc_id: str = None):
-        """
-        Adds a single document to the vector database.
-        """
         if not doc_id:
             doc_id = str(uuid.uuid4())
-        embedding = self.embedder.encode(text).tolist()
-        self.collection.add(documents=[text], ids=[doc_id], embeddings=[embedding])
     def retrieve(self, query: str, top_k: int = 3):
-        """
-        Retrieves the top_k most relevant documents for a query.
-        """
-        q_embedding = self.embedder.encode(query).tolist()
         results = self.collection.query(
-            query_embeddings=[q_embedding],
             n_results=top_k
         )
-        # Return list of documents
         return results.get("documents", [[]])[0]
     def reset_vector_store(self):
-        """
-        Clears all documents from the collection.
-        """
         try:
-            self.client.delete_collection("rag_collection")
         except Exception:
             pass
-        self.collection = self.client.get_or_create_collection(name="rag_collection")

 from sentence_transformers import SentenceTransformer
+from chromadb import Client
+from chromadb.utils import embedding_functions
 import uuid
 class RAGPipeline:
     """
+    RAG pipeline using ChromaDB new client API.
     """
     def __init__(self, db_dir: str = "./chroma_store"):
+        # Embedding model
         self.embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+        # ChromaDB new client
+        self.client = Client()  # No Settings() needed in v0.5+
+        self.collection_name = "rag_collection"
+        # Check if collection exists; create if not
+        if self.collection_name in [c.name for c in self.client.list_collections()]:
+            self.collection = self.client.get_collection(self.collection_name)
+        else:
+            self.collection = self.client.create_collection(
+                name=self.collection_name,
+                embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction(
+                    model_name="sentence-transformers/all-MiniLM-L6-v2"
+                )
+            )
     def add_document(self, text: str, doc_id: str = None):
         if not doc_id:
             doc_id = str(uuid.uuid4())
+        self.collection.add(
+            documents=[text],
+            ids=[doc_id],
+        )
     def retrieve(self, query: str, top_k: int = 3):
         results = self.collection.query(
+            query_texts=[query],
             n_results=top_k
         )
         return results.get("documents", [[]])[0]
     def reset_vector_store(self):
+        # Delete and recreate the collection
         try:
+            self.client.delete_collection(self.collection_name)
         except Exception:
             pass
+        self.collection = self.client.create_collection(
+            name=self.collection_name,
+            embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction(
+                model_name="sentence-transformers/all-MiniLM-L6-v2"
+            )
+        )