Spaces:

Navya-Sree
/

multi-agent-code-assistant

Build error

App Files Files Community

Navya-Sree commited on Dec 22, 2025

Commit

ed60c1b

verified ·

1 Parent(s): 166441f

Create utils/rag_system.py

Browse files

Files changed (1) hide show

utils/rag_system.py +124 -0

utils/rag_system.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import os
+from typing import List, Dict
+import chromadb
+from chromadb.utils import embedding_functions
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import TextLoader
+class RAGSystem:
+    """
+    Retrieval-Augmented Generation system for providing documentation context.
+    """
+    def __init__(self, collection_name="python_docs"):
+        self.client = chromadb.PersistentClient(path="./chroma_db")
+        # Use sentence transformers for embeddings
+        self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
+            model_name="all-MiniLM-L6-v2"
+        )
+        # Get or create collection
+        self.collection = self.client.get_or_create_collection(
+            name=collection_name,
+            embedding_function=self.embedding_function
+        )
+        # Load default documents if collection is empty
+        if self.collection.count() == 0:
+            self._load_default_documents()
+    def _load_default_documents(self):
+        """Load default Python documentation."""
+        default_docs = [
+            {
+                "id": "1",
+                "text": "Python functions are defined using the def keyword. Example: def hello(): return 'Hello'",
+                "metadata": {"source": "python_basics"}
+            },
+            {
+                "id": "2",
+                "text": "Use type hints for better code documentation. Example: def add(a: int, b: int) -> int:",
+                "metadata": {"source": "best_practices"}
+            },
+            {
+                "id": "3",
+                "text": "Always handle exceptions with try-except blocks to prevent crashes.",
+                "metadata": {"source": "error_handling"}
+            },
+            {
+                "id": "4",
+                "text": "Use list comprehensions for concise list creation: [x*2 for x in range(10)]",
+                "metadata": {"source": "python_tips"}
+            },
+            {
+                "id": "5",
+                "text": "Document your code with docstrings. Use triple quotes for multi-line documentation.",
+                "metadata": {"source": "documentation"}
+            }
+        ]
+        # Add documents to collection
+        self.collection.add(
+            documents=[doc["text"] for doc in default_docs],
+            metadatas=[doc["metadata"] for doc in default_docs],
+            ids=[doc["id"] for doc in default_docs]
+        )
+    def add_document(self, text: str, source: str = "user"):
+        """Add a new document to the knowledge base."""
+        doc_id = f"doc_{self.collection.count() + 1}"
+        self.collection.add(
+            documents=[text],
+            metadatas=[{"source": source}],
+            ids=[doc_id]
+        )
+    def search(self, query: str, n_results: int = 3) -> List[Dict]:
+        """
+        Search for relevant documents.
+        Args:
+            query: Search query
+            n_results: Number of results to return
+        Returns:
+            List of relevant documents
+        """
+        results = self.collection.query(
+            query_texts=[query],
+            n_results=n_results
+        )
+        documents = []
+        if results['documents']:
+            for i, doc in enumerate(results['documents'][0]):
+                documents.append({
+                    "text": doc,
+                    "metadata": results['metadatas'][0][i],
+                    "distance": results['distances'][0][i]
+                })
+        return documents
+    def get_context(self, query: str) -> str:
+        """
+        Get relevant context for a coding query.
+        Args:
+            query: Coding task or question
+        Returns:
+            Context string from relevant documents
+        """
+        relevant_docs = self.search(query)
+        if not relevant_docs:
+            return ""
+        # Combine top documents into context
+        context_parts = ["Relevant documentation:"]
+        for i, doc in enumerate(relevant_docs[:2]):  # Use top 2 documents
+            context_parts.append(f"{i+1}. {doc['text']}")
+        return "\n".join(context_parts)