Spaces:

Ryanfafa
/

docmind-ai

Running

App Files Files Community

Ryanfafa commited on Feb 17

Commit

36db703

verified ·

1 Parent(s): 778eb7b

Upload rag_engine.py

Browse files

Files changed (1) hide show

rag_engine.py +186 -0

rag_engine.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""
+RAG Engine
+──────────
+- Embeddings : sentence-transformers/all-MiniLM-L6-v2  (HuggingFace, free)
+- Vector DB  : ChromaDB  (local, in-memory / persistent)
+- LLM        : HuggingFace Router API  (Mistral-7B-Instruct-v0.3, free tier)
+- Chunking   : Recursive character splitter with overlap
+"""
+import os
+import re
+import requests
+import tempfile
+from typing import Tuple, List
+import chromadb
+from chromadb.config import Settings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import Chroma
+from langchain_community.document_loaders import PyPDFLoader, TextLoader
+from langchain.schema import Document
+# ─── Configuration ─────────────────────────────────────────────────────────────
+EMBED_MODEL     = "sentence-transformers/all-MiniLM-L6-v2"
+HF_MODEL_ID     = "mistralai/Mistral-7B-Instruct-v0.3"
+HF_API_URL      = f"https://router.huggingface.co/hf-inference/models/{HF_MODEL_ID}/v1/chat/completions"
+CHUNK_SIZE      = 800
+CHUNK_OVERLAP   = 150
+TOP_K           = 4
+COLLECTION_NAME = "docmind_collection"
+CHROMA_DIR      = "./chroma_db"
+class RAGEngine:
+    """Full RAG pipeline: ingest → embed → store → retrieve → generate."""
+    def __init__(self):
+        self._embeddings  = None
+        self._vectorstore = None
+        self._splitter    = RecursiveCharacterTextSplitter(
+            chunk_size=CHUNK_SIZE,
+            chunk_overlap=CHUNK_OVERLAP,
+            separators=["\n\n", "\n", ". ", " ", ""],
+        )
+    # ── Lazy-load embeddings ───────────────────────────────────────────────────
+    @property
+    def embeddings(self):
+        if self._embeddings is None:
+            self._embeddings = HuggingFaceEmbeddings(
+                model_name=EMBED_MODEL,
+                model_kwargs={"device": "cpu"},
+                encode_kwargs={"normalize_embeddings": True},
+            )
+        return self._embeddings
+    # ── Ingest an uploaded Streamlit file object ───────────────────────────────
+    def ingest_file(self, uploaded_file) -> int:
+        suffix = Path_suffix(uploaded_file.name)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+            tmp.write(uploaded_file.read())
+            tmp_path = tmp.name
+        return self.ingest_path(tmp_path, uploaded_file.name)
+    # ── Ingest from a file path ────────────────────────────────────────────────
+    def ingest_path(self, path: str, name: str = "") -> int:
+        suffix = Path_suffix(name or path)
+        if suffix == ".pdf":
+            loader = PyPDFLoader(path)
+        else:
+            loader = TextLoader(path, encoding="utf-8")
+        raw_docs = loader.load()
+        # Add source metadata
+        for doc in raw_docs:
+            doc.metadata["source"] = name or os.path.basename(path)
+        chunks = self._splitter.split_documents(raw_docs)
+        # Reset & recreate vectorstore for the new document
+        self._vectorstore = Chroma.from_documents(
+            documents=chunks,
+            embedding=self.embeddings,
+            collection_name=COLLECTION_NAME,
+            persist_directory=CHROMA_DIR,
+            client_settings=Settings(anonymized_telemetry=False),
+        )
+        return len(chunks)
+    # ── Query: retrieve + generate ─────────────────────────────────────────────
+    def query(self, question: str) -> Tuple[str, List[str]]:
+        if self._vectorstore is None:
+            return "⚠️ Please upload a document first.", []
+        # 1. Retrieve top-k relevant chunks
+        retriever = self._vectorstore.as_retriever(
+            search_type="mmr",
+            search_kwargs={"k": TOP_K, "fetch_k": TOP_K * 3},
+        )
+        docs = retriever.invoke(question)
+        # 2. Build context
+        context = "\n\n---\n\n".join(
+            f"[Chunk {i+1}]\n{d.page_content}" for i, d in enumerate(docs)
+        )
+        # 3. Unique source names for display
+        sources = list({d.metadata.get("source", "Document") for d in docs})
+        # 4. Generate answer
+        answer = self._generate(question, context)
+        return answer, sources
+    # ── LLM call via NEW HuggingFace Router API ────────────────────────────────
+    def _generate(self, question: str, context: str) -> str:
+        try:
+            hf_token = os.environ.get("HF_TOKEN", "")
+            headers = {"Content-Type": "application/json"}
+            if hf_token:
+                headers["Authorization"] = f"Bearer {hf_token}"
+            payload = {
+                "model": HF_MODEL_ID,
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": (
+                            "You are DocMind, an expert document analyst. "
+                            "Answer the user's question using ONLY the provided document context. "
+                            "Be concise, accurate, and cite specific details from the context. "
+                            "If the answer is not in the context, say so clearly."
+                        ),
+                    },
+                    {
+                        "role": "user",
+                        "content": (
+                            f"Document context:\n{context}\n\n"
+                            f"Question: {question}"
+                        ),
+                    },
+                ],
+                "max_tokens": 512,
+                "temperature": 0.2,
+            }
+            resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=60)
+            resp.raise_for_status()
+            answer = resp.json()["choices"][0]["message"]["content"].strip()
+            return answer or "I could not generate a response. Please try rephrasing."
+        except Exception as e:
+            return _fallback_answer(question, context, str(e))
+# ─── Fallback (no LLM) ─────────────────────────────────────────────────────────
+def _fallback_answer(question: str, context: str, error: str) -> str:
+    """Simple extractive answer when LLM is unavailable."""
+    keywords = set(re.findall(r'\b\w{4,}\b', question.lower()))
+    best_chunk, best_score = "", 0
+    for chunk in context.split("---"):
+        words = set(re.findall(r'\b\w{4,}\b', chunk.lower()))
+        score = len(keywords & words)
+        if score > best_score:
+            best_score = score
+            best_chunk = chunk.strip()
+    if best_chunk:
+        excerpt = best_chunk[:600] + ("..." if len(best_chunk) > 600 else "")
+        return (
+            f"*(LLM unavailable – showing most relevant excerpt)*\n\n{excerpt}\n\n"
+            f"<small>Error: {error}</small>"
+        )
+    return f"⚠️ Could not generate answer. Error: {error}"
+# ─── Helper ────────────────────────────────────────────────────────────────────
+def Path_suffix(name: str) -> str:
+    return os.path.splitext(name)[-1].lower() or ".txt"