AsyncRAG

Sleeping

App Files Files Community

Zubaish commited on 26 days ago

Commit

f6f60e8

1 Parent(s): 7167638

update

Browse files

Files changed (1) hide show

rag.py +25 -52

rag.py CHANGED Viewed

@@ -1,63 +1,33 @@
 # rag.py
 import os
-from datasets import load_dataset
 from transformers import pipeline
-from langchain.schema import Document
-from langchain_community.vectorstores import Chroma
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from config import HF_DATASET_REPO, EMBEDDING_MODEL, LLM_MODEL
-# -----------------------------
-# Load documents from HF dataset
-# -----------------------------
-def load_documents():
-    documents = []
-    try:
-        ds = load_dataset(HF_DATASET_REPO, split="train")
-    except Exception as e:
-        print(f"❌ Could not load dataset: {e}")
-        return []
-    # Expecting dataset rows like: { "text": "..." }
-    for row in ds:
-        text = row.get("text")
-        if text and isinstance(text, str):
-            documents.append(Document(page_content=text))
-    print(f"✅ Loaded {len(documents)} documents from dataset")
-    return documents
 # -----------------------------
-# Embeddings
 # -----------------------------
 embeddings = HuggingFaceEmbeddings(
     model_name=EMBEDDING_MODEL
 )
 # -----------------------------
-# Vector DB (safe creation)
 # -----------------------------
-documents = load_documents()
-if not documents:
-    print("⚠️ No documents loaded. Vector DB will be disabled.")
-    vectordb = None
-else:
-    vectordb = Chroma.from_documents(
-        documents=documents,
-        embedding=embeddings
     )
-    print("✅ Vector DB initialized")
 # -----------------------------
-# LLM Pipeline (CPU safe)
 # -----------------------------
 qa_pipeline = pipeline(
     task="text-generation",
@@ -65,22 +35,22 @@ qa_pipeline = pipeline(
     max_new_tokens=256
 )
 # -----------------------------
-# RAG Query Function
 # -----------------------------
 def ask_rag_with_status(question: str):
     if vectordb is None:
-        return "Knowledge base is empty.", "NO_KB"
     docs = vectordb.similarity_search(question, k=3)
     if not docs:
-        return "No relevant documents found.", "NO_MATCH"
     context = "\n\n".join(d.page_content for d in docs)
-    prompt = f"""Use the context below to answer the question.
 Context:
 {context}
@@ -91,6 +61,9 @@ Question:
 Answer:"""
     result = qa_pipeline(prompt)
-    answer = result[0]["generated_text"]
-    return answer, "OK"

 # rag.py
 import os
 from transformers import pipeline
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+from config import EMBEDDING_MODEL, LLM_MODEL, CHROMA_DIR
 # -----------------------------
+# 1. Initialize Embeddings (LangChain-HuggingFace)
 # -----------------------------
 embeddings = HuggingFaceEmbeddings(
     model_name=EMBEDDING_MODEL
 )
 # -----------------------------
+# 2. Load Vector DB (Safe Loading)
 # -----------------------------
+# We expect the DB to be pre-built by ingest.py during Docker build
+if os.path.exists(CHROMA_DIR) and os.listdir(CHROMA_DIR):
+    vectordb = Chroma(
+        persist_directory=CHROMA_DIR,
+        embedding_function=embeddings
     )
+    print(f"✅ Vector DB loaded from {CHROMA_DIR}")
+else:
+    print(f"⚠️ Vector DB not found at {CHROMA_DIR}. Please check ingestion.")
+    vectordb = None
 # -----------------------------
+# 3. LLM Pipeline (CPU safe)
 # -----------------------------
 qa_pipeline = pipeline(
     task="text-generation",
     max_new_tokens=256
 )
 # -----------------------------
+# 4. RAG Query Function
 # -----------------------------
 def ask_rag_with_status(question: str):
     if vectordb is None:
+        return "Knowledge base is empty. Technical error during ingestion.", "NO_KB"
+    # Search for relevant context
     docs = vectordb.similarity_search(question, k=3)
     if not docs:
+        return "No relevant documents found in the knowledge base.", "NO_MATCH"
     context = "\n\n".join(d.page_content for d in docs)
+    prompt = f"""Use the context below to answer the question accurately.
 Context:
 {context}
 Answer:"""
     result = qa_pipeline(prompt)
+    # Extract only the generated answer
+    full_text = result[0]["generated_text"]
+    answer = full_text.split("Answer:")[-1].strip()
+    return answer, ["Context retrieved", "LLM processed"]