AsyncRAG

Sleeping

App Files Files Community

Zubaish commited on 9 days ago

Commit

ffadad7

1 Parent(s): 1e98153

rag update

Browse files

Files changed (1) hide show

rag.py +40 -27

rag.py CHANGED Viewed

@@ -14,43 +14,51 @@ from config import (
     LLM_MODEL,
 )
-# -----------------------------
-# Load embeddings (CPU-safe)
-# -----------------------------
 embeddings = HuggingFaceEmbeddings(
     model_name=EMBEDDING_MODEL
 )
-# -----------------------------
-# Load documents
-# -----------------------------
-docs = []
 if os.path.exists(KB_DIR):
     for file in os.listdir(KB_DIR):
-        if file.endswith(".pdf"):
             loader = PyPDFLoader(os.path.join(KB_DIR, file))
-            docs.extend(loader.load())
 splitter = RecursiveCharacterTextSplitter(
     chunk_size=500,
     chunk_overlap=50
 )
-splits = splitter.split_documents(docs)
-# -----------------------------
-# Vector store
-# -----------------------------
-vectordb = Chroma.from_documents(
-    splits,
-    embedding=embeddings,
-    persist_directory=VECTOR_DB_DIR
-)
-retriever = vectordb.as_retriever(search_kwargs={"k": 3})
-# -----------------------------
 # Load LLM (CPU ONLY, NO ACCELERATE)
-# -----------------------------
 tokenizer = AutoTokenizer.from_pretrained(
     LLM_MODEL,
     trust_remote_code=True
@@ -58,8 +66,7 @@ tokenizer = AutoTokenizer.from_pretrained(
 model = AutoModelForCausalLM.from_pretrained(
     LLM_MODEL,
-    trust_remote_code=True,
-    torch_dtype=None,          # CPU-safe
 )
 llm = pipeline(
@@ -70,12 +77,18 @@ llm = pipeline(
     do_sample=False
 )
-# -----------------------------
-# RAG function
-# -----------------------------
 def ask_rag_with_status(question: str):
     status = []
     status.append("🔍 Retrieving documents...")
     docs = retriever.get_relevant_documents(question)

     LLM_MODEL,
 )
+# --------------------------------------------------
+# Embeddings (CPU-safe)
+# --------------------------------------------------
 embeddings = HuggingFaceEmbeddings(
     model_name=EMBEDDING_MODEL
 )
+# --------------------------------------------------
+# Load PDFs (if any)
+# --------------------------------------------------
+documents = []
 if os.path.exists(KB_DIR):
     for file in os.listdir(KB_DIR):
+        if file.lower().endswith(".pdf"):
             loader = PyPDFLoader(os.path.join(KB_DIR, file))
+            documents.extend(loader.load())
+# --------------------------------------------------
+# Split documents
+# --------------------------------------------------
 splitter = RecursiveCharacterTextSplitter(
     chunk_size=500,
     chunk_overlap=50
 )
+splits = splitter.split_documents(documents) if documents else []
+# --------------------------------------------------
+# Vector DB (ONLY if docs exist)
+# --------------------------------------------------
+vectordb = None
+retriever = None
+if splits:
+    vectordb = Chroma.from_documents(
+        splits,
+        embedding=embeddings,
+        persist_directory=VECTOR_DB_DIR
+    )
+    retriever = vectordb.as_retriever(search_kwargs={"k": 3})
+# --------------------------------------------------
 # Load LLM (CPU ONLY, NO ACCELERATE)
+# --------------------------------------------------
 tokenizer = AutoTokenizer.from_pretrained(
     LLM_MODEL,
     trust_remote_code=True
 model = AutoModelForCausalLM.from_pretrained(
     LLM_MODEL,
+    trust_remote_code=True
 )
 llm = pipeline(
     do_sample=False
 )
+# --------------------------------------------------
+# Public RAG API
+# --------------------------------------------------
 def ask_rag_with_status(question: str):
     status = []
+    if retriever is None:
+        return {
+            "answer": "❌ Knowledge base is empty. Please upload PDFs to the dataset or storage.",
+            "status": ["⚠️ No documents indexed"]
+        }
     status.append("🔍 Retrieving documents...")
     docs = retriever.get_relevant_documents(question)