AsyncRAG

Sleeping

App Files Files Community

Zubaish commited on 13 days ago

Commit

9a9d2bd

1 Parent(s): d322d09

Fix: stable RAG implementation

Browse files

Files changed (1) hide show

rag.py +102 -25

rag.py CHANGED Viewed

@@ -1,42 +1,114 @@
 from langchain_community.vectorstores import Chroma
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-from config import VECTOR_DIR, EMBED_MODEL
-# Embeddings
-embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
-# Vector DB
-db = Chroma(
-    persist_directory=VECTOR_DIR,
-    embedding_function=embeddings
 )
-# LLM (CPU-safe)
-MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
 tokenizer = AutoTokenizer.from_pretrained(
-    MODEL_ID,
-    trust_remote_code=True
 )
 model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    trust_remote_code=True
 )
-llm = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
-    max_new_tokens=256
 )
-def ask_rag_with_status(question: str):
-    docs = db.similarity_search(question, k=3)
-    context = "\n\n".join(d.page_content for d in docs)
-    prompt = f"""Use the context below to answer.
 Context:
 {context}
@@ -44,10 +116,15 @@ Context:
 Question:
 {question}
-Answer:"""
-    output = llm(prompt)[0]["generated_text"]
     return {
-        "answer": output,
-        "sources": len(docs)
     }

+import os
+from typing import Dict
+from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
+from langchain_huggingface import HuggingFaceEmbeddings
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    pipeline,
+)
+from config import (
+    KB_DIR,
+    CHROMA_DIR,
+    EMBEDDING_MODEL,
+    LLM_MODEL,
+    CHUNK_SIZE,
+    CHUNK_OVERLAP,
+    TOP_K,
 )
+# ---------------------------
+# Load & index documents
+# ---------------------------
+def load_documents():
+    loader = DirectoryLoader(
+        KB_DIR,
+        glob="**/*.pdf",
+        loader_cls=PyPDFLoader,
+    )
+    return loader.load()
+def build_vectorstore():
+    documents = load_documents()
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=CHUNK_SIZE,
+        chunk_overlap=CHUNK_OVERLAP,
+    )
+    chunks = splitter.split_documents(documents)
+    embeddings = HuggingFaceEmbeddings(
+        model_name=EMBEDDING_MODEL
+    )
+    vectordb = Chroma.from_documents(
+        documents=chunks,
+        embedding=embeddings,
+        persist_directory=CHROMA_DIR,
+    )
+    vectordb.persist()
+    return vectordb
+# Build or load Chroma DB
+if os.path.exists(CHROMA_DIR):
+    embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+    vectordb = Chroma(
+        persist_directory=CHROMA_DIR,
+        embedding_function=embeddings,
+    )
+else:
+    vectordb = build_vectorstore()
+# ---------------------------
+# Load LLM (HF Space safe)
+# ---------------------------
 tokenizer = AutoTokenizer.from_pretrained(
+    LLM_MODEL,
+    trust_remote_code=True,
 )
 model = AutoModelForCausalLM.from_pretrained(
+    LLM_MODEL,
+    trust_remote_code=True,
+    device_map="cpu",
 )
+generator = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
+    max_new_tokens=256,
+    do_sample=True,
+    temperature=0.7,
 )
+# ---------------------------
+# RAG Query
+# ---------------------------
+def ask_rag_with_status(question: str) -> Dict:
+    docs = vectordb.similarity_search(question, k=TOP_K)
+    context = "\n\n".join(
+        [doc.page_content for doc in docs]
+    )
+    prompt = f"""
+You are a helpful assistant.
+Answer the question using ONLY the context below.
+If the answer is not in the context, say "I don't know".
 Context:
 {context}
 Question:
 {question}
+Answer:
+""".strip()
+    output = generator(prompt)[0]["generated_text"]
+    answer = output.split("Answer:")[-1].strip()
     return {
+        "question": question,
+        "answer": answer,
+        "sources": [doc.metadata for doc in docs],
     }