AsyncRAG

Sleeping

App Files Files Community

Zubaish commited on Jan 27

Commit

cd319c6

1 Parent(s): c2d3414

Working RAG with kb folder

Browse files

Files changed (6) hide show

Dockerfile +4 -5
app.py +6 -10
config.py +4 -8
ingest.py +23 -11
rag.py +22 -89
requirements.txt +1 -6

Dockerfile CHANGED Viewed

@@ -2,16 +2,15 @@ FROM python:3.10-slim
 WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    git \
-    && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-COPY app.py rag.py ingest.py guardrails.py config.py ./
-RUN mkdir -p kb_docs
 EXPOSE 7860

 WORKDIR /app
+RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+COPY app.py rag.py ingest.py config.py ./
+COPY kb ./kb
+RUN python ingest.py
 EXPOSE 7860

app.py CHANGED Viewed

@@ -1,16 +1,12 @@
 from fastapi import FastAPI
-from pydantic import BaseModel
 from rag import ask_rag_with_status
-app = FastAPI(title="HubRAG API")
-class Question(BaseModel):
-    question: str
 @app.get("/")
-def root():
-    return {"status": "ok", "message": "RAG API running"}
-@app.post("/chat")
-def chat(req: Question):
-    return ask_rag_with_status(req.question)

 from fastapi import FastAPI
 from rag import ask_rag_with_status
+app = FastAPI()
 @app.get("/")
+def health():
+    return {"status": "ok"}
+@app.get("/ask")
+def ask(q: str):
+    return ask_rag_with_status(q)

config.py CHANGED Viewed

@@ -1,9 +1,5 @@
-import os
-HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-if not HUGGINGFACEHUB_API_TOKEN:
-    raise RuntimeError(
-        "HUGGINGFACEHUB_API_TOKEN is not set. "
-        "Set it as an environment variable or HF Space Secret."
-    )

+# config.py
+KB_DIR = "kb"
+VECTOR_DIR = "vectorstore"
+EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

ingest.py CHANGED Viewed

@@ -1,24 +1,36 @@
 from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
-import os
-def load_and_split_docs(path="kb_docs"):
-    if not os.path.exists(path):
-        return []
     loader = DirectoryLoader(
-        path,
         glob="**/*.pdf",
         loader_cls=PyPDFLoader
     )
     docs = loader.load()
-    if not docs:
-        return []
     splitter = RecursiveCharacterTextSplitter(
-        chunk_size=800,
-        chunk_overlap=100
     )
-    return splitter.split_documents(docs)

+import os
 from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import Chroma
+from config import KB_DIR, VECTOR_DIR, EMBED_MODEL
+def ingest():
+    if not os.path.exists(KB_DIR):
+        raise RuntimeError(f"{KB_DIR} folder not found")
     loader = DirectoryLoader(
+        KB_DIR,
         glob="**/*.pdf",
         loader_cls=PyPDFLoader
     )
     docs = loader.load()
     splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=50
     )
+    splits = splitter.split_documents(docs)
+    embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
+    Chroma.from_documents(
+        documents=splits,
+        embedding=embeddings,
+        persist_directory=VECTOR_DIR
+    )
+    print("✅ Ingestion complete")
+if __name__ == "__main__":
+    ingest()

rag.py CHANGED Viewed

@@ -1,104 +1,42 @@
-import os
-from langchain_community.document_loaders import PyPDFLoader
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import Chroma
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-from config import (
-    KB_DIR,
-    PERSIST_DIR,
-    EMBEDDING_MODEL,
-    LLM_MODEL,
-    CHUNK_SIZE,
-    CHUNK_OVERLAP,
-    TOP_K,
-)
-# -----------------------------
-# Load embeddings
-# -----------------------------
-embeddings = HuggingFaceEmbeddings(
-    model_name=EMBEDDING_MODEL
 )
-# -----------------------------
-# Load or build vector DB
-# -----------------------------
-if not os.path.exists(PERSIST_DIR):
-    os.makedirs(PERSIST_DIR, exist_ok=True)
-if not os.listdir(PERSIST_DIR):
-    print("⏳ Loading documents...")
-    docs = []
-    for filename in os.listdir(KB_DIR):
-        if filename.lower().endswith(".pdf"):
-            loader = PyPDFLoader(os.path.join(KB_DIR, filename))
-            docs.extend(loader.load())
-    splitter = RecursiveCharacterTextSplitter(
-        chunk_size=CHUNK_SIZE,
-        chunk_overlap=CHUNK_OVERLAP
-    )
-    splits = splitter.split_documents(docs)
-    vectorstore = Chroma.from_documents(
-        documents=splits,
-        embedding=embeddings,
-        persist_directory=PERSIST_DIR
-    )
-    vectorstore.persist()
-else:
-    vectorstore = Chroma(
-        persist_directory=PERSIST_DIR,
-        embedding_function=embeddings
-    )
-retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K})
-# -----------------------------
-# Load LLM (NON-INTERACTIVE)
-# -----------------------------
-print("⏳ Loading LLM...")
 tokenizer = AutoTokenizer.from_pretrained(
-    LLM_MODEL,
     trust_remote_code=True
 )
 model = AutoModelForCausalLM.from_pretrained(
-    LLM_MODEL,
-    trust_remote_code=True,
-    low_cpu_mem_usage=False
 )
-generator = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
-    max_new_tokens=512,
-    do_sample=True,
-    temperature=0.3,
 )
-# -----------------------------
-# RAG Query Function
-# -----------------------------
 def ask_rag_with_status(question: str):
-    status = []
-    status.append("🔍 Searching knowledge base...")
-    docs = retriever.get_relevant_documents(question)
-    context = "\n\n".join(doc.page_content for doc in docs)
-    prompt = f"""
-You are a helpful assistant.
-Answer the question using ONLY the context below.
-If the answer is not in the context, say you don't know.
 Context:
 {context}
@@ -106,15 +44,10 @@ Context:
 Question:
 {question}
-Answer:
-"""
-    status.append("🧠 Generating answer...")
-    output = generator(prompt)[0]["generated_text"]
-    answer = output.split("Answer:")[-1].strip()
     return {
-        "answer": answer,
-        "status": status
     }

 from langchain_community.vectorstores import Chroma
+from langchain_community.embeddings import HuggingFaceEmbeddings
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from config import VECTOR_DIR, EMBED_MODEL
+# Embeddings
+embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
+# Vector DB
+db = Chroma(
+    persist_directory=VECTOR_DIR,
+    embedding_function=embeddings
 )
+# LLM (CPU-safe)
+MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
 tokenizer = AutoTokenizer.from_pretrained(
+    MODEL_ID,
     trust_remote_code=True
 )
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    trust_remote_code=True
 )
+llm = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
+    max_new_tokens=256
 )
 def ask_rag_with_status(question: str):
+    docs = db.similarity_search(question, k=3)
+    context = "\n\n".join(d.page_content for d in docs)
+    prompt = f"""Use the context below to answer.
 Context:
 {context}
 Question:
 {question}
+Answer:"""
+    output = llm(prompt)[0]["generated_text"]
     return {
+        "answer": output,
+        "sources": len(docs)
     }

requirements.txt CHANGED Viewed

@@ -1,16 +1,11 @@
 fastapi
 uvicorn
-python-dotenv
 langchain==0.2.17
 langchain-community==0.2.17
 langchain-text-splitters==0.2.4
 chromadb==0.5.5
 sentence-transformers
 pypdf
 transformers>=4.39.0
 huggingface_hub<1.0.0
-numpy<2
-SQLAlchemy<3

 fastapi
 uvicorn
 langchain==0.2.17
 langchain-community==0.2.17
 langchain-text-splitters==0.2.4
 chromadb==0.5.5
 sentence-transformers
 pypdf
 transformers>=4.39.0
 huggingface_hub<1.0.0
+torch