AsyncRAG

Sleeping

App Files Files Community

Zubaish commited on 9 days ago

Commit

abd4e0b

1 Parent(s): 45d6be3

Final stable HF-ready RAG

Browse files

Files changed (6) hide show

Dockerfile +1 -2
app.py +10 -4
config.py +2 -18
ingest.py +12 -18
rag.py +57 -113
requirements.txt +5 -5

Dockerfile CHANGED Viewed

@@ -8,9 +8,8 @@ COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY app.py rag.py ingest.py config.py ./
-COPY kb ./kb
-RUN python ingest.py
 EXPOSE 7860

 RUN pip install --no-cache-dir -r requirements.txt
 COPY app.py rag.py ingest.py config.py ./
+RUN mkdir -p kb vectordb
 EXPOSE 7860

app.py CHANGED Viewed

@@ -1,12 +1,18 @@
 from fastapi import FastAPI
 from rag import ask_rag_with_status
-app = FastAPI()
 @app.get("/")
 def health():
     return {"status": "ok"}
-@app.get("/ask")
-def ask(q: str):
-    return ask_rag_with_status(q)

 from fastapi import FastAPI
+from pydantic import BaseModel
 from rag import ask_rag_with_status
+app = FastAPI(title="RAG Knowledge Bot")
+class Query(BaseModel):
+    question: str
 @app.get("/")
 def health():
     return {"status": "ok"}
+@app.post("/chat")
+def chat(query: Query):
+    return ask_rag_with_status(query.question)

config.py CHANGED Viewed

@@ -1,25 +1,9 @@
 import os
-# -----------------------
-# Paths
-# -----------------------
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-# Your knowledge base folder (this MUST exist in the repo)
 KB_DIR = os.path.join(BASE_DIR, "kb")
-# Chroma persistence directory
-CHROMA_DIR = os.path.join(BASE_DIR, "chroma_db")
-# -----------------------
-# Models
-# -----------------------
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"
-# -----------------------
-# RAG params
-# -----------------------
-CHUNK_SIZE = 500
-CHUNK_OVERLAP = 50
-TOP_K = 3

 import os
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 KB_DIR = os.path.join(BASE_DIR, "kb")
+VECTOR_DB_DIR = os.path.join(BASE_DIR, "vectordb")
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"

ingest.py CHANGED Viewed

@@ -1,33 +1,27 @@
 import os
 from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import Chroma
-from config import KB_DIR, VECTOR_DIR, EMBED_MODEL
 def ingest():
-    if not os.path.exists(KB_DIR):
-        raise RuntimeError(f"{KB_DIR} folder not found")
-    loader = DirectoryLoader(
-        KB_DIR,
-        glob="**/*.pdf",
-        loader_cls=PyPDFLoader
-    )
     docs = loader.load()
-    splitter = RecursiveCharacterTextSplitter(
-        chunk_size=500,
-        chunk_overlap=50
-    )
-    splits = splitter.split_documents(docs)
-    embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
     Chroma.from_documents(
-        documents=splits,
-        embedding=embeddings,
-        persist_directory=VECTOR_DIR
     )
     print("✅ Ingestion complete")

 import os
 from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from config import KB_DIR, VECTOR_DB_DIR, EMBEDDING_MODEL
 def ingest():
+    if not os.path.exists(KB_DIR) or not os.listdir(KB_DIR):
+        print("⚠️ No PDFs found in kb/. Skipping ingestion.")
+        return
+    loader = DirectoryLoader(KB_DIR, glob="**/*.pdf", loader_cls=PyPDFLoader)
     docs = loader.load()
+    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    chunks = splitter.split_documents(docs)
+    embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
     Chroma.from_documents(
+        chunks,
+        embeddings,
+        persist_directory=VECTOR_DB_DIR
     )
     print("✅ Ingestion complete")

rag.py CHANGED Viewed

@@ -1,114 +1,58 @@
-import os
-from typing import Dict
-from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
-from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
-from langchain_huggingface import HuggingFaceEmbeddings
-from transformers import (
-    AutoTokenizer,
-    AutoModelForCausalLM,
-    pipeline,
-)
-from config import (
-    KB_DIR,
-    CHROMA_DIR,
-    EMBEDDING_MODEL,
-    LLM_MODEL,
-    CHUNK_SIZE,
-    CHUNK_OVERLAP,
-    TOP_K,
-)
-# ---------------------------
-# Load & index documents
-# ---------------------------
-def load_documents():
-    loader = DirectoryLoader(
-        KB_DIR,
-        glob="**/*.pdf",
-        loader_cls=PyPDFLoader,
-    )
-    return loader.load()
-def build_vectorstore():
-    documents = load_documents()
-    splitter = RecursiveCharacterTextSplitter(
-        chunk_size=CHUNK_SIZE,
-        chunk_overlap=CHUNK_OVERLAP,
-    )
-    chunks = splitter.split_documents(documents)
-    embeddings = HuggingFaceEmbeddings(
-        model_name=EMBEDDING_MODEL
-    )
-    vectordb = Chroma.from_documents(
-        documents=chunks,
-        embedding=embeddings,
-        persist_directory=CHROMA_DIR,
-    )
-    vectordb.persist()
-    return vectordb
-# Build or load Chroma DB
-if os.path.exists(CHROMA_DIR):
-    embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
-    vectordb = Chroma(
-        persist_directory=CHROMA_DIR,
-        embedding_function=embeddings,
-    )
-else:
-    vectordb = build_vectorstore()
-# ---------------------------
-# Load LLM (HF Space safe)
-# ---------------------------
-tokenizer = AutoTokenizer.from_pretrained(
-    LLM_MODEL,
-    trust_remote_code=True,
-)
-model = AutoModelForCausalLM.from_pretrained(
-    LLM_MODEL,
-    trust_remote_code=True,
-    device_map="cpu",
-)
-generator = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=256,
-    do_sample=True,
-    temperature=0.7,
-)
-# ---------------------------
-# RAG Query
-# ---------------------------
-def ask_rag_with_status(question: str) -> Dict:
-    docs = vectordb.similarity_search(question, k=TOP_K)
-    context = "\n\n".join(
-        [doc.page_content for doc in docs]
-    )
     prompt = f"""
 You are a helpful assistant.
-Answer the question using ONLY the context below.
-If the answer is not in the context, say "I don't know".
 Context:
 {context}
@@ -116,15 +60,15 @@ Context:
 Question:
 {question}
-Answer:
-""".strip()
-    output = generator(prompt)[0]["generated_text"]
-    answer = output.split("Answer:")[-1].strip()
     return {
-        "question": question,
         "answer": answer,
-        "sources": [doc.metadata for doc in docs],
     }

 from langchain_community.vectorstores import Chroma
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from config import VECTOR_DB_DIR, EMBEDDING_MODEL, LLM_MODEL
+_embeddings = None
+_db = None
+_tokenizer = None
+_model = None
+def get_vector_db():
+    global _embeddings, _db
+    if _db is None:
+        _embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+        _db = Chroma(
+            persist_directory=VECTOR_DB_DIR,
+            embedding_function=_embeddings,
+        )
+    return _db
+def get_llm():
+    global _tokenizer, _model
+    if _model is None:
+        _tokenizer = AutoTokenizer.from_pretrained(
+            LLM_MODEL, trust_remote_code=True
+        )
+        _model = AutoModelForCausalLM.from_pretrained(
+            LLM_MODEL,
+            trust_remote_code=True,
+            torch_dtype=torch.float32
+        )
+    return _tokenizer, _model
+def ask_rag_with_status(question: str):
+    status = []
+    db = get_vector_db()
+    status.append("📚 Vector DB loaded")
+    docs = db.similarity_search(question, k=3)
+    context = "\n\n".join(d.page_content for d in docs)
+    status.append("🔍 Retrieved relevant context")
+    tokenizer, model = get_llm()
+    status.append("🤖 LLM loaded")
     prompt = f"""
 You are a helpful assistant.
 Context:
 {context}
 Question:
 {question}
+Answer clearly and concisely.
+"""
+    inputs = tokenizer(prompt, return_tensors="pt")
+    outputs = model.generate(**inputs, max_new_tokens=300)
+    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return {
         "answer": answer,
+        "status": status
     }

requirements.txt CHANGED Viewed

@@ -5,13 +5,13 @@ python-dotenv
 langchain==0.2.17
 langchain-community==0.2.17
 langchain-text-splitters==0.2.4
-langchain-huggingface==0.0.8
 chromadb==0.5.5
 sentence-transformers
 pypdf
-transformers==4.39.3
-huggingface_hub==0.36.0
-torch
-numpy<2

 langchain==0.2.17
 langchain-community==0.2.17
 langchain-text-splitters==0.2.4
 chromadb==0.5.5
 sentence-transformers
 pypdf
+transformers>=4.39.0
+huggingface_hub<1.0.0
+numpy<2
+SQLAlchemy<3
+requests<3