Zubaish commited on
Commit
cd319c6
·
1 Parent(s): c2d3414

Working RAG with kb folder

Browse files
Files changed (6) hide show
  1. Dockerfile +4 -5
  2. app.py +6 -10
  3. config.py +4 -8
  4. ingest.py +23 -11
  5. rag.py +22 -89
  6. requirements.txt +1 -6
Dockerfile CHANGED
@@ -2,16 +2,15 @@ FROM python:3.10-slim
2
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y \
6
- git \
7
- && rm -rf /var/lib/apt/lists/*
8
 
9
  COPY requirements.txt .
10
  RUN pip install --no-cache-dir -r requirements.txt
11
 
12
- COPY app.py rag.py ingest.py guardrails.py config.py ./
 
13
 
14
- RUN mkdir -p kb_docs
15
 
16
  EXPOSE 7860
17
 
 
2
 
3
  WORKDIR /app
4
 
5
+ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
 
 
6
 
7
  COPY requirements.txt .
8
  RUN pip install --no-cache-dir -r requirements.txt
9
 
10
+ COPY app.py rag.py ingest.py config.py ./
11
+ COPY kb ./kb
12
 
13
+ RUN python ingest.py
14
 
15
  EXPOSE 7860
16
 
app.py CHANGED
@@ -1,16 +1,12 @@
1
  from fastapi import FastAPI
2
- from pydantic import BaseModel
3
  from rag import ask_rag_with_status
4
 
5
- app = FastAPI(title="HubRAG API")
6
-
7
- class Question(BaseModel):
8
- question: str
9
 
10
  @app.get("/")
11
- def root():
12
- return {"status": "ok", "message": "RAG API running"}
13
 
14
- @app.post("/chat")
15
- def chat(req: Question):
16
- return ask_rag_with_status(req.question)
 
1
  from fastapi import FastAPI
 
2
  from rag import ask_rag_with_status
3
 
4
+ app = FastAPI()
 
 
 
5
 
6
  @app.get("/")
7
+ def health():
8
+ return {"status": "ok"}
9
 
10
+ @app.get("/ask")
11
+ def ask(q: str):
12
+ return ask_rag_with_status(q)
config.py CHANGED
@@ -1,9 +1,5 @@
1
- import os
2
 
3
- HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
4
-
5
- if not HUGGINGFACEHUB_API_TOKEN:
6
- raise RuntimeError(
7
- "HUGGINGFACEHUB_API_TOKEN is not set. "
8
- "Set it as an environment variable or HF Space Secret."
9
- )
 
1
+ # config.py
2
 
3
+ KB_DIR = "kb"
4
+ VECTOR_DIR = "vectorstore"
5
+ EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 
 
 
 
ingest.py CHANGED
@@ -1,24 +1,36 @@
 
1
  from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
2
  from langchain_text_splitters import RecursiveCharacterTextSplitter
3
- import os
 
 
4
 
5
- def load_and_split_docs(path="kb_docs"):
6
- if not os.path.exists(path):
7
- return []
8
 
9
  loader = DirectoryLoader(
10
- path,
11
  glob="**/*.pdf",
12
  loader_cls=PyPDFLoader
13
  )
14
-
15
  docs = loader.load()
16
- if not docs:
17
- return []
18
 
19
  splitter = RecursiveCharacterTextSplitter(
20
- chunk_size=800,
21
- chunk_overlap=100
22
  )
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- return splitter.split_documents(docs)
 
 
1
+ import os
2
  from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
3
  from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from langchain_community.vectorstores import Chroma
6
+ from config import KB_DIR, VECTOR_DIR, EMBED_MODEL
7
 
8
+ def ingest():
9
+ if not os.path.exists(KB_DIR):
10
+ raise RuntimeError(f"{KB_DIR} folder not found")
11
 
12
  loader = DirectoryLoader(
13
+ KB_DIR,
14
  glob="**/*.pdf",
15
  loader_cls=PyPDFLoader
16
  )
 
17
  docs = loader.load()
 
 
18
 
19
  splitter = RecursiveCharacterTextSplitter(
20
+ chunk_size=500,
21
+ chunk_overlap=50
22
  )
23
+ splits = splitter.split_documents(docs)
24
+
25
+ embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
26
+
27
+ Chroma.from_documents(
28
+ documents=splits,
29
+ embedding=embeddings,
30
+ persist_directory=VECTOR_DIR
31
+ )
32
+
33
+ print("✅ Ingestion complete")
34
 
35
+ if __name__ == "__main__":
36
+ ingest()
rag.py CHANGED
@@ -1,104 +1,42 @@
1
- import os
2
-
3
- from langchain_community.document_loaders import PyPDFLoader
4
- from langchain_text_splitters import RecursiveCharacterTextSplitter
5
- from langchain_community.embeddings import HuggingFaceEmbeddings
6
  from langchain_community.vectorstores import Chroma
7
-
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
9
 
10
- from config import (
11
- KB_DIR,
12
- PERSIST_DIR,
13
- EMBEDDING_MODEL,
14
- LLM_MODEL,
15
- CHUNK_SIZE,
16
- CHUNK_OVERLAP,
17
- TOP_K,
18
- )
19
 
20
- # -----------------------------
21
- # Load embeddings
22
- # -----------------------------
23
- embeddings = HuggingFaceEmbeddings(
24
- model_name=EMBEDDING_MODEL
25
  )
26
 
27
- # -----------------------------
28
- # Load or build vector DB
29
- # -----------------------------
30
- if not os.path.exists(PERSIST_DIR):
31
- os.makedirs(PERSIST_DIR, exist_ok=True)
32
-
33
- if not os.listdir(PERSIST_DIR):
34
- print("⏳ Loading documents...")
35
-
36
- docs = []
37
- for filename in os.listdir(KB_DIR):
38
- if filename.lower().endswith(".pdf"):
39
- loader = PyPDFLoader(os.path.join(KB_DIR, filename))
40
- docs.extend(loader.load())
41
-
42
- splitter = RecursiveCharacterTextSplitter(
43
- chunk_size=CHUNK_SIZE,
44
- chunk_overlap=CHUNK_OVERLAP
45
- )
46
- splits = splitter.split_documents(docs)
47
-
48
- vectorstore = Chroma.from_documents(
49
- documents=splits,
50
- embedding=embeddings,
51
- persist_directory=PERSIST_DIR
52
- )
53
- vectorstore.persist()
54
- else:
55
- vectorstore = Chroma(
56
- persist_directory=PERSIST_DIR,
57
- embedding_function=embeddings
58
- )
59
-
60
- retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K})
61
-
62
- # -----------------------------
63
- # Load LLM (NON-INTERACTIVE)
64
- # -----------------------------
65
- print("⏳ Loading LLM...")
66
 
67
  tokenizer = AutoTokenizer.from_pretrained(
68
- LLM_MODEL,
69
  trust_remote_code=True
70
  )
71
 
72
  model = AutoModelForCausalLM.from_pretrained(
73
- LLM_MODEL,
74
- trust_remote_code=True,
75
- low_cpu_mem_usage=False
76
  )
77
 
78
- generator = pipeline(
79
  "text-generation",
80
  model=model,
81
  tokenizer=tokenizer,
82
- max_new_tokens=512,
83
- do_sample=True,
84
- temperature=0.3,
85
  )
86
 
87
- # -----------------------------
88
- # RAG Query Function
89
- # -----------------------------
90
  def ask_rag_with_status(question: str):
91
- status = []
 
92
 
93
- status.append("🔍 Searching knowledge base...")
94
- docs = retriever.get_relevant_documents(question)
95
-
96
- context = "\n\n".join(doc.page_content for doc in docs)
97
-
98
- prompt = f"""
99
- You are a helpful assistant.
100
- Answer the question using ONLY the context below.
101
- If the answer is not in the context, say you don't know.
102
 
103
  Context:
104
  {context}
@@ -106,15 +44,10 @@ Context:
106
  Question:
107
  {question}
108
 
109
- Answer:
110
- """
111
-
112
- status.append("🧠 Generating answer...")
113
- output = generator(prompt)[0]["generated_text"]
114
-
115
- answer = output.split("Answer:")[-1].strip()
116
 
 
117
  return {
118
- "answer": answer,
119
- "status": status
120
  }
 
 
 
 
 
 
1
  from langchain_community.vectorstores import Chroma
2
+ from langchain_community.embeddings import HuggingFaceEmbeddings
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ from config import VECTOR_DIR, EMBED_MODEL
5
 
6
+ # Embeddings
7
+ embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
 
 
 
 
 
 
 
8
 
9
+ # Vector DB
10
+ db = Chroma(
11
+ persist_directory=VECTOR_DIR,
12
+ embedding_function=embeddings
 
13
  )
14
 
15
+ # LLM (CPU-safe)
16
+ MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  tokenizer = AutoTokenizer.from_pretrained(
19
+ MODEL_ID,
20
  trust_remote_code=True
21
  )
22
 
23
  model = AutoModelForCausalLM.from_pretrained(
24
+ MODEL_ID,
25
+ trust_remote_code=True
 
26
  )
27
 
28
+ llm = pipeline(
29
  "text-generation",
30
  model=model,
31
  tokenizer=tokenizer,
32
+ max_new_tokens=256
 
 
33
  )
34
 
 
 
 
35
  def ask_rag_with_status(question: str):
36
+ docs = db.similarity_search(question, k=3)
37
+ context = "\n\n".join(d.page_content for d in docs)
38
 
39
+ prompt = f"""Use the context below to answer.
 
 
 
 
 
 
 
 
40
 
41
  Context:
42
  {context}
 
44
  Question:
45
  {question}
46
 
47
+ Answer:"""
 
 
 
 
 
 
48
 
49
+ output = llm(prompt)[0]["generated_text"]
50
  return {
51
+ "answer": output,
52
+ "sources": len(docs)
53
  }
requirements.txt CHANGED
@@ -1,16 +1,11 @@
1
  fastapi
2
  uvicorn
3
- python-dotenv
4
-
5
  langchain==0.2.17
6
  langchain-community==0.2.17
7
  langchain-text-splitters==0.2.4
8
-
9
  chromadb==0.5.5
10
  sentence-transformers
11
  pypdf
12
-
13
  transformers>=4.39.0
14
  huggingface_hub<1.0.0
15
- numpy<2
16
- SQLAlchemy<3
 
1
  fastapi
2
  uvicorn
 
 
3
  langchain==0.2.17
4
  langchain-community==0.2.17
5
  langchain-text-splitters==0.2.4
 
6
  chromadb==0.5.5
7
  sentence-transformers
8
  pypdf
 
9
  transformers>=4.39.0
10
  huggingface_hub<1.0.0
11
+ torch