Zubaish commited on
Commit
abd4e0b
·
1 Parent(s): 45d6be3

Final stable HF-ready RAG

Browse files
Files changed (6) hide show
  1. Dockerfile +1 -2
  2. app.py +10 -4
  3. config.py +2 -18
  4. ingest.py +12 -18
  5. rag.py +57 -113
  6. requirements.txt +5 -5
Dockerfile CHANGED
@@ -8,9 +8,8 @@ COPY requirements.txt .
8
  RUN pip install --no-cache-dir -r requirements.txt
9
 
10
  COPY app.py rag.py ingest.py config.py ./
11
- COPY kb ./kb
12
 
13
- RUN python ingest.py
14
 
15
  EXPOSE 7860
16
 
 
8
  RUN pip install --no-cache-dir -r requirements.txt
9
 
10
  COPY app.py rag.py ingest.py config.py ./
 
11
 
12
+ RUN mkdir -p kb vectordb
13
 
14
  EXPOSE 7860
15
 
app.py CHANGED
@@ -1,12 +1,18 @@
1
  from fastapi import FastAPI
 
2
  from rag import ask_rag_with_status
3
 
4
- app = FastAPI()
 
 
 
 
5
 
6
  @app.get("/")
7
  def health():
8
  return {"status": "ok"}
9
 
10
- @app.get("/ask")
11
- def ask(q: str):
12
- return ask_rag_with_status(q)
 
 
1
  from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
  from rag import ask_rag_with_status
4
 
5
+ app = FastAPI(title="RAG Knowledge Bot")
6
+
7
+ class Query(BaseModel):
8
+ question: str
9
+
10
 
11
  @app.get("/")
12
  def health():
13
  return {"status": "ok"}
14
 
15
+
16
+ @app.post("/chat")
17
+ def chat(query: Query):
18
+ return ask_rag_with_status(query.question)
config.py CHANGED
@@ -1,25 +1,9 @@
1
  import os
2
 
3
- # -----------------------
4
- # Paths
5
- # -----------------------
6
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
7
 
8
- # Your knowledge base folder (this MUST exist in the repo)
9
  KB_DIR = os.path.join(BASE_DIR, "kb")
 
10
 
11
- # Chroma persistence directory
12
- CHROMA_DIR = os.path.join(BASE_DIR, "chroma_db")
13
-
14
- # -----------------------
15
- # Models
16
- # -----------------------
17
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
18
- LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"
19
-
20
- # -----------------------
21
- # RAG params
22
- # -----------------------
23
- CHUNK_SIZE = 500
24
- CHUNK_OVERLAP = 50
25
- TOP_K = 3
 
1
  import os
2
 
 
 
 
3
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
4
 
 
5
  KB_DIR = os.path.join(BASE_DIR, "kb")
6
+ VECTOR_DB_DIR = os.path.join(BASE_DIR, "vectordb")
7
 
 
 
 
 
 
 
8
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
9
+ LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"
 
 
 
 
 
 
 
ingest.py CHANGED
@@ -1,33 +1,27 @@
1
  import os
2
  from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
3
  from langchain_text_splitters import RecursiveCharacterTextSplitter
4
- from langchain_community.embeddings import HuggingFaceEmbeddings
5
  from langchain_community.vectorstores import Chroma
6
- from config import KB_DIR, VECTOR_DIR, EMBED_MODEL
 
7
 
8
  def ingest():
9
- if not os.path.exists(KB_DIR):
10
- raise RuntimeError(f"{KB_DIR} folder not found")
 
11
 
12
- loader = DirectoryLoader(
13
- KB_DIR,
14
- glob="**/*.pdf",
15
- loader_cls=PyPDFLoader
16
- )
17
  docs = loader.load()
18
 
19
- splitter = RecursiveCharacterTextSplitter(
20
- chunk_size=500,
21
- chunk_overlap=50
22
- )
23
- splits = splitter.split_documents(docs)
24
 
25
- embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
26
 
27
  Chroma.from_documents(
28
- documents=splits,
29
- embedding=embeddings,
30
- persist_directory=VECTOR_DIR
31
  )
32
 
33
  print("✅ Ingestion complete")
 
1
  import os
2
  from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
3
  from langchain_text_splitters import RecursiveCharacterTextSplitter
 
4
  from langchain_community.vectorstores import Chroma
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ from config import KB_DIR, VECTOR_DB_DIR, EMBEDDING_MODEL
7
 
8
  def ingest():
9
+ if not os.path.exists(KB_DIR) or not os.listdir(KB_DIR):
10
+ print("⚠️ No PDFs found in kb/. Skipping ingestion.")
11
+ return
12
 
13
+ loader = DirectoryLoader(KB_DIR, glob="**/*.pdf", loader_cls=PyPDFLoader)
 
 
 
 
14
  docs = loader.load()
15
 
16
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
17
+ chunks = splitter.split_documents(docs)
 
 
 
18
 
19
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
20
 
21
  Chroma.from_documents(
22
+ chunks,
23
+ embeddings,
24
+ persist_directory=VECTOR_DB_DIR
25
  )
26
 
27
  print("✅ Ingestion complete")
rag.py CHANGED
@@ -1,114 +1,58 @@
1
- import os
2
- from typing import Dict
3
-
4
- from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
5
- from langchain_text_splitters import RecursiveCharacterTextSplitter
6
  from langchain_community.vectorstores import Chroma
7
- from langchain_huggingface import HuggingFaceEmbeddings
8
-
9
- from transformers import (
10
- AutoTokenizer,
11
- AutoModelForCausalLM,
12
- pipeline,
13
- )
14
-
15
- from config import (
16
- KB_DIR,
17
- CHROMA_DIR,
18
- EMBEDDING_MODEL,
19
- LLM_MODEL,
20
- CHUNK_SIZE,
21
- CHUNK_OVERLAP,
22
- TOP_K,
23
- )
24
-
25
- # ---------------------------
26
- # Load & index documents
27
- # ---------------------------
28
-
29
- def load_documents():
30
- loader = DirectoryLoader(
31
- KB_DIR,
32
- glob="**/*.pdf",
33
- loader_cls=PyPDFLoader,
34
- )
35
- return loader.load()
36
-
37
-
38
- def build_vectorstore():
39
- documents = load_documents()
40
-
41
- splitter = RecursiveCharacterTextSplitter(
42
- chunk_size=CHUNK_SIZE,
43
- chunk_overlap=CHUNK_OVERLAP,
44
- )
45
- chunks = splitter.split_documents(documents)
46
-
47
- embeddings = HuggingFaceEmbeddings(
48
- model_name=EMBEDDING_MODEL
49
- )
50
-
51
- vectordb = Chroma.from_documents(
52
- documents=chunks,
53
- embedding=embeddings,
54
- persist_directory=CHROMA_DIR,
55
- )
56
-
57
- vectordb.persist()
58
- return vectordb
59
-
60
-
61
- # Build or load Chroma DB
62
- if os.path.exists(CHROMA_DIR):
63
- embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
64
- vectordb = Chroma(
65
- persist_directory=CHROMA_DIR,
66
- embedding_function=embeddings,
67
- )
68
- else:
69
- vectordb = build_vectorstore()
70
-
71
-
72
- # ---------------------------
73
- # Load LLM (HF Space safe)
74
- # ---------------------------
75
-
76
- tokenizer = AutoTokenizer.from_pretrained(
77
- LLM_MODEL,
78
- trust_remote_code=True,
79
- )
80
-
81
- model = AutoModelForCausalLM.from_pretrained(
82
- LLM_MODEL,
83
- trust_remote_code=True,
84
- device_map="cpu",
85
- )
86
-
87
- generator = pipeline(
88
- "text-generation",
89
- model=model,
90
- tokenizer=tokenizer,
91
- max_new_tokens=256,
92
- do_sample=True,
93
- temperature=0.7,
94
- )
95
-
96
-
97
- # ---------------------------
98
- # RAG Query
99
- # ---------------------------
100
-
101
- def ask_rag_with_status(question: str) -> Dict:
102
- docs = vectordb.similarity_search(question, k=TOP_K)
103
-
104
- context = "\n\n".join(
105
- [doc.page_content for doc in docs]
106
- )
107
 
108
  prompt = f"""
109
  You are a helpful assistant.
110
- Answer the question using ONLY the context below.
111
- If the answer is not in the context, say "I don't know".
112
 
113
  Context:
114
  {context}
@@ -116,15 +60,15 @@ Context:
116
  Question:
117
  {question}
118
 
119
- Answer:
120
- """.strip()
121
 
122
- output = generator(prompt)[0]["generated_text"]
 
123
 
124
- answer = output.split("Answer:")[-1].strip()
125
 
126
  return {
127
- "question": question,
128
  "answer": answer,
129
- "sources": [doc.metadata for doc in docs],
130
  }
 
 
 
 
 
 
1
  from langchain_community.vectorstores import Chroma
2
+ from langchain_community.embeddings import HuggingFaceEmbeddings
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
5
+
6
+ from config import VECTOR_DB_DIR, EMBEDDING_MODEL, LLM_MODEL
7
+
8
+ _embeddings = None
9
+ _db = None
10
+ _tokenizer = None
11
+ _model = None
12
+
13
+
14
+ def get_vector_db():
15
+ global _embeddings, _db
16
+
17
+ if _db is None:
18
+ _embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
19
+ _db = Chroma(
20
+ persist_directory=VECTOR_DB_DIR,
21
+ embedding_function=_embeddings,
22
+ )
23
+ return _db
24
+
25
+
26
+ def get_llm():
27
+ global _tokenizer, _model
28
+
29
+ if _model is None:
30
+ _tokenizer = AutoTokenizer.from_pretrained(
31
+ LLM_MODEL, trust_remote_code=True
32
+ )
33
+ _model = AutoModelForCausalLM.from_pretrained(
34
+ LLM_MODEL,
35
+ trust_remote_code=True,
36
+ torch_dtype=torch.float32
37
+ )
38
+ return _tokenizer, _model
39
+
40
+
41
+ def ask_rag_with_status(question: str):
42
+ status = []
43
+
44
+ db = get_vector_db()
45
+ status.append("📚 Vector DB loaded")
46
+
47
+ docs = db.similarity_search(question, k=3)
48
+ context = "\n\n".join(d.page_content for d in docs)
49
+ status.append("🔍 Retrieved relevant context")
50
+
51
+ tokenizer, model = get_llm()
52
+ status.append("🤖 LLM loaded")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  prompt = f"""
55
  You are a helpful assistant.
 
 
56
 
57
  Context:
58
  {context}
 
60
  Question:
61
  {question}
62
 
63
+ Answer clearly and concisely.
64
+ """
65
 
66
+ inputs = tokenizer(prompt, return_tensors="pt")
67
+ outputs = model.generate(**inputs, max_new_tokens=300)
68
 
69
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
70
 
71
  return {
 
72
  "answer": answer,
73
+ "status": status
74
  }
requirements.txt CHANGED
@@ -5,13 +5,13 @@ python-dotenv
5
  langchain==0.2.17
6
  langchain-community==0.2.17
7
  langchain-text-splitters==0.2.4
8
- langchain-huggingface==0.0.8
9
 
10
  chromadb==0.5.5
11
  sentence-transformers
12
  pypdf
13
 
14
- transformers==4.39.3
15
- huggingface_hub==0.36.0
16
- torch
17
- numpy<2
 
 
5
  langchain==0.2.17
6
  langchain-community==0.2.17
7
  langchain-text-splitters==0.2.4
 
8
 
9
  chromadb==0.5.5
10
  sentence-transformers
11
  pypdf
12
 
13
+ transformers>=4.39.0
14
+ huggingface_hub<1.0.0
15
+ numpy<2
16
+ SQLAlchemy<3
17
+ requests<3