Zubaish commited on
Commit
2fd2129
·
1 Parent(s): b6d77d3

Final stable HF RAG (dataset-backed, CPU-safe)

Browse files
Files changed (5) hide show
  1. app.py +11 -5
  2. config.py +2 -13
  3. frontend/index.html +19 -5
  4. rag.py +50 -79
  5. requirements.txt +14 -6
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from fastapi import FastAPI
2
  from fastapi.responses import HTMLResponse
3
  from pydantic import BaseModel
@@ -5,17 +7,21 @@ from rag import ask_rag_with_status
5
 
6
  app = FastAPI()
7
 
 
8
  class Query(BaseModel):
9
  question: str
10
 
11
- # Serve frontend
12
  @app.get("/", response_class=HTMLResponse)
13
  def index():
14
- with open("index.html", "r", encoding="utf-8") as f:
15
  return f.read()
16
 
17
- # Chat endpoint
18
  @app.post("/chat")
19
  def chat(q: Query):
20
- result = ask_rag_with_status(q.question)
21
- return result
 
 
 
 
1
+ # app.py
2
+
3
  from fastapi import FastAPI
4
  from fastapi.responses import HTMLResponse
5
  from pydantic import BaseModel
 
7
 
8
  app = FastAPI()
9
 
10
+
11
  class Query(BaseModel):
12
  question: str
13
 
14
+
15
  @app.get("/", response_class=HTMLResponse)
16
  def index():
17
+ with open("frontend/index.html", "r", encoding="utf-8") as f:
18
  return f.read()
19
 
20
+
21
  @app.post("/chat")
22
  def chat(q: Query):
23
+ answer, status = ask_rag_with_status(q.question)
24
+ return {
25
+ "answer": answer,
26
+ "status": status,
27
+ }
config.py CHANGED
@@ -1,16 +1,5 @@
1
- import os
2
 
3
- # Hugging Face dataset repo containing PDFs
4
  HF_DATASET_REPO = "Zubaish/hubrag-kb"
5
-
6
- # Embedding model (lightweight, CPU-safe)
7
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
8
-
9
- # Chroma persistence (local to container)
10
- CHROMA_DIR = "/tmp/chroma"
11
-
12
- # LLM via HF Inference API (NOT local)
13
- LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"
14
-
15
- # Safety
16
- MAX_CONTEXT_CHUNKS = 4
 
1
+ # config.py
2
 
 
3
  HF_DATASET_REPO = "Zubaish/hubrag-kb"
 
 
4
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
5
+ LLM_MODEL = "google/flan-t5-small" # SAFE on HF CPU
 
 
 
 
 
 
 
 
frontend/index.html CHANGED
@@ -4,17 +4,31 @@
4
  <meta charset="UTF-8" />
5
  <title>HubRAG</title>
6
  <style>
7
- body { font-family: sans-serif; max-width: 800px; margin: 40px auto; }
8
- textarea { width: 100%; padding: 10px; }
9
- button { margin-top: 10px; padding: 8px 16px; }
10
- pre { background: #f5f5f5; padding: 10px; white-space: pre-wrap; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  </style>
12
  </head>
13
  <body>
14
 
15
  <h2>📄 HubRAG (HF Space)</h2>
16
 
17
- <textarea id="q" rows="4" placeholder="Ask a question..."></textarea>
18
  <br/>
19
  <button onclick="ask()">Ask</button>
20
 
 
4
  <meta charset="UTF-8" />
5
  <title>HubRAG</title>
6
  <style>
7
+ body {
8
+ font-family: sans-serif;
9
+ max-width: 800px;
10
+ margin: 40px auto;
11
+ }
12
+ textarea {
13
+ width: 100%;
14
+ padding: 10px;
15
+ }
16
+ button {
17
+ margin-top: 10px;
18
+ padding: 8px 16px;
19
+ }
20
+ pre {
21
+ background: #f5f5f5;
22
+ padding: 10px;
23
+ white-space: pre-wrap;
24
+ }
25
  </style>
26
  </head>
27
  <body>
28
 
29
  <h2>📄 HubRAG (HF Space)</h2>
30
 
31
+ <textarea id="q" rows="4" placeholder="Ask a question about the documents..."></textarea>
32
  <br/>
33
  <button onclick="ask()">Ask</button>
34
 
rag.py CHANGED
@@ -1,99 +1,78 @@
1
- import os
 
2
  from datasets import load_dataset
3
- from langchain_community.document_loaders import PyPDFLoader
4
- from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain_community.vectorstores import Chroma
6
- from langchain.embeddings import HuggingFaceEmbeddings
7
- from huggingface_hub import InferenceClient
8
-
9
- from config import (
10
- HF_DATASET_REPO,
11
- EMBEDDING_MODEL,
12
- CHROMA_DIR,
13
- LLM_MODEL,
14
- MAX_CONTEXT_CHUNKS,
15
- )
16
 
17
- # --- Globals (lazy loaded) ---
18
- _vectordb = None
19
 
20
- # --- Embeddings ---
21
- embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
22
-
23
- # --- HF Inference Client ---
24
- llm = InferenceClient(
25
- model=LLM_MODEL,
26
- token=os.environ.get("HF_TOKEN"),
27
- )
28
-
29
- # --- Load PDFs from HF Dataset ---
30
  def load_documents():
31
- docs = []
32
  ds = load_dataset(HF_DATASET_REPO, split="train")
33
 
34
- for item in ds:
35
- pdf_path = item["file"]
36
- loader = PyPDFLoader(pdf_path)
37
- docs.extend(loader.load())
 
38
 
39
  return docs
40
 
41
 
42
- def get_vectordb():
43
- global _vectordb
 
 
44
 
45
- if _vectordb is not None:
46
- return _vectordb
47
 
48
- documents = load_documents()
49
- if not documents:
50
- return None
51
-
52
- splitter = RecursiveCharacterTextSplitter(
53
- chunk_size=800,
54
- chunk_overlap=150
55
- )
56
 
57
- chunks = splitter.split_documents(documents)
58
 
59
- if not chunks:
60
- return None
61
 
62
- _vectordb = Chroma.from_documents(
63
- chunks,
64
- embedding=embeddings,
65
- persist_directory=CHROMA_DIR
66
- )
67
 
68
- return _vectordb
69
 
 
 
 
 
 
 
 
 
70
 
71
- # --- RAG Query ---
 
 
72
  def ask_rag_with_status(question: str):
73
  status = []
74
 
75
- vectordb = get_vectordb()
76
- if vectordb is None:
77
- return {
78
- "answer": "No documents indexed.",
79
- "status": ["Vector DB not available"]
80
- }
81
-
82
- status.append("🔍 Searching documents")
83
-
84
- docs = vectordb.similarity_search(question, k=MAX_CONTEXT_CHUNKS)
85
 
86
  if not docs:
87
- return {
88
- "answer": "No relevant context found.",
89
- "status": status
90
- }
91
 
92
  context = "\n\n".join(d.page_content for d in docs)
93
 
94
- prompt = f"""You are a helpful assistant.
95
- Answer ONLY from the context below.
96
- If the answer is not present, say "I don't know".
97
 
98
  Context:
99
  {context}
@@ -105,14 +84,6 @@ Answer:
105
  """
106
 
107
  status.append("🧠 Generating answer")
 
108
 
109
- answer = llm.text_generation(
110
- prompt,
111
- max_new_tokens=256,
112
- temperature=0.2,
113
- )
114
-
115
- return {
116
- "answer": answer.strip(),
117
- "status": status
118
- }
 
1
+ # rag.py
2
+
3
  from datasets import load_dataset
 
 
4
  from langchain_community.vectorstores import Chroma
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
7
+ from langchain.schema import Document
8
+ from transformers import pipeline
 
 
 
 
 
 
9
 
10
+ from config import HF_DATASET_REPO, EMBEDDING_MODEL, LLM_MODEL
 
11
 
12
+ # ------------------------
13
+ # Load documents from HF Dataset
14
+ # ------------------------
 
 
 
 
 
 
 
15
  def load_documents():
 
16
  ds = load_dataset(HF_DATASET_REPO, split="train")
17
 
18
+ docs = []
19
+ for row in ds:
20
+ text = row.get("text") or row.get("content")
21
+ if text and text.strip():
22
+ docs.append(Document(page_content=text))
23
 
24
  return docs
25
 
26
 
27
+ # ------------------------
28
+ # Build Vector DB (ONCE)
29
+ # ------------------------
30
+ documents = load_documents()
31
 
32
+ if not documents:
33
+ raise RuntimeError("No documents loaded from HF Dataset")
34
 
35
+ splitter = RecursiveCharacterTextSplitter(
36
+ chunk_size=500,
37
+ chunk_overlap=50,
38
+ )
 
 
 
 
39
 
40
+ chunks = splitter.split_documents(documents)
41
 
42
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
 
43
 
44
+ vectordb = Chroma.from_documents(
45
+ documents=chunks,
46
+ embedding=embeddings,
47
+ )
 
48
 
49
+ retriever = vectordb.as_retriever(search_kwargs={"k": 3})
50
 
51
+ # ------------------------
52
+ # LLM (CPU SAFE)
53
+ # ------------------------
54
+ llm = pipeline(
55
+ "text2text-generation",
56
+ model=LLM_MODEL,
57
+ max_new_tokens=256,
58
+ )
59
 
60
+ # ------------------------
61
+ # RAG Query
62
+ # ------------------------
63
  def ask_rag_with_status(question: str):
64
  status = []
65
 
66
+ status.append("🔎 Retrieving documents")
67
+ docs = retriever.get_relevant_documents(question)
 
 
 
 
 
 
 
 
68
 
69
  if not docs:
70
+ return "No relevant documents found.", status
 
 
 
71
 
72
  context = "\n\n".join(d.page_content for d in docs)
73
 
74
+ prompt = f"""
75
+ Answer the question using the context below.
 
76
 
77
  Context:
78
  {context}
 
84
  """
85
 
86
  status.append("🧠 Generating answer")
87
+ result = llm(prompt)[0]["generated_text"]
88
 
89
+ return result.strip(), status
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,10 +1,18 @@
1
  fastapi
2
  uvicorn
3
  pydantic
4
- datasets
5
- huggingface_hub
 
 
 
 
 
6
  sentence-transformers
7
- langchain
8
- langchain-community
9
- chromadb
10
- pypdf
 
 
 
 
1
  fastapi
2
  uvicorn
3
  pydantic
4
+ python-dotenv
5
+
6
+ langchain==0.2.17
7
+ langchain-community==0.2.17
8
+ langchain-text-splitters==0.2.4
9
+
10
+ chromadb==0.5.5
11
  sentence-transformers
12
+ pypdf
13
+
14
+ transformers>=4.39.0
15
+ huggingface_hub
16
+ datasets
17
+
18
+ torch