Zubaish commited on
Commit
b6d77d3
·
1 Parent(s): e34c59e

Fix: proper frontend/backend separatifff

Browse files
Files changed (5) hide show
  1. app.py +9 -9
  2. config.py +12 -6
  3. frontend/index.html +53 -21
  4. rag.py +82 -36
  5. requirements.txt +5 -5
app.py CHANGED
@@ -1,5 +1,5 @@
1
- # app.py
2
  from fastapi import FastAPI
 
3
  from pydantic import BaseModel
4
  from rag import ask_rag_with_status
5
 
@@ -8,14 +8,14 @@ app = FastAPI()
8
  class Query(BaseModel):
9
  question: str
10
 
11
- @app.get("/")
12
- def health():
13
- return {"status": "ok"}
 
 
14
 
 
15
  @app.post("/chat")
16
  def chat(q: Query):
17
- answer, status = ask_rag_with_status(q.question)
18
- return {
19
- "answer": answer,
20
- "status": status,
21
- }
 
 
1
  from fastapi import FastAPI
2
+ from fastapi.responses import HTMLResponse
3
  from pydantic import BaseModel
4
  from rag import ask_rag_with_status
5
 
 
8
  class Query(BaseModel):
9
  question: str
10
 
11
+ # Serve frontend
12
+ @app.get("/", response_class=HTMLResponse)
13
+ def index():
14
+ with open("index.html", "r", encoding="utf-8") as f:
15
+ return f.read()
16
 
17
+ # Chat endpoint
18
  @app.post("/chat")
19
  def chat(q: Query):
20
+ result = ask_rag_with_status(q.question)
21
+ return result
 
 
 
config.py CHANGED
@@ -1,10 +1,16 @@
1
- # config.py
2
 
3
- MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
 
 
 
4
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
5
 
6
- # Hugging Face Dataset repo where PDFs live
7
- HF_DATASET_REPO = "Zubaish/HubRAG-docs"
 
 
 
8
 
9
- # Retrieval
10
- TOP_K = 3
 
1
+ import os
2
 
3
+ # Hugging Face dataset repo containing PDFs
4
+ HF_DATASET_REPO = "Zubaish/hubrag-kb"
5
+
6
+ # Embedding model (lightweight, CPU-safe)
7
  EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
8
 
9
+ # Chroma persistence (local to container)
10
+ CHROMA_DIR = "/tmp/chroma"
11
+
12
+ # LLM via HF Inference API (NOT local)
13
+ LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"
14
 
15
+ # Safety
16
+ MAX_CONTEXT_CHUNKS = 4
frontend/index.html CHANGED
@@ -1,21 +1,53 @@
1
- # app.py
2
- from fastapi import FastAPI
3
- from pydantic import BaseModel
4
- from rag import ask_rag_with_status
5
-
6
- app = FastAPI()
7
-
8
- class Query(BaseModel):
9
- question: str
10
-
11
- @app.get("/")
12
- def health():
13
- return {"status": "ok"}
14
-
15
- @app.post("/chat")
16
- def chat(q: Query):
17
- answer, status = ask_rag_with_status(q.question)
18
- return {
19
- "answer": answer,
20
- "status": status,
21
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <title>HubRAG</title>
6
+ <style>
7
+ body { font-family: sans-serif; max-width: 800px; margin: 40px auto; }
8
+ textarea { width: 100%; padding: 10px; }
9
+ button { margin-top: 10px; padding: 8px 16px; }
10
+ pre { background: #f5f5f5; padding: 10px; white-space: pre-wrap; }
11
+ </style>
12
+ </head>
13
+ <body>
14
+
15
+ <h2>📄 HubRAG (HF Space)</h2>
16
+
17
+ <textarea id="q" rows="4" placeholder="Ask a question..."></textarea>
18
+ <br/>
19
+ <button onclick="ask()">Ask</button>
20
+
21
+ <h3>Status</h3>
22
+ <ul id="status"></ul>
23
+
24
+ <h3>Answer</h3>
25
+ <pre id="answer"></pre>
26
+
27
+ <script>
28
+ async function ask() {
29
+ const q = document.getElementById("q").value;
30
+ document.getElementById("answer").textContent = "Thinking...";
31
+ document.getElementById("status").innerHTML = "";
32
+
33
+ const res = await fetch("/chat", {
34
+ method: "POST",
35
+ headers: { "Content-Type": "application/json" },
36
+ body: JSON.stringify({ question: q })
37
+ });
38
+
39
+ const data = await res.json();
40
+
41
+ document.getElementById("answer").textContent =
42
+ data.answer || "No answer";
43
+
44
+ (data.status || []).forEach(s => {
45
+ const li = document.createElement("li");
46
+ li.textContent = s;
47
+ document.getElementById("status").appendChild(li);
48
+ });
49
+ }
50
+ </script>
51
+
52
+ </body>
53
+ </html>
rag.py CHANGED
@@ -1,57 +1,99 @@
 
1
  from datasets import load_dataset
2
- from langchain.schema import Document
 
3
  from langchain_community.vectorstores import Chroma
4
- from langchain_huggingface import HuggingFaceEmbeddings
5
- from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- HF_DATASET_REPO = "Zubaish/hubrag-kb"
8
- EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
9
- CHROMA_DIR = "./chroma"
10
 
 
 
 
 
 
 
 
11
  def load_documents():
12
  docs = []
13
  ds = load_dataset(HF_DATASET_REPO, split="train")
14
 
15
- for row in ds:
16
- text = row.get("text")
17
- if text and text.strip():
18
- docs.append(Document(page_content=text))
19
 
20
  return docs
21
 
22
- documents = load_documents()
23
 
24
- if not documents:
25
- print("⚠️ No text documents found in dataset. PDFs must be converted to text.")
 
 
 
26
 
27
- embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
 
 
28
 
29
- vectordb = None
30
- if documents:
31
- vectordb = Chroma.from_documents(
32
- documents,
 
 
 
 
 
 
 
 
33
  embedding=embeddings,
34
  persist_directory=CHROMA_DIR
35
  )
36
 
37
- llm = pipeline(
38
- "text-generation",
39
- model="microsoft/Phi-3-mini-4k-instruct",
40
- trust_remote_code=True,
41
- max_new_tokens=256
42
- )
43
 
 
 
44
  def ask_rag_with_status(question: str):
45
- if not vectordb:
 
 
 
46
  return {
47
- "answer": "Knowledge base is empty. Please upload text documents to the dataset.",
48
- "status": ["No text documents loaded"]
 
 
 
 
 
 
 
 
 
 
49
  }
50
 
51
- docs = vectordb.similarity_search(question, k=3)
52
  context = "\n\n".join(d.page_content for d in docs)
53
 
54
- prompt = f"""Answer the question using only the context.
 
 
55
 
56
  Context:
57
  {context}
@@ -59,14 +101,18 @@ Context:
59
  Question:
60
  {question}
61
 
62
- Answer:"""
 
63
 
64
- result = llm(prompt)[0]["generated_text"]
 
 
 
 
 
 
65
 
66
  return {
67
- "answer": result.split("Answer:")[-1].strip(),
68
- "status": [
69
- f"Loaded {len(documents)} documents",
70
- f"Retrieved {len(docs)} chunks"
71
- ]
72
  }
 
1
+ import os
2
  from datasets import load_dataset
3
+ from langchain_community.document_loaders import PyPDFLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain_community.vectorstores import Chroma
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from huggingface_hub import InferenceClient
8
+
9
+ from config import (
10
+ HF_DATASET_REPO,
11
+ EMBEDDING_MODEL,
12
+ CHROMA_DIR,
13
+ LLM_MODEL,
14
+ MAX_CONTEXT_CHUNKS,
15
+ )
16
+
17
+ # --- Globals (lazy loaded) ---
18
+ _vectordb = None
19
 
20
+ # --- Embeddings ---
21
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
 
22
 
23
+ # --- HF Inference Client ---
24
+ llm = InferenceClient(
25
+ model=LLM_MODEL,
26
+ token=os.environ.get("HF_TOKEN"),
27
+ )
28
+
29
+ # --- Load PDFs from HF Dataset ---
30
  def load_documents():
31
  docs = []
32
  ds = load_dataset(HF_DATASET_REPO, split="train")
33
 
34
+ for item in ds:
35
+ pdf_path = item["file"]
36
+ loader = PyPDFLoader(pdf_path)
37
+ docs.extend(loader.load())
38
 
39
  return docs
40
 
 
41
 
42
+ def get_vectordb():
43
+ global _vectordb
44
+
45
+ if _vectordb is not None:
46
+ return _vectordb
47
 
48
+ documents = load_documents()
49
+ if not documents:
50
+ return None
51
 
52
+ splitter = RecursiveCharacterTextSplitter(
53
+ chunk_size=800,
54
+ chunk_overlap=150
55
+ )
56
+
57
+ chunks = splitter.split_documents(documents)
58
+
59
+ if not chunks:
60
+ return None
61
+
62
+ _vectordb = Chroma.from_documents(
63
+ chunks,
64
  embedding=embeddings,
65
  persist_directory=CHROMA_DIR
66
  )
67
 
68
+ return _vectordb
 
 
 
 
 
69
 
70
+
71
+ # --- RAG Query ---
72
  def ask_rag_with_status(question: str):
73
+ status = []
74
+
75
+ vectordb = get_vectordb()
76
+ if vectordb is None:
77
  return {
78
+ "answer": "No documents indexed.",
79
+ "status": ["Vector DB not available"]
80
+ }
81
+
82
+ status.append("🔍 Searching documents")
83
+
84
+ docs = vectordb.similarity_search(question, k=MAX_CONTEXT_CHUNKS)
85
+
86
+ if not docs:
87
+ return {
88
+ "answer": "No relevant context found.",
89
+ "status": status
90
  }
91
 
 
92
  context = "\n\n".join(d.page_content for d in docs)
93
 
94
+ prompt = f"""You are a helpful assistant.
95
+ Answer ONLY from the context below.
96
+ If the answer is not present, say "I don't know".
97
 
98
  Context:
99
  {context}
 
101
  Question:
102
  {question}
103
 
104
+ Answer:
105
+ """
106
 
107
+ status.append("🧠 Generating answer")
108
+
109
+ answer = llm.text_generation(
110
+ prompt,
111
+ max_new_tokens=256,
112
+ temperature=0.2,
113
+ )
114
 
115
  return {
116
+ "answer": answer.strip(),
117
+ "status": status
 
 
 
118
  }
requirements.txt CHANGED
@@ -1,10 +1,10 @@
1
  fastapi
2
  uvicorn
3
- transformers
4
- torch
5
  datasets
6
- chromadb
 
7
  langchain
8
  langchain-community
9
- langchain-huggingface==0.1.0
10
- sentence-transformers
 
1
  fastapi
2
  uvicorn
3
+ pydantic
 
4
  datasets
5
+ huggingface_hub
6
+ sentence-transformers
7
  langchain
8
  langchain-community
9
+ chromadb
10
+ pypdf