Zubaish commited on
Commit
772c22e
·
1 Parent(s): 98b93b7

Add frontend UI and serve it via FastAPI (HF Space)

Browse files
Files changed (6) hide show
  1. Dockerfile +2 -1
  2. app.py +16 -7
  3. config.py +11 -13
  4. frontend/index.html +65 -0
  5. rag.py +47 -48
  6. requirements.txt +9 -2
Dockerfile CHANGED
@@ -7,7 +7,8 @@ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
7
  COPY requirements.txt .
8
  RUN pip install --no-cache-dir -r requirements.txt
9
 
10
- COPY app.py rag.py ingest.py config.py ./
 
11
 
12
  EXPOSE 7860
13
 
 
7
  COPY requirements.txt .
8
  RUN pip install --no-cache-dir -r requirements.txt
9
 
10
+ COPY app.py rag.py ingest.py config.py ./
11
+ COPY frontend ./frontend
12
 
13
  EXPOSE 7860
14
 
app.py CHANGED
@@ -1,13 +1,22 @@
1
  from fastapi import FastAPI
 
 
 
2
  from rag import ask_rag_with_status
3
 
4
  app = FastAPI()
5
 
6
- @app.get("/")
7
- def health():
8
- return {"status": "ok"}
9
 
10
- @app.post("/ask")
11
- def ask(payload: dict):
12
- question = payload.get("question", "")
13
- return ask_rag_with_status(question)
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
+ from fastapi.responses import HTMLResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from pydantic import BaseModel
5
  from rag import ask_rag_with_status
6
 
7
  app = FastAPI()
8
 
9
+ app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend")
 
 
10
 
11
+ class Query(BaseModel):
12
+ question: str
13
+
14
+ @app.get("/", response_class=HTMLResponse)
15
+ def home():
16
+ with open("frontend/index.html", "r", encoding="utf-8") as f:
17
+ return f.read()
18
+
19
+ @app.post("/chat")
20
+ def chat(q: Query):
21
+ answer, status = ask_rag_with_status(q.question)
22
+ return {"answer": answer, "status": status}
config.py CHANGED
@@ -1,18 +1,16 @@
1
  import os
2
- from huggingface_hub import snapshot_download
3
- from config import HF_DATASET_ID, KB_DIR
4
 
5
- def download_kb():
6
- os.makedirs(KB_DIR, exist_ok=True)
7
 
8
- snapshot_download(
9
- repo_id=HF_DATASET_ID,
10
- repo_type="dataset",
11
- local_dir=KB_DIR,
12
- local_dir_use_symlinks=False
13
- )
14
 
15
- print("✅ Knowledge base downloaded")
 
16
 
17
- if __name__ == "__main__":
18
- download_kb()
 
 
 
 
1
  import os
 
 
2
 
3
+ # Folder where PDFs are downloaded at runtime
4
+ KB_DIR = "kb"
5
 
6
+ # HF dataset containing PDFs
7
+ HF_DATASET_REPO = "Zubaish/hubrags-docs" # change if needed
 
 
 
 
8
 
9
+ # Embeddings
10
+ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
11
 
12
+ # LLM
13
+ LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"
14
+
15
+ # Chroma
16
+ CHROMA_DIR = "chroma_db"
frontend/index.html ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <title>HubRAG</title>
6
+ <style>
7
+ body {
8
+ font-family: sans-serif;
9
+ max-width: 800px;
10
+ margin: 40px auto;
11
+ }
12
+ textarea {
13
+ width: 100%;
14
+ padding: 10px;
15
+ }
16
+ button {
17
+ margin-top: 10px;
18
+ padding: 8px 16px;
19
+ }
20
+ pre {
21
+ background: #f5f5f5;
22
+ padding: 10px;
23
+ white-space: pre-wrap;
24
+ }
25
+ </style>
26
+ </head>
27
+ <body>
28
+
29
+ <h2>📄 HubRAG (HF Space)</h2>
30
+
31
+ <textarea id="q" rows="4" placeholder="Ask a question about the documents..."></textarea>
32
+ <br/>
33
+ <button onclick="ask()">Ask</button>
34
+
35
+ <h3>Status</h3>
36
+ <ul id="status"></ul>
37
+
38
+ <h3>Answer</h3>
39
+ <pre id="answer"></pre>
40
+
41
+ <script>
42
+ async function ask() {
43
+ const q = document.getElementById("q").value;
44
+ document.getElementById("answer").textContent = "Thinking...";
45
+ document.getElementById("status").innerHTML = "";
46
+
47
+ const res = await fetch("/chat", {
48
+ method: "POST",
49
+ headers: { "Content-Type": "application/json" },
50
+ body: JSON.stringify({ question: q })
51
+ });
52
+
53
+ const data = await res.json();
54
+
55
+ document.getElementById("answer").textContent = data.answer || "No answer";
56
+ (data.status || []).forEach(s => {
57
+ const li = document.createElement("li");
58
+ li.textContent = s;
59
+ document.getElementById("status").appendChild(li);
60
+ });
61
+ }
62
+ </script>
63
+
64
+ </body>
65
+ </html>
rag.py CHANGED
@@ -1,66 +1,65 @@
1
- import os
2
- from typing import Dict
3
-
4
- from langchain_community.document_loaders import PyPDFLoader
5
- from langchain_text_splitters import RecursiveCharacterTextSplitter
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
- from langchain_community.vectorstores import Chroma
 
 
8
 
9
- from ingest import download_kb
10
- from config import (
11
- KB_DIR,
12
- CHROMA_DIR,
13
- EMBED_MODEL,
14
- CHUNK_SIZE,
15
- CHUNK_OVERLAP,
16
- )
17
 
18
- # -------------------------
19
- # Startup: download + index
20
- # -------------------------
21
 
22
- print("⬇️ Downloading KB...")
23
- download_kb()
24
 
25
- print("📄 Loading documents...")
26
- documents = []
27
- for file in os.listdir(KB_DIR):
28
- if file.endswith(".pdf"):
29
- loader = PyPDFLoader(os.path.join(KB_DIR, file))
30
- documents.extend(loader.load())
31
 
32
- print(f"📚 Loaded {len(documents)} pages")
 
 
 
 
33
 
34
- splitter = RecursiveCharacterTextSplitter(
35
- chunk_size=CHUNK_SIZE,
36
- chunk_overlap=CHUNK_OVERLAP,
 
 
37
  )
38
 
39
- splits = splitter.split_documents(documents)
 
 
40
 
41
- embeddings = HuggingFaceEmbeddings(
42
- model_name=EMBED_MODEL
43
- )
44
 
45
- vectorstore = Chroma.from_documents(
46
- documents=splits,
47
- embedding=embeddings,
48
- persist_directory=CHROMA_DIR
49
- )
50
 
51
- retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
 
52
 
53
- # -------------------------
54
- # Query API
55
- # -------------------------
56
 
57
- def ask_rag_with_status(question: str) -> Dict:
58
- docs = retriever.get_relevant_documents(question)
59
 
60
- context = "\n\n".join(d.page_content for d in docs)
 
 
 
 
 
 
 
 
61
 
 
62
  return {
63
- "question": question,
64
- "chunks_used": len(docs),
65
- "context_preview": context[:500]
66
  }
 
1
+ from langchain_chroma import Chroma
 
 
 
 
2
  from langchain_community.embeddings import HuggingFaceEmbeddings
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
5
+ from config import EMBEDDING_MODEL, LLM_MODEL, CHROMA_DIR
6
 
7
+ status_log = []
 
 
 
 
 
 
 
8
 
9
+ def log(msg):
10
+ status_log.append(msg)
 
11
 
12
+ log("🔹 Loading embeddings...")
13
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
14
 
15
+ log("🔹 Loading vector store...")
16
+ vectordb = Chroma(
17
+ persist_directory=CHROMA_DIR,
18
+ embedding_function=embeddings
19
+ )
 
20
 
21
+ log("🔹 Loading LLM...")
22
+ tokenizer = AutoTokenizer.from_pretrained(
23
+ LLM_MODEL,
24
+ trust_remote_code=True
25
+ )
26
 
27
+ model = AutoModelForCausalLM.from_pretrained(
28
+ LLM_MODEL,
29
+ trust_remote_code=True,
30
+ torch_dtype=torch.float32,
31
+ device_map="cpu"
32
  )
33
 
34
+ def ask_rag_with_status(question: str):
35
+ status_log.clear()
36
+ log("🔍 Searching documents...")
37
 
38
+ docs = vectordb.similarity_search(question, k=3)
39
+ context = "\n\n".join(d.page_content for d in docs)
 
40
 
41
+ prompt = f"""Use the context below to answer the question.
 
 
 
 
42
 
43
+ Context:
44
+ {context}
45
 
46
+ Question:
47
+ {question}
 
48
 
49
+ Answer:"""
 
50
 
51
+ log("🤖 Generating answer...")
52
+ inputs = tokenizer(prompt, return_tensors="pt")
53
+
54
+ output = model.generate(
55
+ **inputs,
56
+ max_new_tokens=300,
57
+ do_sample=True,
58
+ temperature=0.3
59
+ )
60
 
61
+ answer = tokenizer.decode(output[0], skip_special_tokens=True)
62
  return {
63
+ "answer": answer.split("Answer:")[-1].strip(),
64
+ "status": status_log.copy()
 
65
  }
requirements.txt CHANGED
@@ -1,10 +1,17 @@
1
  fastapi
2
  uvicorn
 
 
3
  langchain==0.2.17
4
  langchain-community==0.2.17
5
- langchain-text-splitters==0.2.4
 
6
  chromadb==0.5.5
7
  sentence-transformers
8
- huggingface_hub
9
  pypdf
 
 
 
 
 
10
  numpy<2
 
1
  fastapi
2
  uvicorn
3
+ python-dotenv
4
+
5
  langchain==0.2.17
6
  langchain-community==0.2.17
7
+ langchain-chroma==0.1.2
8
+
9
  chromadb==0.5.5
10
  sentence-transformers
 
11
  pypdf
12
+ datasets
13
+
14
+ transformers>=4.39.0
15
+ huggingface_hub<1.0.0
16
+ torch
17
  numpy<2