AsyncRAG

Sleeping

App Files Files Community

Zubaish commited on 18 days ago

Commit

772c22e

1 Parent(s): 98b93b7

Add frontend UI and serve it via FastAPI (HF Space)

Browse files

Files changed (6) hide show

Dockerfile +2 -1
app.py +16 -7
config.py +11 -13
frontend/index.html +65 -0
rag.py +47 -48
requirements.txt +9 -2

Dockerfile CHANGED Viewed

@@ -7,7 +7,8 @@ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-COPY app.py rag.py ingest.py config.py ./
 EXPOSE 7860

 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+COPY app.py rag.py ingest.py config.py ./
+COPY frontend ./frontend
 EXPOSE 7860

app.py CHANGED Viewed

@@ -1,13 +1,22 @@
 from fastapi import FastAPI
 from rag import ask_rag_with_status
 app = FastAPI()
-@app.get("/")
-def health():
-    return {"status": "ok"}
-@app.post("/ask")
-def ask(payload: dict):
-    question = payload.get("question", "")
-    return ask_rag_with_status(question)

 from fastapi import FastAPI
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
 from rag import ask_rag_with_status
 app = FastAPI()
+app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend")
+class Query(BaseModel):
+    question: str
+@app.get("/", response_class=HTMLResponse)
+def home():
+    with open("frontend/index.html", "r", encoding="utf-8") as f:
+        return f.read()
+@app.post("/chat")
+def chat(q: Query):
+    answer, status = ask_rag_with_status(q.question)
+    return {"answer": answer, "status": status}

config.py CHANGED Viewed

@@ -1,18 +1,16 @@
 import os
-from huggingface_hub import snapshot_download
-from config import HF_DATASET_ID, KB_DIR
-def download_kb():
-    os.makedirs(KB_DIR, exist_ok=True)
-    snapshot_download(
-        repo_id=HF_DATASET_ID,
-        repo_type="dataset",
-        local_dir=KB_DIR,
-        local_dir_use_symlinks=False
-    )
-    print("✅ Knowledge base downloaded")
-if __name__ == "__main__":
-    download_kb()

 import os
+# Folder where PDFs are downloaded at runtime
+KB_DIR = "kb"
+# HF dataset containing PDFs
+HF_DATASET_REPO = "Zubaish/hubrags-docs"  # change if needed
+# Embeddings
+EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+# LLM
+LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"
+# Chroma
+CHROMA_DIR = "chroma_db"

frontend/index.html ADDED Viewed

	@@ -0,0 +1,65 @@

+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="UTF-8" />
+  <title>HubRAG</title>
+  <style>
+    body {
+      font-family: sans-serif;
+      max-width: 800px;
+      margin: 40px auto;
+    }
+    textarea {
+      width: 100%;
+      padding: 10px;
+    }
+    button {
+      margin-top: 10px;
+      padding: 8px 16px;
+    }
+    pre {
+      background: #f5f5f5;
+      padding: 10px;
+      white-space: pre-wrap;
+    }
+  </style>
+</head>
+<body>
+<h2>📄 HubRAG (HF Space)</h2>
+<textarea id="q" rows="4" placeholder="Ask a question about the documents..."></textarea>
+<br/>
+<button onclick="ask()">Ask</button>
+<h3>Status</h3>
+<ul id="status"></ul>
+<h3>Answer</h3>
+<pre id="answer"></pre>
+<script>
+async function ask() {
+  const q = document.getElementById("q").value;
+  document.getElementById("answer").textContent = "Thinking...";
+  document.getElementById("status").innerHTML = "";
+  const res = await fetch("/chat", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({ question: q })
+  });
+  const data = await res.json();
+  document.getElementById("answer").textContent = data.answer || "No answer";
+  (data.status || []).forEach(s => {
+    const li = document.createElement("li");
+    li.textContent = s;
+    document.getElementById("status").appendChild(li);
+  });
+}
+</script>
+</body>
+</html>

rag.py CHANGED Viewed

@@ -1,66 +1,65 @@
-import os
-from typing import Dict
-from langchain_community.document_loaders import PyPDFLoader
-from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import Chroma
-from ingest import download_kb
-from config import (
-    KB_DIR,
-    CHROMA_DIR,
-    EMBED_MODEL,
-    CHUNK_SIZE,
-    CHUNK_OVERLAP,
-)
-# -------------------------
-# Startup: download + index
-# -------------------------
-print("⬇️ Downloading KB...")
-download_kb()
-print("📄 Loading documents...")
-documents = []
-for file in os.listdir(KB_DIR):
-    if file.endswith(".pdf"):
-        loader = PyPDFLoader(os.path.join(KB_DIR, file))
-        documents.extend(loader.load())
-print(f"📚 Loaded {len(documents)} pages")
-splitter = RecursiveCharacterTextSplitter(
-    chunk_size=CHUNK_SIZE,
-    chunk_overlap=CHUNK_OVERLAP,
 )
-splits = splitter.split_documents(documents)
-embeddings = HuggingFaceEmbeddings(
-    model_name=EMBED_MODEL
-)
-vectorstore = Chroma.from_documents(
-    documents=splits,
-    embedding=embeddings,
-    persist_directory=CHROMA_DIR
-)
-retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
-# -------------------------
-# Query API
-# -------------------------
-def ask_rag_with_status(question: str) -> Dict:
-    docs = retriever.get_relevant_documents(question)
-    context = "\n\n".join(d.page_content for d in docs)
     return {
-        "question": question,
-        "chunks_used": len(docs),
-        "context_preview": context[:500]
     }

+from langchain_chroma import Chroma
 from langchain_community.embeddings import HuggingFaceEmbeddings
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from config import EMBEDDING_MODEL, LLM_MODEL, CHROMA_DIR
+status_log = []
+def log(msg):
+    status_log.append(msg)
+log("🔹 Loading embeddings...")
+embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+log("🔹 Loading vector store...")
+vectordb = Chroma(
+    persist_directory=CHROMA_DIR,
+    embedding_function=embeddings
+)
+log("🔹 Loading LLM...")
+tokenizer = AutoTokenizer.from_pretrained(
+    LLM_MODEL,
+    trust_remote_code=True
+)
+model = AutoModelForCausalLM.from_pretrained(
+    LLM_MODEL,
+    trust_remote_code=True,
+    torch_dtype=torch.float32,
+    device_map="cpu"
 )
+def ask_rag_with_status(question: str):
+    status_log.clear()
+    log("🔍 Searching documents...")
+    docs = vectordb.similarity_search(question, k=3)
+    context = "\n\n".join(d.page_content for d in docs)
+    prompt = f"""Use the context below to answer the question.
+Context:
+{context}
+Question:
+{question}
+Answer:"""
+    log("🤖 Generating answer...")
+    inputs = tokenizer(prompt, return_tensors="pt")
+    output = model.generate(
+        **inputs,
+        max_new_tokens=300,
+        do_sample=True,
+        temperature=0.3
+    )
+    answer = tokenizer.decode(output[0], skip_special_tokens=True)
     return {
+        "answer": answer.split("Answer:")[-1].strip(),
+        "status": status_log.copy()
     }

requirements.txt CHANGED Viewed

@@ -1,10 +1,17 @@
 fastapi
 uvicorn
 langchain==0.2.17
 langchain-community==0.2.17
-langchain-text-splitters==0.2.4
 chromadb==0.5.5
 sentence-transformers
-huggingface_hub
 pypdf
 numpy<2

 fastapi
 uvicorn
+python-dotenv
 langchain==0.2.17
 langchain-community==0.2.17
+langchain-chroma==0.1.2
 chromadb==0.5.5
 sentence-transformers
 pypdf
+datasets
+transformers>=4.39.0
+huggingface_hub<1.0.0
+torch
 numpy<2