AsyncRAG

Sleeping

App Files Files Community

Zubaish commited on Jan 27

Commit

b6d77d3

1 Parent(s): e34c59e

Fix: proper frontend/backend separatifff

Browse files

Files changed (5) hide show

app.py +9 -9
config.py +12 -6
frontend/index.html +53 -21
rag.py +82 -36
requirements.txt +5 -5

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
-# app.py
 from fastapi import FastAPI
 from pydantic import BaseModel
 from rag import ask_rag_with_status
@@ -8,14 +8,14 @@ app = FastAPI()
 class Query(BaseModel):
     question: str
-@app.get("/")
-def health():
-    return {"status": "ok"}
 @app.post("/chat")
 def chat(q: Query):
-    answer, status = ask_rag_with_status(q.question)
-    return {
-        "answer": answer,
-        "status": status,
-    }

 from fastapi import FastAPI
+from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
 from rag import ask_rag_with_status
 class Query(BaseModel):
     question: str
+# Serve frontend
+@app.get("/", response_class=HTMLResponse)
+def index():
+    with open("index.html", "r", encoding="utf-8") as f:
+        return f.read()
+# Chat endpoint
 @app.post("/chat")
 def chat(q: Query):
+    result = ask_rag_with_status(q.question)
+    return result

config.py CHANGED Viewed

@@ -1,10 +1,16 @@
-# config.py
-MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-# Hugging Face Dataset repo where PDFs live
-HF_DATASET_REPO = "Zubaish/HubRAG-docs"
-# Retrieval
-TOP_K = 3

+import os
+# Hugging Face dataset repo containing PDFs
+HF_DATASET_REPO = "Zubaish/hubrag-kb"
+# Embedding model (lightweight, CPU-safe)
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+# Chroma persistence (local to container)
+CHROMA_DIR = "/tmp/chroma"
+# LLM via HF Inference API (NOT local)
+LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"
+# Safety
+MAX_CONTEXT_CHUNKS = 4

frontend/index.html CHANGED Viewed

@@ -1,21 +1,53 @@
-# app.py
-from fastapi import FastAPI
-from pydantic import BaseModel
-from rag import ask_rag_with_status
-app = FastAPI()
-class Query(BaseModel):
-    question: str
-@app.get("/")
-def health():
-    return {"status": "ok"}
-@app.post("/chat")
-def chat(q: Query):
-    answer, status = ask_rag_with_status(q.question)
-    return {
-        "answer": answer,
-        "status": status,
-    }

+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="UTF-8" />
+  <title>HubRAG</title>
+  <style>
+    body { font-family: sans-serif; max-width: 800px; margin: 40px auto; }
+    textarea { width: 100%; padding: 10px; }
+    button { margin-top: 10px; padding: 8px 16px; }
+    pre { background: #f5f5f5; padding: 10px; white-space: pre-wrap; }
+  </style>
+</head>
+<body>
+<h2>📄 HubRAG (HF Space)</h2>
+<textarea id="q" rows="4" placeholder="Ask a question..."></textarea>
+<br/>
+<button onclick="ask()">Ask</button>
+<h3>Status</h3>
+<ul id="status"></ul>
+<h3>Answer</h3>
+<pre id="answer"></pre>
+<script>
+async function ask() {
+  const q = document.getElementById("q").value;
+  document.getElementById("answer").textContent = "Thinking...";
+  document.getElementById("status").innerHTML = "";
+  const res = await fetch("/chat", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({ question: q })
+  });
+  const data = await res.json();
+  document.getElementById("answer").textContent =
+    data.answer || "No answer";
+  (data.status || []).forEach(s => {
+    const li = document.createElement("li");
+    li.textContent = s;
+    document.getElementById("status").appendChild(li);
+  });
+}
+</script>
+</body>
+</html>

rag.py CHANGED Viewed

@@ -1,57 +1,99 @@
 from datasets import load_dataset
-from langchain.schema import Document
 from langchain_community.vectorstores import Chroma
-from langchain_huggingface import HuggingFaceEmbeddings
-from transformers import pipeline
-HF_DATASET_REPO = "Zubaish/hubrag-kb"
-EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-CHROMA_DIR = "./chroma"
 def load_documents():
     docs = []
     ds = load_dataset(HF_DATASET_REPO, split="train")
-    for row in ds:
-        text = row.get("text")
-        if text and text.strip():
-            docs.append(Document(page_content=text))
     return docs
-documents = load_documents()
-if not documents:
-    print("⚠️ No text documents found in dataset. PDFs must be converted to text.")
-embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
-vectordb = None
-if documents:
-    vectordb = Chroma.from_documents(
-        documents,
         embedding=embeddings,
         persist_directory=CHROMA_DIR
     )
-llm = pipeline(
-    "text-generation",
-    model="microsoft/Phi-3-mini-4k-instruct",
-    trust_remote_code=True,
-    max_new_tokens=256
-)
 def ask_rag_with_status(question: str):
-    if not vectordb:
         return {
-            "answer": "Knowledge base is empty. Please upload text documents to the dataset.",
-            "status": ["No text documents loaded"]
         }
-    docs = vectordb.similarity_search(question, k=3)
     context = "\n\n".join(d.page_content for d in docs)
-    prompt = f"""Answer the question using only the context.
 Context:
 {context}
@@ -59,14 +101,18 @@ Context:
 Question:
 {question}
-Answer:"""
-    result = llm(prompt)[0]["generated_text"]
     return {
-        "answer": result.split("Answer:")[-1].strip(),
-        "status": [
-            f"Loaded {len(documents)} documents",
-            f"Retrieved {len(docs)} chunks"
-        ]
     }

+import os
 from datasets import load_dataset
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
+from langchain.embeddings import HuggingFaceEmbeddings
+from huggingface_hub import InferenceClient
+from config import (
+    HF_DATASET_REPO,
+    EMBEDDING_MODEL,
+    CHROMA_DIR,
+    LLM_MODEL,
+    MAX_CONTEXT_CHUNKS,
+)
+# --- Globals (lazy loaded) ---
+_vectordb = None
+# --- Embeddings ---
+embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+# --- HF Inference Client ---
+llm = InferenceClient(
+    model=LLM_MODEL,
+    token=os.environ.get("HF_TOKEN"),
+)
+# --- Load PDFs from HF Dataset ---
 def load_documents():
     docs = []
     ds = load_dataset(HF_DATASET_REPO, split="train")
+    for item in ds:
+        pdf_path = item["file"]
+        loader = PyPDFLoader(pdf_path)
+        docs.extend(loader.load())
     return docs
+def get_vectordb():
+    global _vectordb
+    if _vectordb is not None:
+        return _vectordb
+    documents = load_documents()
+    if not documents:
+        return None
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=800,
+        chunk_overlap=150
+    )
+    chunks = splitter.split_documents(documents)
+    if not chunks:
+        return None
+    _vectordb = Chroma.from_documents(
+        chunks,
         embedding=embeddings,
         persist_directory=CHROMA_DIR
     )
+    return _vectordb
+# --- RAG Query ---
 def ask_rag_with_status(question: str):
+    status = []
+    vectordb = get_vectordb()
+    if vectordb is None:
         return {
+            "answer": "No documents indexed.",
+            "status": ["Vector DB not available"]
+        }
+    status.append("🔍 Searching documents")
+    docs = vectordb.similarity_search(question, k=MAX_CONTEXT_CHUNKS)
+    if not docs:
+        return {
+            "answer": "No relevant context found.",
+            "status": status
         }
     context = "\n\n".join(d.page_content for d in docs)
+    prompt = f"""You are a helpful assistant.
+Answer ONLY from the context below.
+If the answer is not present, say "I don't know".
 Context:
 {context}
 Question:
 {question}
+Answer:
+"""
+    status.append("🧠 Generating answer")
+    answer = llm.text_generation(
+        prompt,
+        max_new_tokens=256,
+        temperature=0.2,
+    )
     return {
+        "answer": answer.strip(),
+        "status": status
     }

requirements.txt CHANGED Viewed

@@ -1,10 +1,10 @@
 fastapi
 uvicorn
-transformers
-torch
 datasets
-chromadb
 langchain
 langchain-community
-langchain-huggingface==0.1.0
-sentence-transformers

 fastapi
 uvicorn
+pydantic
 datasets
+huggingface_hub
+sentence-transformers
 langchain
 langchain-community
+chromadb
+pypdf