AsyncRAG

Sleeping

App Files Files Community

Zubaish commited on Jan 28

Commit

e8fa82e

1 Parent(s): 1afe1ea

update

Browse files

Files changed (5) hide show

Dockerfile +1 -0
config.py +2 -2
download_models.py +5 -6
ingest.py +8 -30
rag.py +20 -30

Dockerfile CHANGED Viewed

@@ -4,6 +4,7 @@ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 RUN python download_models.py
 RUN python ingest.py
 EXPOSE 7860

 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
+# These run during the 'Building' phase on Hugging Face
 RUN python download_models.py
 RUN python ingest.py
 EXPOSE 7860

config.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import os
 BASE_DIR = "/app"
-HF_DATASET_REPO = "Zubaish/hubrag-kb"
 HF_TOKEN = os.getenv("HF_TOKEN")
 CHROMA_DIR = os.path.join(BASE_DIR, "chroma_db")
 KB_DIR = os.path.join(BASE_DIR, "kb")
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-LLM_MODEL = "google/flan-t5-small"
 LLM_TASK = "text-generation"
 CHUNK_SIZE = 1000

 import os
 BASE_DIR = "/app"
+HF_DATASET_REPO = "Zubaish/gandhi-kb-docx"  # Ensure this points to your NEW docx repo
 HF_TOKEN = os.getenv("HF_TOKEN")
 CHROMA_DIR = os.path.join(BASE_DIR, "chroma_db")
 KB_DIR = os.path.join(BASE_DIR, "kb")
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+LLM_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
 LLM_TASK = "text-generation"
 CHUNK_SIZE = 1000

download_models.py CHANGED Viewed

@@ -1,11 +1,10 @@
-# download_models.py
 from transformers import pipeline
 from langchain_huggingface import HuggingFaceEmbeddings
-from config import EMBEDDING_MODEL, LLM_MODEL
 print("⏳ Pre-downloading models...")
-# Download Embedding Model
 HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
-# Download LLM
-pipeline("text-generation", model=LLM_MODEL, trust_remote_code=True)
-print("✅ Models downloaded successfully")

 from transformers import pipeline
 from langchain_huggingface import HuggingFaceEmbeddings
+from config import EMBEDDING_MODEL, LLM_MODEL, LLM_TASK
 print("⏳ Pre-downloading models...")
+# Cache Embedding Model
 HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+# Cache Qwen LLM
+pipeline(LLM_TASK, model=LLM_MODEL, device_map="cpu", trust_remote_code=True)
+print("✅ Models cached successfully")

ingest.py CHANGED Viewed

@@ -1,6 +1,4 @@
-# ingest.py
-import os
-import shutil
 from huggingface_hub import hf_hub_download, list_repo_files
 from langchain_community.document_loaders import Docx2txtLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
@@ -9,51 +7,31 @@ from langchain_chroma import Chroma
 from config import KB_DIR, HF_DATASET_REPO, EMBEDDING_MODEL, CHROMA_DIR, CHUNK_SIZE, CHUNK_OVERLAP, HF_TOKEN
 def run_ingestion():
-    # 1. Clean Environment
     if os.path.exists(KB_DIR): shutil.rmtree(KB_DIR)
     if os.path.exists(CHROMA_DIR): shutil.rmtree(CHROMA_DIR)
     os.makedirs(KB_DIR, exist_ok=True)
-    print(f"⬇️ Downloading files from NEW repo: {HF_DATASET_REPO}...")
     try:
-        # List files using the hub API instead of load_dataset
         all_files = list_repo_files(repo_id=HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN)
         docx_files = [f for f in all_files if f.lower().endswith(".docx")]
         docs = []
-        for i, file_name in enumerate(docx_files):
-            # Download file directly to local folder
-            local_path = hf_hub_download(
-                repo_id=HF_DATASET_REPO,
-                filename=file_name,
-                repo_type="dataset",
-                local_dir=KB_DIR,
-                token=HF_TOKEN
-            )
-            # Load the text from docx
             loader = Docx2txtLoader(local_path)
             docs.extend(loader.load())
             print(f"✅ Loaded: {file_name}")
         if not docs:
-            print("❌ No documents found. Check repo files.")
             return
-        # 2. Chunking
-        splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
-        splits = splitter.split_documents(docs)
-        # 3. Embedding and Storage
-        print(f"🧠 Indexing {len(splits)} chunks into ChromaDB...")
         embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
-        Chroma.from_documents(
-            documents=splits,
-            embedding=embeddings,
-            persist_directory=CHROMA_DIR
-        )
-        print(f"✅ Knowledge base initialized successfully at {CHROMA_DIR}")
     except Exception as e:
         print(f"❌ Ingestion failed: {e}")

+import os, shutil
 from huggingface_hub import hf_hub_download, list_repo_files
 from langchain_community.document_loaders import Docx2txtLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from config import KB_DIR, HF_DATASET_REPO, EMBEDDING_MODEL, CHROMA_DIR, CHUNK_SIZE, CHUNK_OVERLAP, HF_TOKEN
 def run_ingestion():
     if os.path.exists(KB_DIR): shutil.rmtree(KB_DIR)
     if os.path.exists(CHROMA_DIR): shutil.rmtree(CHROMA_DIR)
     os.makedirs(KB_DIR, exist_ok=True)
+    print(f"⬇️ Downloading files from: {HF_DATASET_REPO}...")
     try:
         all_files = list_repo_files(repo_id=HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN)
         docx_files = [f for f in all_files if f.lower().endswith(".docx")]
         docs = []
+        for file_name in docx_files:
+            local_path = hf_hub_download(repo_id=HF_DATASET_REPO, filename=file_name, repo_type="dataset", local_dir=KB_DIR, token=HF_TOKEN)
             loader = Docx2txtLoader(local_path)
             docs.extend(loader.load())
             print(f"✅ Loaded: {file_name}")
         if not docs:
+            print("❌ No documents found.")
             return
+        splits = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP).split_documents(docs)
         embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+        Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory=CHROMA_DIR)
+        print(f"✅ Knowledge base initialized successfully.")
     except Exception as e:
         print(f"❌ Ingestion failed: {e}")

rag.py CHANGED Viewed

@@ -2,47 +2,37 @@ import os
 from transformers import pipeline
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_chroma import Chroma
-from config import EMBEDDING_MODEL, LLM_MODEL, CHROMA_DIR
-# 1. Initialize Embeddings
 embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
-# 2. Load Vector DB
-if os.path.exists(CHROMA_DIR) and os.path.isdir(CHROMA_DIR):
     vectordb = Chroma(persist_directory=CHROMA_DIR, embedding_function=embeddings)
-    print("✅ Vector DB loaded successfully")
 else:
     vectordb = None
-    print("⚠️ Vector DB folder missing")
-# 3. LLM Pipeline - Using the explicit class to avoid task errors
-qa_pipeline = pipeline(
-    "text2text-generation", # T5 specifically needs this task name
-    model=LLM_MODEL,
-    max_new_tokens=128,      # Reduced to keep responses concise
-    model_kwargs={"torch_dtype": "auto"}
-)
 def ask_rag_with_status(question: str):
     if vectordb is None:
-        return "The knowledge base is not initialized properly.", "ERROR"
-    # Search for only 2 docs (k=2) to stay under the 512 token limit
-    docs = vectordb.similarity_search(question, k=2)
-    # Extract text and keep it short
-    context = " ".join([d.page_content[:400] for d in docs])
-    # Specific T5 Prompt Format: "question: ... context: ..."
-    prompt = f"question: {question} context: {context}"
-    try:
-        result = qa_pipeline(prompt)
-        answer = result[0]["generated_text"].strip()
-        if not answer:
-            answer = "I couldn't find a specific answer in the documents provided."
-        return answer, ["Context retrieved", "T5 generating"]
-    except Exception as e:
-        return f"Error generating answer: {str(e)}", "ERROR"

 from transformers import pipeline
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_chroma import Chroma
+from config import EMBEDDING_MODEL, LLM_MODEL, CHROMA_DIR, LLM_TASK
 embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+if os.path.exists(CHROMA_DIR) and any(os.scandir(CHROMA_DIR)):
     vectordb = Chroma(persist_directory=CHROMA_DIR, embedding_function=embeddings)
+    print("✅ Vector DB loaded")
 else:
     vectordb = None
+    print("⚠️ Vector DB missing")
+qa_pipeline = pipeline(task=LLM_TASK, model=LLM_MODEL, device_map="cpu", max_new_tokens=512, trust_remote_code=True)
 def ask_rag_with_status(question: str):
     if vectordb is None:
+        return "Knowledge base not ready.", "ERROR"
+    docs = vectordb.similarity_search(question, k=3)
+    context = "\n\n".join(d.page_content for d in docs)
+    # Qwen Chat Template
+    messages = [
+        {"role": "system", "content": "You are a Gandhi ji expert. Answer the question using ONLY the provided context."},
+        {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"}
+    ]
+    prompt = qa_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    result = qa_pipeline(prompt, pad_token_id=qa_pipeline.tokenizer.eos_token_id)
+    # Extract Qwen answer
+    full_text = result[0]["generated_text"]
+    answer = full_text.split("<|im_start|>assistant")[-1].strip().replace("<|im_end|>", "")
+    return answer, ["Success"]