AsyncRAG

Sleeping

App Files Files Community

Zubaish commited on Jan 28

Commit

1afe1ea

1 Parent(s): 19be3af

update

Browse files

Files changed (1) hide show

rag.py +28 -15

rag.py CHANGED Viewed

@@ -1,35 +1,48 @@
-# rag.py
 import os
 from transformers import pipeline
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_chroma import Chroma
 from config import EMBEDDING_MODEL, LLM_MODEL, CHROMA_DIR
 embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
-# Check if directory exists AND has files
-if os.path.exists(CHROMA_DIR) and any(os.scandir(CHROMA_DIR)):
     vectordb = Chroma(persist_directory=CHROMA_DIR, embedding_function=embeddings)
-    print("✅ Vector DB ready")
 else:
     vectordb = None
-    print("⚠️ Vector DB not found or empty")
 qa_pipeline = pipeline(
-    task="text-generation",
     model=LLM_MODEL,
-    max_new_tokens=256,
-    trust_remote_code=True
 )
 def ask_rag_with_status(question: str):
     if vectordb is None:
-        return "The knowledge base is not initialized. Please check deployment logs.", "ERROR"
-    docs = vectordb.similarity_search(question, k=3)
-    context = "\n\n".join(d.page_content for d in docs)
-    prompt = f"Context: {context}\n\nQuestion: {question}\nAnswer:"
-    result = qa_pipeline(prompt)
-    answer = result[0]["generated_text"].split("Answer:")[-1].strip()
-    return answer, ["Success"]

 import os
 from transformers import pipeline
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_chroma import Chroma
 from config import EMBEDDING_MODEL, LLM_MODEL, CHROMA_DIR
+# 1. Initialize Embeddings
 embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+# 2. Load Vector DB
+if os.path.exists(CHROMA_DIR) and os.path.isdir(CHROMA_DIR):
     vectordb = Chroma(persist_directory=CHROMA_DIR, embedding_function=embeddings)
+    print("✅ Vector DB loaded successfully")
 else:
     vectordb = None
+    print("⚠️ Vector DB folder missing")
+# 3. LLM Pipeline - Using the explicit class to avoid task errors
 qa_pipeline = pipeline(
+    "text2text-generation", # T5 specifically needs this task name
     model=LLM_MODEL,
+    max_new_tokens=128,      # Reduced to keep responses concise
+    model_kwargs={"torch_dtype": "auto"}
 )
 def ask_rag_with_status(question: str):
     if vectordb is None:
+        return "The knowledge base is not initialized properly.", "ERROR"
+    # Search for only 2 docs (k=2) to stay under the 512 token limit
+    docs = vectordb.similarity_search(question, k=2)
+    # Extract text and keep it short
+    context = " ".join([d.page_content[:400] for d in docs])
+    # Specific T5 Prompt Format: "question: ... context: ..."
+    prompt = f"question: {question} context: {context}"
+    try:
+        result = qa_pipeline(prompt)
+        answer = result[0]["generated_text"].strip()
+        if not answer:
+            answer = "I couldn't find a specific answer in the documents provided."
+        return answer, ["Context retrieved", "T5 generating"]
+    except Exception as e:
+        return f"Error generating answer: {str(e)}", "ERROR"