Spaces:

ImagineCanada
/

hr-intervals-chatbot

Sleeping

App Files Files Community

pikamomo commited on Feb 8

Commit

d7ef75e

1 Parent(s): 8a70dca

add multi query

Browse files

Files changed (2) hide show

requirements.txt +2 -1
src/chatbot.py +23 -8

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 # ======================= LangChain Core =======================
-langchain==1.0.2
 langchain-openai==1.0.1
 langchain-qdrant==1.1.0
 langchain-community==0.4.1

 # ======================= LangChain Core =======================
+langchain==1.2.9
+langchain-classic
 langchain-openai==1.0.1
 langchain-qdrant==1.1.0
 langchain-community==0.4.1

src/chatbot.py CHANGED Viewed

@@ -1,9 +1,11 @@
 """
 RAG chatbot module using latest LangChain with LCEL
 Handles question-answering with conversation memory using modern patterns
 """
 import os
 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain_qdrant import QdrantVectorStore
@@ -14,10 +16,15 @@ from langchain_core.runnables import RunnablePassthrough, RunnableLambda
 from langchain_core.runnables.history import RunnableWithMessageHistory
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.documents import Document
 from qdrant_client import QdrantClient
 from typing import Tuple, List, Dict, Any
 from operator import itemgetter
 load_dotenv()
 # Store for chat sessions
@@ -77,8 +84,8 @@ def create_rag_chain():
         embedding=embeddings
     )
-    # 2. Create retriever
-    retriever = vectorstore.as_retriever(
         search_type="similarity",
         search_kwargs={"k": 8}
     )
@@ -89,7 +96,15 @@ def create_rag_chain():
         temperature=0.3
     )
-    # 4. System prompt
     system_prompt = """You are an HR assistant for nonprofit organizations in Canada.
 Use the following context to answer questions accurately and helpfully.
@@ -110,11 +125,11 @@ Provide a clear, helpful answer. If you're not certain, say so. Always remind us
         ("human", "{input}")
     ])
-    # 5. Build RAG chain using LCEL (pipe operator)
-    # This is the modern LangChain approach for better composability
     rag_chain = (
         {
-            "context": itemgetter("input") | retriever | format_docs,
             "input": itemgetter("input"),
             "chat_history": itemgetter("chat_history")
         }
@@ -123,7 +138,7 @@ Provide a clear, helpful answer. If you're not certain, say so. Always remind us
         | StrOutputParser()
     )
-    # 6. Add chat history with message management
     conversational_rag_chain = RunnableWithMessageHistory(
         rag_chain,
         get_session_history,
@@ -131,7 +146,7 @@ Provide a clear, helpful answer. If you're not certain, say so. Always remind us
         history_messages_key="chat_history",
     )
-    return conversational_rag_chain, retriever
 def ask_question(

 """
 RAG chatbot module using latest LangChain with LCEL
 Handles question-answering with conversation memory using modern patterns
+Uses MultiQueryRetriever for improved document retrieval
 """
 import os
+import logging
 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain_qdrant import QdrantVectorStore
 from langchain_core.runnables.history import RunnableWithMessageHistory
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.documents import Document
+from langchain_classic.retrievers.multi_query import MultiQueryRetriever
 from qdrant_client import QdrantClient
 from typing import Tuple, List, Dict, Any
 from operator import itemgetter
+# Configure logging for MultiQueryRetriever to see generated query variations
+logging.basicConfig()
+logging.getLogger("langchain_classic.retrievers.multi_query").setLevel(logging.INFO)
 load_dotenv()
 # Store for chat sessions
         embedding=embeddings
     )
+    # 2. Create base retriever
+    base_retriever = vectorstore.as_retriever(
         search_type="similarity",
         search_kwargs={"k": 8}
     )
         temperature=0.3
     )
+    # 4. Wrap with MultiQueryRetriever for improved recall
+    # Generates multiple query variations from the original question,
+    # retrieves documents for each, and returns the unique union of results
+    multi_query_retriever = MultiQueryRetriever.from_llm(
+        retriever=base_retriever,
+        llm=llm,
+    )
+    # 5. System prompt
     system_prompt = """You are an HR assistant for nonprofit organizations in Canada.
 Use the following context to answer questions accurately and helpfully.
         ("human", "{input}")
     ])
+    # 6. Build RAG chain using LCEL (pipe operator)
+    # Uses MultiQueryRetriever instead of base retriever for broader document coverage
     rag_chain = (
         {
+            "context": itemgetter("input") | multi_query_retriever | format_docs,
             "input": itemgetter("input"),
             "chat_history": itemgetter("chat_history")
         }
         | StrOutputParser()
     )
+    # 7. Add chat history with message management
     conversational_rag_chain = RunnableWithMessageHistory(
         rag_chain,
         get_session_history,
         history_messages_key="chat_history",
     )
+    return conversational_rag_chain, multi_query_retriever
 def ask_question(