pikamomo commited on
Commit
d7ef75e
·
1 Parent(s): 8a70dca

add multi query

Browse files
Files changed (2) hide show
  1. requirements.txt +2 -1
  2. src/chatbot.py +23 -8
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  # ======================= LangChain Core =======================
2
- langchain==1.0.2
 
3
  langchain-openai==1.0.1
4
  langchain-qdrant==1.1.0
5
  langchain-community==0.4.1
 
1
  # ======================= LangChain Core =======================
2
+ langchain==1.2.9
3
+ langchain-classic
4
  langchain-openai==1.0.1
5
  langchain-qdrant==1.1.0
6
  langchain-community==0.4.1
src/chatbot.py CHANGED
@@ -1,9 +1,11 @@
1
  """
2
  RAG chatbot module using latest LangChain with LCEL
3
  Handles question-answering with conversation memory using modern patterns
 
4
  """
5
 
6
  import os
 
7
  from dotenv import load_dotenv
8
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
9
  from langchain_qdrant import QdrantVectorStore
@@ -14,10 +16,15 @@ from langchain_core.runnables import RunnablePassthrough, RunnableLambda
14
  from langchain_core.runnables.history import RunnableWithMessageHistory
15
  from langchain_core.output_parsers import StrOutputParser
16
  from langchain_core.documents import Document
 
17
  from qdrant_client import QdrantClient
18
  from typing import Tuple, List, Dict, Any
19
  from operator import itemgetter
20
 
 
 
 
 
21
  load_dotenv()
22
 
23
  # Store for chat sessions
@@ -77,8 +84,8 @@ def create_rag_chain():
77
  embedding=embeddings
78
  )
79
 
80
- # 2. Create retriever
81
- retriever = vectorstore.as_retriever(
82
  search_type="similarity",
83
  search_kwargs={"k": 8}
84
  )
@@ -89,7 +96,15 @@ def create_rag_chain():
89
  temperature=0.3
90
  )
91
 
92
- # 4. System prompt
 
 
 
 
 
 
 
 
93
  system_prompt = """You are an HR assistant for nonprofit organizations in Canada.
94
  Use the following context to answer questions accurately and helpfully.
95
 
@@ -110,11 +125,11 @@ Provide a clear, helpful answer. If you're not certain, say so. Always remind us
110
  ("human", "{input}")
111
  ])
112
 
113
- # 5. Build RAG chain using LCEL (pipe operator)
114
- # This is the modern LangChain approach for better composability
115
  rag_chain = (
116
  {
117
- "context": itemgetter("input") | retriever | format_docs,
118
  "input": itemgetter("input"),
119
  "chat_history": itemgetter("chat_history")
120
  }
@@ -123,7 +138,7 @@ Provide a clear, helpful answer. If you're not certain, say so. Always remind us
123
  | StrOutputParser()
124
  )
125
 
126
- # 6. Add chat history with message management
127
  conversational_rag_chain = RunnableWithMessageHistory(
128
  rag_chain,
129
  get_session_history,
@@ -131,7 +146,7 @@ Provide a clear, helpful answer. If you're not certain, say so. Always remind us
131
  history_messages_key="chat_history",
132
  )
133
 
134
- return conversational_rag_chain, retriever
135
 
136
 
137
  def ask_question(
 
1
  """
2
  RAG chatbot module using latest LangChain with LCEL
3
  Handles question-answering with conversation memory using modern patterns
4
+ Uses MultiQueryRetriever for improved document retrieval
5
  """
6
 
7
  import os
8
+ import logging
9
  from dotenv import load_dotenv
10
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
11
  from langchain_qdrant import QdrantVectorStore
 
16
  from langchain_core.runnables.history import RunnableWithMessageHistory
17
  from langchain_core.output_parsers import StrOutputParser
18
  from langchain_core.documents import Document
19
+ from langchain_classic.retrievers.multi_query import MultiQueryRetriever
20
  from qdrant_client import QdrantClient
21
  from typing import Tuple, List, Dict, Any
22
  from operator import itemgetter
23
 
24
+ # Configure logging for MultiQueryRetriever to see generated query variations
25
+ logging.basicConfig()
26
+ logging.getLogger("langchain_classic.retrievers.multi_query").setLevel(logging.INFO)
27
+
28
  load_dotenv()
29
 
30
  # Store for chat sessions
 
84
  embedding=embeddings
85
  )
86
 
87
+ # 2. Create base retriever
88
+ base_retriever = vectorstore.as_retriever(
89
  search_type="similarity",
90
  search_kwargs={"k": 8}
91
  )
 
96
  temperature=0.3
97
  )
98
 
99
+ # 4. Wrap with MultiQueryRetriever for improved recall
100
+ # Generates multiple query variations from the original question,
101
+ # retrieves documents for each, and returns the unique union of results
102
+ multi_query_retriever = MultiQueryRetriever.from_llm(
103
+ retriever=base_retriever,
104
+ llm=llm,
105
+ )
106
+
107
+ # 5. System prompt
108
  system_prompt = """You are an HR assistant for nonprofit organizations in Canada.
109
  Use the following context to answer questions accurately and helpfully.
110
 
 
125
  ("human", "{input}")
126
  ])
127
 
128
+ # 6. Build RAG chain using LCEL (pipe operator)
129
+ # Uses MultiQueryRetriever instead of base retriever for broader document coverage
130
  rag_chain = (
131
  {
132
+ "context": itemgetter("input") | multi_query_retriever | format_docs,
133
  "input": itemgetter("input"),
134
  "chat_history": itemgetter("chat_history")
135
  }
 
138
  | StrOutputParser()
139
  )
140
 
141
+ # 7. Add chat history with message management
142
  conversational_rag_chain = RunnableWithMessageHistory(
143
  rag_chain,
144
  get_session_history,
 
146
  history_messages_key="chat_history",
147
  )
148
 
149
+ return conversational_rag_chain, multi_query_retriever
150
 
151
 
152
  def ask_question(