XQ commited on
Commit
cc7b6b4
Β·
1 Parent(s): cce63da

Add LangChain

Browse files
Files changed (4) hide show
  1. README.md +2 -2
  2. requirements.txt +1 -0
  3. src/agent/router.py +214 -51
  4. src/ui/app.py +2 -2
README.md CHANGED
@@ -20,14 +20,14 @@ The system follows a three-stage RAG pipeline:
20
 
21
  **Retrieval:** User queries run through both dense (Qdrant cosine similarity) and sparse (BM25) search paths. Results are merged via reciprocal rank fusion, then a cross-encoder reranker scores each candidate for final ordering. An intent classifier routes queries to the appropriate retrieval strategy.
22
 
23
- **Generation:** Top-ranked chunks are assembled into a prompt context and passed to the LLM through LangChain. The response is returned via a FastAPI endpoint and displayed in a Streamlit UI. Retrieval quality can be measured offline using RAGAS metrics.
24
 
25
  ## Tech Stack
26
 
27
  | Category | Technology |
28
  |---|---|
29
  | Framework | FastAPI, uvicorn |
30
- | Orchestration | LangChain |
31
  | Vector Store | Qdrant (local mode, no server required) |
32
  | Embedding | HuggingFace `paraphrase-multilingual-MiniLM-L12-v2` (384 dim) |
33
  | LLM | `gemma3:4b` (default, runs locally via Ollama) |
 
20
 
21
  **Retrieval:** User queries run through both dense (Qdrant cosine similarity) and sparse (BM25) search paths. Results are merged via reciprocal rank fusion, then a cross-encoder reranker scores each candidate for final ordering. An intent classifier routes queries to the appropriate retrieval strategy.
22
 
23
+ **Generation:** Top-ranked chunks are assembled into a prompt context and passed to the LLM. The routing pipeline is orchestrated as a stateful LangGraph graph β€” each step (language detection, translation, retrieval, reranking, generation) runs as a node with full intermediate state preserved. The response is returned via a FastAPI endpoint and displayed in a Streamlit UI. Retrieval quality can be measured offline using RAGAS metrics.
24
 
25
  ## Tech Stack
26
 
27
  | Category | Technology |
28
  |---|---|
29
  | Framework | FastAPI, uvicorn |
30
+ | Orchestration | LangChain, LangGraph |
31
  | Vector Store | Qdrant (local mode, no server required) |
32
  | Embedding | HuggingFace `paraphrase-multilingual-MiniLM-L12-v2` (384 dim) |
33
  | LLM | `gemma3:4b` (default, runs locally via Ollama) |
requirements.txt CHANGED
@@ -3,6 +3,7 @@ fastapi==0.135.3
3
  uvicorn==0.43.0
4
 
5
  # LLM orchestration
 
6
  langchain==1.2.15
7
  langchain-core==1.2.26
8
  langchain-openai==1.1.12
 
3
  uvicorn==0.43.0
4
 
5
  # LLM orchestration
6
+ langgraph==1.1.6
7
  langchain==1.2.15
8
  langchain-core==1.2.26
9
  langchain-openai==1.1.12
src/agent/router.py CHANGED
@@ -2,10 +2,12 @@
2
 
3
  import logging
4
  import unicodedata
 
5
 
6
  from langchain_core.runnables import Runnable
 
7
 
8
- from src.models import IntentType, GenerationResponse, PipelineDetails
9
  from src.agent.intent_classifier import IntentClassifier
10
  from src.retrieval.hybrid import HybridRetriever
11
  from src.retrieval.reranker import Reranker
@@ -13,6 +15,38 @@ from src.retrieval.reranker import Reranker
13
  logger = logging.getLogger(__name__)
14
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  class QueryRouter:
17
  """Routes queries to appropriate retrieval and generation pipelines."""
18
 
@@ -41,6 +75,7 @@ class QueryRouter:
41
  self._reranker = reranker
42
  self._generator = generator
43
  self._translate_query_enabled = translate_query
 
44
 
45
  @staticmethod
46
  def _detect_script(text: str) -> str | None:
@@ -168,8 +203,100 @@ class QueryRouter:
168
  logger.info("Translated query to Danish: %s", translated)
169
  return translated
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  def route(self, query: str, top_k: int) -> GenerationResponse:
172
- """Route a query through the full RAG pipeline.
173
 
174
  Args:
175
  query: The user's natural language query.
@@ -180,65 +307,101 @@ class QueryRouter:
180
  """
181
  logger.info("Routing query: %s", query)
182
 
183
- # Single LLM call for both language detection and intent classification
184
- user_language, intent = self._detect_language_and_intent(query)
185
- retrieval_query = self._translate_query(query, user_language)
186
- translated = retrieval_query != query
187
-
188
- logger.info("Classified intent: %s", intent.value)
189
- logger.debug("Intent classification result: %s for query='%s'", intent.value, query)
 
 
 
 
 
 
 
190
 
191
- should_retrieve = intent != IntentType.UNKNOWN
192
- logger.debug("Retrieval executed: %s (intent=%s)", should_retrieve, intent.value)
193
 
194
- # Use detailed search to capture intermediate results
195
  pipeline = PipelineDetails(
196
  original_query=query,
197
- retrieval_query=retrieval_query,
198
- detected_language=user_language,
199
- translated=translated,
 
 
 
 
200
  )
201
 
202
- if should_retrieve:
203
- hybrid_result = self._hybrid_retriever.search_detailed(retrieval_query, top_k=top_k)
204
- pipeline.dense_results = hybrid_result.dense_results
205
- pipeline.sparse_results = hybrid_result.sparse_results
206
- pipeline.fused_results = hybrid_result.fused_results
207
- results = hybrid_result.fused_results
208
- else:
209
- results = []
210
-
211
- logger.info("Retrieved %d results from hybrid search", len(results))
212
- logger.debug("Retrieval returned %d results", len(results))
213
-
214
- reranked = self._reranker.rerank(retrieval_query, results, top_k=top_k) if results else []
215
- pipeline.reranked_results = reranked
216
- logger.info("Reranked to %d results", len(reranked))
217
-
218
- if reranked and intent == IntentType.FACTUAL:
219
- intent = IntentType.RAG
220
- logger.info("Overriding intent to RAG (sources retrieved)")
221
-
222
- context = "\n\n".join(r.chunk.text for r in reranked)
223
- prompt = self._build_prompt(query, intent, context, user_language)
224
-
225
- answer = self._generator.invoke(prompt)
226
- logger.info("Generated answer for intent=%s", intent.value)
227
-
228
- if reranked:
229
- confidence = max(r.score for r in reranked)
230
- logger.info("Confidence: %.4f (sigmoid-normalized by reranker)", confidence)
231
- else:
232
- confidence = 0.0
233
-
234
  return GenerationResponse(
235
- answer=str(answer),
236
- sources=reranked,
237
- intent=intent,
238
- confidence=confidence,
239
  pipeline_details=pipeline,
240
  )
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  def _build_prompt(
243
  self, query: str, intent: IntentType, context: str, user_language: str
244
  ) -> str:
 
2
 
3
  import logging
4
  import unicodedata
5
+ from typing import TypedDict
6
 
7
  from langchain_core.runnables import Runnable
8
+ from langgraph.graph import END, StateGraph
9
 
10
+ from src.models import IntentType, GenerationResponse, PipelineDetails, QueryResult
11
  from src.agent.intent_classifier import IntentClassifier
12
  from src.retrieval.hybrid import HybridRetriever
13
  from src.retrieval.reranker import Reranker
 
15
  logger = logging.getLogger(__name__)
16
 
17
 
18
+ class RouterState(TypedDict):
19
+ """LangGraph state passed between routing nodes.
20
+
21
+ Attributes:
22
+ query: The user's original query.
23
+ top_k: Number of results to retrieve.
24
+ user_language: Detected language of the query.
25
+ intent: Classified intent type.
26
+ retrieval_query: Query used for retrieval (may be translated).
27
+ translated: Whether the query was translated.
28
+ dense_results: Results from vector retrieval.
29
+ sparse_results: Results from BM25 retrieval.
30
+ fused_results: Results after RRF fusion.
31
+ reranked: Results after cross-encoder reranking.
32
+ confidence: Max reranker score (0.0-1.0).
33
+ answer: Final generated answer.
34
+ """
35
+
36
+ query: str
37
+ top_k: int
38
+ user_language: str
39
+ intent: IntentType
40
+ retrieval_query: str
41
+ translated: bool
42
+ dense_results: list[QueryResult]
43
+ sparse_results: list[QueryResult]
44
+ fused_results: list[QueryResult]
45
+ reranked: list[QueryResult]
46
+ confidence: float
47
+ answer: str
48
+
49
+
50
  class QueryRouter:
51
  """Routes queries to appropriate retrieval and generation pipelines."""
52
 
 
75
  self._reranker = reranker
76
  self._generator = generator
77
  self._translate_query_enabled = translate_query
78
+ self._graph = self._build_graph()
79
 
80
  @staticmethod
81
  def _detect_script(text: str) -> str | None:
 
203
  logger.info("Translated query to Danish: %s", translated)
204
  return translated
205
 
206
+ def _build_graph(self) -> object:
207
+ """Build the LangGraph routing graph.
208
+
209
+ Nodes:
210
+ detect β†’ detect language and intent
211
+ translate β†’ translate query to Danish if needed
212
+ retrieve β†’ hybrid search (skipped when intent is UNKNOWN)
213
+ rerank β†’ cross-encoder reranking
214
+ update_intent β†’ promote FACTUAL to RAG when sources are found
215
+ generate β†’ build prompt and call LLM
216
+
217
+ Returns:
218
+ Compiled LangGraph graph.
219
+ """
220
+
221
+ def detect_node(state: RouterState) -> dict:
222
+ user_language, intent = self._detect_language_and_intent(state["query"])
223
+ return {"user_language": user_language, "intent": intent}
224
+
225
+ def translate_node(state: RouterState) -> dict:
226
+ retrieval_query = self._translate_query(state["query"], state["user_language"])
227
+ return {
228
+ "retrieval_query": retrieval_query,
229
+ "translated": retrieval_query != state["query"],
230
+ }
231
+
232
+ def retrieve_node(state: RouterState) -> dict:
233
+ hybrid_result = self._hybrid_retriever.search_detailed(
234
+ state["retrieval_query"], top_k=state["top_k"]
235
+ )
236
+ logger.info("Retrieved %d results from hybrid search", len(hybrid_result.fused_results))
237
+ return {
238
+ "dense_results": hybrid_result.dense_results,
239
+ "sparse_results": hybrid_result.sparse_results,
240
+ "fused_results": hybrid_result.fused_results,
241
+ }
242
+
243
+ def rerank_node(state: RouterState) -> dict:
244
+ results = state.get("fused_results", [])
245
+ reranked = (
246
+ self._reranker.rerank(state["retrieval_query"], results, top_k=state["top_k"])
247
+ if results
248
+ else []
249
+ )
250
+ confidence = max(r.score for r in reranked) if reranked else 0.0
251
+ logger.info("Reranked to %d results", len(reranked))
252
+ if reranked:
253
+ logger.info("Confidence: %.4f (sigmoid-normalized by reranker)", confidence)
254
+ return {"reranked": reranked, "confidence": confidence}
255
+
256
+ def update_intent_node(state: RouterState) -> dict:
257
+ if state.get("reranked") and state["intent"] == IntentType.FACTUAL:
258
+ logger.info("Overriding intent to RAG (sources retrieved)")
259
+ return {"intent": IntentType.RAG}
260
+ return {}
261
+
262
+ def generate_node(state: RouterState) -> dict:
263
+ reranked = state.get("reranked", [])
264
+ context = "\n\n".join(r.chunk.text for r in reranked)
265
+ prompt = self._build_prompt(
266
+ state["query"], state["intent"], context, state["user_language"]
267
+ )
268
+ answer = self._generator.invoke(prompt)
269
+ logger.info("Generated answer for intent=%s", state["intent"].value)
270
+ return {"answer": str(answer)}
271
+
272
+ def should_retrieve(state: RouterState) -> str:
273
+ """Skip retrieval when intent is UNKNOWN."""
274
+ return "retrieve" if state["intent"] != IntentType.UNKNOWN else "rerank"
275
+
276
+ graph: StateGraph = StateGraph(RouterState)
277
+ graph.add_node("detect", detect_node)
278
+ graph.add_node("translate", translate_node)
279
+ graph.add_node("retrieve", retrieve_node)
280
+ graph.add_node("rerank", rerank_node)
281
+ graph.add_node("update_intent", update_intent_node)
282
+ graph.add_node("generate", generate_node)
283
+
284
+ graph.set_entry_point("detect")
285
+ graph.add_edge("detect", "translate")
286
+ graph.add_conditional_edges(
287
+ "translate",
288
+ should_retrieve,
289
+ {"retrieve": "retrieve", "rerank": "rerank"},
290
+ )
291
+ graph.add_edge("retrieve", "rerank")
292
+ graph.add_edge("rerank", "update_intent")
293
+ graph.add_edge("update_intent", "generate")
294
+ graph.add_edge("generate", END)
295
+
296
+ return graph.compile()
297
+
298
  def route(self, query: str, top_k: int) -> GenerationResponse:
299
+ """Route a query through the full RAG pipeline via LangGraph.
300
 
301
  Args:
302
  query: The user's natural language query.
 
307
  """
308
  logger.info("Routing query: %s", query)
309
 
310
+ initial_state: RouterState = {
311
+ "query": query,
312
+ "top_k": top_k,
313
+ "user_language": "Danish",
314
+ "intent": IntentType.UNKNOWN,
315
+ "retrieval_query": query,
316
+ "translated": False,
317
+ "dense_results": [],
318
+ "sparse_results": [],
319
+ "fused_results": [],
320
+ "reranked": [],
321
+ "confidence": 0.0,
322
+ "answer": "",
323
+ }
324
 
325
+ final_state: RouterState = self._graph.invoke(initial_state)
 
326
 
 
327
  pipeline = PipelineDetails(
328
  original_query=query,
329
+ retrieval_query=final_state["retrieval_query"],
330
+ detected_language=final_state["user_language"],
331
+ translated=final_state["translated"],
332
+ dense_results=final_state.get("dense_results", []),
333
+ sparse_results=final_state.get("sparse_results", []),
334
+ fused_results=final_state.get("fused_results", []),
335
+ reranked_results=final_state.get("reranked", []),
336
  )
337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  return GenerationResponse(
339
+ answer=final_state["answer"],
340
+ sources=final_state.get("reranked", []),
341
+ intent=final_state["intent"],
342
+ confidence=final_state["confidence"],
343
  pipeline_details=pipeline,
344
  )
345
 
346
+ # --- Old if/else routing (replaced by LangGraph above) ---
347
+ #
348
+ # user_language, intent = self._detect_language_and_intent(query)
349
+ # retrieval_query = self._translate_query(query, user_language)
350
+ # translated = retrieval_query != query
351
+ #
352
+ # logger.info("Classified intent: %s", intent.value)
353
+ # logger.debug("Intent classification result: %s for query='%s'", intent.value, query)
354
+ #
355
+ # should_retrieve = intent != IntentType.UNKNOWN
356
+ # logger.debug("Retrieval executed: %s (intent=%s)", should_retrieve, intent.value)
357
+ #
358
+ # pipeline = PipelineDetails(
359
+ # original_query=query,
360
+ # retrieval_query=retrieval_query,
361
+ # detected_language=user_language,
362
+ # translated=translated,
363
+ # )
364
+ #
365
+ # if should_retrieve:
366
+ # hybrid_result = self._hybrid_retriever.search_detailed(retrieval_query, top_k=top_k)
367
+ # pipeline.dense_results = hybrid_result.dense_results
368
+ # pipeline.sparse_results = hybrid_result.sparse_results
369
+ # pipeline.fused_results = hybrid_result.fused_results
370
+ # results = hybrid_result.fused_results
371
+ # else:
372
+ # results = []
373
+ #
374
+ # logger.info("Retrieved %d results from hybrid search", len(results))
375
+ # logger.debug("Retrieval returned %d results", len(results))
376
+ #
377
+ # reranked = self._reranker.rerank(retrieval_query, results, top_k=top_k) if results else []
378
+ # pipeline.reranked_results = reranked
379
+ # logger.info("Reranked to %d results", len(reranked))
380
+ #
381
+ # if reranked and intent == IntentType.FACTUAL:
382
+ # intent = IntentType.RAG
383
+ # logger.info("Overriding intent to RAG (sources retrieved)")
384
+ #
385
+ # context = "\n\n".join(r.chunk.text for r in reranked)
386
+ # prompt = self._build_prompt(query, intent, context, user_language)
387
+ #
388
+ # answer = self._generator.invoke(prompt)
389
+ # logger.info("Generated answer for intent=%s", intent.value)
390
+ #
391
+ # if reranked:
392
+ # confidence = max(r.score for r in reranked)
393
+ # logger.info("Confidence: %.4f (sigmoid-normalized by reranker)", confidence)
394
+ # else:
395
+ # confidence = 0.0
396
+ #
397
+ # return GenerationResponse(
398
+ # answer=str(answer),
399
+ # sources=reranked,
400
+ # intent=intent,
401
+ # confidence=confidence,
402
+ # pipeline_details=pipeline,
403
+ # )
404
+
405
  def _build_prompt(
406
  self, query: str, intent: IntentType, context: str, user_language: str
407
  ) -> str:
src/ui/app.py CHANGED
@@ -49,7 +49,7 @@ TEXTS: dict[str, dict[str, str]] = {
49
  "- **Vektordatabase + hybrid soegning** β€” Qdrant (semantisk) "
50
  "+ BM25 (leksikalsk)\n"
51
  "- **Reranking** β€” cross-encoder for praecis relevans\n"
52
- "- **RAG-arkitektur** β€” LangChain-orkestreret pipeline\n"
53
  "- **LLM-integration** β€” provider-agnostisk, prompt-styret "
54
  "svargenerering\n"
55
  "- **Evaluering** β€” RAGAS-baseret kvalitetsmaaling\n"
@@ -121,7 +121,7 @@ TEXTS: dict[str, dict[str, str]] = {
121
  "- **Vector database + hybrid search** β€” Qdrant (semantic) "
122
  "+ BM25 (lexical)\n"
123
  "- **Reranking** β€” cross-encoder for precise relevance\n"
124
- "- **RAG architecture** β€” LangChain-orchestrated pipeline\n"
125
  "- **LLM integration** β€” provider-agnostic, prompt-driven "
126
  "answer generation\n"
127
  "- **Evaluation** β€” RAGAS-based quality measurement\n"
 
49
  "- **Vektordatabase + hybrid soegning** β€” Qdrant (semantisk) "
50
  "+ BM25 (leksikalsk)\n"
51
  "- **Reranking** β€” cross-encoder for praecis relevans\n"
52
+ "- **RAG-arkitektur** β€” LangChain + LangGraph-orkestreret pipeline\n"
53
  "- **LLM-integration** β€” provider-agnostisk, prompt-styret "
54
  "svargenerering\n"
55
  "- **Evaluering** β€” RAGAS-baseret kvalitetsmaaling\n"
 
121
  "- **Vector database + hybrid search** β€” Qdrant (semantic) "
122
  "+ BM25 (lexical)\n"
123
  "- **Reranking** β€” cross-encoder for precise relevance\n"
124
+ "- **RAG architecture** β€” LangChain + LangGraph-orchestrated pipeline\n"
125
  "- **LLM integration** β€” provider-agnostic, prompt-driven "
126
  "answer generation\n"
127
  "- **Evaluation** β€” RAGAS-based quality measurement\n"