Spaces:

XQ
/

Dokumentassistent

Sleeping

App Files Files

XQ commited on Apr 7

Commit

0a7ef90

1 Parent(s): 1f01595

Fix pipeline details displaying and routing and searching logic

Browse files

Files changed (10) hide show

src/agent/react_router.py +17 -6
src/agent/router.py +1 -58
src/agent/tools.py +2 -2
src/api/routes.py +12 -1
src/ingestion/chunker.py +21 -13
src/ingestion/pipeline.py +3 -1
src/models.py +5 -4
src/retrieval/bm25_search.py +2 -2
src/retrieval/vector_store.py +3 -2
src/ui/app.py +79 -39

src/agent/react_router.py CHANGED Viewed

@@ -145,7 +145,7 @@ class ReActRouter:
             confidence=confidence,
             pipeline_details=PipelineDetails(
                 original_query=query,
-                retrieval_query=", ".join(q for _, q in store.tool_calls) or query,
                 dense_results=store.dense_results,
                 sparse_results=store.sparse_results,
                 fused_results=store.fused_results,
@@ -175,6 +175,7 @@ class ReActRouter:
         graph = self._make_graph(store)
         all_messages: list = []
         for chunk in graph.stream(
             {
@@ -194,20 +195,30 @@ class ReActRouter:
                 for msg in node_messages:
                     if isinstance(msg, AIMessage):
                         for tc in getattr(msg, "tool_calls", []):
                             yield {
                                 "step": "tool_call",
                                 "tool": tc.get("name", ""),
-                                "query": tc.get("args", {}).get("query", ""),
                             }
                         if msg.content and not getattr(msg, "tool_calls", None):
                             yield {"step": "generate"}
                     elif isinstance(msg, ToolMessage):
                         yield {
                             "step": "tool_result",
-                            "tool": getattr(msg, "name", ""),
-                            "result_count": len(store.retrieved),
                         }
         answer = self._extract_answer(all_messages)
         sources = store.retrieved[:top_k]
@@ -222,8 +233,8 @@ class ReActRouter:
                 "confidence": confidence,
                 "pipeline_details": {
                     "original_query": query,
-                    "retrieval_query": ", ".join(q for _, q in store.tool_calls) or query,
-                    "detected_language": "unknown",
                     "translated": False,
                     "dense_results": [r.to_dict(include_text=False) for r in store.dense_results],
                     "sparse_results": [r.to_dict(include_text=False) for r in store.sparse_results],

             confidence=confidence,
             pipeline_details=PipelineDetails(
                 original_query=query,
+                retrieval_query=", ".join(q for name, q in store.tool_calls if name == "hybrid_search") or query,
                 dense_results=store.dense_results,
                 sparse_results=store.sparse_results,
                 fused_results=store.fused_results,
         graph = self._make_graph(store)
         all_messages: list = []
+        prev_retrieved_count = 0
         for chunk in graph.stream(
             {
                 for msg in node_messages:
                     if isinstance(msg, AIMessage):
                         for tc in getattr(msg, "tool_calls", []):
+                            tc_args = tc.get("args", {})
+                            # Extract the most relevant argument for display
+                            tc_detail = (
+                                tc_args.get("query", "")
+                                or tc_args.get("document_id", "")
+                            )
                             yield {
                                 "step": "tool_call",
                                 "tool": tc.get("name", ""),
+                                "query": tc_detail,
                             }
                         if msg.content and not getattr(msg, "tool_calls", None):
                             yield {"step": "generate"}
                     elif isinstance(msg, ToolMessage):
+                        tool_name = getattr(msg, "name", "")
+                        current_count = len(store.retrieved)
                         yield {
                             "step": "tool_result",
+                            "tool": tool_name,
+                            "result_count": current_count - prev_retrieved_count,
+                            "total_count": current_count,
                         }
+                        prev_retrieved_count = current_count
         answer = self._extract_answer(all_messages)
         sources = store.retrieved[:top_k]
                 "confidence": confidence,
                 "pipeline_details": {
                     "original_query": query,
+                    "retrieval_query": ", ".join(q for name, q in store.tool_calls if name == "hybrid_search") or query,
+                    "detected_language": "",
                     "translated": False,
                     "dense_results": [r.to_dict(include_text=False) for r in store.dense_results],
                     "sparse_results": [r.to_dict(include_text=False) for r in store.sparse_results],

src/agent/router.py CHANGED Viewed

@@ -11,7 +11,6 @@ explicit and testable without hand-rolled flags or callback plumbing.
 """
 import logging
-import unicodedata
 from collections.abc import Generator
 from typing import TypedDict
@@ -123,76 +122,20 @@ class QueryRouter:
         self._translate_query_enabled = translate_query
         self._graph = self._build_graph()
-    @staticmethod
-    def _detect_script(text: str) -> str | None:
-        """Detect language from Unicode script for non-Latin text.
-        Returns a language name (e.g. "Chinese") if the script is
-        unambiguously identifiable, or None to fall back to LLM detection.
-        """
-        script_counts: dict[str, int] = {}
-        for ch in text:
-            if ch.isspace() or ch in ".,!?;:\"'()[]{}":
-                continue
-            try:
-                name = unicodedata.name(ch, "")
-            except ValueError:
-                continue
-            if name.startswith("CJK") or name.startswith("KANGXI"):
-                script_counts["CJK"] = script_counts.get("CJK", 0) + 1
-            elif name.startswith("HIRAGANA") or name.startswith("KATAKANA"):
-                script_counts["Japanese"] = script_counts.get("Japanese", 0) + 1
-            elif name.startswith("HANGUL"):
-                script_counts["Korean"] = script_counts.get("Korean", 0) + 1
-            elif name.startswith("ARABIC"):
-                script_counts["Arabic"] = script_counts.get("Arabic", 0) + 1
-            elif name.startswith("DEVANAGARI"):
-                script_counts["Hindi"] = script_counts.get("Hindi", 0) + 1
-            elif name.startswith("THAI"):
-                script_counts["Thai"] = script_counts.get("Thai", 0) + 1
-            elif name.startswith("CYRILLIC"):
-                script_counts["Russian"] = script_counts.get("Russian", 0) + 1
-        if not script_counts:
-            return None
-        dominant = max(script_counts, key=lambda k: script_counts[k])
-        # CJK characters alone -> Chinese; if mixed with Hiragana/Katakana -> Japanese
-        if dominant == "CJK" and "Japanese" in script_counts:
-            return "Japanese"
-        if dominant == "CJK":
-            return "Chinese"
-        return dominant
     def _detect_language_and_intent(self, query: str) -> tuple[str, IntentType]:
         """Detect the query language and classify intent in a single LLM call.
-        Uses Unicode script detection first for non-Latin scripts.  For
-        Latin-script text, a single LLM call returns both language and intent,
-        saving one full round-trip compared to two separate calls.
         Args:
             query: The user's original query.
         Returns:
             Tuple of (detected_language, intent).
         """
-        # Fast path: detect non-Latin scripts via Unicode
-        script_language = self._detect_script(query)
-        if script_language is not None:
-            # Language is known; still need intent from LLM
-            intent = self._intent_classifier.classify(query)
-            logger.info("Detected query language: %s", script_language)
-            logger.info("Classified intent: %s", intent.value)
-            return script_language, intent
-        # Latin-script text — combine language detection + intent classification
         valid_intents = "factual, summary, comparison, procedural, unknown"
         prompt = (
             "You are given a user query. Do TWO things:\n"
             "1. Detect the language of the query (reply with the language name in English, "
-            "e.g. 'Danish', 'English', 'German').\n"
             "2. Classify the intent into exactly one of: "
             f"{valid_intents}.\n\n"
             "Reply with EXACTLY two lines, nothing else:\n"

 """
 import logging
 from collections.abc import Generator
 from typing import TypedDict
         self._translate_query_enabled = translate_query
         self._graph = self._build_graph()
     def _detect_language_and_intent(self, query: str) -> tuple[str, IntentType]:
         """Detect the query language and classify intent in a single LLM call.
         Args:
             query: The user's original query.
         Returns:
             Tuple of (detected_language, intent).
         """
         valid_intents = "factual, summary, comparison, procedural, unknown"
         prompt = (
             "You are given a user query. Do TWO things:\n"
             "1. Detect the language of the query (reply with the language name in English, "
+            "e.g. 'Danish', 'English', 'German', 'Chinese', 'Japanese').\n"
             "2. Classify the intent into exactly one of: "
             f"{valid_intents}.\n\n"
             "Reply with EXACTLY two lines, nothing else:\n"

src/agent/tools.py CHANGED Viewed

@@ -161,8 +161,8 @@ def make_retrieval_tools(
                 f"(Document not found. Use list_documents to see available IDs.)"
             )
-        # Sort chunks by chunk_id to preserve document order
-        chunks.sort(key=lambda c: c.chunk_id)
         # Register chunks as QueryResult so confidence and sources are surfaced in the UI.
         # Score 1.0 indicates a direct full-document fetch (no ranking involved).

                 f"(Document not found. Use list_documents to see available IDs.)"
             )
+        # Sort chunks by chunk_index to preserve document order
+        chunks.sort(key=lambda c: c.metadata.get("chunk_index", 0))
         # Register chunks as QueryResult so confidence and sources are surfaced in the UI.
         # Score 1.0 indicates a direct full-document fetch (no ranking involved).

src/api/routes.py CHANGED Viewed

@@ -90,11 +90,22 @@ class PipelineDetailsResponse(BaseModel):
     reranked_results: list[PipelineResultItem] = []
 class QueryResponse(BaseModel):
     """Response body for the query endpoint."""
     answer: str
-    sources: list[dict[str, str | float]]
     intent: str
     confidence: float
     pipeline_details: PipelineDetailsResponse = PipelineDetailsResponse()

     reranked_results: list[PipelineResultItem] = []
+class SourceItem(BaseModel):
+    """A single source item in the query response."""
+    chunk_id: str
+    document_id: str
+    score: float
+    source: str
+    text: str = ""
+    metadata: dict[str, str | int] = {}
 class QueryResponse(BaseModel):
     """Response body for the query endpoint."""
     answer: str
+    sources: list[SourceItem]
     intent: str
     confidence: float
     pipeline_details: PipelineDetailsResponse = PipelineDetailsResponse()

src/ingestion/chunker.py CHANGED Viewed

@@ -32,7 +32,8 @@ class BaseChunker:
         self.chunk_overlap = chunk_overlap
     def chunk(
-        self, text: str, document_id: str, metadata: dict[str, str | int]
     ) -> list[DocumentChunk]:
         """Split text into chunks.
@@ -40,6 +41,7 @@ class BaseChunker:
             text: The full text to chunk.
             document_id: Identifier of the source document.
             metadata: Metadata to attach to each chunk.
         Returns:
             List of DocumentChunk objects.
@@ -51,7 +53,8 @@ class FixedSizeChunker(BaseChunker):
     """Splits text into fixed-size character chunks with overlap."""
     def chunk(
-        self, text: str, document_id: str, metadata: dict[str, str | int]
     ) -> list[DocumentChunk]:
         """Split text into fixed-size chunks using LangChain CharacterTextSplitter.
@@ -59,6 +62,7 @@ class FixedSizeChunker(BaseChunker):
             text: The full text to chunk.
             document_id: Identifier of the source document.
             metadata: Metadata to attach to each chunk.
         Returns:
             List of DocumentChunk with strategy=FIXED_SIZE.
@@ -73,13 +77,13 @@ class FixedSizeChunker(BaseChunker):
         texts = splitter.split_text(text)
         chunks = [
             DocumentChunk(
-                chunk_id=_make_chunk_id(document_id, index),
                 document_id=document_id,
                 text=chunk_text,
-                metadata={**metadata, "chunk_index": index},
                 strategy=ChunkStrategy.FIXED_SIZE,
             )
-            for index, chunk_text in enumerate(texts)
         ]
         logger.debug("FixedSizeChunker produced %d chunks for %s", len(chunks), document_id)
         return chunks
@@ -89,7 +93,8 @@ class RecursiveChunker(BaseChunker):
     """Recursively splits text using LangChain's RecursiveCharacterTextSplitter."""
     def chunk(
-        self, text: str, document_id: str, metadata: dict[str, str | int]
     ) -> list[DocumentChunk]:
         """Split text using recursive character splitting.
@@ -97,6 +102,7 @@ class RecursiveChunker(BaseChunker):
             text: The full text to chunk.
             document_id: Identifier of the source document.
             metadata: Metadata to attach to each chunk.
         Returns:
             List of DocumentChunk with strategy=RECURSIVE.
@@ -107,12 +113,12 @@ class RecursiveChunker(BaseChunker):
         )
         texts = splitter.split_text(text)
         chunks: list[DocumentChunk] = []
-        for index, chunk_text in enumerate(texts):
             chunks.append(DocumentChunk(
-                chunk_id=_make_chunk_id(document_id, index),
                 document_id=document_id,
                 text=chunk_text,
-                metadata={**metadata, "chunk_index": index},
                 strategy=ChunkStrategy.RECURSIVE,
             ))
         logger.debug("RecursiveChunker produced %d chunks for %s", len(chunks), document_id)
@@ -136,7 +142,8 @@ class SemanticChunker(BaseChunker):
         self._embeddings = embeddings
     def chunk(
-        self, text: str, document_id: str, metadata: dict[str, str | int]
     ) -> list[DocumentChunk]:
         """Split text at semantic boundaries.
@@ -144,6 +151,7 @@ class SemanticChunker(BaseChunker):
             text: The full text to chunk.
             document_id: Identifier of the source document.
             metadata: Metadata to attach to each chunk.
         Returns:
             List of DocumentChunk with strategy=SEMANTIC.
@@ -151,12 +159,12 @@ class SemanticChunker(BaseChunker):
         splitter = LCSemanticChunker(embeddings=self._embeddings)
         docs = splitter.create_documents([text])
         chunks: list[DocumentChunk] = []
-        for index, doc in enumerate(docs):
             chunks.append(DocumentChunk(
-                chunk_id=_make_chunk_id(document_id, index),
                 document_id=document_id,
                 text=doc.page_content,
-                metadata={**metadata, "chunk_index": index},
                 strategy=ChunkStrategy.SEMANTIC,
             ))
         logger.debug("SemanticChunker produced %d chunks for %s", len(chunks), document_id)

         self.chunk_overlap = chunk_overlap
     def chunk(
+        self, text: str, document_id: str, metadata: dict[str, str | int],
+        start_index: int = 0,
     ) -> list[DocumentChunk]:
         """Split text into chunks.
             text: The full text to chunk.
             document_id: Identifier of the source document.
             metadata: Metadata to attach to each chunk.
+            start_index: Starting chunk index for globally unique IDs.
         Returns:
             List of DocumentChunk objects.
     """Splits text into fixed-size character chunks with overlap."""
     def chunk(
+        self, text: str, document_id: str, metadata: dict[str, str | int],
+        start_index: int = 0,
     ) -> list[DocumentChunk]:
         """Split text into fixed-size chunks using LangChain CharacterTextSplitter.
             text: The full text to chunk.
             document_id: Identifier of the source document.
             metadata: Metadata to attach to each chunk.
+            start_index: Starting chunk index for globally unique IDs.
         Returns:
             List of DocumentChunk with strategy=FIXED_SIZE.
         texts = splitter.split_text(text)
         chunks = [
             DocumentChunk(
+                chunk_id=_make_chunk_id(document_id, start_index + i),
                 document_id=document_id,
                 text=chunk_text,
+                metadata={**metadata, "chunk_index": start_index + i},
                 strategy=ChunkStrategy.FIXED_SIZE,
             )
+            for i, chunk_text in enumerate(texts)
         ]
         logger.debug("FixedSizeChunker produced %d chunks for %s", len(chunks), document_id)
         return chunks
     """Recursively splits text using LangChain's RecursiveCharacterTextSplitter."""
     def chunk(
+        self, text: str, document_id: str, metadata: dict[str, str | int],
+        start_index: int = 0,
     ) -> list[DocumentChunk]:
         """Split text using recursive character splitting.
             text: The full text to chunk.
             document_id: Identifier of the source document.
             metadata: Metadata to attach to each chunk.
+            start_index: Starting chunk index for globally unique IDs.
         Returns:
             List of DocumentChunk with strategy=RECURSIVE.
         )
         texts = splitter.split_text(text)
         chunks: list[DocumentChunk] = []
+        for i, chunk_text in enumerate(texts):
             chunks.append(DocumentChunk(
+                chunk_id=_make_chunk_id(document_id, start_index + i),
                 document_id=document_id,
                 text=chunk_text,
+                metadata={**metadata, "chunk_index": start_index + i},
                 strategy=ChunkStrategy.RECURSIVE,
             ))
         logger.debug("RecursiveChunker produced %d chunks for %s", len(chunks), document_id)
         self._embeddings = embeddings
     def chunk(
+        self, text: str, document_id: str, metadata: dict[str, str | int],
+        start_index: int = 0,
     ) -> list[DocumentChunk]:
         """Split text at semantic boundaries.
             text: The full text to chunk.
             document_id: Identifier of the source document.
             metadata: Metadata to attach to each chunk.
+            start_index: Starting chunk index for globally unique IDs.
         Returns:
             List of DocumentChunk with strategy=SEMANTIC.
         splitter = LCSemanticChunker(embeddings=self._embeddings)
         docs = splitter.create_documents([text])
         chunks: list[DocumentChunk] = []
+        for i, doc in enumerate(docs):
             chunks.append(DocumentChunk(
+                chunk_id=_make_chunk_id(document_id, start_index + i),
                 document_id=document_id,
                 text=doc.page_content,
+                metadata={**metadata, "chunk_index": start_index + i},
                 strategy=ChunkStrategy.SEMANTIC,
             ))
         logger.debug("SemanticChunker produced %d chunks for %s", len(chunks), document_id)

src/ingestion/pipeline.py CHANGED Viewed

@@ -77,6 +77,7 @@ class IngestionPipeline:
         pages = self.parser.parse(file_path)
         all_chunks: list[DocumentChunk] = []
         for page in pages:
             raw_text = str(page["text"])
@@ -88,8 +89,9 @@ class IngestionPipeline:
                 "source": str(page["source"]),
                 "page_number": int(page["page_number"]),
             }
-            chunks = self.chunker.chunk(cleaned, document_id, metadata)
             all_chunks.extend(chunks)
         logger.info("Ingested %d chunks from %s", len(all_chunks), file_path)
         return all_chunks

         pages = self.parser.parse(file_path)
         all_chunks: list[DocumentChunk] = []
+        chunk_offset = 0
         for page in pages:
             raw_text = str(page["text"])
                 "source": str(page["source"]),
                 "page_number": int(page["page_number"]),
             }
+            chunks = self.chunker.chunk(cleaned, document_id, metadata, start_index=chunk_offset)
             all_chunks.extend(chunks)
+            chunk_offset += len(chunks)
         logger.info("Ingested %d chunks from %s", len(all_chunks), file_path)
         return all_chunks

src/models.py CHANGED Viewed

@@ -56,21 +56,22 @@ class QueryResult:
     score: float
     source: str
-    def to_dict(self, *, include_text: bool = True) -> dict[str, str | float]:
         """Serialise to a JSON-safe dictionary.
         Args:
             include_text: Whether to include the chunk text (default True).
         Returns:
-            Dictionary with chunk_id, document_id, score, source, and
-            optionally text.
         """
-        d: dict[str, str | float] = {
             "chunk_id": self.chunk.chunk_id,
             "document_id": self.chunk.document_id,
             "score": self.score,
             "source": self.source,
         }
         if include_text:
             d["text"] = self.chunk.text

     score: float
     source: str
+    def to_dict(self, *, include_text: bool = True) -> dict:
         """Serialise to a JSON-safe dictionary.
         Args:
             include_text: Whether to include the chunk text (default True).
         Returns:
+            Dictionary with chunk_id, document_id, score, source, metadata,
+            and optionally text.
         """
+        d: dict = {
             "chunk_id": self.chunk.chunk_id,
             "document_id": self.chunk.document_id,
             "score": self.score,
             "source": self.source,
+            "metadata": self.chunk.metadata,
         }
         if include_text:
             d["text"] = self.chunk.text

src/retrieval/bm25_search.py CHANGED Viewed

@@ -45,7 +45,8 @@ class BM25Search:
         tokenized_query = self._tokenize(query)
         scores = self._index.get_scores(tokenized_query)
-        ranked_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
         results = [
             QueryResult(
@@ -54,7 +55,6 @@ class BM25Search:
                 source="bm25",
             )
             for i in ranked_indices
-            if scores[i] > 0.0
         ]
         logger.debug("BM25 search returned %d results for query: %s", len(results), query)
         return results

         tokenized_query = self._tokenize(query)
         scores = self._index.get_scores(tokenized_query)
+        positive_indices = [i for i in range(len(scores)) if scores[i] > 0.0]
+        ranked_indices = sorted(positive_indices, key=lambda i: scores[i], reverse=True)[:top_k]
         results = [
             QueryResult(
                 source="bm25",
             )
             for i in ranked_indices
         ]
         logger.debug("BM25 search returned %d results for query: %s", len(results), query)
         return results

src/retrieval/vector_store.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Qdrant vector store for dense retrieval."""
 import json
 import logging
@@ -77,7 +78,7 @@ class VectorStore:
         points = [
             PointStruct(
-                id=idx,
                 vector=embedding,
                 payload={
                     "chunk_id": chunk.chunk_id,
@@ -87,7 +88,7 @@ class VectorStore:
                     "strategy": chunk.strategy.value,
                 },
             )
-            for idx, (chunk, embedding) in enumerate(zip(chunks, embeddings))
         ]
         self._client.upsert(collection_name=self._collection_name, points=points)

 """Qdrant vector store for dense retrieval."""
+import hashlib
 import json
 import logging
         points = [
             PointStruct(
+                id=int(hashlib.sha256(chunk.chunk_id.encode()).hexdigest()[:15], 16),
                 vector=embedding,
                 payload={
                     "chunk_id": chunk.chunk_id,
                     "strategy": chunk.strategy.value,
                 },
             )
+            for chunk, embedding in zip(chunks, embeddings)
         ]
         self._client.upsert(collection_name=self._collection_name, points=points)

src/ui/app.py CHANGED Viewed

@@ -4,6 +4,7 @@ Calls the FastAPI backend at http://localhost:8000.
 Single-page document search interface with clean sans-serif design.
 """
 import json
 import os
 import random
@@ -721,10 +722,35 @@ if search_clicked and question.strip():
                     elif _step == "tool_result":
                         _rc = _event.get("result_count", 0)
                         st.write(
-                            (f"Hentet **{_rc}** dokumenter")
                             if lang == "da"
-                            else (f"Retrieved **{_rc}** documents")
                         )
                     elif _step == "generate":
@@ -783,7 +809,7 @@ if search_clicked and question.strip():
     # -- Answer --
     answer = data.get("answer", t["no_answer"])
-    st.markdown(f'<div class="answer-block">{answer}</div>', unsafe_allow_html=True)
     # -- Sources --
     sources = data.get("sources", [])
@@ -795,17 +821,17 @@ if search_clicked and question.strip():
                 score = src.get("score", 0.0)
                 retrieval_source = src.get("source", "")
                 metadata = src.get("metadata", {})
-                page = metadata.get("page", "") if isinstance(metadata, dict) else ""
                 page_info = f' &middot; {t["page_label"]} {page}' if page else ""
                 score_display = f"{score:.3f}"
                 st.markdown(
                     f'<div class="source-card">'
-                    f'<div class="source-card-title">{doc_name}{page_info}</div>'
-                    f'<div class="source-card-text">{text[:500]}</div>'
                     f'<div class="source-card-meta">'
-                    f"Score: {score_display} &nbsp;&middot;&nbsp; {retrieval_source}"
                     f"</div>"
                     f"</div>",
                     unsafe_allow_html=True,
@@ -844,48 +870,62 @@ if search_clicked and question.strip():
                 )
                 st.markdown(f"{header}\n{rows}")
-            # 2) BM25 results
-            _render_result_table(pd.get("sparse_results", []), t["pipeline_bm25"])
-            st.markdown("---")
-            # 3) Vector search results
-            _render_result_table(pd.get("dense_results", []), t["pipeline_dense"])
-            st.markdown("---")
-            # 4) RRF fused ranking
-            _render_result_table(pd.get("fused_results", []), t["pipeline_fused"])
-            st.markdown("---")
-            # 5) Reranked results with score change
             reranked = pd.get("reranked_results", [])
             st.markdown(f'**{t["pipeline_reranked"]}**')
             if reranked:
-                # Build a map from chunk_id -> fused score for comparison
-                fused_scores: dict[str, float] = {
-                    r.get("chunk_id", ""): r.get("score", 0.0)
-                    for r in pd.get("fused_results", [])
-                }
-                header = (
-                    f'| {t["pipeline_rank"]} | {t["pipeline_doc"]} | '
-                    f'{t["pipeline_score"]} | {t["pipeline_score_change"]} |\n'
-                    f"|---|---|---|---|"
-                )
-                rows_list = []
-                for i, r in enumerate(reranked):
-                    cid = r.get("chunk_id", "")
-                    new_score = r.get("score", 0.0)
-                    old_score = fused_scores.get(cid)
-                    if old_score is not None:
-                        change = f"RRF {old_score:.4f} -> {new_score:.4f}"
-                    else:
-                        change = "-"
-                    rows_list.append(
-                        f'| {i + 1} | {_truncate_doc(r.get("document_id", ""))} | {new_score:.4f} | {change} |'
                     )
-                st.markdown(f"{header}\n" + "\n".join(rows_list))
             else:
                 st.caption(t["pipeline_no_results"])

 Single-page document search interface with clean sans-serif design.
 """
+import html
 import json
 import os
 import random
                     elif _step == "tool_result":
                         _rc = _event.get("result_count", 0)
+                        _tool_name = _event.get("tool", "")
+                        if _tool_name == "list_documents":
+                            # list_documents returns doc list in its text,
+                            # parse count from the tool output or show generic
+                            st.write(
+                                "Dokumentliste hentet"
+                                if lang == "da"
+                                else "Document list retrieved"
+                            )
+                        elif _tool_name == "fetch_document":
+                            st.write(
+                                (f"Hentet dokument (**{_rc}** afsnit)")
+                                if lang == "da"
+                                else (f"Fetched document (**{_rc}** chunks)")
+                            )
+                        else:
+                            st.write(
+                                (f"Fandt **{_rc}** relevante passager")
+                                if lang == "da"
+                                else (f"Found **{_rc}** relevant passages")
+                            )
+                    elif _step == "broaden_query":
+                        _retry = _event.get("retry_count", 1)
+                        _rq = _event.get("retrieval_query", "")
                         st.write(
+                            (f"Lav konfidensgrad – forsøg {_retry} med udvidet søgning: _{_rq}_")
                             if lang == "da"
+                            else (f"Low confidence – retry {_retry} with broadened query: _{_rq}_")
                         )
                     elif _step == "generate":
     # -- Answer --
     answer = data.get("answer", t["no_answer"])
+    st.markdown(answer)
     # -- Sources --
     sources = data.get("sources", [])
                 score = src.get("score", 0.0)
                 retrieval_source = src.get("source", "")
                 metadata = src.get("metadata", {})
+                page = metadata.get("page_number", "") if isinstance(metadata, dict) else ""
                 page_info = f' &middot; {t["page_label"]} {page}' if page else ""
                 score_display = f"{score:.3f}"
                 st.markdown(
                     f'<div class="source-card">'
+                    f'<div class="source-card-title">{html.escape(doc_name)}{page_info}</div>'
+                    f'<div class="source-card-text">{html.escape(text[:500])}</div>'
                     f'<div class="source-card-meta">'
+                    f"Score: {score_display} &nbsp;&middot;&nbsp; {html.escape(retrieval_source)}"
                     f"</div>"
                     f"</div>",
                     unsafe_allow_html=True,
                 )
                 st.markdown(f"{header}\n{rows}")
+            _has_retrieval = bool(
+                pd.get("dense_results") or pd.get("sparse_results") or pd.get("fused_results")
+            )
+            if _has_retrieval:
+                # 2) BM25 results
+                _render_result_table(pd.get("sparse_results", []), t["pipeline_bm25"])
+                st.markdown("---")
+                # 3) Vector search results
+                _render_result_table(pd.get("dense_results", []), t["pipeline_dense"])
+                st.markdown("---")
+                # 4) RRF fused ranking
+                _render_result_table(pd.get("fused_results", []), t["pipeline_fused"])
+                st.markdown("---")
+            # 5) Reranked / fetched results
             reranked = pd.get("reranked_results", [])
             st.markdown(f'**{t["pipeline_reranked"]}**')
             if reranked:
+                if _has_retrieval:
+                    # Show score change from RRF → reranking
+                    fused_scores: dict[str, float] = {
+                        r.get("chunk_id", ""): r.get("score", 0.0)
+                        for r in pd.get("fused_results", [])
+                    }
+                    header = (
+                        f'| {t["pipeline_rank"]} | {t["pipeline_doc"]} | '
+                        f'{t["pipeline_score"]} | {t["pipeline_score_change"]} |\n'
+                        f"|---|---|---|---|"
+                    )
+                    rows_list = []
+                    for i, r in enumerate(reranked):
+                        cid = r.get("chunk_id", "")
+                        new_score = r.get("score", 0.0)
+                        old_score = fused_scores.get(cid)
+                        if old_score is not None:
+                            change = f"RRF {old_score:.4f} -> {new_score:.4f}"
+                        else:
+                            change = "-"
+                        rows_list.append(
+                            f'| {i + 1} | {_truncate_doc(r.get("document_id", ""))} | {new_score:.4f} | {change} |'
+                        )
+                    st.markdown(f"{header}\n" + "\n".join(rows_list))
+                else:
+                    # No hybrid search was used (e.g. fetch_document only) — simple table
+                    header = f'| {t["pipeline_rank"]} | {t["pipeline_doc"]} | {t["pipeline_score"]} |\n|---|---|---|'
+                    rows = "\n".join(
+                        f'| {i + 1} | {_truncate_doc(r.get("document_id", ""))} | {r.get("score", 0):.4f} |'
+                        for i, r in enumerate(reranked)
                     )
+                    st.markdown(f"{header}\n{rows}")
             else:
                 st.caption(t["pipeline_no_results"])