Spaces:

akryldigital
/

audit_assistant

Sleeping

App Files Files Community

akryldigital commited on 21 days ago

Commit

865589a

verified ·

1 Parent(s): b0fe395

add saliency map components

Browse files

Files changed (3) hide show

src/agents/__init__.py +40 -15
src/agents/saliency.py +383 -0
src/agents/visual_documents.py +418 -0

src/agents/__init__.py CHANGED Viewed

@@ -1,21 +1,46 @@
 """
-Agent modules for chatbot implementations
-"""
-from .gemini_chatbot import get_gemini_chatbot
-from .visual_chatbot import get_visual_chatbot
-from .multi_agent_chatbot import get_multi_agent_chatbot
-from .smart_chatbot import get_chatbot as get_smart_chatbot
-from .visual_multi_agent_chatbot import get_visual_multi_agent_chatbot
-# Alias for backward compatibility
-get_visual_chatbot_v2 = get_visual_multi_agent_chatbot
 __all__ = [
-    "get_smart_chatbot",
-    "get_multi_agent_chatbot",
-    "get_gemini_chatbot",
-    "get_visual_chatbot",
-    "get_visual_multi_agent_chatbot",
-    "get_visual_chatbot_v2"
 ]

 """
+UI Components Module
+This module contains UI-related components including styles, visualizations,
+and utility functions for the Streamlit application.
+"""
+from .styles import get_custom_css
+from .components import (
+    display_chunk_statistics_charts,
+    display_chunk_statistics_table
+)
+from .utils import extract_chunk_statistics
+from .visual_documents import (
+    display_visual_search_results,
+    display_visual_document_statistics,
+    display_visual_document_details
+)
+from .saliency import (
+    generate_tile_aware_saliency,
+    can_generate_saliency,
+    get_saliency_metadata_summary,
+    DEFAULT_ALPHA,
+    DEFAULT_COLORMAP,
+    DEFAULT_THRESHOLD_PERCENTILE
+)
 __all__ = [
+    "get_custom_css",
+    "display_chunk_statistics_charts",
+    "display_chunk_statistics_table",
+    "extract_chunk_statistics",
+    "display_visual_search_results",
+    "display_visual_document_statistics",
+    "display_visual_document_details",
+    # Saliency functions
+    "generate_tile_aware_saliency",
+    "can_generate_saliency",
+    "get_saliency_metadata_summary",
+    "DEFAULT_ALPHA",
+    "DEFAULT_COLORMAP",
+    "DEFAULT_THRESHOLD_PERCENTILE"
 ]

src/agents/saliency.py ADDED Viewed

	@@ -0,0 +1,383 @@

+"""
+Saliency Map Generation for Visual RAG
+This module provides saliency map generation for visual document search results.
+It implements the tile-aware ColBERT MaxSim strategy for accurate visualization
+of which image regions are relevant to a query.
+Key features:
+1. Tile-aware architecture (understands 4×3 grid of 512×512 tiles)
+2. Excludes global tile for cleaner saliency
+3. Maps patches to resized image, then scales to original
+4. Uses "hot" colormap by default for better visibility
+"""
+import logging
+from typing import Any, Optional, Tuple
+from io import BytesIO
+from base64 import b64decode
+import numpy as np
+import requests
+from PIL import Image
+logger = logging.getLogger(__name__)
+# Default saliency configuration
+DEFAULT_ALPHA = 0.4
+DEFAULT_COLORMAP = 'hot'  # Better visibility than 'jet'
+DEFAULT_THRESHOLD_PERCENTILE = 50
+def convert_to_numpy(embedding, dtype: np.dtype = np.float32) -> np.ndarray:
+    """
+    Convert embedding to numpy array with proper dtype.
+    Handles:
+    - Lists
+    - PyTorch tensors (including bfloat16)
+    - NumPy arrays
+    """
+    try:
+        import torch
+        if isinstance(embedding, torch.Tensor):
+            if embedding.dtype == torch.bfloat16:
+                embedding = embedding.cpu().float()
+            else:
+                embedding = embedding.cpu()
+            embedding = embedding.numpy()
+    except ImportError:
+        pass
+    return np.array(embedding, dtype=dtype)
+def validate_embeddings(
+    doc_embedding: np.ndarray,
+    query_embedding: np.ndarray
+) -> Tuple[bool, str]:
+    """Validate embedding shapes and types."""
+    if doc_embedding.ndim != 2:
+        return False, f"Document embedding must be 2D, got {doc_embedding.ndim}D"
+    if query_embedding.ndim != 2:
+        return False, f"Query embedding must be 2D, got {query_embedding.ndim}D"
+    if doc_embedding.shape[1] != query_embedding.shape[1]:
+        return False, f"Embedding dimensions don't match: doc={doc_embedding.shape[1]}, query={query_embedding.shape[1]}"
+    if np.any(np.isnan(doc_embedding)) or np.any(np.isinf(doc_embedding)):
+        return False, "Document embedding contains NaN or Inf values"
+    if np.any(np.isnan(query_embedding)) or np.any(np.isinf(query_embedding)):
+        return False, "Query embedding contains NaN or Inf values"
+    return True, ""
+def compute_maxsim_scores(
+    doc_embedding: np.ndarray,
+    query_embedding: np.ndarray,
+    normalize: bool = True
+) -> np.ndarray:
+    """
+    Compute MaxSim scores for ColBERT-style late interaction.
+    MaxSim: For each document patch, find the maximum similarity
+    across all query patches.
+    """
+    if normalize:
+        doc_norm = doc_embedding / (np.linalg.norm(doc_embedding, axis=1, keepdims=True) + 1e-8)
+        query_norm = query_embedding / (np.linalg.norm(query_embedding, axis=1, keepdims=True) + 1e-8)
+    else:
+        doc_norm = doc_embedding
+        query_norm = query_embedding
+    similarity_matrix = np.dot(doc_norm, query_norm.T)
+    patch_scores = np.max(similarity_matrix, axis=1)
+    return patch_scores
+def normalize_scores(
+    score_grid: np.ndarray,
+    threshold_percentile: int = None
+) -> np.ndarray:
+    """Normalize score grid to 0-1 range with optional thresholding."""
+    score_min = score_grid.min()
+    score_max = score_grid.max()
+    if score_max - score_min < 1e-8:
+        logger.warning("All scores are identical, returning zeros")
+        return np.zeros_like(score_grid, dtype=np.float32)
+    score_grid_norm = (score_grid - score_min) / (score_max - score_min)
+    if threshold_percentile is not None:
+        score_threshold = np.percentile(score_grid, threshold_percentile)
+        mask = score_grid < score_threshold
+        score_grid_norm[mask] = 0.0
+        visible_count = np.sum(~mask)
+        total_count = score_grid.size
+        logger.debug(f"Threshold: {score_threshold:.3f} ({threshold_percentile}th percentile)")
+        logger.debug(f"Visible patches: {visible_count} / {total_count}")
+    return score_grid_norm
+def download_image(page_url: str) -> Optional[Image.Image]:
+    """Download image from URL or decode from data URI."""
+    try:
+        if page_url.startswith(("http://", "https://")):
+            resp = requests.get(page_url, timeout=15)
+            resp.raise_for_status()
+            image = Image.open(BytesIO(resp.content))
+        elif page_url.startswith("data:image"):
+            b64_data = page_url.split(",", 1)[1]
+            image = Image.open(BytesIO(b64decode(b64_data)))
+        else:
+            image = Image.open(page_url)
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        return image
+    except Exception as e:
+        logger.error(f"Failed to load image: {e}")
+        return None
+def apply_colormap_and_blend(
+    score_grid: np.ndarray,
+    image: Image.Image,
+    alpha: float = DEFAULT_ALPHA,
+    colormap: str = DEFAULT_COLORMAP
+) -> Image.Image:
+    """Apply colormap to scores and blend with original image."""
+    from matplotlib import cm
+    img_width, img_height = image.size
+    # Resize heatmap to image size
+    heatmap_pil = Image.fromarray((score_grid * 255).astype(np.uint8), mode='L')
+    heatmap_resized = heatmap_pil.resize((img_width, img_height), Image.BILINEAR)
+    heatmap_array = np.array(heatmap_resized) / 255.0
+    # Apply colormap
+    cmap = cm.get_cmap(colormap)
+    heatmap_colored = cmap(heatmap_array)[:, :, :3]
+    heatmap_colored = (heatmap_colored * 255).astype(np.uint8)
+    heatmap_img = Image.fromarray(heatmap_colored, mode='RGB')
+    # Blend with original image
+    overlay = Image.blend(image, heatmap_img, alpha=alpha)
+    return overlay
+def generate_tile_aware_saliency(
+    qdrant_client: Any,
+    collection_name: str,
+    point_id: str,
+    query_embedding: np.ndarray,
+    alpha: float = DEFAULT_ALPHA,
+    colormap: str = DEFAULT_COLORMAP,
+    threshold_percentile: int = DEFAULT_THRESHOLD_PERCENTILE
+) -> Optional[Image.Image]:
+    """
+    Generate tile-aware saliency map for a document-query pair.
+    This is the main function to call for saliency generation.
+    Args:
+        qdrant_client: Qdrant client instance
+        collection_name: Name of the collection
+        point_id: ID of the document point
+        query_embedding: Query multi-vector embedding [num_query_patches, dim]
+        alpha: Overlay transparency (0.0-1.0)
+        colormap: Matplotlib colormap name (default: 'hot')
+        threshold_percentile: Hide patches below this percentile (default: 50)
+    Returns:
+        PIL Image with saliency overlay, or None if generation fails
+    """
+    try:
+        # Step 1: Fetch full multi-vector embedding AND payload
+        logger.debug(f"Fetching point {point_id} with tile metadata from {collection_name}")
+        points = qdrant_client.retrieve(
+            collection_name=collection_name,
+            ids=[point_id],
+            with_vectors=["initial"],
+            with_payload=True
+        )
+        if not points or len(points) == 0:
+            logger.error(f"Point {point_id} not found in collection")
+            return None
+        point = points[0]
+        doc_vector = point.vector.get("initial")
+        payload = point.payload
+        if doc_vector is None:
+            logger.error("No 'initial' vector found for point")
+            return None
+        # Step 2: Get tile structure from payload
+        num_tiles = payload.get('num_tiles')
+        tile_rows = payload.get('tile_rows')
+        tile_cols = payload.get('tile_cols')
+        patches_per_tile = payload.get('patches_per_tile', 64)
+        resized_width = payload.get('resized_width')
+        resized_height = payload.get('resized_height')
+        resized_url = payload.get('resized_url') or payload.get('page')
+        original_width = payload.get('original_width')
+        original_height = payload.get('original_height')
+        if not all([num_tiles, tile_rows, tile_cols, resized_width, resized_height]):
+            logger.warning("Missing tile metadata - cannot generate saliency")
+            return None
+        logger.info(f"✅ Tile structure: {tile_rows}×{tile_cols} tiles, {patches_per_tile} patches/tile")
+        logger.info(f"✅ Resized image: {resized_width}×{resized_height}")
+        logger.info(f"✅ Original image: {original_width}×{original_height}")
+        # Step 3: Convert embeddings
+        doc_embedding = convert_to_numpy(doc_vector)
+        query_emb = convert_to_numpy(query_embedding)
+        is_valid, error_msg = validate_embeddings(doc_embedding, query_emb)
+        if not is_valid:
+            logger.error(f"Embedding validation failed: {error_msg}")
+            return None
+        logger.info(f"Document embedding: {doc_embedding.shape}")
+        logger.info(f"Query embedding: {query_emb.shape}")
+        # Step 4: Separate tile embeddings from global tile
+        total_patches = num_tiles * patches_per_tile
+        tile_patches = total_patches - patches_per_tile  # Exclude global
+        if len(doc_embedding) < total_patches:
+            logger.warning(f"Embedding size mismatch: got {len(doc_embedding)}, expected {total_patches}")
+            tile_embeddings = doc_embedding[:tile_patches] if len(doc_embedding) > tile_patches else doc_embedding
+        else:
+            tile_embeddings = doc_embedding[:tile_patches]
+        logger.info(f"Using {len(tile_embeddings)} tile patches (excluding global)")
+        # Step 5: Compute MaxSim scores
+        patch_scores = compute_maxsim_scores(tile_embeddings, query_emb, normalize=True)
+        logger.info(f"Computed scores for {len(patch_scores)} patches")
+        # Step 6: Reshape patches into tile structure
+        patches_per_tile_side = int(np.sqrt(patches_per_tile))  # 8 for 64 patches
+        try:
+            num_actual_tiles = tile_rows * tile_cols
+            if len(patch_scores) != num_actual_tiles * patches_per_tile:
+                logger.error(f"Patch count mismatch: {len(patch_scores)} patches")
+                return None
+            tile_scores = patch_scores.reshape(num_actual_tiles, patches_per_tile)
+            # Reshape each tile's patches to 8×8 grid (F-order)
+            tile_grids = []
+            for tile_idx in range(num_actual_tiles):
+                tile_patch_scores = tile_scores[tile_idx]
+                tile_grid = tile_patch_scores.reshape(
+                    patches_per_tile_side, patches_per_tile_side, order='F'
+                )
+                tile_grids.append(tile_grid)
+            # Arrange tiles into full image grid
+            full_grid_rows = []
+            for row_idx in range(tile_rows):
+                row_tiles = []
+                for col_idx in range(tile_cols):
+                    tile_idx = row_idx * tile_cols + col_idx
+                    row_tiles.append(tile_grids[tile_idx])
+                row_grid = np.concatenate(row_tiles, axis=1)
+                full_grid_rows.append(row_grid)
+            score_grid = np.concatenate(full_grid_rows, axis=0)
+            logger.info(f"✅ Reconstructed grid: {score_grid.shape} (from {tile_rows}×{tile_cols} tiles)")
+        except ValueError as e:
+            logger.error(f"❌ Failed to reshape patches: {e}")
+            return None
+        # Step 7: Normalize scores
+        score_grid_norm = normalize_scores(score_grid, threshold_percentile=threshold_percentile)
+        # Step 8: Download RESIZED image
+        logger.info(f"Downloading resized image from: {resized_url}")
+        resized_image = download_image(resized_url)
+        if resized_image is None:
+            logger.error("Failed to download resized image")
+            return None
+        # Step 9: Apply heatmap to resized image
+        overlay_resized = apply_colormap_and_blend(
+            score_grid_norm, resized_image, alpha, colormap
+        )
+        # Step 10: Resize back to original dimensions
+        if original_width and original_height:
+            overlay_final = overlay_resized.resize(
+                (original_width, original_height), Image.BILINEAR
+            )
+            logger.info(f"✅ Resized saliency map to original: {original_width}×{original_height}")
+        else:
+            overlay_final = overlay_resized
+        logger.info(f"✅ Saliency map generated successfully")
+        return overlay_final
+    except Exception as e:
+        logger.error(f"Saliency generation failed: {e}")
+        import traceback
+        logger.debug(traceback.format_exc())
+        return None
+def can_generate_saliency(metadata: dict) -> bool:
+    """
+    Check if saliency can be generated for a document based on its metadata.
+    Args:
+        metadata: Document metadata dictionary
+    Returns:
+        True if all required tile metadata is present
+    """
+    required_fields = ['num_tiles', 'tile_rows', 'tile_cols', 'resized_width', 'resized_height']
+    return all(metadata.get(field) is not None for field in required_fields)
+def get_saliency_metadata_summary(metadata: dict) -> str:
+    """
+    Get a summary of saliency-related metadata for display.
+    Args:
+        metadata: Document metadata dictionary
+    Returns:
+        Human-readable summary string
+    """
+    num_tiles = metadata.get('num_tiles', 'N/A')
+    tile_rows = metadata.get('tile_rows', 'N/A')
+    tile_cols = metadata.get('tile_cols', 'N/A')
+    patches_per_tile = metadata.get('patches_per_tile', 64)
+    if all(v != 'N/A' for v in [num_tiles, tile_rows, tile_cols]):
+        return f"{tile_rows}×{tile_cols} tiles ({num_tiles} total), {patches_per_tile} patches/tile"
+    else:
+        return "Tile metadata not available"

src/agents/visual_documents.py ADDED Viewed

	@@ -0,0 +1,418 @@

+"""
+Visual Document Display Components
+UI components for displaying visual search results with enhanced metadata.
+Includes saliency map visualization for tile-aware ColPali embeddings.
+"""
+import streamlit as st
+import pandas as pd
+import numpy as np
+import logging
+from typing import List, Any, Dict, Optional
+from collections import Counter
+logger = logging.getLogger(__name__)
+def display_visual_document_statistics(sources: List[Any]) -> None:
+    """
+    Display statistics for visual search results in a bordered box with tables.
+    Args:
+        sources: List of VisualSearchResult objects
+    """
+    if not sources:
+        return
+    # Extract statistics
+    filenames = []
+    years = []
+    sources_list = []
+    districts = []
+    for doc in sources:
+        metadata = getattr(doc, 'metadata', {})
+        filenames.append(metadata.get('filename', 'Unknown'))
+        year = metadata.get('year')
+        if year:
+            years.append(year)
+        source = metadata.get('source')
+        if source:
+            sources_list.append(source)
+        district = metadata.get('district')
+        if district and district != 'None':
+            districts.append(district)
+    # Count unique values
+    unique_files = len(set(filenames))
+    unique_years = len(set(years))
+    unique_sources = len(set(sources_list))
+    # Create bordered container
+    with st.container():
+        st.markdown("""
+        <style>
+        .stats-container {
+            border: 2px solid #e0e0e0;
+            border-radius: 10px;
+            padding: 20px;
+            margin: 10px 0;
+            background-color: #f9f9f9;
+        }
+        </style>
+        """, unsafe_allow_html=True)
+        st.markdown('<div class="stats-container">', unsafe_allow_html=True)
+        st.markdown("### 📊 Retrieval Statistics")
+        # Metrics in columns
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            st.metric("Total Chunks", len(sources))
+        with col2:
+            st.metric("Unique Files", unique_files)
+        with col3:
+            st.metric("Unique Years", unique_years if unique_years > 0 else "N/A")
+        with col4:
+            st.metric("Unique Sources", unique_sources if unique_sources > 0 else "N/A")
+        st.markdown("---")
+        # Distribution tables in columns
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            # District distribution
+            if districts:
+                district_counts = Counter(districts)
+                st.markdown("**🏘️ Districts**")
+                district_df = pd.DataFrame([
+                    {"District": dist, "Count": count}
+                    for dist, count in district_counts.most_common(10)
+                ])
+                st.dataframe(district_df, hide_index=True, use_container_width=True)
+        with col2:
+            # Source distribution
+            if sources_list:
+                source_counts = Counter(sources_list)
+                st.markdown("**🏛️ Sources**")
+                source_df = pd.DataFrame([
+                    {"Source": src, "Count": count}
+                    for src, count in source_counts.most_common()
+                ])
+                st.dataframe(source_df, hide_index=True, use_container_width=True)
+        with col3:
+            # Year distribution
+            if years:
+                year_counts = Counter(years)
+                st.markdown("**📅 Years**")
+                year_df = pd.DataFrame([
+                    {"Year": year, "Count": count}
+                    for year, count in sorted(year_counts.items(), reverse=True)
+                ])
+                st.dataframe(year_df, hide_index=True, use_container_width=True)
+        with col4:
+            # File distribution (top 10)
+            file_counts = Counter(filenames)
+            st.markdown("**📄 Files**")
+            file_df = pd.DataFrame([
+                {"File": filename[:30] + "..." if len(filename) > 30 else filename, "Count": count}
+                for filename, count in file_counts.most_common(10)
+            ])
+            st.dataframe(file_df, hide_index=True, use_container_width=True)
+        st.markdown('</div>', unsafe_allow_html=True)
+def display_visual_document_details(
+    sources: List[Any],
+    show_images: bool = False,
+    show_saliency: bool = False,
+    qdrant_client: Any = None,
+    collection_name: str = None,
+    query_embedding: Optional[np.ndarray] = None,
+    saliency_alpha: float = 0.4,
+    saliency_colormap: str = 'hot',
+    saliency_threshold: int = 50
+) -> None:
+    """
+    Display detailed information for each visual search result.
+    Args:
+        sources: List of VisualSearchResult objects
+        show_images: Whether to display document images (from Cloudinary)
+        show_saliency: Whether to generate and display saliency maps
+        qdrant_client: Qdrant client (required for saliency)
+        collection_name: Qdrant collection name (required for saliency)
+        query_embedding: Query embedding for saliency computation
+        saliency_alpha: Saliency overlay transparency (0.0-1.0)
+        saliency_colormap: Matplotlib colormap for saliency (default: 'hot')
+        saliency_threshold: Threshold percentile for saliency (default: 50)
+    """
+    st.markdown("### 📄 Document Details")
+    # Import saliency functions if needed
+    if show_saliency:
+        from .saliency import generate_tile_aware_saliency, can_generate_saliency
+    for i, doc in enumerate(sources):
+        metadata = getattr(doc, 'metadata', {})
+        # Get basic metadata
+        filename = metadata.get('filename', 'Unknown')
+        page_number = metadata.get('page_number', '?')
+        year = metadata.get('year', 'Unknown')
+        source = metadata.get('source', 'Unknown')
+        district = metadata.get('district')
+        score = getattr(doc, 'score', 0.0)
+        # Get visual-specific metadata
+        num_tiles = metadata.get('num_tiles')
+        tile_rows = metadata.get('tile_rows')
+        tile_cols = metadata.get('tile_cols')
+        num_visual_tokens = metadata.get('num_visual_tokens')
+        original_width = metadata.get('original_width')
+        original_height = metadata.get('original_height')
+        resized_width = metadata.get('resized_width')
+        resized_height = metadata.get('resized_height')
+        # Get image URLs
+        original_url = metadata.get('original_url')
+        resized_url = metadata.get('resized_url')
+        page_url = metadata.get('page')  # Fallback
+        # Get point_id for saliency (check doc.id first, then metadata)
+        point_id = getattr(doc, 'id', None) or metadata.get('point_id') or metadata.get('_id')
+        # Debug logging for saliency
+        if show_saliency:
+            logger.debug(f"Doc {i+1}: point_id={point_id}, has_tiles={metadata.get('num_tiles') is not None}")
+        # Build title
+        score_text = f" (Score: {score:.3f})"
+        title = f"📄 Document {i+1}: {filename[:50]}...{score_text}"
+        with st.expander(title, expanded=(i == 0)):  # Expand first result
+            # Two-column layout: Metadata (left) and Image (right)
+            col_meta, col_image = st.columns([1, 2])
+            with col_meta:
+                st.markdown("### 📋 Metadata")
+                # Basic metadata
+                st.write(f"📄 **File:** {filename}")
+                st.write(f"🏛️ **Source:** {source}")
+                st.write(f"📅 **Year:** {year}")
+                st.write(f"📖 **Page:** {page_number}")
+                if district and district != 'None':
+                    st.write(f"📍 **District:** {district}")
+                # Relevance score
+                st.markdown("---")
+                st.markdown("### 🎯 Relevance")
+                score_color = "🟢" if score > 0.7 else "🟡" if score > 0.5 else "🔴"
+                st.markdown(f"**Score:** {score_color} **{score:.3f}**")
+                # Visual metadata (if available)
+                if num_tiles or num_visual_tokens:
+                    st.markdown("---")
+                    st.markdown("### 🎨 Visual Metadata")
+                    if num_tiles:
+                        st.write(f"🔲 **Tiles:** {num_tiles} ({tile_rows}×{tile_cols})")
+                    if num_visual_tokens:
+                        st.write(f"🔢 **Visual Tokens:** {num_visual_tokens}")
+                    if original_width and original_height:
+                        st.write(f"📐 **Original Size:** {original_width}×{original_height}")
+                    if resized_width and resized_height:
+                        st.write(f"📐 **Resized Size:** {resized_width}×{resized_height}")
+                    processing_version = metadata.get('processing_version')
+                    if processing_version:
+                        st.write(f"⚙️ **Processing:** {processing_version}")
+                # Text content preview
+                content = getattr(doc, 'page_content', '')
+                if content:
+                    st.markdown("---")
+                    with st.expander("📝 Extracted Text", expanded=True):
+                        st.text_area(
+                            "Content",
+                            value=content[:500] + ("..." if len(content) > 500 else ""),
+                            height=150,
+                            disabled=True,
+                            label_visibility="collapsed",
+                            key=f"visual_doc_text_{i}"
+                        )
+                else:
+                    st.markdown("---")
+                    st.caption("_No text extracted (image-only page)_")
+                # Show image URLs under text
+                if original_url and resized_url:
+                    with st.expander("🔗 Image URLs", expanded=True):
+                        st.markdown(f"**Original:** [{original_url}]({original_url})")
+                        st.markdown(f"**Resized (for embeddings):** [{resized_url}]({resized_url})")
+            with col_image:
+                st.markdown("### 📸 Document Page")
+                # Check if we should generate saliency
+                saliency_generated = False
+                if show_saliency and show_images:
+                    # Check if we have all requirements for saliency
+                    has_client = qdrant_client is not None
+                    has_collection = collection_name is not None
+                    has_query = query_embedding is not None
+                    has_point_id = point_id is not None
+                    has_tile_metadata = can_generate_saliency(metadata)
+                    can_saliency = has_client and has_collection and has_query and has_point_id and has_tile_metadata
+                    if not can_saliency:
+                        missing = []
+                        if not has_client: missing.append("qdrant_client")
+                        if not has_collection: missing.append("collection_name")
+                        if not has_query: missing.append("query_embedding")
+                        if not has_point_id: missing.append("point_id")
+                        if not has_tile_metadata: missing.append("tile_metadata")
+                        logger.warning(f"Doc {i+1}: Saliency unavailable, missing: {missing}")
+                    if can_saliency:
+                        try:
+                            with st.spinner(f"🔥 Generating saliency map for Doc {i+1}..."):
+                                # Convert query embedding if needed
+                                query_emb = query_embedding
+                                if hasattr(query_emb, 'cpu'):
+                                    query_emb = query_emb.cpu().float().numpy()
+                                if query_emb.ndim == 3:
+                                    query_emb = query_emb.squeeze(0)  # Remove batch dimension
+                                logger.info(f"🔥 Generating saliency for doc {i+1}: point_id={point_id}, colormap={saliency_colormap}")
+                                saliency_img = generate_tile_aware_saliency(
+                                    qdrant_client=qdrant_client,
+                                    collection_name=collection_name,
+                                    point_id=point_id,
+                                    query_embedding=query_emb,
+                                    alpha=saliency_alpha,
+                                    colormap=saliency_colormap,
+                                    threshold_percentile=saliency_threshold
+                                )
+                                if saliency_img:
+                                    # Display saliency map
+                                    st.image(saliency_img, width=700, caption=f"🔥 Saliency Map - Page {page_number}")
+                                    saliency_generated = True
+                                    logger.info(f"✅ Saliency map displayed for doc {i+1}")
+                                else:
+                                    logger.warning(f"Saliency generation returned None for doc {i+1}")
+                                    st.caption("_Saliency map could not be generated_")
+                        except Exception as e:
+                            logger.error(f"Saliency generation failed for doc {i+1}: {e}")
+                            import traceback
+                            logger.debug(traceback.format_exc())
+                            st.warning(f"⚠️ Saliency generation failed: {str(e)[:100]}")
+                    else:
+                        if not has_tile_metadata:
+                            st.caption("_Saliency unavailable: missing tile metadata_")
+                        elif not has_point_id:
+                            st.caption("_Saliency unavailable: missing point_id_")
+                # Display original image if saliency wasn't generated
+                if show_images and not saliency_generated:
+                    # Use ORIGINAL image (not resized) for display
+                    image_url = original_url or resized_url or page_url
+                    if image_url and isinstance(image_url, str) and image_url.startswith('http'):
+                        try:
+                            # Use width parameter for medium-sized image
+                            st.image(image_url, width=700, caption=f"Page {page_number}")
+                        except Exception as e:
+                            st.error(f"Failed to load image: {e}")
+                    else:
+                        st.info("No image URL available")
+                elif not show_images:
+                    st.info("Enable image display in settings to view document pages")
+def display_visual_search_results(
+    sources: List[Any],
+    show_statistics: bool = True,
+    show_images: bool = False,
+    show_saliency: bool = False,
+    qdrant_client: Any = None,
+    collection_name: str = None,
+    query_embedding: Optional[np.ndarray] = None,
+    saliency_alpha: float = 0.4,
+    saliency_colormap: str = 'hot',
+    saliency_threshold: int = 50,
+    max_display: int = 20
+) -> None:
+    """
+    Display visual search results with statistics and details.
+    Args:
+        sources: List of VisualSearchResult objects
+        show_statistics: Whether to show statistics
+        show_images: Whether to show document images
+        show_saliency: Whether to generate and display saliency maps
+        qdrant_client: Qdrant client (required for saliency)
+        collection_name: Qdrant collection name (required for saliency)
+        query_embedding: Query embedding for saliency computation
+        saliency_alpha: Saliency overlay transparency (0.0-1.0)
+        saliency_colormap: Matplotlib colormap for saliency (default: 'hot')
+        saliency_threshold: Threshold percentile for saliency (default: 50)
+        max_display: Maximum number of documents to display in detail
+    """
+    if not sources:
+        st.info("No documents were retrieved for the last query.")
+        return
+    # Count unique filenames
+    unique_filenames = set()
+    for doc in sources:
+        filename = getattr(doc, 'metadata', {}).get('filename', 'Unknown')
+        unique_filenames.add(filename)
+    st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents:**")
+    if len(unique_filenames) < len(sources):
+        st.info(f"💡 **Note**: Each document is split into multiple chunks. You're seeing {len(sources)} chunks from {len(unique_filenames)} documents.")
+    # Show saliency info if enabled
+    if show_saliency:
+        st.info(f"🔥 **Saliency Maps Enabled**: Showing which image regions are most relevant to your query (using '{saliency_colormap}' colormap)")
+    # Show statistics
+    if show_statistics:
+        display_visual_document_statistics(sources)
+        st.markdown("---")
+    # Show detailed results (limit to max_display)
+    display_sources = sources[:max_display]
+    if len(sources) > max_display:
+        st.warning(f"⚠️ Showing top {max_display} of {len(sources)} results")
+    display_visual_document_details(
+        display_sources,
+        show_images=show_images,
+        show_saliency=show_saliency,
+        qdrant_client=qdrant_client,
+        collection_name=collection_name,
+        query_embedding=query_embedding,
+        saliency_alpha=saliency_alpha,
+        saliency_colormap=saliency_colormap,
+        saliency_threshold=saliency_threshold
+    )
+    if len(sources) > max_display:
+        st.info(f"💡 {len(sources) - max_display} more results not shown")