File size: 3,145 Bytes
1739591
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# Add this function to your agent_direct_llm_sections.py or a new api_callers.py / utils.py

import json
from pathlib import Path

IMAGE_METADATA_FILE = "./data/images/image_metadata.json"  
IMAGE_BASE_STORAGE_PATH = "./data/images"       # Base path where image files are stored

# Load metadata once when the script starts (or when the tool is initialized)
_image_metadata_store = []
if Path(IMAGE_METADATA_FILE).exists():
    try:
        with open(IMAGE_METADATA_FILE, 'r', encoding='utf-8') as f:
            _image_metadata_store = json.load(f)
    except Exception as e:
        print(f"⚠️ Error loading image metadata: {e}")

def find_relevant_image_info(image_description_query: str) -> str:
   
    print(f"--- Image Retrieval Tool: Searching for image matching: '{image_description_query}' ---")

    if not _image_metadata_store:
        return "Image metadata store is not loaded or is empty. Cannot search for images."

    # Simple keyword matching for now. Can be enhanced with semantic search later if needed.
    query_keywords = set(image_description_query.lower().split())
    best_match = None
    max_shared_keywords = 0

    for image_data in _image_metadata_store:
        # Combine description and keywords from metadata for searching
        text_to_search = image_data.get("caption_or_description", "").lower()
        for kw in image_data.get("keywords", []):
            text_to_search += " " + kw.lower()
        
        # Add filename to searchable text too, without extension
        filename_text = Path(image_data.get("image_filename", "")).stem.replace("_", " ").lower()
        text_to_search += " " + filename_text

        current_shared_keywords = len(query_keywords.intersection(set(text_to_search.split())))

        if current_shared_keywords > max_shared_keywords:
            max_shared_keywords = current_shared_keywords
            best_match = image_data
        elif current_shared_keywords == max_shared_keywords and best_match:
            # Simple tie-breaking: prefer shorter description if scores are equal
            if len(image_data.get("caption_or_description", "")) < len(best_match.get("caption_or_description", "")):
                best_match = image_data
    
    if best_match and max_shared_keywords > 0: # Require at least one keyword match
        full_image_path = Path(IMAGE_BASE_STORAGE_PATH) / best_match['image_filename']
        response_str = (
            f"Found image related to '{image_description_query}':\n"
            f"  File: {best_match['image_filename']}\n"
            f"  Description: {best_match['caption_or_description']}\n"
            f"  Source: Section {best_match.get('section_id_source', 'N/A')}, Page {best_match.get('original_pdf_page', 'N/A')}\n"
            f"  (Image would be displayed here in a UI, path: {full_image_path})"
        )
        print(f"   Image tool found: {best_match['image_filename']}")
        return response_str
    else:
        print(f"   Image tool: No specific image found for '{image_description_query}'.")
        return f"No specific image found closely matching the description: '{image_description_query}'."