Spaces:
Sleeping
Sleeping
| # Add this function to your agent_direct_llm_sections.py or a new api_callers.py / utils.py | |
| import json | |
| from pathlib import Path | |
| IMAGE_METADATA_FILE = "./data/images/image_metadata.json" | |
| IMAGE_BASE_STORAGE_PATH = "./data/images" # Base path where image files are stored | |
| # Load metadata once when the script starts (or when the tool is initialized) | |
| _image_metadata_store = [] | |
| if Path(IMAGE_METADATA_FILE).exists(): | |
| try: | |
| with open(IMAGE_METADATA_FILE, 'r', encoding='utf-8') as f: | |
| _image_metadata_store = json.load(f) | |
| except Exception as e: | |
| print(f"⚠️ Error loading image metadata: {e}") | |
| def find_relevant_image_info(image_description_query: str) -> str: | |
| print(f"--- Image Retrieval Tool: Searching for image matching: '{image_description_query}' ---") | |
| if not _image_metadata_store: | |
| return "Image metadata store is not loaded or is empty. Cannot search for images." | |
| # Simple keyword matching for now. Can be enhanced with semantic search later if needed. | |
| query_keywords = set(image_description_query.lower().split()) | |
| best_match = None | |
| max_shared_keywords = 0 | |
| for image_data in _image_metadata_store: | |
| # Combine description and keywords from metadata for searching | |
| text_to_search = image_data.get("caption_or_description", "").lower() | |
| for kw in image_data.get("keywords", []): | |
| text_to_search += " " + kw.lower() | |
| # Add filename to searchable text too, without extension | |
| filename_text = Path(image_data.get("image_filename", "")).stem.replace("_", " ").lower() | |
| text_to_search += " " + filename_text | |
| current_shared_keywords = len(query_keywords.intersection(set(text_to_search.split()))) | |
| if current_shared_keywords > max_shared_keywords: | |
| max_shared_keywords = current_shared_keywords | |
| best_match = image_data | |
| elif current_shared_keywords == max_shared_keywords and best_match: | |
| # Simple tie-breaking: prefer shorter description if scores are equal | |
| if len(image_data.get("caption_or_description", "")) < len(best_match.get("caption_or_description", "")): | |
| best_match = image_data | |
| if best_match and max_shared_keywords > 0: # Require at least one keyword match | |
| full_image_path = Path(IMAGE_BASE_STORAGE_PATH) / best_match['image_filename'] | |
| response_str = ( | |
| f"Found image related to '{image_description_query}':\n" | |
| f" File: {best_match['image_filename']}\n" | |
| f" Description: {best_match['caption_or_description']}\n" | |
| f" Source: Section {best_match.get('section_id_source', 'N/A')}, Page {best_match.get('original_pdf_page', 'N/A')}\n" | |
| f" (Image would be displayed here in a UI, path: {full_image_path})" | |
| ) | |
| print(f" Image tool found: {best_match['image_filename']}") | |
| return response_str | |
| else: | |
| print(f" Image tool: No specific image found for '{image_description_query}'.") | |
| return f"No specific image found closely matching the description: '{image_description_query}'." |