import json import os from pydantic import BaseModel from .template import DIVERSIFICATION_PROMPT, LOCATION_PROMPT, VERIFICATION_PROMPT class Evidence(BaseModel): analysis: str references: list[str] = [] class LocationPrediction(BaseModel): latitude: float longitude: float location: str evidence: list[Evidence] class GPSPrediction(BaseModel): latitude: float longitude: float analysis: str references: list[str] def rag_prompt(index_search_json: str, n_coords: int | None = None) -> str: """ Creates a formatted string with GPS coordinates for similar and dissimilar images. Args: candidates_gps (list[tuple]): List of (lat, lon) tuples for similar images. reverse_gps (list[tuple]): List of (lat, lon) tuples for dissimilar images. n_coords (int, optional): Number of coords to include from each list. Defaults to all. Returns: str: Formatted string with coordinates for reference. """ if not os.path.exists(index_search_json): return "" with open(index_search_json, "r", encoding="utf-8") as file: data = json.load(file) candidates_gps = data.get("candidates_gps", []) reverse_gps = data.get("reverse_gps", []) if n_coords is not None: candidates_gps = candidates_gps[: min(n_coords, len(candidates_gps))] reverse_gps = reverse_gps[: min(n_coords, len(reverse_gps))] else: candidates_gps = candidates_gps reverse_gps = reverse_gps candidates_str = ( "[" + ", ".join(f"[{lat}, {lon}]" for (lat, lon) in candidates_gps) + "]" ) reverse_str = "[" + ", ".join(f"[{lat}, {lon}]" for (lat, lon) in reverse_gps) + "]" return f"For your reference, these are coordinates of some similar images: {candidates_str}, and these are coordinates of some dissimilar images: {reverse_str}." def metadata_prompt(metadata_file_path: str) -> str: """ Reads a metadata JSON file and returns a formatted string combining all fields. Args: metadata_file_path (str): Path to the metadata JSON file Returns: str: Formatted string with all metadata fields combined """ if not metadata_file_path or not os.path.exists(metadata_file_path): return "" try: with open(metadata_file_path, "r", encoding="utf-8") as file: metadata = json.load(file) if not metadata: return "" metadata_parts = [] if "location" in metadata and metadata["location"]: metadata_parts.append(f"Location: {metadata['location']}") if "violence level" in metadata and metadata["violence level"]: metadata_parts.append(f"Violence level: {metadata['violence level']}") if "title" in metadata and metadata["title"]: metadata_parts.append(f"Title: {metadata['title']}") if "social media link" in metadata and metadata["social media link"]: metadata_parts.append(f"Social media link: {metadata['social media link']}") if "description" in metadata and metadata["description"]: metadata_parts.append(f"Description: {metadata['description']}") if "category" in metadata and metadata["category"]: metadata_parts.append(f"Category: {metadata['category']}") if not metadata_parts: return "" metadata_string = "Metadata for the image is: " return metadata_string + ". ".join(metadata_parts) + "." except Exception: return "" def search_prompt(search_candidates: list[str], n_search: int | None = None) -> str: """ Formats search candidate links into a prompt string. Args: search_candidates (list[str]): List of candidate URLs from image search n_search (int): Number of results to include (default: 5) Returns: str: Formatted string with candidate links, each on a new line Example: >>> candidates = search_prompt(["https://example1.com", "https://example2.com"], n_search=3) >>> print(candidates) Similar image can be found in those links: https://example1.com https://example2.com """ if not search_candidates or not isinstance(search_candidates, list): return "" EXCLUDE_DOMAINS = [ "x.com", "twitter.com", "linkedin.com", "bbc.com", "bbc.co.uk", "instagram.com", "tiktok.com", ] for domain in EXCLUDE_DOMAINS: search_candidates = [url for url in search_candidates if domain not in url] if n_search is not None: search_candidates = search_candidates[: min(n_search, len(search_candidates))] try: prompt = "\n".join(search_candidates) return prompt except Exception: return "" def image_search_prompt(image_search_json: str, n_search: int | None = None) -> str: """ Reads all JSON files in the base directory's image_search folder and combines links. Args: base_dir (str): Path to the base directory containing image search JSON files Returns: str: Combined search prompt string """ pages_with_matching_images = set() full_matching_images = set() partial_matching_images = set() with open(image_search_json, "r", encoding="utf-8") as file: data_list = json.load(file) for json_data in data_list: if "pages_with_matching_images" in json_data: pages_with_matching_images.update( json_data["pages_with_matching_images"] ) elif "full_matching_images" in json_data: full_matching_images.update(json_data["full_matching_images"]) elif "partial_matching_images" in json_data: partial_matching_images.update(json_data["partial_matching_images"]) if ( not pages_with_matching_images and not full_matching_images and not partial_matching_images ): return "" prompt = "Those are pages with matching images:\n" prompt += search_prompt(list(pages_with_matching_images), n_search=n_search) # prompt += "\n\nThose are full matching images:\n" # prompt += search_prompt(list(full_matching_images), n_search=n_search) # prompt += "\n\nThose are partial matching images:\n" # prompt += search_prompt(list(partial_matching_images), n_search=n_search) return prompt def search_content_prompt(search_content_json: str) -> str: """ Reads a JSON file containing search content and returns a formatted string. Args: search_content_json (str): Path to the JSON file with search content Returns: str: Formatted string with all search content links """ if not os.path.exists(search_content_json): return "" try: with open(search_content_json, "r", encoding="utf-8") as file: data = json.load(file) if not data or not isinstance(data, list): return "" prompt = json.dumps(data, indent=2) return prompt except Exception: return "" def transcript_prompt(audio_dir: str) -> str: """ Reads all transcript text files in the audio directory and returns a formatted string. Args: audio_dir (str): Path to the audio directory containing transcript files Returns: str: Combined transcript content formatted as a prompt """ if not os.path.exists(audio_dir): return "" transcript_content = [] for txt_file in os.listdir(audio_dir): if txt_file.endswith(".txt"): txt_path = os.path.join(audio_dir, txt_file) with open(txt_path, "r", encoding="utf-8") as file: transcript_content.append(file.read().strip()) combined_transcript = "\n".join(transcript_content) return ( f"This is the transcript of the video: {combined_transcript}" if combined_transcript else "" ) def combine_prompt_data( prompt_dir: str, n_search: int | None = None, n_coords: int | None = None, image_prediction: bool = True, text_prediction: bool = True, ) -> str: """ Combines all prompt data into one comprehensive prompt string. Args: base_dir (str): Path to the base directory candidates_gps (list[tuple]): GPS coordinates for similar images (for RAG) reverse_gps (list[tuple]): GPS coordinates for dissimilar images (for RAG) n_search (int): Number of search results to include (default: 5) n_coords (int, optional): Number of coordinates to include in RAG Returns: str: Combined prompt string Example: >>> prompt = combine_prompts( ... base_dir="path/to/base_dir", ... candidates_gps=[(40.7128, -74.0060)], ... reverse_gps=[(51.5074, -0.1278)] ... ) """ prompt_parts = [] # 1. RAG prompt (optional) if n_coords is not None: rag_text = rag_prompt(os.path.join(prompt_dir, "index_search.json"), n_coords) prompt_parts.append(rag_text) # 2. Metadata prompt if text_prediction: metadata_text = metadata_prompt(os.path.join(prompt_dir, "metadata.json")) if metadata_text: prompt_parts.append(metadata_text) # 3. Search prompt if image_prediction: image_search_text = search_content_prompt( os.path.join(prompt_dir, "image_search_content.json") ) if image_search_text: prompt_parts.append(image_search_text) if text_prediction: search_content_text = search_content_prompt( os.path.join(prompt_dir, "text_search_content.json") ) if search_content_text: prompt_parts.append(search_content_text) # 4. Transcript prompt transcript_text = transcript_prompt(os.path.join(prompt_dir, "audio")) if transcript_text: prompt_parts.append(transcript_text) # Combine all parts with double newlines for readability combined_prompt = "\n\n".join(part for part in prompt_parts if part.strip()) return combined_prompt def diversification_prompt( prompt_dir: str, n_search: int | None = None, n_coords: int | None = None, image_prediction: bool = True, text_prediction: bool = True, ) -> str: """ Combines all prompts into one comprehensive prompt string. Args: base_dir (str): Path to the base directory candidates_gps (list[tuple]): GPS coordinates for similar images (for RAG) reverse_gps (list[tuple]): GPS coordinates for dissimilar images (for RAG) n_search (int): Number of search results to include (default: 5) n_coords (int, optional): Number of coordinates to include in RAG Returns: str: Combined prompt string Example: >>> prompt = combine_prompts( ... base_dir="path/to/base_dir", ... candidates_gps=[(40.7128, -74.0060)], ... reverse_gps=[(51.5074, -0.1278)] ... ) """ prompt_data = combine_prompt_data( prompt_dir, n_search=n_search, n_coords=n_coords, image_prediction=image_prediction, text_prediction=text_prediction, ) prompt = DIVERSIFICATION_PROMPT.strip().format(prompt_data=prompt_data) return prompt def location_prompt(location: str) -> str: """ Creates a prompt string for the given location. Args: location (str): The location to include in the prompt. Returns: str: Formatted string with the location. """ if not location: return "" prompt = LOCATION_PROMPT.strip() prompt = prompt.format(location=location) return prompt def verification_prompt( satellite_image_id: int, prediction: dict, prompt_dir: str, n_search: int | None = None, n_coords: int | None = None, image_prediction: bool = True, text_prediction: bool = True, ) -> str: """ Creates a verification prompt string with the provided data and prediction. Args: prompt_data (str): The prompt data to include. prediction (str): The prediction to verify. Returns: str: Formatted verification prompt string. """ prompt_data = combine_prompt_data( prompt_dir, n_search=n_search, n_coords=n_coords, image_prediction=image_prediction, text_prediction=text_prediction, ) prompt = VERIFICATION_PROMPT.strip().format( prompt_data=prompt_data, prediction=json.dumps(prediction, indent=2), satellite_image_id=f"{satellite_image_id:03d}", ) return prompt