geolocation / src /prompt /factory.py
3v324v23's picture
init prj
eff2be4
import json
import os
from pydantic import BaseModel
from .template import DIVERSIFICATION_PROMPT, LOCATION_PROMPT, VERIFICATION_PROMPT
class Evidence(BaseModel):
analysis: str
references: list[str] = []
class LocationPrediction(BaseModel):
latitude: float
longitude: float
location: str
evidence: list[Evidence]
class GPSPrediction(BaseModel):
latitude: float
longitude: float
analysis: str
references: list[str]
def rag_prompt(index_search_json: str, n_coords: int | None = None) -> str:
"""
Creates a formatted string with GPS coordinates for similar and dissimilar images.
Args:
candidates_gps (list[tuple]): List of (lat, lon) tuples for similar images.
reverse_gps (list[tuple]): List of (lat, lon) tuples for dissimilar images.
n_coords (int, optional): Number of coords to include from each list. Defaults to all.
Returns:
str: Formatted string with coordinates for reference.
"""
if not os.path.exists(index_search_json):
return ""
with open(index_search_json, "r", encoding="utf-8") as file:
data = json.load(file)
candidates_gps = data.get("candidates_gps", [])
reverse_gps = data.get("reverse_gps", [])
if n_coords is not None:
candidates_gps = candidates_gps[: min(n_coords, len(candidates_gps))]
reverse_gps = reverse_gps[: min(n_coords, len(reverse_gps))]
else:
candidates_gps = candidates_gps
reverse_gps = reverse_gps
candidates_str = (
"[" + ", ".join(f"[{lat}, {lon}]" for (lat, lon) in candidates_gps) + "]"
)
reverse_str = "[" + ", ".join(f"[{lat}, {lon}]" for (lat, lon) in reverse_gps) + "]"
return f"For your reference, these are coordinates of some similar images: {candidates_str}, and these are coordinates of some dissimilar images: {reverse_str}."
def metadata_prompt(metadata_file_path: str) -> str:
"""
Reads a metadata JSON file and returns a formatted string combining all fields.
Args:
metadata_file_path (str): Path to the metadata JSON file
Returns:
str: Formatted string with all metadata fields combined
"""
if not metadata_file_path or not os.path.exists(metadata_file_path):
return ""
try:
with open(metadata_file_path, "r", encoding="utf-8") as file:
metadata = json.load(file)
if not metadata:
return ""
metadata_parts = []
if "location" in metadata and metadata["location"]:
metadata_parts.append(f"Location: {metadata['location']}")
if "violence level" in metadata and metadata["violence level"]:
metadata_parts.append(f"Violence level: {metadata['violence level']}")
if "title" in metadata and metadata["title"]:
metadata_parts.append(f"Title: {metadata['title']}")
if "social media link" in metadata and metadata["social media link"]:
metadata_parts.append(f"Social media link: {metadata['social media link']}")
if "description" in metadata and metadata["description"]:
metadata_parts.append(f"Description: {metadata['description']}")
if "category" in metadata and metadata["category"]:
metadata_parts.append(f"Category: {metadata['category']}")
if not metadata_parts:
return ""
metadata_string = "Metadata for the image is: "
return metadata_string + ". ".join(metadata_parts) + "."
except Exception:
return ""
def search_prompt(search_candidates: list[str], n_search: int | None = None) -> str:
"""
Formats search candidate links into a prompt string.
Args:
search_candidates (list[str]): List of candidate URLs from image search
n_search (int): Number of results to include (default: 5)
Returns:
str: Formatted string with candidate links, each on a new line
Example:
>>> candidates = search_prompt(["https://example1.com", "https://example2.com"], n_search=3)
>>> print(candidates)
Similar image can be found in those links:
https://example1.com
https://example2.com
"""
if not search_candidates or not isinstance(search_candidates, list):
return ""
EXCLUDE_DOMAINS = [
"x.com",
"twitter.com",
"linkedin.com",
"bbc.com",
"bbc.co.uk",
"instagram.com",
"tiktok.com",
]
for domain in EXCLUDE_DOMAINS:
search_candidates = [url for url in search_candidates if domain not in url]
if n_search is not None:
search_candidates = search_candidates[: min(n_search, len(search_candidates))]
try:
prompt = "\n".join(search_candidates)
return prompt
except Exception:
return ""
def image_search_prompt(image_search_json: str, n_search: int | None = None) -> str:
"""
Reads all JSON files in the base directory's image_search folder and combines links.
Args:
base_dir (str): Path to the base directory containing image search JSON files
Returns:
str: Combined search prompt string
"""
pages_with_matching_images = set()
full_matching_images = set()
partial_matching_images = set()
with open(image_search_json, "r", encoding="utf-8") as file:
data_list = json.load(file)
for json_data in data_list:
if "pages_with_matching_images" in json_data:
pages_with_matching_images.update(
json_data["pages_with_matching_images"]
)
elif "full_matching_images" in json_data:
full_matching_images.update(json_data["full_matching_images"])
elif "partial_matching_images" in json_data:
partial_matching_images.update(json_data["partial_matching_images"])
if (
not pages_with_matching_images
and not full_matching_images
and not partial_matching_images
):
return ""
prompt = "Those are pages with matching images:\n"
prompt += search_prompt(list(pages_with_matching_images), n_search=n_search)
# prompt += "\n\nThose are full matching images:\n"
# prompt += search_prompt(list(full_matching_images), n_search=n_search)
# prompt += "\n\nThose are partial matching images:\n"
# prompt += search_prompt(list(partial_matching_images), n_search=n_search)
return prompt
def search_content_prompt(search_content_json: str) -> str:
"""
Reads a JSON file containing search content and returns a formatted string.
Args:
search_content_json (str): Path to the JSON file with search content
Returns:
str: Formatted string with all search content links
"""
if not os.path.exists(search_content_json):
return ""
try:
with open(search_content_json, "r", encoding="utf-8") as file:
data = json.load(file)
if not data or not isinstance(data, list):
return ""
prompt = json.dumps(data, indent=2)
return prompt
except Exception:
return ""
def transcript_prompt(audio_dir: str) -> str:
"""
Reads all transcript text files in the audio directory and returns a formatted string.
Args:
audio_dir (str): Path to the audio directory containing transcript files
Returns:
str: Combined transcript content formatted as a prompt
"""
if not os.path.exists(audio_dir):
return ""
transcript_content = []
for txt_file in os.listdir(audio_dir):
if txt_file.endswith(".txt"):
txt_path = os.path.join(audio_dir, txt_file)
with open(txt_path, "r", encoding="utf-8") as file:
transcript_content.append(file.read().strip())
combined_transcript = "\n".join(transcript_content)
return (
f"This is the transcript of the video: {combined_transcript}"
if combined_transcript
else ""
)
def combine_prompt_data(
prompt_dir: str,
n_search: int | None = None,
n_coords: int | None = None,
image_prediction: bool = True,
text_prediction: bool = True,
) -> str:
"""
Combines all prompt data into one comprehensive prompt string.
Args:
base_dir (str): Path to the base directory
candidates_gps (list[tuple]): GPS coordinates for similar images (for RAG)
reverse_gps (list[tuple]): GPS coordinates for dissimilar images (for RAG)
n_search (int): Number of search results to include (default: 5)
n_coords (int, optional): Number of coordinates to include in RAG
Returns:
str: Combined prompt string
Example:
>>> prompt = combine_prompts(
... base_dir="path/to/base_dir",
... candidates_gps=[(40.7128, -74.0060)],
... reverse_gps=[(51.5074, -0.1278)]
... )
"""
prompt_parts = []
# 1. RAG prompt (optional)
if n_coords is not None:
rag_text = rag_prompt(os.path.join(prompt_dir, "index_search.json"), n_coords)
prompt_parts.append(rag_text)
# 2. Metadata prompt
if text_prediction:
metadata_text = metadata_prompt(os.path.join(prompt_dir, "metadata.json"))
if metadata_text:
prompt_parts.append(metadata_text)
# 3. Search prompt
if image_prediction:
image_search_text = search_content_prompt(
os.path.join(prompt_dir, "image_search_content.json")
)
if image_search_text:
prompt_parts.append(image_search_text)
if text_prediction:
search_content_text = search_content_prompt(
os.path.join(prompt_dir, "text_search_content.json")
)
if search_content_text:
prompt_parts.append(search_content_text)
# 4. Transcript prompt
transcript_text = transcript_prompt(os.path.join(prompt_dir, "audio"))
if transcript_text:
prompt_parts.append(transcript_text)
# Combine all parts with double newlines for readability
combined_prompt = "\n\n".join(part for part in prompt_parts if part.strip())
return combined_prompt
def diversification_prompt(
prompt_dir: str,
n_search: int | None = None,
n_coords: int | None = None,
image_prediction: bool = True,
text_prediction: bool = True,
) -> str:
"""
Combines all prompts into one comprehensive prompt string.
Args:
base_dir (str): Path to the base directory
candidates_gps (list[tuple]): GPS coordinates for similar images (for RAG)
reverse_gps (list[tuple]): GPS coordinates for dissimilar images (for RAG)
n_search (int): Number of search results to include (default: 5)
n_coords (int, optional): Number of coordinates to include in RAG
Returns:
str: Combined prompt string
Example:
>>> prompt = combine_prompts(
... base_dir="path/to/base_dir",
... candidates_gps=[(40.7128, -74.0060)],
... reverse_gps=[(51.5074, -0.1278)]
... )
"""
prompt_data = combine_prompt_data(
prompt_dir,
n_search=n_search,
n_coords=n_coords,
image_prediction=image_prediction,
text_prediction=text_prediction,
)
prompt = DIVERSIFICATION_PROMPT.strip().format(prompt_data=prompt_data)
return prompt
def location_prompt(location: str) -> str:
"""
Creates a prompt string for the given location.
Args:
location (str): The location to include in the prompt.
Returns:
str: Formatted string with the location.
"""
if not location:
return ""
prompt = LOCATION_PROMPT.strip()
prompt = prompt.format(location=location)
return prompt
def verification_prompt(
satellite_image_id: int,
prediction: dict,
prompt_dir: str,
n_search: int | None = None,
n_coords: int | None = None,
image_prediction: bool = True,
text_prediction: bool = True,
) -> str:
"""
Creates a verification prompt string with the provided data and prediction.
Args:
prompt_data (str): The prompt data to include.
prediction (str): The prediction to verify.
Returns:
str: Formatted verification prompt string.
"""
prompt_data = combine_prompt_data(
prompt_dir,
n_search=n_search,
n_coords=n_coords,
image_prediction=image_prediction,
text_prediction=text_prediction,
)
prompt = VERIFICATION_PROMPT.strip().format(
prompt_data=prompt_data,
prediction=json.dumps(prediction, indent=2),
satellite_image_id=f"{satellite_image_id:03d}",
)
return prompt