Spaces:

tokyotechlab
/

geolocation

Sleeping

App Files Files Community

geolocation / src /prompt /factory.py

3v324v23

init prj

eff2be4 3 months ago

raw

history blame contribute delete

12.7 kB

	import json
	import os

	from pydantic import BaseModel

	from .template import DIVERSIFICATION_PROMPT, LOCATION_PROMPT, VERIFICATION_PROMPT


	class Evidence(BaseModel):
	analysis: str
	references: list[str] = []


	class LocationPrediction(BaseModel):
	latitude: float
	longitude: float
	location: str
	evidence: list[Evidence]


	class GPSPrediction(BaseModel):
	latitude: float
	longitude: float
	analysis: str
	references: list[str]


	def rag_prompt(index_search_json: str, n_coords: int \| None = None) -> str:
	"""
	Creates a formatted string with GPS coordinates for similar and dissimilar images.

	Args:
	candidates_gps (list[tuple]): List of (lat, lon) tuples for similar images.
	reverse_gps (list[tuple]): List of (lat, lon) tuples for dissimilar images.
	n_coords (int, optional): Number of coords to include from each list. Defaults to all.

	Returns:
	str: Formatted string with coordinates for reference.
	"""
	if not os.path.exists(index_search_json):
	return ""

	with open(index_search_json, "r", encoding="utf-8") as file:
	data = json.load(file)

	candidates_gps = data.get("candidates_gps", [])
	reverse_gps = data.get("reverse_gps", [])

	if n_coords is not None:
	candidates_gps = candidates_gps[: min(n_coords, len(candidates_gps))]
	reverse_gps = reverse_gps[: min(n_coords, len(reverse_gps))]
	else:
	candidates_gps = candidates_gps
	reverse_gps = reverse_gps

	candidates_str = (
	"[" + ", ".join(f"[{lat}, {lon}]" for (lat, lon) in candidates_gps) + "]"
	)
	reverse_str = "[" + ", ".join(f"[{lat}, {lon}]" for (lat, lon) in reverse_gps) + "]"
	return f"For your reference, these are coordinates of some similar images: {candidates_str}, and these are coordinates of some dissimilar images: {reverse_str}."


	def metadata_prompt(metadata_file_path: str) -> str:
	"""
	Reads a metadata JSON file and returns a formatted string combining all fields.

	Args:
	metadata_file_path (str): Path to the metadata JSON file

	Returns:
	str: Formatted string with all metadata fields combined
	"""
	if not metadata_file_path or not os.path.exists(metadata_file_path):
	return ""

	try:
	with open(metadata_file_path, "r", encoding="utf-8") as file:
	metadata = json.load(file)

	if not metadata:
	return ""

	metadata_parts = []

	if "location" in metadata and metadata["location"]:
	metadata_parts.append(f"Location: {metadata['location']}")

	if "violence level" in metadata and metadata["violence level"]:
	metadata_parts.append(f"Violence level: {metadata['violence level']}")

	if "title" in metadata and metadata["title"]:
	metadata_parts.append(f"Title: {metadata['title']}")

	if "social media link" in metadata and metadata["social media link"]:
	metadata_parts.append(f"Social media link: {metadata['social media link']}")

	if "description" in metadata and metadata["description"]:
	metadata_parts.append(f"Description: {metadata['description']}")

	if "category" in metadata and metadata["category"]:
	metadata_parts.append(f"Category: {metadata['category']}")

	if not metadata_parts:
	return ""

	metadata_string = "Metadata for the image is: "
	return metadata_string + ". ".join(metadata_parts) + "."

	except Exception:
	return ""


	def search_prompt(search_candidates: list[str], n_search: int \| None = None) -> str:
	"""
	Formats search candidate links into a prompt string.

	Args:
	search_candidates (list[str]): List of candidate URLs from image search
	n_search (int): Number of results to include (default: 5)

	Returns:
	str: Formatted string with candidate links, each on a new line

	Example:
	>>> candidates = search_prompt(["https://example1.com", "https://example2.com"], n_search=3)
	>>> print(candidates)
	Similar image can be found in those links:
	https://example1.com
	https://example2.com
	"""

	if not search_candidates or not isinstance(search_candidates, list):
	return ""

	EXCLUDE_DOMAINS = [
	"x.com",
	"twitter.com",
	"linkedin.com",
	"bbc.com",
	"bbc.co.uk",
	"instagram.com",
	"tiktok.com",
	]

	for domain in EXCLUDE_DOMAINS:
	search_candidates = [url for url in search_candidates if domain not in url]

	if n_search is not None:
	search_candidates = search_candidates[: min(n_search, len(search_candidates))]

	try:
	prompt = "\n".join(search_candidates)
	return prompt

	except Exception:
	return ""


	def image_search_prompt(image_search_json: str, n_search: int \| None = None) -> str:
	"""
	Reads all JSON files in the base directory's image_search folder and combines links.

	Args:
	base_dir (str): Path to the base directory containing image search JSON files

	Returns:
	str: Combined search prompt string
	"""
	pages_with_matching_images = set()
	full_matching_images = set()
	partial_matching_images = set()

	with open(image_search_json, "r", encoding="utf-8") as file:
	data_list = json.load(file)
	for json_data in data_list:
	if "pages_with_matching_images" in json_data:
	pages_with_matching_images.update(
	json_data["pages_with_matching_images"]
	)
	elif "full_matching_images" in json_data:
	full_matching_images.update(json_data["full_matching_images"])
	elif "partial_matching_images" in json_data:
	partial_matching_images.update(json_data["partial_matching_images"])

	if (
	not pages_with_matching_images
	and not full_matching_images
	and not partial_matching_images
	):
	return ""

	prompt = "Those are pages with matching images:\n"
	prompt += search_prompt(list(pages_with_matching_images), n_search=n_search)
	# prompt += "\n\nThose are full matching images:\n"
	# prompt += search_prompt(list(full_matching_images), n_search=n_search)
	# prompt += "\n\nThose are partial matching images:\n"
	# prompt += search_prompt(list(partial_matching_images), n_search=n_search)

	return prompt


	def search_content_prompt(search_content_json: str) -> str:
	"""
	Reads a JSON file containing search content and returns a formatted string.

	Args:
	search_content_json (str): Path to the JSON file with search content

	Returns:
	str: Formatted string with all search content links
	"""
	if not os.path.exists(search_content_json):
	return ""

	try:
	with open(search_content_json, "r", encoding="utf-8") as file:
	data = json.load(file)

	if not data or not isinstance(data, list):
	return ""

	prompt = json.dumps(data, indent=2)
	return prompt

	except Exception:
	return ""


	def transcript_prompt(audio_dir: str) -> str:
	"""
	Reads all transcript text files in the audio directory and returns a formatted string.

	Args:
	audio_dir (str): Path to the audio directory containing transcript files

	Returns:
	str: Combined transcript content formatted as a prompt
	"""
	if not os.path.exists(audio_dir):
	return ""

	transcript_content = []

	for txt_file in os.listdir(audio_dir):
	if txt_file.endswith(".txt"):
	txt_path = os.path.join(audio_dir, txt_file)
	with open(txt_path, "r", encoding="utf-8") as file:
	transcript_content.append(file.read().strip())

	combined_transcript = "\n".join(transcript_content)
	return (
	f"This is the transcript of the video: {combined_transcript}"
	if combined_transcript
	else ""
	)


	def combine_prompt_data(
	prompt_dir: str,
	n_search: int \| None = None,
	n_coords: int \| None = None,
	image_prediction: bool = True,
	text_prediction: bool = True,
	) -> str:
	"""
	Combines all prompt data into one comprehensive prompt string.

	Args:
	base_dir (str): Path to the base directory
	candidates_gps (list[tuple]): GPS coordinates for similar images (for RAG)
	reverse_gps (list[tuple]): GPS coordinates for dissimilar images (for RAG)
	n_search (int): Number of search results to include (default: 5)
	n_coords (int, optional): Number of coordinates to include in RAG

	Returns:
	str: Combined prompt string

	Example:
	>>> prompt = combine_prompts(
	... base_dir="path/to/base_dir",
	... candidates_gps=[(40.7128, -74.0060)],
	... reverse_gps=[(51.5074, -0.1278)]
	... )
	"""

	prompt_parts = []

	# 1. RAG prompt (optional)
	if n_coords is not None:
	rag_text = rag_prompt(os.path.join(prompt_dir, "index_search.json"), n_coords)
	prompt_parts.append(rag_text)

	# 2. Metadata prompt
	if text_prediction:
	metadata_text = metadata_prompt(os.path.join(prompt_dir, "metadata.json"))
	if metadata_text:
	prompt_parts.append(metadata_text)

	# 3. Search prompt
	if image_prediction:
	image_search_text = search_content_prompt(
	os.path.join(prompt_dir, "image_search_content.json")
	)
	if image_search_text:
	prompt_parts.append(image_search_text)

	if text_prediction:
	search_content_text = search_content_prompt(
	os.path.join(prompt_dir, "text_search_content.json")
	)
	if search_content_text:
	prompt_parts.append(search_content_text)

	# 4. Transcript prompt
	transcript_text = transcript_prompt(os.path.join(prompt_dir, "audio"))
	if transcript_text:
	prompt_parts.append(transcript_text)

	# Combine all parts with double newlines for readability
	combined_prompt = "\n\n".join(part for part in prompt_parts if part.strip())

	return combined_prompt


	def diversification_prompt(
	prompt_dir: str,
	n_search: int \| None = None,
	n_coords: int \| None = None,
	image_prediction: bool = True,
	text_prediction: bool = True,
	) -> str:
	"""
	Combines all prompts into one comprehensive prompt string.

	Args:
	base_dir (str): Path to the base directory
	candidates_gps (list[tuple]): GPS coordinates for similar images (for RAG)
	reverse_gps (list[tuple]): GPS coordinates for dissimilar images (for RAG)
	n_search (int): Number of search results to include (default: 5)
	n_coords (int, optional): Number of coordinates to include in RAG

	Returns:
	str: Combined prompt string

	Example:
	>>> prompt = combine_prompts(
	... base_dir="path/to/base_dir",
	... candidates_gps=[(40.7128, -74.0060)],
	... reverse_gps=[(51.5074, -0.1278)]
	... )
	"""

	prompt_data = combine_prompt_data(
	prompt_dir,
	n_search=n_search,
	n_coords=n_coords,
	image_prediction=image_prediction,
	text_prediction=text_prediction,
	)

	prompt = DIVERSIFICATION_PROMPT.strip().format(prompt_data=prompt_data)

	return prompt


	def location_prompt(location: str) -> str:
	"""
	Creates a prompt string for the given location.

	Args:
	location (str): The location to include in the prompt.

	Returns:
	str: Formatted string with the location.
	"""
	if not location:
	return ""

	prompt = LOCATION_PROMPT.strip()
	prompt = prompt.format(location=location)

	return prompt


	def verification_prompt(
	satellite_image_id: int,
	prediction: dict,
	prompt_dir: str,
	n_search: int \| None = None,
	n_coords: int \| None = None,
	image_prediction: bool = True,
	text_prediction: bool = True,
	) -> str:
	"""
	Creates a verification prompt string with the provided data and prediction.

	Args:
	prompt_data (str): The prompt data to include.
	prediction (str): The prediction to verify.

	Returns:
	str: Formatted verification prompt string.
	"""
	prompt_data = combine_prompt_data(
	prompt_dir,
	n_search=n_search,
	n_coords=n_coords,
	image_prediction=image_prediction,
	text_prediction=text_prediction,
	)

	prompt = VERIFICATION_PROMPT.strip().format(
	prompt_data=prompt_data,
	prediction=json.dumps(prediction, indent=2),
	satellite_image_id=f"{satellite_image_id:03d}",
	)

	return prompt