Spaces:

bimaardhia
/

rag_api_ui

Sleeping

rag_api_ui / user_agent /tools /utils.py

Bima Ardhia

initttt

d2d5a16 5 months ago

3.9 kB

	"""
	Utility functions for the RAG tools.
	"""

	import logging
	import re

	from google.adk.tools.tool_context import ToolContext
	from vertexai import rag

	from ..config import (
	LOCATION,
	PROJECT_ID,
	)

	logger = logging.getLogger(__name__)


	def get_corpus_resource_name(corpus_name: str) -> str:
	"""
	Convert a corpus name to its full resource name if needed.
	Handles various input formats and ensures the returned name follows Vertex AI's requirements.

	Args:
	corpus_name (str): The corpus name or display name

	Returns:
	str: The full resource name of the corpus
	"""
	logger.info(f"Getting resource name for corpus: {corpus_name}")

	# If it's already a full resource name with the projects/locations/ragCorpora format
	if re.match(r"^projects/[^/]+/locations/[^/]+/ragCorpora/[^/]+$", corpus_name):
	return corpus_name

	# Check if this is a display name of an existing corpus
	try:
	# List all corpora and check if there's a match with the display name
	corpora = rag.list_corpora()
	for corpus in corpora:
	if hasattr(corpus, "display_name") and corpus.display_name == corpus_name:
	return corpus.name
	except Exception as e:
	logger.warning(f"Error when checking for corpus display name: {str(e)}")
	# If we can't check, continue with the default behavior
	pass

	# If it contains partial path elements, extract just the corpus ID
	if "/" in corpus_name:
	# Extract the last part of the path as the corpus ID
	corpus_id = corpus_name.split("/")[-1]
	else:
	corpus_id = corpus_name

	# Remove any special characters that might cause issues
	corpus_id = re.sub(r"[^a-zA-Z0-9_-]", "_", corpus_id)

	# Construct the standardized resource name
	return f"projects/{PROJECT_ID}/locations/{LOCATION}/ragCorpora/{corpus_id}"


	def check_corpus_exists(corpus_name: str, tool_context: ToolContext) -> bool:
	"""
	Check if a corpus with the given name exists.

	Args:
	corpus_name (str): The name of the corpus to check
	tool_context (ToolContext): The tool context for state management

	Returns:
	bool: True if the corpus exists, False otherwise
	"""
	# Check state first if tool_context is provided
	if tool_context.state.get(f"corpus_exists_{corpus_name}"):
	return True

	try:
	# Get full resource name
	corpus_resource_name = get_corpus_resource_name(corpus_name)

	# List all corpora and check if this one exists
	corpora = rag.list_corpora()
	for corpus in corpora:
	if (
	corpus.name == corpus_resource_name
	or corpus.display_name == corpus_name
	):
	# Update state
	tool_context.state[f"corpus_exists_{corpus_name}"] = True
	# Also set this as the current corpus if no current corpus is set
	if not tool_context.state.get("current_corpus"):
	tool_context.state["current_corpus"] = corpus_name
	return True

	return False
	except Exception as e:
	logger.error(f"Error checking if corpus exists: {str(e)}")
	# If we can't check, assume it doesn't exist
	return False


	def set_current_corpus(corpus_name: str, tool_context: ToolContext) -> bool:
	"""
	Set the current corpus in the tool context state.

	Args:
	corpus_name (str): The name of the corpus to set as current
	tool_context (ToolContext): The tool context for state management

	Returns:
	bool: True if the corpus exists and was set as current, False otherwise
	"""
	# Check if corpus exists first
	if check_corpus_exists(corpus_name, tool_context):
	tool_context.state["current_corpus"] = corpus_name
	return True
	return False