Bima Ardhia
initttt
d2d5a16
"""
Utility functions for the RAG tools.
"""
import logging
import re
from google.adk.tools.tool_context import ToolContext
from vertexai import rag
from ..config import (
LOCATION,
PROJECT_ID,
)
logger = logging.getLogger(__name__)
def get_corpus_resource_name(corpus_name: str) -> str:
"""
Convert a corpus name to its full resource name if needed.
Handles various input formats and ensures the returned name follows Vertex AI's requirements.
Args:
corpus_name (str): The corpus name or display name
Returns:
str: The full resource name of the corpus
"""
logger.info(f"Getting resource name for corpus: {corpus_name}")
# If it's already a full resource name with the projects/locations/ragCorpora format
if re.match(r"^projects/[^/]+/locations/[^/]+/ragCorpora/[^/]+$", corpus_name):
return corpus_name
# Check if this is a display name of an existing corpus
try:
# List all corpora and check if there's a match with the display name
corpora = rag.list_corpora()
for corpus in corpora:
if hasattr(corpus, "display_name") and corpus.display_name == corpus_name:
return corpus.name
except Exception as e:
logger.warning(f"Error when checking for corpus display name: {str(e)}")
# If we can't check, continue with the default behavior
pass
# If it contains partial path elements, extract just the corpus ID
if "/" in corpus_name:
# Extract the last part of the path as the corpus ID
corpus_id = corpus_name.split("/")[-1]
else:
corpus_id = corpus_name
# Remove any special characters that might cause issues
corpus_id = re.sub(r"[^a-zA-Z0-9_-]", "_", corpus_id)
# Construct the standardized resource name
return f"projects/{PROJECT_ID}/locations/{LOCATION}/ragCorpora/{corpus_id}"
def check_corpus_exists(corpus_name: str, tool_context: ToolContext) -> bool:
"""
Check if a corpus with the given name exists.
Args:
corpus_name (str): The name of the corpus to check
tool_context (ToolContext): The tool context for state management
Returns:
bool: True if the corpus exists, False otherwise
"""
# Check state first if tool_context is provided
if tool_context.state.get(f"corpus_exists_{corpus_name}"):
return True
try:
# Get full resource name
corpus_resource_name = get_corpus_resource_name(corpus_name)
# List all corpora and check if this one exists
corpora = rag.list_corpora()
for corpus in corpora:
if (
corpus.name == corpus_resource_name
or corpus.display_name == corpus_name
):
# Update state
tool_context.state[f"corpus_exists_{corpus_name}"] = True
# Also set this as the current corpus if no current corpus is set
if not tool_context.state.get("current_corpus"):
tool_context.state["current_corpus"] = corpus_name
return True
return False
except Exception as e:
logger.error(f"Error checking if corpus exists: {str(e)}")
# If we can't check, assume it doesn't exist
return False
def set_current_corpus(corpus_name: str, tool_context: ToolContext) -> bool:
"""
Set the current corpus in the tool context state.
Args:
corpus_name (str): The name of the corpus to set as current
tool_context (ToolContext): The tool context for state management
Returns:
bool: True if the corpus exists and was set as current, False otherwise
"""
# Check if corpus exists first
if check_corpus_exists(corpus_name, tool_context):
tool_context.state["current_corpus"] = corpus_name
return True
return False