Spaces:
Sleeping
Sleeping
| """ | |
| Utility functions for the RAG tools. | |
| """ | |
| import logging | |
| import re | |
| from google.adk.tools.tool_context import ToolContext | |
| from vertexai import rag | |
| from ..config import ( | |
| LOCATION, | |
| PROJECT_ID, | |
| ) | |
| logger = logging.getLogger(__name__) | |
| def get_corpus_resource_name(corpus_name: str) -> str: | |
| """ | |
| Convert a corpus name to its full resource name if needed. | |
| Handles various input formats and ensures the returned name follows Vertex AI's requirements. | |
| Args: | |
| corpus_name (str): The corpus name or display name | |
| Returns: | |
| str: The full resource name of the corpus | |
| """ | |
| logger.info(f"Getting resource name for corpus: {corpus_name}") | |
| # If it's already a full resource name with the projects/locations/ragCorpora format | |
| if re.match(r"^projects/[^/]+/locations/[^/]+/ragCorpora/[^/]+$", corpus_name): | |
| return corpus_name | |
| # Check if this is a display name of an existing corpus | |
| try: | |
| # List all corpora and check if there's a match with the display name | |
| corpora = rag.list_corpora() | |
| for corpus in corpora: | |
| if hasattr(corpus, "display_name") and corpus.display_name == corpus_name: | |
| return corpus.name | |
| except Exception as e: | |
| logger.warning(f"Error when checking for corpus display name: {str(e)}") | |
| # If we can't check, continue with the default behavior | |
| pass | |
| # If it contains partial path elements, extract just the corpus ID | |
| if "/" in corpus_name: | |
| # Extract the last part of the path as the corpus ID | |
| corpus_id = corpus_name.split("/")[-1] | |
| else: | |
| corpus_id = corpus_name | |
| # Remove any special characters that might cause issues | |
| corpus_id = re.sub(r"[^a-zA-Z0-9_-]", "_", corpus_id) | |
| # Construct the standardized resource name | |
| return f"projects/{PROJECT_ID}/locations/{LOCATION}/ragCorpora/{corpus_id}" | |
| def check_corpus_exists(corpus_name: str, tool_context: ToolContext) -> bool: | |
| """ | |
| Check if a corpus with the given name exists. | |
| Args: | |
| corpus_name (str): The name of the corpus to check | |
| tool_context (ToolContext): The tool context for state management | |
| Returns: | |
| bool: True if the corpus exists, False otherwise | |
| """ | |
| # Check state first if tool_context is provided | |
| if tool_context.state.get(f"corpus_exists_{corpus_name}"): | |
| return True | |
| try: | |
| # Get full resource name | |
| corpus_resource_name = get_corpus_resource_name(corpus_name) | |
| # List all corpora and check if this one exists | |
| corpora = rag.list_corpora() | |
| for corpus in corpora: | |
| if ( | |
| corpus.name == corpus_resource_name | |
| or corpus.display_name == corpus_name | |
| ): | |
| # Update state | |
| tool_context.state[f"corpus_exists_{corpus_name}"] = True | |
| # Also set this as the current corpus if no current corpus is set | |
| if not tool_context.state.get("current_corpus"): | |
| tool_context.state["current_corpus"] = corpus_name | |
| return True | |
| return False | |
| except Exception as e: | |
| logger.error(f"Error checking if corpus exists: {str(e)}") | |
| # If we can't check, assume it doesn't exist | |
| return False | |
| def set_current_corpus(corpus_name: str, tool_context: ToolContext) -> bool: | |
| """ | |
| Set the current corpus in the tool context state. | |
| Args: | |
| corpus_name (str): The name of the corpus to set as current | |
| tool_context (ToolContext): The tool context for state management | |
| Returns: | |
| bool: True if the corpus exists and was set as current, False otherwise | |
| """ | |
| # Check if corpus exists first | |
| if check_corpus_exists(corpus_name, tool_context): | |
| tool_context.state["current_corpus"] = corpus_name | |
| return True | |
| return False | |