Spaces:

bimaardhia
/

rag_api_ui

Sleeping

App Files Files Community

Bima Ardhia commited on Aug 20, 2025

Commit

d2d5a16

1 Parent(s): e7d5bbb

initttt

Browse files

Files changed (15) hide show

.gitignore +4 -0
DockerFile +25 -0
requirements.txt +5 -0
user_agent/__init__.py +35 -0
user_agent/agent.py +63 -0
user_agent/config.py +26 -0
user_agent/tools/__init__.py +29 -0
user_agent/tools/add_data.py +156 -0
user_agent/tools/create_corpus.py +78 -0
user_agent/tools/delete_corpus.py +67 -0
user_agent/tools/delete_document.py +58 -0
user_agent/tools/get_corpus_info.py +99 -0
user_agent/tools/list_corpora.py +51 -0
user_agent/tools/rag_query.py +112 -0
user_agent/tools/utils.py +117 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.env
+__pycache__/
+.venv/
+rag_agent/

DockerFile ADDED Viewed

	@@ -0,0 +1,25 @@

+# Step 1: Use an official Python runtime as a parent image
+# Using 'slim' is a good practice for smaller image sizes
+FROM python:3.11-slim
+# Step 2: Set the working directory inside the container
+WORKDIR /app
+# Step 3: Copy the requirements file into the container
+# This is done first to leverage Docker's layer caching.
+# Dependencies won't be re-installed unless requirements.txt changes.
+COPY requirements.txt .
+# Step 4: Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Step 5: Copy the rest of your application's code into the container
+COPY . .
+# Step 6: Expose the port the app runs on
+# The ADK web server defaults to port 8080
+EXPOSE 8080
+# Step 7: Define the command to run your application
+# Use --host=0.0.0.0 to make the server accessible from outside the container
+CMD ["adk", "web", "--host=0.0.0.0"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+google-cloud-aiplatform==1.92.0
+google-cloud-storage==2.19.0
+google-genai==1.14.0
+gitpython==3.1.40
+google-adk==0.5.0

user_agent/__init__.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""
+Vertex AI RAG Agent
+A package for interacting with Google Cloud Vertex AI RAG capabilities.
+"""
+import os
+import vertexai
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Get Vertex AI configuration from environment
+PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")
+LOCATION = os.environ.get("GOOGLE_CLOUD_LOCATION")
+# Initialize Vertex AI at package load time
+try:
+    if PROJECT_ID and LOCATION:
+        print(f"Initializing Vertex AI with project={PROJECT_ID}, location={LOCATION}")
+        vertexai.init(project=PROJECT_ID, location=LOCATION)
+        print("Vertex AI initialization successful")
+    else:
+        print(
+            f"Missing Vertex AI configuration. PROJECT_ID={PROJECT_ID}, LOCATION={LOCATION}. "
+            f"Tools requiring Vertex AI may not work properly."
+        )
+except Exception as e:
+    print(f"Failed to initialize Vertex AI: {str(e)}")
+    print("Please check your Google Cloud credentials and project settings.")
+# Import agent after initialization is complete
+from . import agent

user_agent/agent.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from google.adk.agents import Agent
+# Import the necessary tools for the agent.
+from .tools.get_corpus_info import get_corpus_info
+from .tools.list_corpora import list_corpora
+from .tools.rag_query import rag_query
+# The agent's instructions have been updated to enforce a specific workflow.
+root_agent = Agent(
+    name="RagAgent",
+    # Using Gemini 2.5 Flash for best performance with RAG operations
+    model="gemini-2.5-flash",
+    description="An assistant for finding information, listing available document collections, and viewing their details.",
+    tools=[
+        rag_query,
+        list_corpora,
+        get_corpus_info,
+    ],
+    # The instructions have been significantly updated to enforce a mandatory workflow.
+    instruction="""
+    # 🧠 Document Query Assistant
+    You are a helpful assistant designed to answer questions based on a collection of documents.
+    You must follow a strict workflow to ensure you always have the right context before acting.
+    ## Mandatory Workflow for Every User Request
+    **For every single request from the user, you MUST follow these steps in order:**
+    1.  **Always Run `list_corpora` First**: Before doing anything else, you must call the `list_corpora` tool. This step is mandatory to get the current context of available document collections.
+    2.  **Analyze the Context**: Review the output from `list_corpora` and analyze the user's request.
+    3.  **Decide the Next Action**: Based on the list of corpora and the user's prompt, choose one of the following paths:
+        * **If the user is asking a knowledge-based question**: Determine the most relevant corpus from the list for the user's query. Then, call the `rag_query` tool using that `corpus_name`. If no corpus seems relevant, inform the user.
+        * **If the user is asking for details about a specific corpus**: Find the corpus in the list and then call the `get_corpus_info` tool with the correct `corpus_name`.
+        * **If the user is asking to see the list of corpora**: Simply present the results from the `list_corpora` call you already made in step 1.
+    ## Your Capabilities (Derived from your workflow)
+    * **Answer Questions**: By first checking available corpora and then querying the most relevant one.
+    * **List Document Collections**: By running your mandatory first step and presenting the result.
+    * **Get Corpus Details**: By first checking that the corpus exists and then fetching its details.
+    ## Available Tools (To be used only after step 1)
+    1.  `rag_query`: Searches a document collection to answer a specific question.
+        - Parameters:
+            - `corpus_name`: The name of the document collection to search.
+            - `query`: The user's question.
+    2.  `list_corpora`: Lists all available document collections. (Your mandatory first step).
+        - This tool takes no parameters.
+    3.  `get_corpus_info`: Gets detailed information and metadata about a specific corpus.
+        - Parameters:
+            - `corpus_name`: The name of the corpus to get information about.
+    ## Communication Guidelines
+    - Be clear and direct in your responses.
+    - When you answer a question, state which document collection you used.
+    - If you cannot find a relevant document collection for a query, inform the user clearly. Do not try to answer without a source.
+    """,
+)

user_agent/config.py ADDED Viewed

	@@ -0,0 +1,26 @@

+"""
+Configuration settings for the RAG Agent.
+These settings are used by the various RAG tools.
+Vertex AI initialization is performed in the package's __init__.py
+"""
+import os
+from dotenv import load_dotenv
+# Load environment variables (this is redundant if __init__.py is imported first,
+# but included for safety when importing config directly)
+load_dotenv()
+# Vertex AI settings
+PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")
+LOCATION = os.environ.get("GOOGLE_CLOUD_LOCATION")
+# RAG settings
+DEFAULT_CHUNK_SIZE = 512
+DEFAULT_CHUNK_OVERLAP = 100
+DEFAULT_TOP_K = 3
+DEFAULT_DISTANCE_THRESHOLD = 0.5
+DEFAULT_EMBEDDING_MODEL = "publishers/google/models/text-embedding-005"
+DEFAULT_EMBEDDING_REQUESTS_PER_MIN = 1000

user_agent/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,29 @@

+"""
+RAG Tools package for interacting with Vertex AI RAG corpora.
+"""
+from .add_data import add_data
+from .create_corpus import create_corpus
+from .delete_corpus import delete_corpus
+from .delete_document import delete_document
+from .get_corpus_info import get_corpus_info
+from .list_corpora import list_corpora
+from .rag_query import rag_query
+from .utils import (
+    check_corpus_exists,
+    get_corpus_resource_name,
+    set_current_corpus,
+)
+__all__ = [
+    "add_data",
+    "create_corpus",
+    "list_corpora",
+    "rag_query",
+    "get_corpus_info",
+    "delete_corpus",
+    "delete_document",
+    "check_corpus_exists",
+    "get_corpus_resource_name",
+    "set_current_corpus",
+]

user_agent/tools/add_data.py ADDED Viewed

	@@ -0,0 +1,156 @@

+"""
+Tool for adding new data sources to a Vertex AI RAG corpus.
+"""
+import re
+from typing import List
+from google.adk.tools.tool_context import ToolContext
+from vertexai import rag
+from ..config import (
+    DEFAULT_CHUNK_OVERLAP,
+    DEFAULT_CHUNK_SIZE,
+    DEFAULT_EMBEDDING_REQUESTS_PER_MIN,
+)
+from .utils import check_corpus_exists, get_corpus_resource_name
+def add_data(
+    corpus_name: str,
+    paths: List[str],
+    tool_context: ToolContext,
+) -> dict:
+    """
+    Add new data sources to a Vertex AI RAG corpus.
+    Args:
+        corpus_name (str): The name of the corpus to add data to. If empty, the current corpus will be used.
+        paths (List[str]): List of URLs or GCS paths to add to the corpus.
+                          Supported formats:
+                          - Google Drive: "https://drive.google.com/file/d/{FILE_ID}/view"
+                          - Google Docs/Sheets/Slides: "https://docs.google.com/{type}/d/{FILE_ID}/..."
+                          - Google Cloud Storage: "gs://{BUCKET}/{PATH}"
+                          Example: ["https://drive.google.com/file/d/123", "gs://my_bucket/my_files_dir"]
+        tool_context (ToolContext): The tool context
+    Returns:
+        dict: Information about the added data and status
+    """
+    # Check if the corpus exists
+    if not check_corpus_exists(corpus_name, tool_context):
+        return {
+            "status": "error",
+            "message": f"Corpus '{corpus_name}' does not exist. Please create it first using the create_corpus tool.",
+            "corpus_name": corpus_name,
+            "paths": paths,
+        }
+    # Validate inputs
+    if not paths or not all(isinstance(path, str) for path in paths):
+        return {
+            "status": "error",
+            "message": "Invalid paths: Please provide a list of URLs or GCS paths",
+            "corpus_name": corpus_name,
+            "paths": paths,
+        }
+    # Pre-process paths to validate and convert Google Docs URLs to Drive format if needed
+    validated_paths = []
+    invalid_paths = []
+    conversions = []
+    for path in paths:
+        if not path or not isinstance(path, str):
+            invalid_paths.append(f"{path} (Not a valid string)")
+            continue
+        # Check for Google Docs/Sheets/Slides URLs and convert them to Drive format
+        docs_match = re.match(
+            r"https:\/\/docs\.google\.com\/(?:document|spreadsheets|presentation)\/d\/([a-zA-Z0-9_-]+)(?:\/|$)",
+            path,
+        )
+        if docs_match:
+            file_id = docs_match.group(1)
+            drive_url = f"https://drive.google.com/file/d/{file_id}/view"
+            validated_paths.append(drive_url)
+            conversions.append(f"{path} → {drive_url}")
+            continue
+        # Check for valid Drive URL format
+        drive_match = re.match(
+            r"https:\/\/drive\.google\.com\/(?:file\/d\/|open\?id=)([a-zA-Z0-9_-]+)(?:\/|$)",
+            path,
+        )
+        if drive_match:
+            # Normalize to the standard Drive URL format
+            file_id = drive_match.group(1)
+            drive_url = f"https://drive.google.com/file/d/{file_id}/view"
+            validated_paths.append(drive_url)
+            if drive_url != path:
+                conversions.append(f"{path} → {drive_url}")
+            continue
+        # Check for GCS paths
+        if path.startswith("gs://"):
+            validated_paths.append(path)
+            continue
+        # If we're here, the path wasn't in a recognized format
+        invalid_paths.append(f"{path} (Invalid format)")
+    # Check if we have any valid paths after validation
+    if not validated_paths:
+        return {
+            "status": "error",
+            "message": "No valid paths provided. Please provide Google Drive URLs or GCS paths.",
+            "corpus_name": corpus_name,
+            "invalid_paths": invalid_paths,
+        }
+    try:
+        # Get the corpus resource name
+        corpus_resource_name = get_corpus_resource_name(corpus_name)
+        # Set up chunking configuration
+        transformation_config = rag.TransformationConfig(
+            chunking_config=rag.ChunkingConfig(
+                chunk_size=DEFAULT_CHUNK_SIZE,
+                chunk_overlap=DEFAULT_CHUNK_OVERLAP,
+            ),
+        )
+        # Import files to the corpus
+        import_result = rag.import_files(
+            corpus_resource_name,
+            validated_paths,
+            transformation_config=transformation_config,
+            max_embedding_requests_per_min=DEFAULT_EMBEDDING_REQUESTS_PER_MIN,
+        )
+        # Set this as the current corpus if not already set
+        if not tool_context.state.get("current_corpus"):
+            tool_context.state["current_corpus"] = corpus_name
+        # Build the success message
+        conversion_msg = ""
+        if conversions:
+            conversion_msg = " (Converted Google Docs URLs to Drive format)"
+        return {
+            "status": "success",
+            "message": f"Successfully added {import_result.imported_rag_files_count} file(s) to corpus '{corpus_name}'{conversion_msg}",
+            "corpus_name": corpus_name,
+            "files_added": import_result.imported_rag_files_count,
+            "paths": validated_paths,
+            "invalid_paths": invalid_paths,
+            "conversions": conversions,
+        }
+    except Exception as e:
+        return {
+            "status": "error",
+            "message": f"Error adding data to corpus: {str(e)}",
+            "corpus_name": corpus_name,
+            "paths": paths,
+        }

user_agent/tools/create_corpus.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""
+Tool for creating a new Vertex AI RAG corpus.
+"""
+import re
+from google.adk.tools.tool_context import ToolContext
+from vertexai import rag
+from ..config import (
+    DEFAULT_EMBEDDING_MODEL,
+)
+from .utils import check_corpus_exists
+def create_corpus(
+    corpus_name: str,
+    tool_context: ToolContext,
+) -> dict:
+    """
+    Create a new Vertex AI RAG corpus with the specified name.
+    Args:
+        corpus_name (str): The name for the new corpus
+        tool_context (ToolContext): The tool context for state management
+    Returns:
+        dict: Status information about the operation
+    """
+    # Check if corpus already exists
+    if check_corpus_exists(corpus_name, tool_context):
+        return {
+            "status": "info",
+            "message": f"Corpus '{corpus_name}' already exists",
+            "corpus_name": corpus_name,
+            "corpus_created": False,
+        }
+    try:
+        # Clean corpus name for use as display name
+        display_name = re.sub(r"[^a-zA-Z0-9_-]", "_", corpus_name)
+        # Configure embedding model
+        embedding_model_config = rag.RagEmbeddingModelConfig(
+            vertex_prediction_endpoint=rag.VertexPredictionEndpoint(
+                publisher_model=DEFAULT_EMBEDDING_MODEL
+            )
+        )
+        # Create the corpus
+        rag_corpus = rag.create_corpus(
+            display_name=display_name,
+            backend_config=rag.RagVectorDbConfig(
+                rag_embedding_model_config=embedding_model_config
+            ),
+        )
+        # Update state to track corpus existence
+        tool_context.state[f"corpus_exists_{corpus_name}"] = True
+        # Set this as the current corpus
+        tool_context.state["current_corpus"] = corpus_name
+        return {
+            "status": "success",
+            "message": f"Successfully created corpus '{corpus_name}'",
+            "corpus_name": rag_corpus.name,
+            "display_name": rag_corpus.display_name,
+            "corpus_created": True,
+        }
+    except Exception as e:
+        return {
+            "status": "error",
+            "message": f"Error creating corpus: {str(e)}",
+            "corpus_name": corpus_name,
+            "corpus_created": False,
+        }

user_agent/tools/delete_corpus.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""
+Tool for deleting a Vertex AI RAG corpus when it's no longer needed.
+"""
+from google.adk.tools.tool_context import ToolContext
+from vertexai import rag
+from .utils import check_corpus_exists, get_corpus_resource_name
+def delete_corpus(
+    corpus_name: str,
+    confirm: bool,
+    tool_context: ToolContext,
+) -> dict:
+    """
+    Delete a Vertex AI RAG corpus when it's no longer needed.
+    Requires confirmation to prevent accidental deletion.
+    Args:
+        corpus_name (str): The full resource name of the corpus to delete.
+                           Preferably use the resource_name from list_corpora results.
+        confirm (bool): Must be set to True to confirm deletion
+        tool_context (ToolContext): The tool context
+    Returns:
+        dict: Status information about the deletion operation
+    """
+    # Check if corpus exists
+    if not check_corpus_exists(corpus_name, tool_context):
+        return {
+            "status": "error",
+            "message": f"Corpus '{corpus_name}' does not exist",
+            "corpus_name": corpus_name,
+        }
+    # Check if deletion is confirmed
+    if not confirm:
+        return {
+            "status": "error",
+            "message": "Deletion requires explicit confirmation. Set confirm=True to delete this corpus.",
+            "corpus_name": corpus_name,
+        }
+    try:
+        # Get the corpus resource name
+        corpus_resource_name = get_corpus_resource_name(corpus_name)
+        # Delete the corpus
+        rag.delete_corpus(corpus_resource_name)
+        # Remove from state by setting to False
+        state_key = f"corpus_exists_{corpus_name}"
+        if state_key in tool_context.state:
+            tool_context.state[state_key] = False
+        return {
+            "status": "success",
+            "message": f"Successfully deleted corpus '{corpus_name}'",
+            "corpus_name": corpus_name,
+        }
+    except Exception as e:
+        return {
+            "status": "error",
+            "message": f"Error deleting corpus: {str(e)}",
+            "corpus_name": corpus_name,
+        }

user_agent/tools/delete_document.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""
+Tool for deleting a specific document from a Vertex AI RAG corpus.
+"""
+from google.adk.tools.tool_context import ToolContext
+from vertexai import rag
+from .utils import check_corpus_exists, get_corpus_resource_name
+def delete_document(
+    corpus_name: str,
+    document_id: str,
+    tool_context: ToolContext,
+) -> dict:
+    """
+    Delete a specific document from a Vertex AI RAG corpus.
+    Args:
+        corpus_name (str): The full resource name of the corpus containing the document.
+                          Preferably use the resource_name from list_corpora results.
+        document_id (str): The ID of the specific document/file to delete. This can be
+                          obtained from get_corpus_info results.
+        tool_context (ToolContext): The tool context
+    Returns:
+        dict: Status information about the deletion operation
+    """
+    # Check if corpus exists
+    if not check_corpus_exists(corpus_name, tool_context):
+        return {
+            "status": "error",
+            "message": f"Corpus '{corpus_name}' does not exist",
+            "corpus_name": corpus_name,
+            "document_id": document_id,
+        }
+    try:
+        # Get the corpus resource name
+        corpus_resource_name = get_corpus_resource_name(corpus_name)
+        # Delete the document
+        rag_file_path = f"{corpus_resource_name}/ragFiles/{document_id}"
+        rag.delete_file(rag_file_path)
+        return {
+            "status": "success",
+            "message": f"Successfully deleted document '{document_id}' from corpus '{corpus_name}'",
+            "corpus_name": corpus_name,
+            "document_id": document_id,
+        }
+    except Exception as e:
+        return {
+            "status": "error",
+            "message": f"Error deleting document: {str(e)}",
+            "corpus_name": corpus_name,
+            "document_id": document_id,
+        }

user_agent/tools/get_corpus_info.py ADDED Viewed

	@@ -0,0 +1,99 @@

+"""
+Tool for retrieving detailed information about a specific RAG corpus.
+"""
+from google.adk.tools.tool_context import ToolContext
+from vertexai import rag
+from .utils import check_corpus_exists, get_corpus_resource_name
+def get_corpus_info(
+    corpus_name: str,
+    tool_context: ToolContext,
+) -> dict:
+    """
+    Get detailed information about a specific RAG corpus, including its files.
+    Args:
+        corpus_name (str): The full resource name of the corpus to get information about.
+                           Preferably use the resource_name from list_corpora results.
+        tool_context (ToolContext): The tool context
+    Returns:
+        dict: Information about the corpus and its files
+    """
+    try:
+        # Check if corpus exists
+        if not check_corpus_exists(corpus_name, tool_context):
+            return {
+                "status": "error",
+                "message": f"Corpus '{corpus_name}' does not exist",
+                "corpus_name": corpus_name,
+            }
+        # Get the corpus resource name
+        corpus_resource_name = get_corpus_resource_name(corpus_name)
+        # Try to get corpus details first
+        corpus_display_name = corpus_name  # Default if we can't get actual display name
+        # Process file information
+        file_details = []
+        try:
+            # Get the list of files
+            files = rag.list_files(corpus_resource_name)
+            for rag_file in files:
+                # Get document specific details
+                try:
+                    # Extract the file ID from the name
+                    file_id = rag_file.name.split("/")[-1]
+                    file_info = {
+                        "file_id": file_id,
+                        "display_name": (
+                            rag_file.display_name
+                            if hasattr(rag_file, "display_name")
+                            else ""
+                        ),
+                        "source_uri": (
+                            rag_file.source_uri
+                            if hasattr(rag_file, "source_uri")
+                            else ""
+                        ),
+                        "create_time": (
+                            str(rag_file.create_time)
+                            if hasattr(rag_file, "create_time")
+                            else ""
+                        ),
+                        "update_time": (
+                            str(rag_file.update_time)
+                            if hasattr(rag_file, "update_time")
+                            else ""
+                        ),
+                    }
+                    file_details.append(file_info)
+                except Exception:
+                    # Continue to the next file
+                    continue
+        except Exception:
+            # Continue without file details
+            pass
+        # Basic corpus info
+        return {
+            "status": "success",
+            "message": f"Successfully retrieved information for corpus '{corpus_display_name}'",
+            "corpus_name": corpus_name,
+            "corpus_display_name": corpus_display_name,
+            "file_count": len(file_details),
+            "files": file_details,
+        }
+    except Exception as e:
+        return {
+            "status": "error",
+            "message": f"Error getting corpus information: {str(e)}",
+            "corpus_name": corpus_name,
+        }

user_agent/tools/list_corpora.py ADDED Viewed

	@@ -0,0 +1,51 @@

+"""
+Tool for listing all available Vertex AI RAG corpora.
+"""
+from typing import Dict, List, Union
+from vertexai import rag
+def list_corpora() -> dict:
+    """
+    List all available Vertex AI RAG corpora.
+    Returns:
+        dict: A list of available corpora and status, with each corpus containing:
+            - resource_name: The full resource name to use with other tools
+            - display_name: The human-readable name of the corpus
+            - create_time: When the corpus was created
+            - update_time: When the corpus was last updated
+    """
+    try:
+        # Get the list of corpora
+        corpora = rag.list_corpora()
+        # Process corpus information into a more usable format
+        corpus_info: List[Dict[str, Union[str, int]]] = []
+        for corpus in corpora:
+            corpus_data: Dict[str, Union[str, int]] = {
+                "resource_name": corpus.name,  # Full resource name for use with other tools
+                "display_name": corpus.display_name,
+                "create_time": (
+                    str(corpus.create_time) if hasattr(corpus, "create_time") else ""
+                ),
+                "update_time": (
+                    str(corpus.update_time) if hasattr(corpus, "update_time") else ""
+                ),
+            }
+            corpus_info.append(corpus_data)
+        return {
+            "status": "success",
+            "message": f"Found {len(corpus_info)} available corpora",
+            "corpora": corpus_info,
+        }
+    except Exception as e:
+        return {
+            "status": "error",
+            "message": f"Error listing corpora: {str(e)}",
+            "corpora": [],
+        }

user_agent/tools/rag_query.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""
+Tool for querying Vertex AI RAG corpora and retrieving relevant information.
+"""
+import logging
+from google.adk.tools.tool_context import ToolContext
+from vertexai import rag
+from ..config import (
+    DEFAULT_DISTANCE_THRESHOLD,
+    DEFAULT_TOP_K,
+)
+from .utils import check_corpus_exists, get_corpus_resource_name
+def rag_query(
+    corpus_name: str,
+    query: str,
+    tool_context: ToolContext,
+) -> dict:
+    """
+    Query a Vertex AI RAG corpus with a user question and return relevant information.
+    Args:
+        corpus_name (str): The name of the corpus to query. If empty, the current corpus will be used.
+                          Preferably use the resource_name from list_corpora results.
+        query (str): The text query to search for in the corpus
+        tool_context (ToolContext): The tool context
+    Returns:
+        dict: The query results and status
+    """
+    try:
+        # Check if the corpus exists
+        if not check_corpus_exists(corpus_name, tool_context):
+            return {
+                "status": "error",
+                "message": f"Corpus '{corpus_name}' does not exist. Please create it first using the create_corpus tool.",
+                "query": query,
+                "corpus_name": corpus_name,
+            }
+        # Get the corpus resource name
+        corpus_resource_name = get_corpus_resource_name(corpus_name)
+        # Configure retrieval parameters
+        rag_retrieval_config = rag.RagRetrievalConfig(
+            top_k=DEFAULT_TOP_K,
+            filter=rag.Filter(vector_distance_threshold=DEFAULT_DISTANCE_THRESHOLD),
+        )
+        # Perform the query
+        print("Performing retrieval query...")
+        response = rag.retrieval_query(
+            rag_resources=[
+                rag.RagResource(
+                    rag_corpus=corpus_resource_name,
+                )
+            ],
+            text=query,
+            rag_retrieval_config=rag_retrieval_config,
+        )
+        # Process the response into a more usable format
+        results = []
+        if hasattr(response, "contexts") and response.contexts:
+            for ctx_group in response.contexts.contexts:
+                result = {
+                    "source_uri": (
+                        ctx_group.source_uri if hasattr(ctx_group, "source_uri") else ""
+                    ),
+                    "source_name": (
+                        ctx_group.source_display_name
+                        if hasattr(ctx_group, "source_display_name")
+                        else ""
+                    ),
+                    "text": ctx_group.text if hasattr(ctx_group, "text") else "",
+                    "score": ctx_group.score if hasattr(ctx_group, "score") else 0.0,
+                }
+                results.append(result)
+        # If we didn't find any results
+        if not results:
+            return {
+                "status": "warning",
+                "message": f"No results found in corpus '{corpus_name}' for query: '{query}'",
+                "query": query,
+                "corpus_name": corpus_name,
+                "results": [],
+                "results_count": 0,
+            }
+        return {
+            "status": "success",
+            "message": f"Successfully queried corpus '{corpus_name}'",
+            "query": query,
+            "corpus_name": corpus_name,
+            "results": results,
+            "results_count": len(results),
+        }
+    except Exception as e:
+        error_msg = f"Error querying corpus: {str(e)}"
+        logging.error(error_msg)
+        return {
+            "status": "error",
+            "message": error_msg,
+            "query": query,
+            "corpus_name": corpus_name,
+        }

user_agent/tools/utils.py ADDED Viewed

	@@ -0,0 +1,117 @@

+"""
+Utility functions for the RAG tools.
+"""
+import logging
+import re
+from google.adk.tools.tool_context import ToolContext
+from vertexai import rag
+from ..config import (
+    LOCATION,
+    PROJECT_ID,
+)
+logger = logging.getLogger(__name__)
+def get_corpus_resource_name(corpus_name: str) -> str:
+    """
+    Convert a corpus name to its full resource name if needed.
+    Handles various input formats and ensures the returned name follows Vertex AI's requirements.
+    Args:
+        corpus_name (str): The corpus name or display name
+    Returns:
+        str: The full resource name of the corpus
+    """
+    logger.info(f"Getting resource name for corpus: {corpus_name}")
+    # If it's already a full resource name with the projects/locations/ragCorpora format
+    if re.match(r"^projects/[^/]+/locations/[^/]+/ragCorpora/[^/]+$", corpus_name):
+        return corpus_name
+    # Check if this is a display name of an existing corpus
+    try:
+        # List all corpora and check if there's a match with the display name
+        corpora = rag.list_corpora()
+        for corpus in corpora:
+            if hasattr(corpus, "display_name") and corpus.display_name == corpus_name:
+                return corpus.name
+    except Exception as e:
+        logger.warning(f"Error when checking for corpus display name: {str(e)}")
+        # If we can't check, continue with the default behavior
+        pass
+    # If it contains partial path elements, extract just the corpus ID
+    if "/" in corpus_name:
+        # Extract the last part of the path as the corpus ID
+        corpus_id = corpus_name.split("/")[-1]
+    else:
+        corpus_id = corpus_name
+    # Remove any special characters that might cause issues
+    corpus_id = re.sub(r"[^a-zA-Z0-9_-]", "_", corpus_id)
+    # Construct the standardized resource name
+    return f"projects/{PROJECT_ID}/locations/{LOCATION}/ragCorpora/{corpus_id}"
+def check_corpus_exists(corpus_name: str, tool_context: ToolContext) -> bool:
+    """
+    Check if a corpus with the given name exists.
+    Args:
+        corpus_name (str): The name of the corpus to check
+        tool_context (ToolContext): The tool context for state management
+    Returns:
+        bool: True if the corpus exists, False otherwise
+    """
+    # Check state first if tool_context is provided
+    if tool_context.state.get(f"corpus_exists_{corpus_name}"):
+        return True
+    try:
+        # Get full resource name
+        corpus_resource_name = get_corpus_resource_name(corpus_name)
+        # List all corpora and check if this one exists
+        corpora = rag.list_corpora()
+        for corpus in corpora:
+            if (
+                corpus.name == corpus_resource_name
+                or corpus.display_name == corpus_name
+            ):
+                # Update state
+                tool_context.state[f"corpus_exists_{corpus_name}"] = True
+                # Also set this as the current corpus if no current corpus is set
+                if not tool_context.state.get("current_corpus"):
+                    tool_context.state["current_corpus"] = corpus_name
+                return True
+        return False
+    except Exception as e:
+        logger.error(f"Error checking if corpus exists: {str(e)}")
+        # If we can't check, assume it doesn't exist
+        return False
+def set_current_corpus(corpus_name: str, tool_context: ToolContext) -> bool:
+    """
+    Set the current corpus in the tool context state.
+    Args:
+        corpus_name (str): The name of the corpus to set as current
+        tool_context (ToolContext): The tool context for state management
+    Returns:
+        bool: True if the corpus exists and was set as current, False otherwise
+    """
+    # Check if corpus exists first
+    if check_corpus_exists(corpus_name, tool_context):
+        tool_context.state["current_corpus"] = corpus_name
+        return True
+    return False