Spaces:
Sleeping
Sleeping
Bima Ardhia commited on
Commit ·
d2d5a16
1
Parent(s): e7d5bbb
initttt
Browse files- .gitignore +4 -0
- DockerFile +25 -0
- requirements.txt +5 -0
- user_agent/__init__.py +35 -0
- user_agent/agent.py +63 -0
- user_agent/config.py +26 -0
- user_agent/tools/__init__.py +29 -0
- user_agent/tools/add_data.py +156 -0
- user_agent/tools/create_corpus.py +78 -0
- user_agent/tools/delete_corpus.py +67 -0
- user_agent/tools/delete_document.py +58 -0
- user_agent/tools/get_corpus_info.py +99 -0
- user_agent/tools/list_corpora.py +51 -0
- user_agent/tools/rag_query.py +112 -0
- user_agent/tools/utils.py +117 -0
.gitignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
__pycache__/
|
| 3 |
+
.venv/
|
| 4 |
+
rag_agent/
|
DockerFile
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Step 1: Use an official Python runtime as a parent image
|
| 2 |
+
# Using 'slim' is a good practice for smaller image sizes
|
| 3 |
+
FROM python:3.11-slim
|
| 4 |
+
|
| 5 |
+
# Step 2: Set the working directory inside the container
|
| 6 |
+
WORKDIR /app
|
| 7 |
+
|
| 8 |
+
# Step 3: Copy the requirements file into the container
|
| 9 |
+
# This is done first to leverage Docker's layer caching.
|
| 10 |
+
# Dependencies won't be re-installed unless requirements.txt changes.
|
| 11 |
+
COPY requirements.txt .
|
| 12 |
+
|
| 13 |
+
# Step 4: Install any needed packages specified in requirements.txt
|
| 14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
+
|
| 16 |
+
# Step 5: Copy the rest of your application's code into the container
|
| 17 |
+
COPY . .
|
| 18 |
+
|
| 19 |
+
# Step 6: Expose the port the app runs on
|
| 20 |
+
# The ADK web server defaults to port 8080
|
| 21 |
+
EXPOSE 8080
|
| 22 |
+
|
| 23 |
+
# Step 7: Define the command to run your application
|
| 24 |
+
# Use --host=0.0.0.0 to make the server accessible from outside the container
|
| 25 |
+
CMD ["adk", "web", "--host=0.0.0.0"]
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
google-cloud-aiplatform==1.92.0
|
| 2 |
+
google-cloud-storage==2.19.0
|
| 3 |
+
google-genai==1.14.0
|
| 4 |
+
gitpython==3.1.40
|
| 5 |
+
google-adk==0.5.0
|
user_agent/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Vertex AI RAG Agent
|
| 3 |
+
|
| 4 |
+
A package for interacting with Google Cloud Vertex AI RAG capabilities.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
import vertexai
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
|
| 12 |
+
# Load environment variables
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
# Get Vertex AI configuration from environment
|
| 16 |
+
PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")
|
| 17 |
+
LOCATION = os.environ.get("GOOGLE_CLOUD_LOCATION")
|
| 18 |
+
|
| 19 |
+
# Initialize Vertex AI at package load time
|
| 20 |
+
try:
|
| 21 |
+
if PROJECT_ID and LOCATION:
|
| 22 |
+
print(f"Initializing Vertex AI with project={PROJECT_ID}, location={LOCATION}")
|
| 23 |
+
vertexai.init(project=PROJECT_ID, location=LOCATION)
|
| 24 |
+
print("Vertex AI initialization successful")
|
| 25 |
+
else:
|
| 26 |
+
print(
|
| 27 |
+
f"Missing Vertex AI configuration. PROJECT_ID={PROJECT_ID}, LOCATION={LOCATION}. "
|
| 28 |
+
f"Tools requiring Vertex AI may not work properly."
|
| 29 |
+
)
|
| 30 |
+
except Exception as e:
|
| 31 |
+
print(f"Failed to initialize Vertex AI: {str(e)}")
|
| 32 |
+
print("Please check your Google Cloud credentials and project settings.")
|
| 33 |
+
|
| 34 |
+
# Import agent after initialization is complete
|
| 35 |
+
from . import agent
|
user_agent/agent.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from google.adk.agents import Agent
|
| 2 |
+
|
| 3 |
+
# Import the necessary tools for the agent.
|
| 4 |
+
from .tools.get_corpus_info import get_corpus_info
|
| 5 |
+
from .tools.list_corpora import list_corpora
|
| 6 |
+
from .tools.rag_query import rag_query
|
| 7 |
+
|
| 8 |
+
# The agent's instructions have been updated to enforce a specific workflow.
|
| 9 |
+
root_agent = Agent(
|
| 10 |
+
name="RagAgent",
|
| 11 |
+
# Using Gemini 2.5 Flash for best performance with RAG operations
|
| 12 |
+
model="gemini-2.5-flash",
|
| 13 |
+
description="An assistant for finding information, listing available document collections, and viewing their details.",
|
| 14 |
+
tools=[
|
| 15 |
+
rag_query,
|
| 16 |
+
list_corpora,
|
| 17 |
+
get_corpus_info,
|
| 18 |
+
],
|
| 19 |
+
# The instructions have been significantly updated to enforce a mandatory workflow.
|
| 20 |
+
instruction="""
|
| 21 |
+
# 🧠 Document Query Assistant
|
| 22 |
+
|
| 23 |
+
You are a helpful assistant designed to answer questions based on a collection of documents.
|
| 24 |
+
You must follow a strict workflow to ensure you always have the right context before acting.
|
| 25 |
+
|
| 26 |
+
## Mandatory Workflow for Every User Request
|
| 27 |
+
|
| 28 |
+
**For every single request from the user, you MUST follow these steps in order:**
|
| 29 |
+
|
| 30 |
+
1. **Always Run `list_corpora` First**: Before doing anything else, you must call the `list_corpora` tool. This step is mandatory to get the current context of available document collections.
|
| 31 |
+
2. **Analyze the Context**: Review the output from `list_corpora` and analyze the user's request.
|
| 32 |
+
3. **Decide the Next Action**: Based on the list of corpora and the user's prompt, choose one of the following paths:
|
| 33 |
+
* **If the user is asking a knowledge-based question**: Determine the most relevant corpus from the list for the user's query. Then, call the `rag_query` tool using that `corpus_name`. If no corpus seems relevant, inform the user.
|
| 34 |
+
* **If the user is asking for details about a specific corpus**: Find the corpus in the list and then call the `get_corpus_info` tool with the correct `corpus_name`.
|
| 35 |
+
* **If the user is asking to see the list of corpora**: Simply present the results from the `list_corpora` call you already made in step 1.
|
| 36 |
+
|
| 37 |
+
## Your Capabilities (Derived from your workflow)
|
| 38 |
+
|
| 39 |
+
* **Answer Questions**: By first checking available corpora and then querying the most relevant one.
|
| 40 |
+
* **List Document Collections**: By running your mandatory first step and presenting the result.
|
| 41 |
+
* **Get Corpus Details**: By first checking that the corpus exists and then fetching its details.
|
| 42 |
+
|
| 43 |
+
## Available Tools (To be used only after step 1)
|
| 44 |
+
|
| 45 |
+
1. `rag_query`: Searches a document collection to answer a specific question.
|
| 46 |
+
- Parameters:
|
| 47 |
+
- `corpus_name`: The name of the document collection to search.
|
| 48 |
+
- `query`: The user's question.
|
| 49 |
+
|
| 50 |
+
2. `list_corpora`: Lists all available document collections. (Your mandatory first step).
|
| 51 |
+
- This tool takes no parameters.
|
| 52 |
+
|
| 53 |
+
3. `get_corpus_info`: Gets detailed information and metadata about a specific corpus.
|
| 54 |
+
- Parameters:
|
| 55 |
+
- `corpus_name`: The name of the corpus to get information about.
|
| 56 |
+
|
| 57 |
+
## Communication Guidelines
|
| 58 |
+
|
| 59 |
+
- Be clear and direct in your responses.
|
| 60 |
+
- When you answer a question, state which document collection you used.
|
| 61 |
+
- If you cannot find a relevant document collection for a query, inform the user clearly. Do not try to answer without a source.
|
| 62 |
+
""",
|
| 63 |
+
)
|
user_agent/config.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration settings for the RAG Agent.
|
| 3 |
+
|
| 4 |
+
These settings are used by the various RAG tools.
|
| 5 |
+
Vertex AI initialization is performed in the package's __init__.py
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
|
| 12 |
+
# Load environment variables (this is redundant if __init__.py is imported first,
|
| 13 |
+
# but included for safety when importing config directly)
|
| 14 |
+
load_dotenv()
|
| 15 |
+
|
| 16 |
+
# Vertex AI settings
|
| 17 |
+
PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")
|
| 18 |
+
LOCATION = os.environ.get("GOOGLE_CLOUD_LOCATION")
|
| 19 |
+
|
| 20 |
+
# RAG settings
|
| 21 |
+
DEFAULT_CHUNK_SIZE = 512
|
| 22 |
+
DEFAULT_CHUNK_OVERLAP = 100
|
| 23 |
+
DEFAULT_TOP_K = 3
|
| 24 |
+
DEFAULT_DISTANCE_THRESHOLD = 0.5
|
| 25 |
+
DEFAULT_EMBEDDING_MODEL = "publishers/google/models/text-embedding-005"
|
| 26 |
+
DEFAULT_EMBEDDING_REQUESTS_PER_MIN = 1000
|
user_agent/tools/__init__.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RAG Tools package for interacting with Vertex AI RAG corpora.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from .add_data import add_data
|
| 6 |
+
from .create_corpus import create_corpus
|
| 7 |
+
from .delete_corpus import delete_corpus
|
| 8 |
+
from .delete_document import delete_document
|
| 9 |
+
from .get_corpus_info import get_corpus_info
|
| 10 |
+
from .list_corpora import list_corpora
|
| 11 |
+
from .rag_query import rag_query
|
| 12 |
+
from .utils import (
|
| 13 |
+
check_corpus_exists,
|
| 14 |
+
get_corpus_resource_name,
|
| 15 |
+
set_current_corpus,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
__all__ = [
|
| 19 |
+
"add_data",
|
| 20 |
+
"create_corpus",
|
| 21 |
+
"list_corpora",
|
| 22 |
+
"rag_query",
|
| 23 |
+
"get_corpus_info",
|
| 24 |
+
"delete_corpus",
|
| 25 |
+
"delete_document",
|
| 26 |
+
"check_corpus_exists",
|
| 27 |
+
"get_corpus_resource_name",
|
| 28 |
+
"set_current_corpus",
|
| 29 |
+
]
|
user_agent/tools/add_data.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tool for adding new data sources to a Vertex AI RAG corpus.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import re
|
| 6 |
+
from typing import List
|
| 7 |
+
|
| 8 |
+
from google.adk.tools.tool_context import ToolContext
|
| 9 |
+
from vertexai import rag
|
| 10 |
+
|
| 11 |
+
from ..config import (
|
| 12 |
+
DEFAULT_CHUNK_OVERLAP,
|
| 13 |
+
DEFAULT_CHUNK_SIZE,
|
| 14 |
+
DEFAULT_EMBEDDING_REQUESTS_PER_MIN,
|
| 15 |
+
)
|
| 16 |
+
from .utils import check_corpus_exists, get_corpus_resource_name
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def add_data(
|
| 20 |
+
corpus_name: str,
|
| 21 |
+
paths: List[str],
|
| 22 |
+
tool_context: ToolContext,
|
| 23 |
+
) -> dict:
|
| 24 |
+
"""
|
| 25 |
+
Add new data sources to a Vertex AI RAG corpus.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
corpus_name (str): The name of the corpus to add data to. If empty, the current corpus will be used.
|
| 29 |
+
paths (List[str]): List of URLs or GCS paths to add to the corpus.
|
| 30 |
+
Supported formats:
|
| 31 |
+
- Google Drive: "https://drive.google.com/file/d/{FILE_ID}/view"
|
| 32 |
+
- Google Docs/Sheets/Slides: "https://docs.google.com/{type}/d/{FILE_ID}/..."
|
| 33 |
+
- Google Cloud Storage: "gs://{BUCKET}/{PATH}"
|
| 34 |
+
Example: ["https://drive.google.com/file/d/123", "gs://my_bucket/my_files_dir"]
|
| 35 |
+
tool_context (ToolContext): The tool context
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
dict: Information about the added data and status
|
| 39 |
+
"""
|
| 40 |
+
# Check if the corpus exists
|
| 41 |
+
if not check_corpus_exists(corpus_name, tool_context):
|
| 42 |
+
return {
|
| 43 |
+
"status": "error",
|
| 44 |
+
"message": f"Corpus '{corpus_name}' does not exist. Please create it first using the create_corpus tool.",
|
| 45 |
+
"corpus_name": corpus_name,
|
| 46 |
+
"paths": paths,
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
# Validate inputs
|
| 50 |
+
if not paths or not all(isinstance(path, str) for path in paths):
|
| 51 |
+
return {
|
| 52 |
+
"status": "error",
|
| 53 |
+
"message": "Invalid paths: Please provide a list of URLs or GCS paths",
|
| 54 |
+
"corpus_name": corpus_name,
|
| 55 |
+
"paths": paths,
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
# Pre-process paths to validate and convert Google Docs URLs to Drive format if needed
|
| 59 |
+
validated_paths = []
|
| 60 |
+
invalid_paths = []
|
| 61 |
+
conversions = []
|
| 62 |
+
|
| 63 |
+
for path in paths:
|
| 64 |
+
if not path or not isinstance(path, str):
|
| 65 |
+
invalid_paths.append(f"{path} (Not a valid string)")
|
| 66 |
+
continue
|
| 67 |
+
|
| 68 |
+
# Check for Google Docs/Sheets/Slides URLs and convert them to Drive format
|
| 69 |
+
docs_match = re.match(
|
| 70 |
+
r"https:\/\/docs\.google\.com\/(?:document|spreadsheets|presentation)\/d\/([a-zA-Z0-9_-]+)(?:\/|$)",
|
| 71 |
+
path,
|
| 72 |
+
)
|
| 73 |
+
if docs_match:
|
| 74 |
+
file_id = docs_match.group(1)
|
| 75 |
+
drive_url = f"https://drive.google.com/file/d/{file_id}/view"
|
| 76 |
+
validated_paths.append(drive_url)
|
| 77 |
+
conversions.append(f"{path} → {drive_url}")
|
| 78 |
+
continue
|
| 79 |
+
|
| 80 |
+
# Check for valid Drive URL format
|
| 81 |
+
drive_match = re.match(
|
| 82 |
+
r"https:\/\/drive\.google\.com\/(?:file\/d\/|open\?id=)([a-zA-Z0-9_-]+)(?:\/|$)",
|
| 83 |
+
path,
|
| 84 |
+
)
|
| 85 |
+
if drive_match:
|
| 86 |
+
# Normalize to the standard Drive URL format
|
| 87 |
+
file_id = drive_match.group(1)
|
| 88 |
+
drive_url = f"https://drive.google.com/file/d/{file_id}/view"
|
| 89 |
+
validated_paths.append(drive_url)
|
| 90 |
+
if drive_url != path:
|
| 91 |
+
conversions.append(f"{path} → {drive_url}")
|
| 92 |
+
continue
|
| 93 |
+
|
| 94 |
+
# Check for GCS paths
|
| 95 |
+
if path.startswith("gs://"):
|
| 96 |
+
validated_paths.append(path)
|
| 97 |
+
continue
|
| 98 |
+
|
| 99 |
+
# If we're here, the path wasn't in a recognized format
|
| 100 |
+
invalid_paths.append(f"{path} (Invalid format)")
|
| 101 |
+
|
| 102 |
+
# Check if we have any valid paths after validation
|
| 103 |
+
if not validated_paths:
|
| 104 |
+
return {
|
| 105 |
+
"status": "error",
|
| 106 |
+
"message": "No valid paths provided. Please provide Google Drive URLs or GCS paths.",
|
| 107 |
+
"corpus_name": corpus_name,
|
| 108 |
+
"invalid_paths": invalid_paths,
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
try:
|
| 112 |
+
# Get the corpus resource name
|
| 113 |
+
corpus_resource_name = get_corpus_resource_name(corpus_name)
|
| 114 |
+
|
| 115 |
+
# Set up chunking configuration
|
| 116 |
+
transformation_config = rag.TransformationConfig(
|
| 117 |
+
chunking_config=rag.ChunkingConfig(
|
| 118 |
+
chunk_size=DEFAULT_CHUNK_SIZE,
|
| 119 |
+
chunk_overlap=DEFAULT_CHUNK_OVERLAP,
|
| 120 |
+
),
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
# Import files to the corpus
|
| 124 |
+
import_result = rag.import_files(
|
| 125 |
+
corpus_resource_name,
|
| 126 |
+
validated_paths,
|
| 127 |
+
transformation_config=transformation_config,
|
| 128 |
+
max_embedding_requests_per_min=DEFAULT_EMBEDDING_REQUESTS_PER_MIN,
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
# Set this as the current corpus if not already set
|
| 132 |
+
if not tool_context.state.get("current_corpus"):
|
| 133 |
+
tool_context.state["current_corpus"] = corpus_name
|
| 134 |
+
|
| 135 |
+
# Build the success message
|
| 136 |
+
conversion_msg = ""
|
| 137 |
+
if conversions:
|
| 138 |
+
conversion_msg = " (Converted Google Docs URLs to Drive format)"
|
| 139 |
+
|
| 140 |
+
return {
|
| 141 |
+
"status": "success",
|
| 142 |
+
"message": f"Successfully added {import_result.imported_rag_files_count} file(s) to corpus '{corpus_name}'{conversion_msg}",
|
| 143 |
+
"corpus_name": corpus_name,
|
| 144 |
+
"files_added": import_result.imported_rag_files_count,
|
| 145 |
+
"paths": validated_paths,
|
| 146 |
+
"invalid_paths": invalid_paths,
|
| 147 |
+
"conversions": conversions,
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
except Exception as e:
|
| 151 |
+
return {
|
| 152 |
+
"status": "error",
|
| 153 |
+
"message": f"Error adding data to corpus: {str(e)}",
|
| 154 |
+
"corpus_name": corpus_name,
|
| 155 |
+
"paths": paths,
|
| 156 |
+
}
|
user_agent/tools/create_corpus.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tool for creating a new Vertex AI RAG corpus.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import re
|
| 6 |
+
|
| 7 |
+
from google.adk.tools.tool_context import ToolContext
|
| 8 |
+
from vertexai import rag
|
| 9 |
+
|
| 10 |
+
from ..config import (
|
| 11 |
+
DEFAULT_EMBEDDING_MODEL,
|
| 12 |
+
)
|
| 13 |
+
from .utils import check_corpus_exists
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def create_corpus(
|
| 17 |
+
corpus_name: str,
|
| 18 |
+
tool_context: ToolContext,
|
| 19 |
+
) -> dict:
|
| 20 |
+
"""
|
| 21 |
+
Create a new Vertex AI RAG corpus with the specified name.
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
corpus_name (str): The name for the new corpus
|
| 25 |
+
tool_context (ToolContext): The tool context for state management
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
dict: Status information about the operation
|
| 29 |
+
"""
|
| 30 |
+
# Check if corpus already exists
|
| 31 |
+
if check_corpus_exists(corpus_name, tool_context):
|
| 32 |
+
return {
|
| 33 |
+
"status": "info",
|
| 34 |
+
"message": f"Corpus '{corpus_name}' already exists",
|
| 35 |
+
"corpus_name": corpus_name,
|
| 36 |
+
"corpus_created": False,
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
# Clean corpus name for use as display name
|
| 41 |
+
display_name = re.sub(r"[^a-zA-Z0-9_-]", "_", corpus_name)
|
| 42 |
+
|
| 43 |
+
# Configure embedding model
|
| 44 |
+
embedding_model_config = rag.RagEmbeddingModelConfig(
|
| 45 |
+
vertex_prediction_endpoint=rag.VertexPredictionEndpoint(
|
| 46 |
+
publisher_model=DEFAULT_EMBEDDING_MODEL
|
| 47 |
+
)
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# Create the corpus
|
| 51 |
+
rag_corpus = rag.create_corpus(
|
| 52 |
+
display_name=display_name,
|
| 53 |
+
backend_config=rag.RagVectorDbConfig(
|
| 54 |
+
rag_embedding_model_config=embedding_model_config
|
| 55 |
+
),
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Update state to track corpus existence
|
| 59 |
+
tool_context.state[f"corpus_exists_{corpus_name}"] = True
|
| 60 |
+
|
| 61 |
+
# Set this as the current corpus
|
| 62 |
+
tool_context.state["current_corpus"] = corpus_name
|
| 63 |
+
|
| 64 |
+
return {
|
| 65 |
+
"status": "success",
|
| 66 |
+
"message": f"Successfully created corpus '{corpus_name}'",
|
| 67 |
+
"corpus_name": rag_corpus.name,
|
| 68 |
+
"display_name": rag_corpus.display_name,
|
| 69 |
+
"corpus_created": True,
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
except Exception as e:
|
| 73 |
+
return {
|
| 74 |
+
"status": "error",
|
| 75 |
+
"message": f"Error creating corpus: {str(e)}",
|
| 76 |
+
"corpus_name": corpus_name,
|
| 77 |
+
"corpus_created": False,
|
| 78 |
+
}
|
user_agent/tools/delete_corpus.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tool for deleting a Vertex AI RAG corpus when it's no longer needed.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from google.adk.tools.tool_context import ToolContext
|
| 6 |
+
from vertexai import rag
|
| 7 |
+
|
| 8 |
+
from .utils import check_corpus_exists, get_corpus_resource_name
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def delete_corpus(
|
| 12 |
+
corpus_name: str,
|
| 13 |
+
confirm: bool,
|
| 14 |
+
tool_context: ToolContext,
|
| 15 |
+
) -> dict:
|
| 16 |
+
"""
|
| 17 |
+
Delete a Vertex AI RAG corpus when it's no longer needed.
|
| 18 |
+
Requires confirmation to prevent accidental deletion.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
corpus_name (str): The full resource name of the corpus to delete.
|
| 22 |
+
Preferably use the resource_name from list_corpora results.
|
| 23 |
+
confirm (bool): Must be set to True to confirm deletion
|
| 24 |
+
tool_context (ToolContext): The tool context
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
dict: Status information about the deletion operation
|
| 28 |
+
"""
|
| 29 |
+
# Check if corpus exists
|
| 30 |
+
if not check_corpus_exists(corpus_name, tool_context):
|
| 31 |
+
return {
|
| 32 |
+
"status": "error",
|
| 33 |
+
"message": f"Corpus '{corpus_name}' does not exist",
|
| 34 |
+
"corpus_name": corpus_name,
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
# Check if deletion is confirmed
|
| 38 |
+
if not confirm:
|
| 39 |
+
return {
|
| 40 |
+
"status": "error",
|
| 41 |
+
"message": "Deletion requires explicit confirmation. Set confirm=True to delete this corpus.",
|
| 42 |
+
"corpus_name": corpus_name,
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
# Get the corpus resource name
|
| 47 |
+
corpus_resource_name = get_corpus_resource_name(corpus_name)
|
| 48 |
+
|
| 49 |
+
# Delete the corpus
|
| 50 |
+
rag.delete_corpus(corpus_resource_name)
|
| 51 |
+
|
| 52 |
+
# Remove from state by setting to False
|
| 53 |
+
state_key = f"corpus_exists_{corpus_name}"
|
| 54 |
+
if state_key in tool_context.state:
|
| 55 |
+
tool_context.state[state_key] = False
|
| 56 |
+
|
| 57 |
+
return {
|
| 58 |
+
"status": "success",
|
| 59 |
+
"message": f"Successfully deleted corpus '{corpus_name}'",
|
| 60 |
+
"corpus_name": corpus_name,
|
| 61 |
+
}
|
| 62 |
+
except Exception as e:
|
| 63 |
+
return {
|
| 64 |
+
"status": "error",
|
| 65 |
+
"message": f"Error deleting corpus: {str(e)}",
|
| 66 |
+
"corpus_name": corpus_name,
|
| 67 |
+
}
|
user_agent/tools/delete_document.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tool for deleting a specific document from a Vertex AI RAG corpus.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from google.adk.tools.tool_context import ToolContext
|
| 6 |
+
from vertexai import rag
|
| 7 |
+
|
| 8 |
+
from .utils import check_corpus_exists, get_corpus_resource_name
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def delete_document(
|
| 12 |
+
corpus_name: str,
|
| 13 |
+
document_id: str,
|
| 14 |
+
tool_context: ToolContext,
|
| 15 |
+
) -> dict:
|
| 16 |
+
"""
|
| 17 |
+
Delete a specific document from a Vertex AI RAG corpus.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
corpus_name (str): The full resource name of the corpus containing the document.
|
| 21 |
+
Preferably use the resource_name from list_corpora results.
|
| 22 |
+
document_id (str): The ID of the specific document/file to delete. This can be
|
| 23 |
+
obtained from get_corpus_info results.
|
| 24 |
+
tool_context (ToolContext): The tool context
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
dict: Status information about the deletion operation
|
| 28 |
+
"""
|
| 29 |
+
# Check if corpus exists
|
| 30 |
+
if not check_corpus_exists(corpus_name, tool_context):
|
| 31 |
+
return {
|
| 32 |
+
"status": "error",
|
| 33 |
+
"message": f"Corpus '{corpus_name}' does not exist",
|
| 34 |
+
"corpus_name": corpus_name,
|
| 35 |
+
"document_id": document_id,
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
# Get the corpus resource name
|
| 40 |
+
corpus_resource_name = get_corpus_resource_name(corpus_name)
|
| 41 |
+
|
| 42 |
+
# Delete the document
|
| 43 |
+
rag_file_path = f"{corpus_resource_name}/ragFiles/{document_id}"
|
| 44 |
+
rag.delete_file(rag_file_path)
|
| 45 |
+
|
| 46 |
+
return {
|
| 47 |
+
"status": "success",
|
| 48 |
+
"message": f"Successfully deleted document '{document_id}' from corpus '{corpus_name}'",
|
| 49 |
+
"corpus_name": corpus_name,
|
| 50 |
+
"document_id": document_id,
|
| 51 |
+
}
|
| 52 |
+
except Exception as e:
|
| 53 |
+
return {
|
| 54 |
+
"status": "error",
|
| 55 |
+
"message": f"Error deleting document: {str(e)}",
|
| 56 |
+
"corpus_name": corpus_name,
|
| 57 |
+
"document_id": document_id,
|
| 58 |
+
}
|
user_agent/tools/get_corpus_info.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tool for retrieving detailed information about a specific RAG corpus.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from google.adk.tools.tool_context import ToolContext
|
| 6 |
+
from vertexai import rag
|
| 7 |
+
|
| 8 |
+
from .utils import check_corpus_exists, get_corpus_resource_name
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def get_corpus_info(
|
| 12 |
+
corpus_name: str,
|
| 13 |
+
tool_context: ToolContext,
|
| 14 |
+
) -> dict:
|
| 15 |
+
"""
|
| 16 |
+
Get detailed information about a specific RAG corpus, including its files.
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
corpus_name (str): The full resource name of the corpus to get information about.
|
| 20 |
+
Preferably use the resource_name from list_corpora results.
|
| 21 |
+
tool_context (ToolContext): The tool context
|
| 22 |
+
|
| 23 |
+
Returns:
|
| 24 |
+
dict: Information about the corpus and its files
|
| 25 |
+
"""
|
| 26 |
+
try:
|
| 27 |
+
# Check if corpus exists
|
| 28 |
+
if not check_corpus_exists(corpus_name, tool_context):
|
| 29 |
+
return {
|
| 30 |
+
"status": "error",
|
| 31 |
+
"message": f"Corpus '{corpus_name}' does not exist",
|
| 32 |
+
"corpus_name": corpus_name,
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
# Get the corpus resource name
|
| 36 |
+
corpus_resource_name = get_corpus_resource_name(corpus_name)
|
| 37 |
+
|
| 38 |
+
# Try to get corpus details first
|
| 39 |
+
corpus_display_name = corpus_name # Default if we can't get actual display name
|
| 40 |
+
|
| 41 |
+
# Process file information
|
| 42 |
+
file_details = []
|
| 43 |
+
try:
|
| 44 |
+
# Get the list of files
|
| 45 |
+
files = rag.list_files(corpus_resource_name)
|
| 46 |
+
for rag_file in files:
|
| 47 |
+
# Get document specific details
|
| 48 |
+
try:
|
| 49 |
+
# Extract the file ID from the name
|
| 50 |
+
file_id = rag_file.name.split("/")[-1]
|
| 51 |
+
|
| 52 |
+
file_info = {
|
| 53 |
+
"file_id": file_id,
|
| 54 |
+
"display_name": (
|
| 55 |
+
rag_file.display_name
|
| 56 |
+
if hasattr(rag_file, "display_name")
|
| 57 |
+
else ""
|
| 58 |
+
),
|
| 59 |
+
"source_uri": (
|
| 60 |
+
rag_file.source_uri
|
| 61 |
+
if hasattr(rag_file, "source_uri")
|
| 62 |
+
else ""
|
| 63 |
+
),
|
| 64 |
+
"create_time": (
|
| 65 |
+
str(rag_file.create_time)
|
| 66 |
+
if hasattr(rag_file, "create_time")
|
| 67 |
+
else ""
|
| 68 |
+
),
|
| 69 |
+
"update_time": (
|
| 70 |
+
str(rag_file.update_time)
|
| 71 |
+
if hasattr(rag_file, "update_time")
|
| 72 |
+
else ""
|
| 73 |
+
),
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
file_details.append(file_info)
|
| 77 |
+
except Exception:
|
| 78 |
+
# Continue to the next file
|
| 79 |
+
continue
|
| 80 |
+
except Exception:
|
| 81 |
+
# Continue without file details
|
| 82 |
+
pass
|
| 83 |
+
|
| 84 |
+
# Basic corpus info
|
| 85 |
+
return {
|
| 86 |
+
"status": "success",
|
| 87 |
+
"message": f"Successfully retrieved information for corpus '{corpus_display_name}'",
|
| 88 |
+
"corpus_name": corpus_name,
|
| 89 |
+
"corpus_display_name": corpus_display_name,
|
| 90 |
+
"file_count": len(file_details),
|
| 91 |
+
"files": file_details,
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
except Exception as e:
|
| 95 |
+
return {
|
| 96 |
+
"status": "error",
|
| 97 |
+
"message": f"Error getting corpus information: {str(e)}",
|
| 98 |
+
"corpus_name": corpus_name,
|
| 99 |
+
}
|
user_agent/tools/list_corpora.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tool for listing all available Vertex AI RAG corpora.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from typing import Dict, List, Union
|
| 6 |
+
|
| 7 |
+
from vertexai import rag
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def list_corpora() -> dict:
|
| 11 |
+
"""
|
| 12 |
+
List all available Vertex AI RAG corpora.
|
| 13 |
+
|
| 14 |
+
Returns:
|
| 15 |
+
dict: A list of available corpora and status, with each corpus containing:
|
| 16 |
+
- resource_name: The full resource name to use with other tools
|
| 17 |
+
- display_name: The human-readable name of the corpus
|
| 18 |
+
- create_time: When the corpus was created
|
| 19 |
+
- update_time: When the corpus was last updated
|
| 20 |
+
"""
|
| 21 |
+
try:
|
| 22 |
+
# Get the list of corpora
|
| 23 |
+
corpora = rag.list_corpora()
|
| 24 |
+
|
| 25 |
+
# Process corpus information into a more usable format
|
| 26 |
+
corpus_info: List[Dict[str, Union[str, int]]] = []
|
| 27 |
+
for corpus in corpora:
|
| 28 |
+
corpus_data: Dict[str, Union[str, int]] = {
|
| 29 |
+
"resource_name": corpus.name, # Full resource name for use with other tools
|
| 30 |
+
"display_name": corpus.display_name,
|
| 31 |
+
"create_time": (
|
| 32 |
+
str(corpus.create_time) if hasattr(corpus, "create_time") else ""
|
| 33 |
+
),
|
| 34 |
+
"update_time": (
|
| 35 |
+
str(corpus.update_time) if hasattr(corpus, "update_time") else ""
|
| 36 |
+
),
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
corpus_info.append(corpus_data)
|
| 40 |
+
|
| 41 |
+
return {
|
| 42 |
+
"status": "success",
|
| 43 |
+
"message": f"Found {len(corpus_info)} available corpora",
|
| 44 |
+
"corpora": corpus_info,
|
| 45 |
+
}
|
| 46 |
+
except Exception as e:
|
| 47 |
+
return {
|
| 48 |
+
"status": "error",
|
| 49 |
+
"message": f"Error listing corpora: {str(e)}",
|
| 50 |
+
"corpora": [],
|
| 51 |
+
}
|
user_agent/tools/rag_query.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tool for querying Vertex AI RAG corpora and retrieving relevant information.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
from google.adk.tools.tool_context import ToolContext
|
| 8 |
+
from vertexai import rag
|
| 9 |
+
|
| 10 |
+
from ..config import (
|
| 11 |
+
DEFAULT_DISTANCE_THRESHOLD,
|
| 12 |
+
DEFAULT_TOP_K,
|
| 13 |
+
)
|
| 14 |
+
from .utils import check_corpus_exists, get_corpus_resource_name
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def rag_query(
|
| 18 |
+
corpus_name: str,
|
| 19 |
+
query: str,
|
| 20 |
+
tool_context: ToolContext,
|
| 21 |
+
) -> dict:
|
| 22 |
+
"""
|
| 23 |
+
Query a Vertex AI RAG corpus with a user question and return relevant information.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
corpus_name (str): The name of the corpus to query. If empty, the current corpus will be used.
|
| 27 |
+
Preferably use the resource_name from list_corpora results.
|
| 28 |
+
query (str): The text query to search for in the corpus
|
| 29 |
+
tool_context (ToolContext): The tool context
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
dict: The query results and status
|
| 33 |
+
"""
|
| 34 |
+
try:
|
| 35 |
+
|
| 36 |
+
# Check if the corpus exists
|
| 37 |
+
if not check_corpus_exists(corpus_name, tool_context):
|
| 38 |
+
return {
|
| 39 |
+
"status": "error",
|
| 40 |
+
"message": f"Corpus '{corpus_name}' does not exist. Please create it first using the create_corpus tool.",
|
| 41 |
+
"query": query,
|
| 42 |
+
"corpus_name": corpus_name,
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
# Get the corpus resource name
|
| 46 |
+
corpus_resource_name = get_corpus_resource_name(corpus_name)
|
| 47 |
+
|
| 48 |
+
# Configure retrieval parameters
|
| 49 |
+
rag_retrieval_config = rag.RagRetrievalConfig(
|
| 50 |
+
top_k=DEFAULT_TOP_K,
|
| 51 |
+
filter=rag.Filter(vector_distance_threshold=DEFAULT_DISTANCE_THRESHOLD),
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
# Perform the query
|
| 55 |
+
print("Performing retrieval query...")
|
| 56 |
+
response = rag.retrieval_query(
|
| 57 |
+
rag_resources=[
|
| 58 |
+
rag.RagResource(
|
| 59 |
+
rag_corpus=corpus_resource_name,
|
| 60 |
+
)
|
| 61 |
+
],
|
| 62 |
+
text=query,
|
| 63 |
+
rag_retrieval_config=rag_retrieval_config,
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# Process the response into a more usable format
|
| 67 |
+
results = []
|
| 68 |
+
if hasattr(response, "contexts") and response.contexts:
|
| 69 |
+
for ctx_group in response.contexts.contexts:
|
| 70 |
+
result = {
|
| 71 |
+
"source_uri": (
|
| 72 |
+
ctx_group.source_uri if hasattr(ctx_group, "source_uri") else ""
|
| 73 |
+
),
|
| 74 |
+
"source_name": (
|
| 75 |
+
ctx_group.source_display_name
|
| 76 |
+
if hasattr(ctx_group, "source_display_name")
|
| 77 |
+
else ""
|
| 78 |
+
),
|
| 79 |
+
"text": ctx_group.text if hasattr(ctx_group, "text") else "",
|
| 80 |
+
"score": ctx_group.score if hasattr(ctx_group, "score") else 0.0,
|
| 81 |
+
}
|
| 82 |
+
results.append(result)
|
| 83 |
+
|
| 84 |
+
# If we didn't find any results
|
| 85 |
+
if not results:
|
| 86 |
+
return {
|
| 87 |
+
"status": "warning",
|
| 88 |
+
"message": f"No results found in corpus '{corpus_name}' for query: '{query}'",
|
| 89 |
+
"query": query,
|
| 90 |
+
"corpus_name": corpus_name,
|
| 91 |
+
"results": [],
|
| 92 |
+
"results_count": 0,
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
return {
|
| 96 |
+
"status": "success",
|
| 97 |
+
"message": f"Successfully queried corpus '{corpus_name}'",
|
| 98 |
+
"query": query,
|
| 99 |
+
"corpus_name": corpus_name,
|
| 100 |
+
"results": results,
|
| 101 |
+
"results_count": len(results),
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
except Exception as e:
|
| 105 |
+
error_msg = f"Error querying corpus: {str(e)}"
|
| 106 |
+
logging.error(error_msg)
|
| 107 |
+
return {
|
| 108 |
+
"status": "error",
|
| 109 |
+
"message": error_msg,
|
| 110 |
+
"query": query,
|
| 111 |
+
"corpus_name": corpus_name,
|
| 112 |
+
}
|
user_agent/tools/utils.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utility functions for the RAG tools.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import logging
|
| 6 |
+
import re
|
| 7 |
+
|
| 8 |
+
from google.adk.tools.tool_context import ToolContext
|
| 9 |
+
from vertexai import rag
|
| 10 |
+
|
| 11 |
+
from ..config import (
|
| 12 |
+
LOCATION,
|
| 13 |
+
PROJECT_ID,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def get_corpus_resource_name(corpus_name: str) -> str:
|
| 20 |
+
"""
|
| 21 |
+
Convert a corpus name to its full resource name if needed.
|
| 22 |
+
Handles various input formats and ensures the returned name follows Vertex AI's requirements.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
corpus_name (str): The corpus name or display name
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
str: The full resource name of the corpus
|
| 29 |
+
"""
|
| 30 |
+
logger.info(f"Getting resource name for corpus: {corpus_name}")
|
| 31 |
+
|
| 32 |
+
# If it's already a full resource name with the projects/locations/ragCorpora format
|
| 33 |
+
if re.match(r"^projects/[^/]+/locations/[^/]+/ragCorpora/[^/]+$", corpus_name):
|
| 34 |
+
return corpus_name
|
| 35 |
+
|
| 36 |
+
# Check if this is a display name of an existing corpus
|
| 37 |
+
try:
|
| 38 |
+
# List all corpora and check if there's a match with the display name
|
| 39 |
+
corpora = rag.list_corpora()
|
| 40 |
+
for corpus in corpora:
|
| 41 |
+
if hasattr(corpus, "display_name") and corpus.display_name == corpus_name:
|
| 42 |
+
return corpus.name
|
| 43 |
+
except Exception as e:
|
| 44 |
+
logger.warning(f"Error when checking for corpus display name: {str(e)}")
|
| 45 |
+
# If we can't check, continue with the default behavior
|
| 46 |
+
pass
|
| 47 |
+
|
| 48 |
+
# If it contains partial path elements, extract just the corpus ID
|
| 49 |
+
if "/" in corpus_name:
|
| 50 |
+
# Extract the last part of the path as the corpus ID
|
| 51 |
+
corpus_id = corpus_name.split("/")[-1]
|
| 52 |
+
else:
|
| 53 |
+
corpus_id = corpus_name
|
| 54 |
+
|
| 55 |
+
# Remove any special characters that might cause issues
|
| 56 |
+
corpus_id = re.sub(r"[^a-zA-Z0-9_-]", "_", corpus_id)
|
| 57 |
+
|
| 58 |
+
# Construct the standardized resource name
|
| 59 |
+
return f"projects/{PROJECT_ID}/locations/{LOCATION}/ragCorpora/{corpus_id}"
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def check_corpus_exists(corpus_name: str, tool_context: ToolContext) -> bool:
|
| 63 |
+
"""
|
| 64 |
+
Check if a corpus with the given name exists.
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
corpus_name (str): The name of the corpus to check
|
| 68 |
+
tool_context (ToolContext): The tool context for state management
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
bool: True if the corpus exists, False otherwise
|
| 72 |
+
"""
|
| 73 |
+
# Check state first if tool_context is provided
|
| 74 |
+
if tool_context.state.get(f"corpus_exists_{corpus_name}"):
|
| 75 |
+
return True
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
# Get full resource name
|
| 79 |
+
corpus_resource_name = get_corpus_resource_name(corpus_name)
|
| 80 |
+
|
| 81 |
+
# List all corpora and check if this one exists
|
| 82 |
+
corpora = rag.list_corpora()
|
| 83 |
+
for corpus in corpora:
|
| 84 |
+
if (
|
| 85 |
+
corpus.name == corpus_resource_name
|
| 86 |
+
or corpus.display_name == corpus_name
|
| 87 |
+
):
|
| 88 |
+
# Update state
|
| 89 |
+
tool_context.state[f"corpus_exists_{corpus_name}"] = True
|
| 90 |
+
# Also set this as the current corpus if no current corpus is set
|
| 91 |
+
if not tool_context.state.get("current_corpus"):
|
| 92 |
+
tool_context.state["current_corpus"] = corpus_name
|
| 93 |
+
return True
|
| 94 |
+
|
| 95 |
+
return False
|
| 96 |
+
except Exception as e:
|
| 97 |
+
logger.error(f"Error checking if corpus exists: {str(e)}")
|
| 98 |
+
# If we can't check, assume it doesn't exist
|
| 99 |
+
return False
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def set_current_corpus(corpus_name: str, tool_context: ToolContext) -> bool:
|
| 103 |
+
"""
|
| 104 |
+
Set the current corpus in the tool context state.
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
corpus_name (str): The name of the corpus to set as current
|
| 108 |
+
tool_context (ToolContext): The tool context for state management
|
| 109 |
+
|
| 110 |
+
Returns:
|
| 111 |
+
bool: True if the corpus exists and was set as current, False otherwise
|
| 112 |
+
"""
|
| 113 |
+
# Check if corpus exists first
|
| 114 |
+
if check_corpus_exists(corpus_name, tool_context):
|
| 115 |
+
tool_context.state["current_corpus"] = corpus_name
|
| 116 |
+
return True
|
| 117 |
+
return False
|