Spaces:

jedick
/

plotmydata

Sleeping

App Files Files Community

jedick commited on Jan 26

Commit

8f7fb71

1 Parent(s): ed0d360

Add app files

Browse files

Files changed (12) hide show

.gitignore +8 -0
Dockerfile +44 -0
PlotMyData/__init__.py +13 -0
PlotMyData/agent.py +352 -0
README.md +4 -3
entrypoint.sh +31 -0
functions.R +101 -0
profile.R +12 -0
prompts.R +133 -0
prompts.py +147 -0
requirements.txt +3 -0
server.R +134 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Any secret files (including secret.openai-api-key)
+secret.*
+# Copied by Dockerfile from entrypoint.sh
+startup.sh
+# Created by entrypoint.sh
+.Rprofile
+# We can ignore __pycache__
+__pycache__

Dockerfile ADDED Viewed

	@@ -0,0 +1,44 @@

+# Declare the base image
+FROM rocker/r-ver:latest
+# Considerations for local development: reduce Docker cache size and rebuild time
+#   Single RUN directive and two COPY directives
+#     Pre-RUN COPY for relatively stable files, post-RUN COPY for app files
+#   Avoid other directives like USER and ENV
+#     startup.sh activates the virtual environment for running the app
+# Considerations for remote development (HF Spaces Dev Mode)
+#   Dev Mode requires useradd, chown and USER
+#   Use CMD instead of ENTRYPOINT
+# Set working directory and copy non-app files
+WORKDIR /app
+COPY requirements.txt entrypoint.sh .
+# Install Python and system tools
+# Create and activate virtual environment for installing packages
+# Install required Python and R packages
+# Rename startup script and make it executable
+# Add user with uid=1000 and chown /app directory for HF Spaces Dev Mode
+RUN apt-get update && \
+    apt-get install -y python3 python3-pip python3-venv screen vim git && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
+    python3 -m venv /opt/venv && \
+    export PATH="/opt/venv/bin:$PATH" && \
+    pip --no-cache-dir install -r requirements.txt && \
+    R -q -e 'install.packages(c("ellmer", "mcptools", "readr", "ggplot2", "tidyverse"))' && \
+    cp entrypoint.sh startup.sh && \
+    chmod +x startup.sh && \
+    useradd -m -u 1000 user && \
+    chown -R user /app
+# Copy app files with user permissions
+# NOTE: This overwrites all copied files, rendering them non-executable. That is why we
+# created an executable file with a different name (startup.sh) that is not overwritten here.
+COPY --chown=user . /app
+# Set the user for Dev Mode
+USER user
+# Set default command (executable file in WORKDIR)
+CMD [ "/app/startup.sh" ]

PlotMyData/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from pathlib import Path
+import warnings
+import os
+from . import agent
+# Ensure upload directory exists
+upload_dir = "/tmp/uploads"
+Path(upload_dir).mkdir(parents=True, exist_ok=True)
+# Read, write, execute for owner; read and execute for others
+os.chmod(upload_dir, 0o755)
+# Suppress Pydantic serialization warnings
+warnings.filterwarnings("ignore", message="Pydantic serializer warnings")

PlotMyData/agent.py ADDED Viewed

	@@ -0,0 +1,352 @@

+from google.adk.plugins.save_files_as_artifacts_plugin import SaveFilesAsArtifactsPlugin
+from google.adk.tools.mcp_tool.mcp_session_manager import StdioConnectionParams
+from google.adk.tools.mcp_tool.mcp_session_manager import SseConnectionParams
+from google.adk.tools.mcp_tool.mcp_toolset import McpToolset
+from google.adk.tools.tool_context import ToolContext
+from google.adk.tools.base_tool import BaseTool
+from google.adk.agents.callback_context import CallbackContext
+from google.adk.agents import LlmAgent
+from google.adk.models import LlmResponse, LlmRequest
+from google.adk.models.lite_llm import LiteLlm
+from google.adk.apps import App
+from google.genai import types
+from mcp import ClientSession, StdioServerParameters
+from mcp.types import CallToolResult, TextContent
+from mcp.client.stdio import stdio_client
+from typing import Dict, Any, Optional, Tuple
+from prompts import Root, Run, Data, Plot, Install
+import base64
+import os
+# Define MCP server parameters
+server_params = StdioServerParameters(
+    command="Rscript",
+    args=[
+        # Use --vanilla to ignore .Rprofile, which is meant for the R instance running mcp_session()
+        "--vanilla",
+        "server.R",
+    ],
+)
+# STDIO transport to local R MCP server
+connection_params = StdioConnectionParams(server_params=server_params, timeout=60)
+# Define model
+# If we're using the OpenAI API, get the value of OPENAI_MODEL_NAME set by entrypoint.sh
+# If we're using an OpenAI-compatible endpoint (Docker Model Runner), use a fake API key
+model = LiteLlm(
+    model=os.environ.get("OPENAI_MODEL_NAME", ""),
+    api_key=os.environ.get("OPENAI_API_KEY", "fake-API-key"),
+)
+async def select_r_session(
+    callback_context: CallbackContext,
+) -> Optional[types.Content]:
+    """
+    Callback function to select the first R session.
+    """
+    async with stdio_client(server_params) as (read, write):
+        async with ClientSession(read, write) as session:
+            await session.initialize()
+            await session.call_tool("select_r_session", {"session": 1})
+            print("[select_r_session] R session selected!")
+    # Return None to allow the LlmAgent's normal execution
+    return None
+async def catch_tool_errors(tool: BaseTool, args: dict, tool_context: ToolContext):
+    """
+    Callback function to catch errors from tool calls and turn them into a message.
+    Modified from https://github.com/google/adk-python/discussions/795#discussioncomment-13460659
+    """
+    try:
+        return await tool.run_async(args=args, tool_context=tool_context)
+    except Exception as e:
+        # Format the error as a tool response
+        # https://github.com/google/adk-python/commit/4df926388b6e9ebcf517fbacf2f5532fd73b0f71
+        response = CallToolResult(
+            # The error has class McpError; use e.error.message to get the text
+            content=[TextContent(type="text", text=e.error.message)],
+            isError=True,
+        )
+        return response.model_dump(exclude_none=True, mode="json")
+async def preprocess_artifact(
+    callback_context: CallbackContext, llm_request: LlmRequest
+) -> Optional[LlmResponse]:
+    """
+    Callback function to copy the latest artifact to a temporary file.
+    """
+    # Callback and artifact handling code modified from:
+    # https://google.github.io/adk-docs/callbacks/types-of-callbacks/#before-model-callback
+    # https://github.com/google/adk-python/issues/2176#issuecomment-3395469070
+    # Get the last user message in the request contents
+    last_user_message = llm_request.contents[-1].parts[-1].text
+    # Function call events have no text part, so set this to "" for string search in the next step
+    if last_user_message is None:
+        last_user_message = ""
+    # If a file was uploaded then SaveFilesAsArtifactsPlugin() adds "[Uploaded Artifact: file_name.csv]" to the user message
+    # Check for "Uploaded Artifact:" in the last user message
+    if "Uploaded Artifact:" in last_user_message:
+        # Add a text part only if there are any issues with accessing or saving the artifact
+        added_text = ""
+        # List available artifacts
+        artifacts = await callback_context.list_artifacts()
+        if len(artifacts) == 0:
+            added_text = "No uploaded file is available"
+        else:
+            most_recent_file = artifacts[-1]
+            try:
+                # Get artifact and byte data
+                artifact = await callback_context.load_artifact(
+                    filename=most_recent_file
+                )
+                byte_data = artifact.inline_data.data
+                # Save artifact as temporary file
+                tmp_dir = "/tmp/uploads"
+                tmp_file_path = os.path.join(tmp_dir, most_recent_file)
+                # Write the file
+                with open(tmp_file_path, "wb") as f:
+                    f.write(byte_data)
+                # Set appropriate permissions
+                os.chmod(tmp_file_path, 0o644)
+                print(f"[preprocess_artifact] Saved artifact as '{tmp_file_path}'")
+            except Exception as e:
+                added_text = f"Error processing artifact: {str(e)}"
+        # If there were any issues, add a new part to the user message
+        if added_text:
+            # llm_request.contents[-1].parts.append(types.Part(text=added_text))
+            llm_request.contents[0].parts.append(types.Part(text=added_text))
+            print(
+                f"[preprocess_artifact] Added text part to user message: '{added_text}'"
+            )
+    # Return None to allow the possibly modified request to go to the LLM
+    return None
+async def preprocess_messages(
+    callback_context: CallbackContext, llm_request: LlmRequest
+) -> Optional[LlmResponse]:
+    """
+    Callback function to modify user messages to point to temporary artifact file paths.
+    """
+    # Changes to session state made by callbacks are not preserved across events
+    # See: https://github.com/google/adk-docs/issues/904
+    # Therefore, for every callback invocation we need to loop over all events, not just the most recent one
+    for i in range(len(llm_request.contents)):
+        # Inspect the user message in the request contents
+        user_message = llm_request.contents[i].parts[-1].text
+        if user_message:
+            # Modify file path in user message
+            # Original file path inserted by SaveFilesAsArtifactsPlugin():
+            #   [Uploaded Artifact: "breast-cancer.csv"]
+            # Modified file path used by preprocess_artifact():
+            #   [Uploaded File: "/tmp/uploads/breast-cancer.csv"]
+            tmp_dir = "/tmp/uploads/"
+            if '[Uploaded Artifact: "' in user_message:
+                user_message = user_message.replace(
+                    '[Uploaded Artifact: "', f'[Uploaded File: "{tmp_dir}'
+                )
+                llm_request.contents[i].parts[-1].text = user_message
+                print(f"[preprocess_messages] Modified user message: '{user_message}'")
+    return None
+def detect_file_type(byte_data: bytes) -> Tuple[str, str]:
+    """
+    Detect file type from magic number/bytes and return (mime_type, file_extension).
+    Supports BMP, JPEG, PNG, TIFF, and PDF.
+    """
+    if len(byte_data) < 8:
+        # Default to PNG if we can't determine
+        return "image/png", "png"
+    # Check magic numbers
+    if byte_data.startswith(b"\x89PNG\r\n\x1a\n"):
+        return "image/png", "png"
+    elif byte_data.startswith(b"\xff\xd8\xff"):
+        return "image/jpeg", "jpg"
+    elif byte_data.startswith(b"BM"):
+        return "image/bmp", "bmp"
+    elif byte_data.startswith(b"II*\x00") or byte_data.startswith(b"MM\x00*"):
+        return "image/tiff", "tiff"
+    elif byte_data.startswith(b"%PDF"):
+        return "application/pdf", "pdf"
+    else:
+        # Default to PNG if we can't determine
+        return "image/png", "png"
+async def skip_summarization_for_plot_success(
+    tool: BaseTool, args: Dict[str, Any], tool_context: ToolContext, tool_response: Dict
+) -> Optional[Dict]:
+    """
+    Callback function to turn off summarization if plot succeeded.
+    """
+    # If there was an error making the plot, the LLM tells the user what happened.
+    # This happens because skip_summarization is False by default.
+    # But if the plot was created successfully, there's
+    # no need for an extra LLM call to tell us it's there.
+    if tool.name in ["make_plot", "make_ggplot"]:
+        if not tool_response["isError"]:
+            tool_context.actions.skip_summarization = True
+    return None
+async def save_plot_artifact(
+    tool: BaseTool, args: Dict[str, Any], tool_context: ToolContext, tool_response: Dict
+) -> Optional[Dict]:
+    """
+    Callback function to save plot files as an ADK artifact.
+    """
+    # Look for plot tool (so we don't bother with transfer_to_agent or other functions)
+    if tool.name in ["make_plot", "make_ggplot"]:
+        # In ADK 1.17.0, tool_response is a dict (i.e. result of model_dump method invoked on MCP CallToolResult instance):
+        # https://github.com/google/adk-python/commit/4df926388b6e9ebcf517fbacf2f5532fd73b0f71
+        # https://github.com/modelcontextprotocol/python-sdk?tab=readme-ov-file#parsing-tool-results
+        if "content" in tool_response and not tool_response["isError"]:
+            for content in tool_response["content"]:
+                if "type" in content and content["type"] == "text":
+                    # Convert tool response (hex string) to bytes
+                    byte_data = bytes.fromhex(content["text"])
+                    # Detect file type from magic number
+                    mime_type, file_extension = detect_file_type(byte_data)
+                    # Encode binary data to Base64 format
+                    encoded = base64.b64encode(byte_data).decode("utf-8")
+                    artifact_part = types.Part(
+                        inline_data={
+                            "data": encoded,
+                            "mime_type": mime_type,
+                        }
+                    )
+                    # Use second part of tool name (e.g. make_ggplot -> ggplot.png)
+                    filename = f"{tool.name.split("_", 1)[1]}.{file_extension}"
+                    await tool_context.save_artifact(
+                        filename=filename, artifact=artifact_part
+                    )
+                    # Format the success message as a tool response
+                    text = f"Plot created and saved as an artifact: {filename}"
+                    response = CallToolResult(
+                        content=[TextContent(type="text", text=text)],
+                    )
+                    return response.model_dump(exclude_none=True, mode="json")
+    # Passthrough for other tools or no matching content (e.g. tool error)
+    return None
+# Create agent to run R code
+run_agent = LlmAgent(
+    name="Run",
+    description="Runs R code without making plots. Use the `Run` agent for executing code that does not load data or make a plot.",
+    model=model,
+    instruction=Run,
+    tools=[
+        McpToolset(
+            connection_params=connection_params,
+            tool_filter=["run_visible", "run_hidden"],
+        )
+    ],
+    before_model_callback=[preprocess_artifact, preprocess_messages],
+    before_tool_callback=catch_tool_errors,
+)
+# Create agent to load data
+data_agent = LlmAgent(
+    name="Data",
+    description="Loads data into an R data frame and summarizes it. Use the `Data` agent for loading data from a file or URL before making a plot.",
+    model=model,
+    instruction=Data,
+    tools=[
+        McpToolset(
+            connection_params=connection_params,
+            tool_filter=["run_visible"],
+        )
+    ],
+    before_model_callback=[preprocess_artifact, preprocess_messages],
+    before_tool_callback=catch_tool_errors,
+)
+# Create agent to make plots using R code
+plot_agent = LlmAgent(
+    name="Plot",
+    description="Makes plots using R code. Use the `Plot` agent after loading any required data.",
+    model=model,
+    instruction=Plot,
+    tools=[
+        McpToolset(
+            connection_params=connection_params,
+            tool_filter=["make_plot", "make_ggplot"],
+        )
+    ],
+    before_model_callback=[preprocess_artifact, preprocess_messages],
+    before_tool_callback=catch_tool_errors,
+    after_tool_callback=[skip_summarization_for_plot_success, save_plot_artifact],
+)
+# Create agent to install R packages
+install_agent = LlmAgent(
+    name="Install",
+    description="Installs R packages. Use the `Install` agent when an R package needs to be installed.",
+    model=model,
+    instruction=Install,
+    tools=[
+        McpToolset(
+            connection_params=connection_params,
+            tool_filter=["run_visible"],
+        )
+    ],
+    before_model_callback=[preprocess_artifact, preprocess_messages],
+    before_tool_callback=catch_tool_errors,
+)
+# Create parent agent and assign children via sub_agents
+root_agent = LlmAgent(
+    name="Coordinator",
+    # "Use the..." tells sub-agents to transfer to Coordinator for help requests
+    description="Multi-agent system for performing actions in R. Use the `Coordinator` agent for getting help on packages, datasets, and functions.",
+    model=model,
+    instruction=Root,
+    # To pass control back to root, the help and run functions should be tools or a ToolAgent (not sub_agent)
+    tools=[
+        McpToolset(
+            connection_params=connection_params,
+            tool_filter=["help_package", "help_topic"],
+        )
+    ],
+    sub_agents=[
+        run_agent,
+        data_agent,
+        plot_agent,
+        install_agent,
+    ],
+    # Select R session
+    before_agent_callback=select_r_session,
+    # Save user-uploaded artifact as a temporary file and modify messages to point to this file
+    before_model_callback=[preprocess_artifact, preprocess_messages],
+    before_tool_callback=catch_tool_errors,
+)
+app = App(
+    name="PlotMyData",
+    root_agent=root_agent,
+    # This inserts user messages like '[Uploaded Artifact: "breast-cancer.csv"]'
+    plugins=[SaveFilesAsArtifactsPlugin()],
+)

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
 ---
-title: Plotmydata
-emoji: 💻
-colorFrom: blue
 colorTo: purple
 sdk: docker
 pinned: false
 license: mit
 short_description: Data analysis and plotting with Google ADK, MCP, and R
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: PlotMyData
+emoji: 👀
+colorFrom: yellow
 colorTo: purple
 sdk: docker
 pinned: false
 license: mit
 short_description: Data analysis and plotting with Google ADK, MCP, and R
+app_port: 8080
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

entrypoint.sh ADDED Viewed

	@@ -0,0 +1,31 @@

+#!/bin/sh
+# Exit immediately on errors
+set -e
+# Use profile for persistent R session
+cp profile.R .Rprofile
+# Start R in a detached screen session
+# TODO: Look at using supervisord for another way to run multiple services
+# https://docs.docker.com/engine/containers/multi-service_container/#use-a-process-manager
+screen -d -m R
+# Activate virtual environment
+export PATH="/opt/venv/bin:$PATH"
+# Set OpenAI model
+export OPENAI_MODEL_NAME=gpt-4o
+echo "Using OpenAI with ${OPENAI_MODEL_NAME}"
+# Suppress e.g. UserWarning: [EXPERIMENTAL] BaseAuthenticatedTool: This feature is experimental ...
+# https://github.com/google/adk-python/commit/4afc9b2f33d63381583cea328f97c02213611529
+export ADK_SUPPRESS_EXPERIMENTAL_FEATURE_WARNINGS=true
+# For local development, the API key is read from a file
+# (not needed on HF Spaces, where secrets are injected into container's environment)
+if [ -z "$OPENAI_API_KEY" ]; then
+  export OPENAI_API_KEY=$(cat /run/secrets/openai-api-key)
+fi
+exec adk web --host 0.0.0.0 --port 8080 --reload_agents

functions.R ADDED Viewed

	@@ -0,0 +1,101 @@

+# Summarize a data frame, for example:
+# Data frame dimensions: 10 rows x 3 columns
+# Data Summary:
+# col1: integer
+# col2: numeric, missing=3
+# col3: character
+data_summary <- function(df) {
+  nrows <- nrow(df)
+  ncols <- ncol(df)
+  lines <- c(sprintf("Data frame dimensions: %d rows x %d columns", nrows, ncols), "Data Summary:")
+  # Helper for R data type names
+  type_map <- function(x) {
+    if (is.factor(x)) return("factor")
+    if (is.character(x)) return("character")
+    if (is.logical(x)) return("logical")
+    if (inherits(x, "Date")) return("Date")
+    if (is.numeric(x)) {
+      vals <- x[!is.na(x)]
+      if (length(vals) > 0 && all(abs(vals - round(vals)) < .Machine$double.eps^0.5)) return("integer")
+      return("numeric")
+    }
+    return(class(x)[1])
+  }
+  for (col in names(df)) {
+    dtype <- type_map(df[[col]])
+    miss <- sum(is.na(df[[col]]))
+    if (miss > 0) {
+      lines <- c(lines, sprintf("%s: %s, missing=%d", col, dtype, miss))
+    } else {
+      lines <- c(lines, sprintf("%s: %s", col, dtype))
+    }
+  }
+  paste(lines, collapse = "\n")
+}
+# Check if packages are installed and return status message
+# Example: check_packages(c("nlme", "ggplot2", "scatterplot3d"))
+# Returns: "nlme and ggplot2 are already installed" if all are installed
+# Or: "scatterplot3d needs to be installed" if some are missing
+# The message format makes it easy to determine if installation is needed:
+# - If message contains "are already installed" and does NOT contain "needs to be installed", all packages are installed
+# - If message contains "needs to be installed", some packages need installation
+check_packages <- function(packages) {
+  if (length(packages) == 0) {
+    return("No packages specified")
+  }
+  # Check which packages are installed
+  installed <- sapply(packages, function(pkg) {
+    requireNamespace(pkg, quietly = TRUE)
+  })
+  installed_pkgs <- packages[installed]
+  missing_pkgs <- packages[!installed]
+  if (length(installed_pkgs) == length(packages)) {
+    # All packages are installed
+    if (length(installed_pkgs) == 1) {
+      return(paste(installed_pkgs, "is already installed"))
+    } else if (length(installed_pkgs) == 2) {
+      return(paste(installed_pkgs[1], "and", installed_pkgs[2], "are already installed"))
+    } else {
+      # Format: "pkg1, pkg2, and pkg3 are already installed"
+      pkgs_list <- paste(installed_pkgs[-length(installed_pkgs)], collapse = ", ")
+      return(paste(pkgs_list, "and", installed_pkgs[length(installed_pkgs)], "are already installed"))
+    }
+  } else if (length(installed_pkgs) > 0) {
+    # Some packages are installed, some are missing
+    if (length(installed_pkgs) == 1) {
+      installed_msg <- paste(installed_pkgs, "is already installed")
+    } else if (length(installed_pkgs) == 2) {
+      installed_msg <- paste(installed_pkgs[1], "and", installed_pkgs[2], "are already installed")
+    } else {
+      pkgs_list <- paste(installed_pkgs[-length(installed_pkgs)], collapse = ", ")
+      installed_msg <- paste(pkgs_list, "and", installed_pkgs[length(installed_pkgs)], "are already installed")
+    }
+    if (length(missing_pkgs) == 1) {
+      missing_msg <- paste(missing_pkgs, "needs to be installed")
+    } else if (length(missing_pkgs) == 2) {
+      missing_msg <- paste(missing_pkgs[1], "and", missing_pkgs[2], "need to be installed")
+    } else {
+      pkgs_list <- paste(missing_pkgs[-length(missing_pkgs)], collapse = ", ")
+      missing_msg <- paste(pkgs_list, "and", missing_pkgs[length(missing_pkgs)], "need to be installed")
+    }
+    return(paste(installed_msg, ";", missing_msg))
+  } else {
+    # No packages are installed
+    if (length(missing_pkgs) == 1) {
+      return(paste(missing_pkgs, "needs to be installed"))
+    } else if (length(missing_pkgs) == 2) {
+      return(paste(missing_pkgs[1], "and", missing_pkgs[2], "need to be installed"))
+    } else {
+      pkgs_list <- paste(missing_pkgs[-length(missing_pkgs)], collapse = ", ")
+      return(paste(pkgs_list, "and", missing_pkgs[length(missing_pkgs)], "need to be installed"))
+    }
+  }
+}

profile.R ADDED Viewed

	@@ -0,0 +1,12 @@

+# Set a default CRAN mirror
+options(repos = c(CRAN = "https://cloud.r-project.org"))
+# Load a commonly used package
+library(tidyverse)
+# Use our own data summary function
+source("functions.R")
+# Make this R session visible to the mcptools MCP server
+# NOTE: mcp_session() needs to be run in an *interactive* R session, so we can't put it in server.R
+mcptools::mcp_session()

prompts.R ADDED Viewed

	@@ -0,0 +1,133 @@

+make_plot_prompt <- '
+Runs R code to make a plot with base R graphics.
+Args:
+  code: R code to run
+Returns:
+  Binary image data
+Details:
+`code` should be R code that begins with e.g. `png(filename)` and ends with `dev.off()`.
+Always use the variable `filename` instead of an actual file name.
+Example: User requests "Plot x (1,2,3) and y (10,20,30)", then `code` is:
+png(filename)
+x <- c(1, 2, 3)
+y <- c(10, 20, 30)
+plot(x, y)
+dev.off()
+Example: User requests "Give me a 8.5x11 inch PDF of y = x^2 from -1 to 1, large font, titled with the function", then `code` is:
+pdf(filename, width = 8.5, height = 11)
+par(cex = 2)
+x <- seq(-1, 1, length.out = 100)
+y <- x^2
+plot(x, y, type = "l")
+title(main = quote(y == x^2))
+dev.off()
+Example: User requests "Plot radius_worst (y) vs radius_mean (x) from https://zenodo.org/records/3608984/files/breastcancer.csv?download=1", then `code` is:
+png(filename)
+df <- read.csv("https://zenodo.org/records/3608984/files/breastcancer.csv?download=1")
+plot(df$radius_mean, df$radius_worst, xlab = "radius_worst", ylab = "radius_mean")
+dev.off()
+Example: User requests "Plot radius_worst (y) vs radius_mean (x)" and [Uploaded File: "/tmp/uploads/breast-cancer.csv"], then `code` is:
+png(filename)
+df <- read.csv("/tmp/uploads/breast-cancer.csv")
+plot(df$radius_mean, df$radius_worst, xlab = "radius_worst", ylab = "radius_mean")
+dev.off()
+'
+make_ggplot_prompt <- '
+Runs R code to make a plot with ggplot/ggplot2.
+Args:
+  code: R code to run
+Returns:
+  Binary image data
+Details:
+`code` should be R code that begins with `library(ggplot2)` and ends with `ggsave(filename, device = "png")`.
+Example: User requests "ggplot wt vs mpg from mtcars", then `code` is:
+library(ggplot2)
+ggplot(mtcars, aes(mpg, wt)) +
+  geom_point()
+ggsave(filename, device = "png")
+Example: User requests "ggplot wt vs mpg from mtcars as pdf", then `code` is:
+library(ggplot2)
+ggplot(mtcars, aes(mpg, wt)) +
+  geom_point()
+ggsave(filename, device = "pdf")
+Important notes:
+- `code` must end with ggsave(filename, device = ) with a specified device.
+- Use `device = "png"` unless the user requests a different format.
+- Always use the variable `filename` instead of an actual file name.
+'
+help_topic_prompt <- '
+Gets documentation for a dataset, function, or other topic.
+Args:
+  topic: Topic or function to get help for.
+Returns:
+  Documentation text. May include runnable R examples.
+Examples:
+- Show the arguments of the `lm` function: help_topic("lm").
+- Show the format of the `airquality` dataset: help_topic("airquality").
+- Get variables in `Titanic`: help_topic("Titanic").
+'
+help_package_prompt <- '
+Summarizes datasets and functions in an R package.
+Args:
+  package: Package to get help for.
+Returns:
+  Documentation text. Includes a package description and index of functions and datasets.
+Examples:
+- Get the names of R datasets: help_package("datasets").
+- List graphics functions in base R: help_package("graphics").
+'
+run_visible_prompt <- '
+Runs R code and returns the result.
+Does not make plots.
+Args:
+  code: R code to run.
+Returns:
+  Result of R code execution.
+'
+run_hidden_prompt <- '
+Run R code without returning the result.
+Does not make plots.
+Args:
+  code: R code to run.
+Returns:
+  Nothing.
+NOTE: Choose this tool if:
+  - The user asks to save the result in a variable, or
+  - You are performing intermediate calculations before making a plot.
+'

prompts.py ADDED Viewed

	@@ -0,0 +1,147 @@

+Root = """
+Your purpose is to interact with an R session to perform data analysis and visualization on the user's behalf.
+You cannot run code directly, but may use the `Data`, `Plot`, `Run`, and `Install` agents.
+Only use the `Run` agent if the following conditions are both true:
+- The operation is requested by the user ("calculate" or "run"), and
+- The code does not make a plot, chart, graph, or any other visualization.
+You may call a help tool before transfering control to an agent:
+- If an R dataset ("dataset") is requested, use help_package('datasets') to find the correct dataset name.
+- If the user requests documentation for specific datasets or functions, use the `help_topic` tool.
+Examples:
+- Query includes "?boxplot": The user is requesting documentation. Call help_topic('boxplot') then transfer to an agent.
+- "Plot distance vs speed from the cars dataset": This is a plot request using an R dataset. Call help_package('datasets') then transfer to the `Data` agent.
+- "Calculate x = cos(x) for x = 0 to 12 and make a plot": This is a plot that does not require data. Transfer to the `Plot` agent.
+- "Run x <- 2": This is code execution without data or plot. Transfer to the `Run` agent.
+- "Load the data": The user is asking to load data from an uploaded file. Transfer to the `Data` agent.
+Important notes:
+- Data may be provided directly by the user, in a URL, in an "Uploaded File" message, or an R dataset.
+- You must not use the `Run` agent to make a plot or execute any other plotting commands.
+- The only way to make a plot, chart, graph, or other visualization is to transfer to the `Data` or `Plot` agents.
+- If an R package needs to be installed, transfer to the `Install` agent. Do not use install.packages(), library(), or any other commands for package installation and loading.
+"""
+Run = """
+You are an agent that runs R code using the `run_visible` and `run_hidden` tools.
+You cannot make plots.
+Perform the following actions:
+- Interpret the user's request as R code.
+- If the code makes a plot (including ggplot or any other type of graph or visualization), transfer to the `Plot` agent.
+- If the code assigns the result to a variable, pass the code to the `run_hidden` tool.
+- Otherwise, pass the code to the `run_visible` tool.
+Important notes:
+- The `run_hidden` tool runs R commands without returning the result. This is useful for reducing LLM token usage while working with large variables.
+- You can use dplyr, tidyr, and other tidyverse packages.
+- Your response should always be valid, self-contained R code.
+- If the tool response is an error (isError: true), respond with the exact text of the error message and stop running code.
+- If you need an R package that is not installed, transfer to the `Install` agent to install it, then transfer back to continue running the code.
+"""
+Data = """
+You are an agent that loads and summarizes data.
+Your main task has three parts:
+1. Generate R code to create a `df` object and summarize it with `data_summary(df)`.
+2. Use the `run_visible` tool to execute the code.
+3. Transfer to the `Plot` agent to make a plot.
+Choose the first available data source:
+1: Data provided directly by the user.
+2: File provided in an "Uploaded File" message. Do not use other files.
+3: URL provided by the user. Do not use other URLs.
+4: Available R dataset that matches the user's request.
+Examples of code for `run_visible`:
+- User requests "plot 1,2,3 10,20,30": code is `df <- data.frame(x = c(1,2,3), y = (10, 20, 30))
+data_summary(df)`.
+- User requests "plot cars data": code is `df <- data.frame(cars)
+data_summary(df)`
+- To read CSV data from a URL, use `df <- read.csv(csv_url)`, where csv_url is the exact URL provided by the user.
+- To read CSV data from a file, use `df <- read.csv(file_path)`, where file_path is provided in an "Uploaded File" user message.
+What to do after calling `run_visible`:
+- If "Data Summary" exists and the user requested a plot, then pass control to the `Plot` agent.
+- If "Data Summary" exists and the user did not request a plot, then stop the workflow.
+- If the user provided data but "Data Summary" does not exist, then stop and report a problem.
+Important notes:
+- Do not use the `run_visible` tool to make a plot.
+- Run `data_summary(df)` in your code. Do not run `summary(df)`.
+- You can use dplyr, tidyr, and other tidyverse packages.
+- If you need an R package that is not installed, transfer to the `Install` agent to install it, then transfer back to continue loading the data.
+"""
+Plot = """
+You are an agent that makes plots with R code using the `make_plot` and `make_ggplot` tools.
+Coding strategy:
+- Use previously assigned variables (especially `df`) in your code.
+    - Do not load data yourself.
+    - Use a specific variable other than `df` if it is better for making the plot.
+- Choose column names in `df` based on the user's request.
+    - Column names are case-sensitive, syntactically valid R names.
+    - Look in the Data Summary for details.
+- No data are required for plotting functions and simulations.
+Plot tools:
+- For base R graphics use the `make_plot` tool.
+- For ggplot/ggplot2 use the `make_ggplot` tool.
+- Both of these tools save the plot as a conversation artifact that is visible to the user.
+Examples:
+- User requests to plot "dates", but the Data Summary lists a "Date" column. Answer: use `df$Date`.
+- User requests to plot "volcano", but `df` also exists. Answer: The `volcano` matrix is better for images; use `image(volcano)`.
+Important notes:
+- Use base R graphics unless the user asks for ggplot or ggplot2.
+- Pay attention to the user's request and use your knowledge of R to write code that gives the best-looking plot.
+- Your response should always be valid, self-contained R code.
+- If you need an R package that is not installed, transfer to the `Install` agent to install it, then transfer back to continue making the plot.
+"""
+Install = """
+You are an agent that installs R packages using the `run_visible` tool.
+Your workflow:
+1. Identify which packages need to be installed.
+2. First, check package installation status by calling `check_packages()` function using the `run_visible` tool. For example: `check_packages(c("package1", "package2"))`.
+3. Examine the result from `check_packages()`:
+   - If the result indicates all packages are already installed (contains "are already installed" and does NOT contain "needs to be installed"), then immediately transfer control back to the agent that requested the installation WITHOUT asking for confirmation.
+   - If the result indicates some or all packages need to be installed (contains "needs to be installed"), proceed to step 4.
+4. Clearly state which packages you will install (e.g., "I need to install the following packages: scatterplot3d, plotly").
+5. Ask the user for confirmation before proceeding (e.g., "Should I proceed with installing these packages?").
+6. Wait for the user to confirm before installing.
+7. Once confirmed, use the `run_visible` tool with R code like: `install.packages(c("package1", "package2"))` to install only the packages that are missing.
+8. After successful installation, transfer control back to the agent that requested the installation (e.g., transfer to the `Plot` agent if it was making a plot).
+Important notes:
+- ALWAYS call `check_packages()` first to check installation status before attempting to install.
+- If all packages are already installed, return to the previous agent immediately without asking for confirmation.
+- Only ask for user confirmation if some packages actually need to be installed.
+- ALWAYS clearly state which packages will be installed.
+- Use `run_visible` with `install.packages()` to install packages.
+- For multiple packages, use: `install.packages(c("package1", "package2"))`.
+- For a single package, use: `install.packages("package1")`.
+- If installation fails, report the error to the user and do not transfer control.
+- If installation succeeds, transfer control back to the calling agent to continue the original task.
+- Do not install packages without explicit user confirmation (unless all packages are already installed).
+"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+google-adk==1.23.0
+litellm==1.80.13
+mcp==1.26.0

server.R ADDED Viewed

	@@ -0,0 +1,134 @@

+# 20251009 Added plot tool
+# 20251023 Added help tools
+# Load ellmer for tool() and type_*()
+library(ellmer)
+# Read prompts
+source("prompts.R")
+# Get help for a package
+help_package <- function(package) {
+  help_page <- help(package = (package), help_type = "text")
+  paste(unlist(help_page$info), collapse = "\n")
+}
+# Get help for a topic
+# Adapted from https://github.com/posit-dev/btw:::help_to_rd
+help_topic <- function(topic) {
+  help_page <- help(topic = (topic), help_type = "text")
+  if(length(help_page) == 0) {
+    return(paste0("No help found for '", topic, "'. Please check the name and try again."))
+  }
+  # Handle multiple help files for a topic
+  # e.g. help_topic(plot) returns the help for both base::plot and graphics::plot.default
+  help_paths <- as.character(help_page)
+  help_result <- sapply(help_paths, function(help_path) {
+    rd_name <- basename(help_path)
+    rd_package <- basename(dirname(dirname(help_path)))
+    db <- tools::Rd_db(rd_package)[[paste0(rd_name, ".Rd")]]
+    paste(as.character(db), collapse = "")
+  })
+  # Insert headings to help the LLM distinguish multiple help files
+  # Heading before each help file (e.g. Help file 1, Help file 2)
+  help_result <- paste0("## Help file ", seq_along(help_result), ":\n", help_result)
+  # Heading at start of message (e.g. 2 help files were retrieved)
+  if(length(help_paths) == 1) help_info <- paste0("# ", length(help_paths), " help file was retrieved: ", paste(help_paths, collapse = ", "), ":\n")
+  if(length(help_paths) > 1) help_info <- paste0("# ", length(help_paths), " help files were retrieved: ", paste(help_paths, collapse = ", "), ":\n")
+  help_result <- c(help_info, help_result)
+  help_result
+}
+# Run R code and return the result
+# https://github.com/posit-dev/mcptools/issues/71
+run_visible <- function(code) {
+  eval(parse(text = code), globalenv())
+}
+# Run R code without returning the result
+# https://github.com/posit-dev/mcptools/issues/71
+run_hidden <- function(code) {
+  eval(parse(text = code), globalenv())
+  return("The code executed successfully")
+}
+# Run R code to make a plot and return the image data
+make_plot <- function(code) {
+  # Cursor, Bing and Google AI all suggest this but it causes an error:
+  # Error in png(filename = raw_conn) :
+  #   'filename' must be a non-empty character string
+  ## Write plot to an in-memory PNG
+  #raw_conn <- rawConnection(raw(), open = "wb")
+  #png(filename = raw_conn)
+  # Use a temporary file to save the plot
+  filename <- tempfile(fileext = ".dat")
+  on.exit(unlink(filename))
+  # Run the plotting code (this should include e.g. png() and dev.off())
+  # The code uses a local variable (filename), so don't use envir = globalenv() here
+  eval(parse(text = code))
+  # Return a PNG image as raw bytes so ADK can save it as an artifact
+  readr::read_file_raw(filename)
+}
+# This is the same code as make_plot() but has a different tool description
+make_ggplot <- function(code) {
+  filename <- tempfile(fileext = ".dat")
+  on.exit(unlink(filename))
+  eval(parse(text = code))
+  readr::read_file_raw(filename)
+}
+mcptools::mcp_server(tools = list(
+  tool(
+    help_package,
+    help_package_prompt,
+    arguments = list(
+      package = type_string("Package to get help for.")
+    )
+  ),
+  tool(
+    help_topic,
+    help_topic_prompt,
+    arguments = list(
+      topic = type_string("Topic or function to get help for.")
+    )
+  ),
+  tool(
+    run_visible,
+    run_visible_prompt,
+    arguments = list(
+      code = type_string("R code to run.")
+    )
+  ),
+  tool(
+    run_hidden,
+    run_hidden_prompt,
+    arguments = list(
+      code = type_string("R code to run.")
+    )
+  ),
+  tool(
+    make_plot,
+    make_plot_prompt,
+    arguments = list(
+      code = type_string("R code to make the plot.")
+    )
+  ),
+  tool(
+    make_ggplot,
+    make_ggplot_prompt,
+    arguments = list(
+      code = type_string("R code to make the plot.")
+    )
+  )
+))