Spaces:
Running
Running
File size: 11,076 Bytes
399b80c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 | from typing import List, cast
from enum import Enum
from datetime import datetime
from openspace.utils.logging import Logger
logger = Logger.get_logger(__name__)
try:
from anthropic.types.beta import (
BetaCacheControlEphemeralParam,
BetaContentBlockParam,
BetaImageBlockParam,
BetaMessage,
BetaMessageParam,
BetaTextBlock,
BetaTextBlockParam,
BetaToolResultBlockParam,
BetaToolUseBlockParam,
)
ANTHROPIC_AVAILABLE = True
except ImportError:
ANTHROPIC_AVAILABLE = False
# Beta flags
# For claude-sonnet-4-5 with computer-use-2025-01-24
COMPUTER_USE_BETA_FLAG = "computer-use-2025-01-24"
PROMPT_CACHING_BETA_FLAG = "prompt-caching-2024-07-31"
class APIProvider(Enum):
"""API Provider enumeration"""
ANTHROPIC = "anthropic"
# BEDROCK = "bedrock"
# VERTEX = "vertex"
# Provider to model name mapping (simplified for claude-sonnet-4-5 only)
PROVIDER_TO_DEFAULT_MODEL_NAME: dict = {
(APIProvider.ANTHROPIC, "claude-sonnet-4-5"): "claude-sonnet-4-5",
# (APIProvider.BEDROCK, "claude-sonnet-4-5"): "us.anthropic.claude-sonnet-4-5-v1:0",
# (APIProvider.VERTEX, "claude-sonnet-4-5"): "claude-sonnet-4-5-v1",
}
def get_system_prompt(platform: str = "Ubuntu") -> str:
"""
Get system prompt based on platform.
Args:
platform: Platform type (Ubuntu, Windows, macOS, or Darwin)
Returns:
System prompt string
"""
# Normalize platform name
platform_lower = platform.lower()
if platform_lower in ["windows", "win32"]:
return f"""<SYSTEM_CAPABILITY>
* You are utilising a Windows virtual machine using x86_64 architecture with internet access.
* You can use the computer tool to interact with the desktop: take screenshots, click, type, and control applications.
* To accomplish tasks, you MUST use the computer tool to see the screen and take actions.
* To open browser, please just click on the Chrome icon. Note, Chrome is what is installed on your system.
* When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
* DO NOT ask users for clarification during task execution. DO NOT stop to request more information from users. Always take action using available tools.
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
* The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
* Home directory of this Windows system is 'C:\\Users\\user'.
* When you want to open some applications on Windows, please use Double Click on it instead of clicking once.
* After each action, the system will provide you with a new screenshot showing the result.
* Continue taking actions until the task is complete.
</SYSTEM_CAPABILITY>"""
elif platform_lower in ["macos", "darwin", "mac"]:
return f"""<SYSTEM_CAPABILITY>
* You are utilising a macOS system with internet access.
* You can use the computer tool to interact with the desktop: take screenshots, click, type, and control applications.
* To accomplish tasks, you MUST use the computer tool to see the screen and take actions.
* To open browser, please just click on the Chrome icon. Note, Chrome is what is installed on your system.
* When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
* DO NOT ask users for clarification during task execution. DO NOT stop to request more information from users. Always take action using available tools.
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
* The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
* Home directory of this macOS system is typically '/Users/[username]' or can be accessed via '~'.
* On macOS, use Command (⌘) key combinations instead of Ctrl (e.g., Command+C for copy).
* After each action, the system will provide you with a new screenshot showing the result.
* Continue taking actions until the task is complete.
* When the task is completed, simply describe what you've done in your response WITHOUT using the tool again.
</SYSTEM_CAPABILITY>"""
else: # Ubuntu/Linux
return f"""<SYSTEM_CAPABILITY>
* You are utilising an Ubuntu virtual machine using x86_64 architecture with internet access.
* You can use the computer tool to interact with the desktop: take screenshots, click, type, and control applications.
* To accomplish tasks, you MUST use the computer tool to see the screen and take actions.
* To open browser, please just click on the Chrome icon. Note, Chrome is what is installed on your system.
* When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
* DO NOT ask users for clarification during task execution. DO NOT stop to request more information from users. Always take action using available tools.
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
* The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
* Home directory of this Ubuntu system is '/home/user'.
* After each action, the system will provide you with a new screenshot showing the result.
* Continue taking actions until the task is complete.
</SYSTEM_CAPABILITY>"""
def inject_prompt_caching(messages: List[BetaMessageParam]) -> None:
"""
Set cache breakpoints for the 3 most recent turns.
One cache breakpoint is left for tools/system prompt, to be shared across sessions.
Args:
messages: Message history (modified in place)
"""
if not ANTHROPIC_AVAILABLE:
return
breakpoints_remaining = 3
for message in reversed(messages):
if message["role"] == "user" and isinstance(
content := message["content"], list
):
if breakpoints_remaining:
breakpoints_remaining -= 1
# Use type ignore to bypass TypedDict check until SDK types are updated
content[-1]["cache_control"] = BetaCacheControlEphemeralParam( # type: ignore
{"type": "ephemeral"}
)
else:
content[-1].pop("cache_control", None)
# we'll only ever have one extra turn per loop
break
def maybe_filter_to_n_most_recent_images(
messages: List[BetaMessageParam],
images_to_keep: int,
min_removal_threshold: int,
) -> None:
"""
With the assumption that images are screenshots that are of diminishing value as
the conversation progresses, remove all but the final `images_to_keep` tool_result
images in place, with a chunk of min_removal_threshold to reduce the amount we
break the implicit prompt cache.
Args:
messages: Message history (modified in place)
images_to_keep: Number of recent images to keep
min_removal_threshold: Minimum number of images to remove at once (for cache efficiency)
"""
if not ANTHROPIC_AVAILABLE or images_to_keep is None:
return
tool_result_blocks = cast(
list[BetaToolResultBlockParam],
[
item
for message in messages
for item in (
message["content"] if isinstance(message["content"], list) else []
)
if isinstance(item, dict) and item.get("type") == "tool_result"
],
)
total_images = sum(
1
for tool_result in tool_result_blocks
for content in tool_result.get("content", [])
if isinstance(content, dict) and content.get("type") == "image"
)
images_to_remove = total_images - images_to_keep
# for better cache behavior, we want to remove in chunks
images_to_remove -= images_to_remove % min_removal_threshold
for tool_result in tool_result_blocks:
if isinstance(tool_result.get("content"), list):
new_content = []
for content in tool_result.get("content", []):
if isinstance(content, dict) and content.get("type") == "image":
if images_to_remove > 0:
images_to_remove -= 1
continue
new_content.append(content)
tool_result["content"] = new_content
def response_to_params(response: BetaMessage) -> List[BetaContentBlockParam]:
"""
Convert Anthropic response to parameter list.
Handles both text blocks, tool use blocks, and thinking blocks.
Args:
response: Anthropic API response
Returns:
List of content blocks
"""
if not ANTHROPIC_AVAILABLE:
return []
res: List[BetaContentBlockParam] = []
if response.content:
for block in response.content:
# Check block type using type attribute
# Note: type may be a string or enum, so convert to string for comparison
block_type = str(getattr(block, "type", ""))
if block_type == "text":
# Regular text block
if isinstance(block, BetaTextBlock) and block.text:
res.append(BetaTextBlockParam(type="text", text=block.text))
elif block_type == "thinking":
# Thinking block (for Claude 4 and Sonnet 3.7)
thinking_block = {
"type": "thinking",
"thinking": getattr(block, "thinking", ""),
}
if hasattr(block, "signature"):
thinking_block["signature"] = getattr(block, "signature", None)
res.append(cast(BetaContentBlockParam, thinking_block))
elif block_type == "tool_use":
# Tool use block - only include required fields to avoid API errors
# (e.g., 'caller' field is not permitted by Anthropic API)
tool_use_dict = {
"type": "tool_use",
"id": block.id,
"name": block.name,
"input": block.input,
}
res.append(cast(BetaToolUseBlockParam, tool_use_dict))
else:
# Unknown block type - try to handle generically
try:
res.append(cast(BetaContentBlockParam, block.model_dump()))
except Exception as e:
logger.warning(f"Failed to parse block type {block_type}: {e}")
return res
else:
return []
|