| """Anthropic prompt caching breakpoints for outgoing LLM requests. |
| |
| Caching is GA on Anthropic's API and natively supported by litellm >=1.83 |
| via ``cache_control`` blocks. We apply two breakpoints (out of 4 allowed): |
| |
| 1. The tool block — caches all tool definitions as a single prefix. |
| 2. The system message — caches the rendered system prompt. |
| |
| Together these cover the ~4-5K static tokens that were being re-billed on |
| every turn. Subsequent turns within the 5-minute TTL hit cache_read pricing |
| (~10% of input cost) instead of full input. |
| |
| Non-Anthropic models (HF router, OpenAI) are passed through unchanged. |
| """ |
|
|
| from typing import Any |
|
|
|
|
| def with_prompt_caching( |
| messages: list[Any], |
| tools: list[dict] | None, |
| model_name: str | None, |
| ) -> tuple[list[Any], list[dict] | None]: |
| """Return (messages, tools) with cache_control breakpoints for Anthropic. |
| |
| No-op for non-Anthropic models. Original objects are not mutated; a fresh |
| list with replaced first message and last tool is returned, so callers |
| that share the underlying ``ContextManager.items`` list don't see their |
| persisted history rewritten. |
| """ |
| if not model_name or "anthropic" not in model_name: |
| return messages, tools |
|
|
| if tools: |
| new_tools = list(tools) |
| last = dict(new_tools[-1]) |
| last["cache_control"] = {"type": "ephemeral"} |
| new_tools[-1] = last |
| tools = new_tools |
|
|
| if messages: |
| first = messages[0] |
| role = ( |
| first.get("role") |
| if isinstance(first, dict) |
| else getattr(first, "role", None) |
| ) |
| if role == "system": |
| content = ( |
| first.get("content") |
| if isinstance(first, dict) |
| else getattr(first, "content", None) |
| ) |
| if isinstance(content, str) and content: |
| cached_block = [ |
| { |
| "type": "text", |
| "text": content, |
| "cache_control": {"type": "ephemeral"}, |
| } |
| ] |
| new_first = {"role": "system", "content": cached_block} |
| messages = [new_first] + list(messages[1:]) |
|
|
| return messages, tools |
|
|