[{"id":"token_saver","name":"Token Saver","meta":{"description":"Inlet filter that middle-truncates already consumed tool results before they are sent to the LLM, saving tokens and cost.","type":"filter","manifest":{"title":"Token Saver Filter","author":"Filip Pytloun","version":"4.1.0","license":"MIT","description":">"}},"content":"\"\"\"\ntitle: Token Saver Filter\nauthor: Filip Pytloun\nversion: 4.1.0\nlicense: MIT\ndescription: >\n Inlet filter that middle-truncates consumed tool results before they\n are sent to the LLM, saving tokens and cost.\n\n A tool result is \"consumed\" when the model has already processed it\n and written a response. Unconsumed results (the model hasn't\n responded yet) are never touched — the model needs the full output.\n This means large tool outputs cost tokens only once, on the turn\n they are first returned.\n\n Handles both Open WebUI tool result formats:\n - Native tool calling: role=\"tool\" messages\n - Legacy/details mode:
in assistant messages\n\n Never truncates assistant text, user, system, or developer messages.\n\"\"\"\n\nimport logging\nimport re\nfrom typing import Callable, Optional\n\nfrom pydantic import BaseModel, Field\n\n_log = logging.getLogger(__name__)\n\n# Regex to match
attributes.\n_DETAILS_RESULT_RE = re.compile(\n r'(]*?\\bresult=\")' r\"((?:"|[^\\\"])*?)\" r'(\")',\n re.DOTALL,\n)\n\n\nclass Filter:\n class Valves(BaseModel):\n priority: int = Field(\n default=5,\n description=(\n \"Filter priority (lower = runs first). Set higher than other filters like mnemory.\"\n ),\n )\n enabled: bool = Field(\n default=True,\n description=\"Enable or disable the token saver filter.\",\n )\n max_tool_result_chars: int = Field(\n default=2000,\n description=(\n \"Max chars for consumed tool results (both native \"\n \"role='tool' messages and
\"\n \"attributes). The model already processed and \"\n \"summarized these — the truncated version is just \"\n \"a reminder of what was returned.\"\n ),\n )\n head_ratio: float = Field(\n default=0.5,\n description=\"Head/tail split ratio. 0.5 = equal head and tail.\",\n )\n show_status: bool = Field(\n default=True,\n description=\"Show truncation summary as a chat status message.\",\n )\n exclude_tools: str = Field(\n default=\"mnemory_initialize_memory,mnemory_get_core_memories\",\n description=(\n \"Comma-separated list of tool names whose results \"\n \"should never be truncated. These tools provide \"\n \"context that must remain intact across the entire \"\n \"conversation.\"\n ),\n )\n debug: bool = Field(\n default=False,\n description=\"Show detailed per-message debug info in chat status.\",\n )\n\n class UserValves(BaseModel):\n enabled: bool = Field(\n default=True,\n description=\"Enable token saver for this user.\",\n )\n\n def __init__(self):\n self.valves = self.Valves()\n\n # ------------------------------------------------------------------\n # Helpers\n # ------------------------------------------------------------------\n\n @staticmethod\n def _middle_truncate(\n text: str, max_chars: int, head_ratio: float = 0.5\n ) -> tuple[str, bool]:\n \"\"\"Truncate text preserving head and tail, removing the middle.\"\"\"\n if len(text) <= max_chars:\n return text, False\n\n original_len = len(text)\n marker = f\"\\n... [trimmed: {original_len:,} -> {max_chars:,} chars] ...\\n\"\n available = max_chars - len(marker)\n if available < 100:\n return text[:max_chars], True\n\n head_size = int(available * head_ratio)\n tail_size = available - head_size\n return text[:head_size] + marker + text[-tail_size:], True\n\n def _truncate_details_results(\n self, content: str, max_result: int, head_ratio: float\n ) -> tuple[str, int]:\n \"\"\"Find
attributes and truncate large ones.\"\"\"\n saved = 0\n\n def _replace(m: re.Match) -> str:\n nonlocal saved\n prefix, value, suffix = m.group(1), m.group(2), m.group(3)\n if len(value) <= max_result:\n return m.group(0)\n truncated, was = self._middle_truncate(value, max_result, head_ratio)\n if was:\n saved += len(value) - len(truncated)\n return prefix + truncated + suffix\n return m.group(0)\n\n new_content = _DETAILS_RESULT_RE.sub(_replace, content)\n return new_content, saved\n\n @staticmethod\n def _find_consumed_boundary(messages: list[dict]) -> int:\n \"\"\"Find the index before which all tool results are consumed.\n\n Walk backwards through messages. A tool result is \"consumed\"\n if there is an assistant message with non-empty content after\n it — meaning the model has already seen and responded to it.\n\n Returns the index of the latest unconsumed boundary: all tool\n results at indices < boundary are consumed and eligible for\n truncation. Tool results at indices >= boundary have not been\n processed by the model yet and must be left untouched.\n \"\"\"\n # Find the last assistant message with actual content (not just\n # an empty message with tool_calls).\n last_response_idx = -1\n for i in range(len(messages) - 1, -1, -1):\n msg = messages[i]\n if msg.get(\"role\") != \"assistant\":\n continue\n content = msg.get(\"content\", \"\")\n if isinstance(content, str) and content.strip():\n last_response_idx = i\n break\n\n # Everything before the last substantive assistant response\n # has been consumed. If no assistant response exists, nothing\n # is consumed (boundary = 0, nothing gets truncated).\n return last_response_idx if last_response_idx > 0 else 0\n\n async def _status(\n self, emitter: Callable | None, msg: str, done: bool = True\n ) -> None:\n if not emitter:\n return\n await emitter({\"type\": \"status\", \"data\": {\"description\": msg, \"done\": done}})\n\n # ------------------------------------------------------------------\n # Inlet\n # ------------------------------------------------------------------\n\n async def inlet(\n self,\n body: dict,\n __user__: Optional[dict] = None,\n __event_emitter__: Optional[Callable] = None,\n ) -> dict:\n if not self.valves.enabled:\n return body\n\n user_valves = __user__.get(\"valves\") if __user__ else None\n if user_valves and hasattr(user_valves, \"enabled\") and not user_valves.enabled:\n return body\n\n messages = body.get(\"messages\", [])\n if not messages:\n return body\n\n max_chars = self.valves.max_tool_result_chars\n head_ratio = self.valves.head_ratio\n excluded_tools = {\n t.strip() for t in self.valves.exclude_tools.split(\",\") if t.strip()\n }\n boundary = self._find_consumed_boundary(messages)\n\n if boundary <= 0:\n if self.valves.debug:\n await self._status(\n __event_emitter__,\n \"[TokenSaver] No consumed tool results found\",\n )\n return body\n\n total_chars_saved = 0\n truncated_count = 0\n\n for i in range(boundary):\n msg = messages[i]\n role = msg.get(\"role\", \"\")\n content = msg.get(\"content\", \"\")\n if not isinstance(content, str):\n continue\n\n original_len = len(content)\n msg_saved = 0\n\n if role == \"tool\":\n tool_name = msg.get(\"name\", \"\")\n if tool_name in excluded_tools:\n if self.valves.debug:\n _log.debug(\"token_saver: skipping excluded tool %s\", tool_name)\n continue\n if len(content) > max_chars:\n content, was = self._middle_truncate(content, max_chars, head_ratio)\n if was:\n msg_saved = original_len - len(content)\n\n elif role == \"assistant\":\n if \" 0:\n total_chars_saved += msg_saved\n truncated_count += 1\n messages[i] = {**msg, \"content\": content}\n\n if self.valves.debug:\n await self._status(\n __event_emitter__,\n f\"[TokenSaver] msg[{i}] role={role}: \"\n f\"{original_len:,} -> {len(content):,} chars \"\n f\"(saved {msg_saved:,})\",\n )\n\n if truncated_count > 0:\n estimated_tokens_saved = total_chars_saved // 4\n summary = (\n f\"[TokenSaver] Trimmed {truncated_count} message(s), \"\n f\"saved ~{estimated_tokens_saved:,} tokens\"\n )\n _log.info(\n \"token_saver: trimmed %d messages, saved ~%d chars (~%d tokens)\",\n truncated_count,\n total_chars_saved,\n estimated_tokens_saved,\n )\n if self.valves.show_status:\n await self._status(__event_emitter__, summary)\n elif self.valves.debug:\n await self._status(\n __event_emitter__,\n f\"[TokenSaver] {boundary} message(s) before boundary, \"\n f\"none exceeded {max_chars:,} chars\",\n )\n\n return body\n"}]