webui / token_compressor.py
Nerdur's picture
Upload 9 files
a76c39e verified
Raw
History Blame Contribute Delete
2.08 kB
"""
title: Token Saver Filter
author: nerdur
version: 0.1
description: Štedi tokene tako što ograničava dužinu istorije razgovora.
"""
from pydantic import BaseModel
from typing import Optional
class Filter:
class Valves(BaseModel):
max_chars: int = 8000
keep_last_messages: int = 4
enabled: bool = True
def __init__(self):
self.valves = self.Valves()
def inlet(self, body: dict, __user__: Optional[dict] = None) -> dict:
if not self.valves.enabled:
return body
messages = body.get("messages", [])
if len(messages) <= self.valves.keep_last_messages + 1:
return body
# Uvijek zadrži sistemski prompt (obično prva poruka)
system_message = None
if messages and messages[0].get("role") == "system":
system_message = messages[0]
other_messages = messages[1:]
else:
other_messages = messages
# Ako je ukupna dužina prevelika, skrati sredinu
total_chars = sum(len(m.get("content", "")) for m in other_messages)
if total_chars > self.valves.max_chars:
# Zadrži zadnjih N poruka
last_msgs = other_messages[-self.valves.keep_last_messages:]
# Zadrži prvu poruku (obično početak konteksta)
first_msg = other_messages[0] if other_messages else None
new_messages = []
if system_message:
new_messages.append(system_message)
if first_msg and first_msg not in last_msgs:
new_messages.append(first_msg)
new_messages.append({"role": "system", "content": "...[Stariji dio razgovora je uklonjen radi uštede tokena]..."})
new_messages.extend(last_msgs)
body["messages"] = new_messages
print(f"Token Saver: Skraćeno sa {len(messages)} na {len(new_messages)} poruka.")
return body
def outlet(self, body: dict, __user__: Optional[dict] = None) -> dict:
return body