| """Hindsight memory plugin — MemoryProvider interface. |
| |
| Long-term memory with knowledge graph, entity resolution, and multi-strategy |
| retrieval. Supports cloud (API key) and local modes. |
| |
| Original PR #1811 by benfrank241, adapted to MemoryProvider ABC. |
| |
| Config via environment variables: |
| HINDSIGHT_API_KEY — API key for Hindsight Cloud |
| HINDSIGHT_BANK_ID — memory bank identifier (default: hermes) |
| HINDSIGHT_BUDGET — recall budget: low/mid/high (default: mid) |
| HINDSIGHT_API_URL — API endpoint |
| HINDSIGHT_MODE — cloud or local (default: cloud) |
| HINDSIGHT_RETAIN_TAGS — comma-separated tags attached to retained memories |
| HINDSIGHT_RETAIN_SOURCE — metadata source value attached to retained memories |
| HINDSIGHT_RETAIN_USER_PREFIX — label used before user turns in retained transcripts |
| HINDSIGHT_RETAIN_ASSISTANT_PREFIX — label used before assistant turns in retained transcripts |
| |
| Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to |
| ~/.hindsight/config.json (legacy, shared) for backward compatibility. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import asyncio |
| import json |
| import logging |
| import os |
| import threading |
|
|
| from datetime import datetime, timezone |
| from typing import Any, Dict, List |
|
|
| from agent.memory_provider import MemoryProvider |
| from hermes_constants import get_hermes_home |
| from tools.registry import tool_error |
|
|
| logger = logging.getLogger(__name__) |
|
|
| _DEFAULT_API_URL = "https://api.hindsight.vectorize.io" |
| _DEFAULT_LOCAL_URL = "http://localhost:8888" |
| _MIN_CLIENT_VERSION = "0.4.22" |
| _VALID_BUDGETS = {"low", "mid", "high"} |
| _PROVIDER_DEFAULT_MODELS = { |
| "openai": "gpt-4o-mini", |
| "anthropic": "claude-haiku-4-5", |
| "gemini": "gemini-2.5-flash", |
| "groq": "openai/gpt-oss-120b", |
| "openrouter": "qwen/qwen3.5-9b", |
| "minimax": "MiniMax-M2.7", |
| "ollama": "gemma3:12b", |
| "lmstudio": "local-model", |
| "openai_compatible": "your-model-name", |
| } |
|
|
|
|
| |
| |
| |
| |
|
|
| _loop: asyncio.AbstractEventLoop | None = None |
| _loop_thread: threading.Thread | None = None |
| _loop_lock = threading.Lock() |
|
|
|
|
| def _get_loop() -> asyncio.AbstractEventLoop: |
| """Return a long-lived event loop running on a background thread.""" |
| global _loop, _loop_thread |
| with _loop_lock: |
| if _loop is not None and _loop.is_running(): |
| return _loop |
| _loop = asyncio.new_event_loop() |
|
|
| def _run(): |
| asyncio.set_event_loop(_loop) |
| _loop.run_forever() |
|
|
| _loop_thread = threading.Thread(target=_run, daemon=True, name="hindsight-loop") |
| _loop_thread.start() |
| return _loop |
|
|
|
|
| def _run_sync(coro, timeout: float = 120.0): |
| """Schedule *coro* on the shared loop and block until done.""" |
| loop = _get_loop() |
| future = asyncio.run_coroutine_threadsafe(coro, loop) |
| return future.result(timeout=timeout) |
|
|
|
|
| |
| |
| |
|
|
| RETAIN_SCHEMA = { |
| "name": "hindsight_retain", |
| "description": ( |
| "Store information to long-term memory. Hindsight automatically " |
| "extracts structured facts, resolves entities, and indexes for retrieval." |
| ), |
| "parameters": { |
| "type": "object", |
| "properties": { |
| "content": {"type": "string", "description": "The information to store."}, |
| "context": {"type": "string", "description": "Short label (e.g. 'user preference', 'project decision')."}, |
| "tags": { |
| "type": "array", |
| "items": {"type": "string"}, |
| "description": "Optional per-call tags to merge with configured default retain tags.", |
| }, |
| }, |
| "required": ["content"], |
| }, |
| } |
|
|
| RECALL_SCHEMA = { |
| "name": "hindsight_recall", |
| "description": ( |
| "Search long-term memory. Returns memories ranked by relevance using " |
| "semantic search, keyword matching, entity graph traversal, and reranking." |
| ), |
| "parameters": { |
| "type": "object", |
| "properties": { |
| "query": {"type": "string", "description": "What to search for."}, |
| }, |
| "required": ["query"], |
| }, |
| } |
|
|
| REFLECT_SCHEMA = { |
| "name": "hindsight_reflect", |
| "description": ( |
| "Synthesize a reasoned answer from long-term memories. Unlike recall, " |
| "this reasons across all stored memories to produce a coherent response." |
| ), |
| "parameters": { |
| "type": "object", |
| "properties": { |
| "query": {"type": "string", "description": "The question to reflect on."}, |
| }, |
| "required": ["query"], |
| }, |
| } |
|
|
|
|
| |
| |
| |
|
|
| def _load_config() -> dict: |
| """Load config from profile-scoped path, legacy path, or env vars. |
| |
| Resolution order: |
| 1. $HERMES_HOME/hindsight/config.json (profile-scoped) |
| 2. ~/.hindsight/config.json (legacy, shared) |
| 3. Environment variables |
| """ |
| from pathlib import Path |
|
|
| |
| profile_path = get_hermes_home() / "hindsight" / "config.json" |
| if profile_path.exists(): |
| try: |
| return json.loads(profile_path.read_text(encoding="utf-8")) |
| except Exception: |
| pass |
|
|
| |
| legacy_path = Path.home() / ".hindsight" / "config.json" |
| if legacy_path.exists(): |
| try: |
| return json.loads(legacy_path.read_text(encoding="utf-8")) |
| except Exception: |
| pass |
|
|
| return { |
| "mode": os.environ.get("HINDSIGHT_MODE", "cloud"), |
| "apiKey": os.environ.get("HINDSIGHT_API_KEY", ""), |
| "retain_tags": os.environ.get("HINDSIGHT_RETAIN_TAGS", ""), |
| "retain_source": os.environ.get("HINDSIGHT_RETAIN_SOURCE", ""), |
| "retain_user_prefix": os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User"), |
| "retain_assistant_prefix": os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant"), |
| "banks": { |
| "hermes": { |
| "bankId": os.environ.get("HINDSIGHT_BANK_ID", "hermes"), |
| "budget": os.environ.get("HINDSIGHT_BUDGET", "mid"), |
| "enabled": True, |
| } |
| }, |
| } |
|
|
|
|
| def _normalize_retain_tags(value: Any) -> List[str]: |
| """Normalize tag config/tool values to a deduplicated list of strings.""" |
| if value is None: |
| return [] |
|
|
| raw_items: list[Any] |
| if isinstance(value, list): |
| raw_items = value |
| elif isinstance(value, str): |
| text = value.strip() |
| if not text: |
| return [] |
| if text.startswith("["): |
| try: |
| parsed = json.loads(text) |
| except Exception: |
| parsed = None |
| if isinstance(parsed, list): |
| raw_items = parsed |
| else: |
| raw_items = text.split(",") |
| else: |
| raw_items = text.split(",") |
| else: |
| raw_items = [value] |
|
|
| normalized = [] |
| seen = set() |
| for item in raw_items: |
| tag = str(item).strip() |
| if not tag or tag in seen: |
| continue |
| seen.add(tag) |
| normalized.append(tag) |
| return normalized |
|
|
|
|
| def _utc_timestamp() -> str: |
| """Return current UTC timestamp in ISO-8601 with milliseconds and Z suffix.""" |
| return datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z") |
|
|
|
|
| |
| |
| |
|
|
| class HindsightMemoryProvider(MemoryProvider): |
| """Hindsight long-term memory with knowledge graph and multi-strategy retrieval.""" |
|
|
| def __init__(self): |
| self._config = None |
| self._api_key = None |
| self._api_url = _DEFAULT_API_URL |
| self._bank_id = "hermes" |
| self._budget = "mid" |
| self._mode = "cloud" |
| self._llm_base_url = "" |
| self._memory_mode = "hybrid" |
| self._prefetch_method = "recall" |
| self._retain_tags: List[str] = [] |
| self._retain_source = "" |
| self._retain_user_prefix = "User" |
| self._retain_assistant_prefix = "Assistant" |
| self._platform = "" |
| self._user_id = "" |
| self._user_name = "" |
| self._chat_id = "" |
| self._chat_name = "" |
| self._chat_type = "" |
| self._thread_id = "" |
| self._agent_identity = "" |
| self._turn_index = 0 |
| self._client = None |
| self._prefetch_result = "" |
| self._prefetch_lock = threading.Lock() |
| self._prefetch_thread = None |
| self._sync_thread = None |
| self._session_id = "" |
|
|
| |
| self._tags: list[str] | None = None |
| self._recall_tags: list[str] | None = None |
| self._recall_tags_match = "any" |
|
|
| |
| self._auto_retain = True |
| self._retain_every_n_turns = 1 |
| self._retain_async = True |
| self._retain_context = "conversation between Hermes Agent and the User" |
| self._turn_counter = 0 |
| self._session_turns: list[str] = [] |
|
|
| |
| self._auto_recall = True |
| self._recall_max_tokens = 4096 |
| self._recall_types: list[str] | None = None |
| self._recall_prompt_preamble = "" |
| self._recall_max_input_chars = 800 |
|
|
| |
| self._bank_mission = "" |
| self._bank_retain_mission: str | None = None |
|
|
| @property |
| def name(self) -> str: |
| return "hindsight" |
|
|
| def is_available(self) -> bool: |
| try: |
| cfg = _load_config() |
| mode = cfg.get("mode", "cloud") |
| if mode in ("local", "local_embedded", "local_external"): |
| return True |
| has_key = bool(cfg.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", "")) |
| has_url = bool(cfg.get("api_url") or os.environ.get("HINDSIGHT_API_URL", "")) |
| return has_key or has_url |
| except Exception: |
| return False |
|
|
| def save_config(self, values, hermes_home): |
| """Write config to $HERMES_HOME/hindsight/config.json.""" |
| import json |
| from pathlib import Path |
| config_dir = Path(hermes_home) / "hindsight" |
| config_dir.mkdir(parents=True, exist_ok=True) |
| config_path = config_dir / "config.json" |
| existing = {} |
| if config_path.exists(): |
| try: |
| existing = json.loads(config_path.read_text()) |
| except Exception: |
| pass |
| existing.update(values) |
| config_path.write_text(json.dumps(existing, indent=2)) |
|
|
| def post_setup(self, hermes_home: str, config: dict) -> None: |
| """Custom setup wizard — installs only the deps needed for the selected mode.""" |
| import getpass |
| import subprocess |
| import shutil |
| import sys |
| from pathlib import Path |
|
|
| from hermes_cli.config import save_config |
|
|
| from hermes_cli.memory_setup import _curses_select |
|
|
| print("\n Configuring Hindsight memory:\n") |
|
|
| |
| mode_items = [ |
| ("Cloud", "Hindsight Cloud API (lightweight, just needs an API key)"), |
| ("Local Embedded", "Run Hindsight locally (downloads ~200MB, needs LLM key)"), |
| ("Local External", "Connect to an existing Hindsight instance"), |
| ] |
| mode_idx = _curses_select(" Select mode", mode_items, default=0) |
| mode = ["cloud", "local_embedded", "local_external"][mode_idx] |
|
|
| provider_config: dict = {"mode": mode} |
| env_writes: dict = {} |
|
|
| |
| _MIN_CLIENT_VERSION = "0.4.22" |
| cloud_dep = f"hindsight-client>={_MIN_CLIENT_VERSION}" |
| local_dep = "hindsight-all" |
| if mode == "local_embedded": |
| deps_to_install = [local_dep] |
| elif mode == "local_external": |
| deps_to_install = [cloud_dep] |
| else: |
| deps_to_install = [cloud_dep] |
|
|
| print(f"\n Checking dependencies...") |
| uv_path = shutil.which("uv") |
| if not uv_path: |
| print(" ⚠ uv not found — install it: curl -LsSf https://astral.sh/uv/install.sh | sh") |
| print(f" Then run manually: uv pip install --python {sys.executable} {' '.join(deps_to_install)}") |
| else: |
| try: |
| subprocess.run( |
| [uv_path, "pip", "install", "--python", sys.executable, "--quiet", "--upgrade"] + deps_to_install, |
| check=True, timeout=120, capture_output=True, |
| ) |
| print(f" ✓ Dependencies up to date") |
| except Exception as e: |
| print(f" ⚠ Install failed: {e}") |
| print(f" Run manually: uv pip install --python {sys.executable} {' '.join(deps_to_install)}") |
|
|
| |
| if mode == "cloud": |
| print(f"\n Get your API key at https://ui.hindsight.vectorize.io\n") |
| existing_key = os.environ.get("HINDSIGHT_API_KEY", "") |
| if existing_key: |
| masked = f"...{existing_key[-4:]}" if len(existing_key) > 4 else "set" |
| sys.stdout.write(f" API key (current: {masked}, blank to keep): ") |
| sys.stdout.flush() |
| api_key = getpass.getpass(prompt="") if sys.stdin.isatty() else sys.stdin.readline().strip() |
| else: |
| sys.stdout.write(" API key: ") |
| sys.stdout.flush() |
| api_key = getpass.getpass(prompt="") if sys.stdin.isatty() else sys.stdin.readline().strip() |
| if api_key: |
| env_writes["HINDSIGHT_API_KEY"] = api_key |
|
|
| val = input(f" API URL [{_DEFAULT_API_URL}]: ").strip() |
| if val: |
| provider_config["api_url"] = val |
|
|
| elif mode == "local_external": |
| val = input(f" Hindsight API URL [{_DEFAULT_LOCAL_URL}]: ").strip() |
| provider_config["api_url"] = val or _DEFAULT_LOCAL_URL |
|
|
| sys.stdout.write(" API key (optional, blank to skip): ") |
| sys.stdout.flush() |
| api_key = getpass.getpass(prompt="") if sys.stdin.isatty() else sys.stdin.readline().strip() |
| if api_key: |
| env_writes["HINDSIGHT_API_KEY"] = api_key |
|
|
| else: |
| providers_list = list(_PROVIDER_DEFAULT_MODELS.keys()) |
| llm_items = [ |
| (p, f"default model: {_PROVIDER_DEFAULT_MODELS[p]}") |
| for p in providers_list |
| ] |
| llm_idx = _curses_select(" Select LLM provider", llm_items, default=0) |
| llm_provider = providers_list[llm_idx] |
|
|
| provider_config["llm_provider"] = llm_provider |
|
|
| if llm_provider == "openai_compatible": |
| val = input(" LLM endpoint URL (e.g. http://192.168.1.10:8080/v1): ").strip() |
| if val: |
| provider_config["llm_base_url"] = val |
| elif llm_provider == "openrouter": |
| provider_config["llm_base_url"] = "https://openrouter.ai/api/v1" |
|
|
| default_model = _PROVIDER_DEFAULT_MODELS.get(llm_provider, "gpt-4o-mini") |
| val = input(f" LLM model [{default_model}]: ").strip() |
| provider_config["llm_model"] = val or default_model |
|
|
| sys.stdout.write(" LLM API key: ") |
| sys.stdout.flush() |
| llm_key = getpass.getpass(prompt="") if sys.stdin.isatty() else sys.stdin.readline().strip() |
| if llm_key: |
| env_writes["HINDSIGHT_LLM_API_KEY"] = llm_key |
|
|
| |
| provider_config["bank_id"] = "hermes" |
| provider_config["recall_budget"] = "mid" |
| bank_id = "hermes" |
| config["memory"]["provider"] = "hindsight" |
| save_config(config) |
|
|
| self.save_config(provider_config, hermes_home) |
|
|
| if env_writes: |
| env_path = Path(hermes_home) / ".env" |
| env_path.parent.mkdir(parents=True, exist_ok=True) |
| existing_lines = [] |
| if env_path.exists(): |
| existing_lines = env_path.read_text().splitlines() |
| updated_keys = set() |
| new_lines = [] |
| for line in existing_lines: |
| key_match = line.split("=", 1)[0].strip() if "=" in line and not line.startswith("#") else None |
| if key_match and key_match in env_writes: |
| new_lines.append(f"{key_match}={env_writes[key_match]}") |
| updated_keys.add(key_match) |
| else: |
| new_lines.append(line) |
| for k, v in env_writes.items(): |
| if k not in updated_keys: |
| new_lines.append(f"{k}={v}") |
| env_path.write_text("\n".join(new_lines) + "\n") |
|
|
| print(f"\n ✓ Hindsight memory configured ({mode} mode)") |
| if env_writes: |
| print(f" API keys saved to .env") |
| print(f"\n Start a new session to activate.\n") |
|
|
| def get_config_schema(self): |
| return [ |
| {"key": "mode", "description": "Connection mode", "default": "cloud", "choices": ["cloud", "local_embedded", "local_external"]}, |
| |
| {"key": "api_url", "description": "Hindsight Cloud API URL", "default": _DEFAULT_API_URL, "when": {"mode": "cloud"}}, |
| {"key": "api_key", "description": "Hindsight Cloud API key", "secret": True, "env_var": "HINDSIGHT_API_KEY", "url": "https://ui.hindsight.vectorize.io", "when": {"mode": "cloud"}}, |
| |
| {"key": "api_url", "description": "Hindsight API URL", "default": _DEFAULT_LOCAL_URL, "when": {"mode": "local_external"}}, |
| {"key": "api_key", "description": "API key (optional)", "secret": True, "env_var": "HINDSIGHT_API_KEY", "when": {"mode": "local_external"}}, |
| |
| {"key": "llm_provider", "description": "LLM provider", "default": "openai", "choices": ["openai", "anthropic", "gemini", "groq", "openrouter", "minimax", "ollama", "lmstudio", "openai_compatible"], "when": {"mode": "local_embedded"}}, |
| {"key": "llm_base_url", "description": "Endpoint URL (e.g. http://192.168.1.10:8080/v1)", "default": "", "when": {"mode": "local_embedded", "llm_provider": "openai_compatible"}}, |
| {"key": "llm_api_key", "description": "LLM API key (optional for openai_compatible)", "secret": True, "env_var": "HINDSIGHT_LLM_API_KEY", "when": {"mode": "local_embedded"}}, |
| {"key": "llm_model", "description": "LLM model", "default": "gpt-4o-mini", "default_from": {"field": "llm_provider", "map": _PROVIDER_DEFAULT_MODELS}, "when": {"mode": "local_embedded"}}, |
| {"key": "bank_id", "description": "Memory bank name", "default": "hermes"}, |
| {"key": "bank_mission", "description": "Mission/purpose description for the memory bank"}, |
| {"key": "bank_retain_mission", "description": "Custom extraction prompt for memory retention"}, |
| {"key": "recall_budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]}, |
| {"key": "memory_mode", "description": "Memory integration mode", "default": "hybrid", "choices": ["hybrid", "context", "tools"]}, |
| {"key": "recall_prefetch_method", "description": "Auto-recall method", "default": "recall", "choices": ["recall", "reflect"]}, |
| {"key": "retain_tags", "description": "Default tags applied to retained memories (comma-separated)", "default": ""}, |
| {"key": "retain_source", "description": "Metadata source value attached to retained memories", "default": ""}, |
| {"key": "retain_user_prefix", "description": "Label used before user turns in retained transcripts", "default": "User"}, |
| {"key": "retain_assistant_prefix", "description": "Label used before assistant turns in retained transcripts", "default": "Assistant"}, |
| {"key": "recall_tags", "description": "Tags to filter when searching memories (comma-separated)", "default": ""}, |
| {"key": "recall_tags_match", "description": "Tag matching mode for recall", "default": "any", "choices": ["any", "all", "any_strict", "all_strict"]}, |
| {"key": "auto_recall", "description": "Automatically recall memories before each turn", "default": True}, |
| {"key": "auto_retain", "description": "Automatically retain conversation turns", "default": True}, |
| {"key": "retain_every_n_turns", "description": "Retain every N turns (1 = every turn)", "default": 1}, |
| {"key": "retain_async","description": "Process retain asynchronously on the Hindsight server", "default": True}, |
| {"key": "retain_context", "description": "Context label for retained memories", "default": "conversation between Hermes Agent and the User"}, |
| {"key": "recall_max_tokens", "description": "Maximum tokens for recall results", "default": 4096}, |
| {"key": "recall_max_input_chars", "description": "Maximum input query length for auto-recall", "default": 800}, |
| {"key": "recall_prompt_preamble", "description": "Custom preamble for recalled memories in context"}, |
| ] |
|
|
| def _get_client(self): |
| """Return the cached Hindsight client (created once, reused).""" |
| if self._client is None: |
| if self._mode == "local_embedded": |
| from hindsight import HindsightEmbedded |
| HindsightEmbedded.__del__ = lambda self: None |
| llm_provider = self._config.get("llm_provider", "") |
| if llm_provider in ("openai_compatible", "openrouter"): |
| llm_provider = "openai" |
| logger.debug("Creating HindsightEmbedded client (profile=%s, provider=%s)", |
| self._config.get("profile", "hermes"), llm_provider) |
| kwargs = dict( |
| profile=self._config.get("profile", "hermes"), |
| llm_provider=llm_provider, |
| llm_api_key=self._config.get("llmApiKey") or self._config.get("llm_api_key") or os.environ.get("HINDSIGHT_LLM_API_KEY", ""), |
| llm_model=self._config.get("llm_model", ""), |
| ) |
| if self._llm_base_url: |
| kwargs["llm_base_url"] = self._llm_base_url |
| self._client = HindsightEmbedded(**kwargs) |
| else: |
| from hindsight_client import Hindsight |
| kwargs = {"base_url": self._api_url, "timeout": 30.0} |
| if self._api_key: |
| kwargs["api_key"] = self._api_key |
| logger.debug("Creating Hindsight cloud client (url=%s, has_key=%s)", |
| self._api_url, bool(self._api_key)) |
| self._client = Hindsight(**kwargs) |
| return self._client |
|
|
| def initialize(self, session_id: str, **kwargs) -> None: |
| self._session_id = str(session_id or "").strip() |
|
|
| |
| try: |
| from importlib.metadata import version as pkg_version |
| from packaging.version import Version |
| installed = pkg_version("hindsight-client") |
| if Version(installed) < Version(_MIN_CLIENT_VERSION): |
| logger.warning("hindsight-client %s is outdated (need >=%s), attempting upgrade...", |
| installed, _MIN_CLIENT_VERSION) |
| import shutil, subprocess, sys |
| uv_path = shutil.which("uv") |
| if uv_path: |
| try: |
| subprocess.run( |
| [uv_path, "pip", "install", "--python", sys.executable, |
| "--quiet", "--upgrade", f"hindsight-client>={_MIN_CLIENT_VERSION}"], |
| check=True, timeout=120, capture_output=True, |
| ) |
| logger.info("hindsight-client upgraded to >=%s", _MIN_CLIENT_VERSION) |
| except Exception as e: |
| logger.warning("Auto-upgrade failed: %s. Run: uv pip install 'hindsight-client>=%s'", |
| e, _MIN_CLIENT_VERSION) |
| else: |
| logger.warning("uv not found. Run: pip install 'hindsight-client>=%s'", _MIN_CLIENT_VERSION) |
| except Exception: |
| pass |
|
|
| self._config = _load_config() |
| self._platform = str(kwargs.get("platform") or "").strip() |
| self._user_id = str(kwargs.get("user_id") or "").strip() |
| self._user_name = str(kwargs.get("user_name") or "").strip() |
| self._chat_id = str(kwargs.get("chat_id") or "").strip() |
| self._chat_name = str(kwargs.get("chat_name") or "").strip() |
| self._chat_type = str(kwargs.get("chat_type") or "").strip() |
| self._thread_id = str(kwargs.get("thread_id") or "").strip() |
| self._agent_identity = str(kwargs.get("agent_identity") or "").strip() |
| self._turn_index = 0 |
| self._session_turns = [] |
| self._mode = self._config.get("mode", "cloud") |
| |
| if self._mode == "local": |
| self._mode = "local_embedded" |
| self._api_key = self._config.get("apiKey") or self._config.get("api_key") or os.environ.get("HINDSIGHT_API_KEY", "") |
| default_url = _DEFAULT_LOCAL_URL if self._mode in ("local_embedded", "local_external") else _DEFAULT_API_URL |
| self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url) |
| self._llm_base_url = self._config.get("llm_base_url", "") |
|
|
| banks = self._config.get("banks", {}).get("hermes", {}) |
| self._bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes") |
| budget = self._config.get("recall_budget") or self._config.get("budget") or banks.get("budget", "mid") |
| self._budget = budget if budget in _VALID_BUDGETS else "mid" |
|
|
| memory_mode = self._config.get("memory_mode", "hybrid") |
| self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid" |
|
|
| prefetch_method = self._config.get("recall_prefetch_method") or self._config.get("prefetch_method", "recall") |
| self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall" |
|
|
| |
| self._bank_mission = self._config.get("bank_mission", "") |
| self._bank_retain_mission = self._config.get("bank_retain_mission") or None |
|
|
| |
| self._retain_tags = _normalize_retain_tags( |
| self._config.get("retain_tags") |
| or os.environ.get("HINDSIGHT_RETAIN_TAGS", "") |
| ) |
| self._tags = self._retain_tags or None |
| self._recall_tags = self._config.get("recall_tags") or None |
| self._recall_tags_match = self._config.get("recall_tags_match", "any") |
| self._retain_source = str( |
| self._config.get("retain_source") or os.environ.get("HINDSIGHT_RETAIN_SOURCE", "") |
| ).strip() |
| self._retain_user_prefix = str( |
| self._config.get("retain_user_prefix") or os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User") |
| ).strip() or "User" |
| self._retain_assistant_prefix = str( |
| self._config.get("retain_assistant_prefix") or os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant") |
| ).strip() or "Assistant" |
|
|
| |
| self._auto_retain = self._config.get("auto_retain", True) |
| self._retain_every_n_turns = max(1, int(self._config.get("retain_every_n_turns", 1))) |
| self._retain_context = self._config.get("retain_context", "conversation between Hermes Agent and the User") |
|
|
| |
| self._auto_recall = self._config.get("auto_recall", True) |
| self._recall_max_tokens = int(self._config.get("recall_max_tokens", 4096)) |
| self._recall_types = self._config.get("recall_types") or None |
| self._recall_prompt_preamble = self._config.get("recall_prompt_preamble", "") |
| self._recall_max_input_chars = int(self._config.get("recall_max_input_chars", 800)) |
| self._retain_async = self._config.get("retain_async", True) |
|
|
| _client_version = "unknown" |
| try: |
| from importlib.metadata import version as pkg_version |
| _client_version = pkg_version("hindsight-client") |
| except Exception: |
| pass |
| logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s, client=%s", |
| self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method, _client_version) |
| logger.debug("Hindsight config: auto_retain=%s, auto_recall=%s, retain_every_n=%d, " |
| "retain_async=%s, retain_context=%s, recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s", |
| self._auto_retain, self._auto_recall, self._retain_every_n_turns, |
| self._retain_async, self._retain_context, self._recall_max_tokens, self._recall_max_input_chars, |
| self._tags, self._recall_tags) |
|
|
| |
| |
| |
| if self._mode == "local_embedded": |
| def _start_daemon(): |
| import traceback |
| log_dir = get_hermes_home() / "logs" |
| log_dir.mkdir(parents=True, exist_ok=True) |
| log_path = log_dir / "hindsight-embed.log" |
| try: |
| |
| |
| |
| import hindsight_embed.daemon_embed_manager as dem |
| from rich.console import Console |
| dem.console = Console(file=open(log_path, "a"), force_terminal=False) |
|
|
| client = self._get_client() |
| profile = self._config.get("profile", "hermes") |
|
|
| |
| |
| |
| from pathlib import Path as _Path |
| profile_env = _Path.home() / ".hindsight" / "profiles" / f"{profile}.env" |
| current_key = self._config.get("llm_api_key") or os.environ.get("HINDSIGHT_LLM_API_KEY", "") |
| current_provider = self._config.get("llm_provider", "") |
| current_model = self._config.get("llm_model", "") |
| current_base_url = self._config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "") |
| |
| daemon_provider = "openai" if current_provider in ("openai_compatible", "openrouter") else current_provider |
|
|
| |
| saved = {} |
| if profile_env.exists(): |
| for line in profile_env.read_text().splitlines(): |
| if "=" in line and not line.startswith("#"): |
| k, v = line.split("=", 1) |
| saved[k.strip()] = v.strip() |
|
|
| config_changed = ( |
| saved.get("HINDSIGHT_API_LLM_PROVIDER") != daemon_provider or |
| saved.get("HINDSIGHT_API_LLM_MODEL") != current_model or |
| saved.get("HINDSIGHT_API_LLM_API_KEY") != current_key or |
| saved.get("HINDSIGHT_API_LLM_BASE_URL", "") != current_base_url |
| ) |
|
|
| if config_changed: |
| |
| profile_env.parent.mkdir(parents=True, exist_ok=True) |
| env_lines = ( |
| f"HINDSIGHT_API_LLM_PROVIDER={daemon_provider}\n" |
| f"HINDSIGHT_API_LLM_API_KEY={current_key}\n" |
| f"HINDSIGHT_API_LLM_MODEL={current_model}\n" |
| f"HINDSIGHT_API_LOG_LEVEL=info\n" |
| ) |
| if current_base_url: |
| env_lines += f"HINDSIGHT_API_LLM_BASE_URL={current_base_url}\n" |
| profile_env.write_text(env_lines) |
| if client._manager.is_running(profile): |
| with open(log_path, "a") as f: |
| f.write("\n=== Config changed, restarting daemon ===\n") |
| client._manager.stop(profile) |
|
|
| client._ensure_started() |
| with open(log_path, "a") as f: |
| f.write("\n=== Daemon started successfully ===\n") |
| except Exception as e: |
| with open(log_path, "a") as f: |
| f.write(f"\n=== Daemon startup failed: {e} ===\n") |
| traceback.print_exc(file=f) |
|
|
| t = threading.Thread(target=_start_daemon, daemon=True, name="hindsight-daemon-start") |
| t.start() |
|
|
| def system_prompt_block(self) -> str: |
| if self._memory_mode == "context": |
| return ( |
| f"# Hindsight Memory\n" |
| f"Active (context mode). Bank: {self._bank_id}, budget: {self._budget}.\n" |
| f"Relevant memories are automatically injected into context." |
| ) |
| if self._memory_mode == "tools": |
| return ( |
| f"# Hindsight Memory\n" |
| f"Active (tools mode). Bank: {self._bank_id}, budget: {self._budget}.\n" |
| f"Use hindsight_recall to search, hindsight_reflect for synthesis, " |
| f"hindsight_retain to store facts." |
| ) |
| return ( |
| f"# Hindsight Memory\n" |
| f"Active. Bank: {self._bank_id}, budget: {self._budget}.\n" |
| f"Relevant memories are automatically injected into context. " |
| f"Use hindsight_recall to search, hindsight_reflect for synthesis, " |
| f"hindsight_retain to store facts." |
| ) |
|
|
| def prefetch(self, query: str, *, session_id: str = "") -> str: |
| if self._prefetch_thread and self._prefetch_thread.is_alive(): |
| logger.debug("Prefetch: waiting for background thread to complete") |
| self._prefetch_thread.join(timeout=3.0) |
| with self._prefetch_lock: |
| result = self._prefetch_result |
| self._prefetch_result = "" |
| if not result: |
| logger.debug("Prefetch: no results available") |
| return "" |
| logger.debug("Prefetch: returning %d chars of context", len(result)) |
| header = self._recall_prompt_preamble or ( |
| "# Hindsight Memory (persistent cross-session context)\n" |
| "Use this to answer questions about the user and prior sessions. " |
| "Do not call tools to look up information that is already present here." |
| ) |
| return f"{header}\n\n{result}" |
|
|
| def queue_prefetch(self, query: str, *, session_id: str = "") -> None: |
| if self._memory_mode == "tools": |
| logger.debug("Prefetch: skipped (tools-only mode)") |
| return |
| if not self._auto_recall: |
| logger.debug("Prefetch: skipped (auto_recall disabled)") |
| return |
| |
| if self._recall_max_input_chars and len(query) > self._recall_max_input_chars: |
| query = query[:self._recall_max_input_chars] |
|
|
| def _run(): |
| try: |
| client = self._get_client() |
| if self._prefetch_method == "reflect": |
| logger.debug("Prefetch: calling reflect (bank=%s, query_len=%d)", self._bank_id, len(query)) |
| resp = _run_sync(client.areflect(bank_id=self._bank_id, query=query, budget=self._budget)) |
| text = resp.text or "" |
| else: |
| recall_kwargs: dict = { |
| "bank_id": self._bank_id, "query": query, |
| "budget": self._budget, "max_tokens": self._recall_max_tokens, |
| } |
| if self._recall_tags: |
| recall_kwargs["tags"] = self._recall_tags |
| recall_kwargs["tags_match"] = self._recall_tags_match |
| if self._recall_types: |
| recall_kwargs["types"] = self._recall_types |
| logger.debug("Prefetch: calling recall (bank=%s, query_len=%d, budget=%s)", |
| self._bank_id, len(query), self._budget) |
| resp = _run_sync(client.arecall(**recall_kwargs)) |
| num_results = len(resp.results) if resp.results else 0 |
| logger.debug("Prefetch: recall returned %d results", num_results) |
| text = "\n".join(f"- {r.text}" for r in resp.results if r.text) if resp.results else "" |
| if text: |
| with self._prefetch_lock: |
| self._prefetch_result = text |
| except Exception as e: |
| logger.debug("Hindsight prefetch failed: %s", e, exc_info=True) |
|
|
| self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="hindsight-prefetch") |
| self._prefetch_thread.start() |
|
|
| def _build_turn_messages(self, user_content: str, assistant_content: str) -> List[Dict[str, str]]: |
| now = datetime.now(timezone.utc).isoformat() |
| return [ |
| { |
| "role": "user", |
| "content": f"{self._retain_user_prefix}: {user_content}", |
| "timestamp": now, |
| }, |
| { |
| "role": "assistant", |
| "content": f"{self._retain_assistant_prefix}: {assistant_content}", |
| "timestamp": now, |
| }, |
| ] |
|
|
| def _build_metadata(self, *, message_count: int, turn_index: int) -> Dict[str, str]: |
| metadata: Dict[str, str] = { |
| "retained_at": _utc_timestamp(), |
| "message_count": str(message_count), |
| "turn_index": str(turn_index), |
| } |
| if self._retain_source: |
| metadata["source"] = self._retain_source |
| if self._session_id: |
| metadata["session_id"] = self._session_id |
| if self._platform: |
| metadata["platform"] = self._platform |
| if self._user_id: |
| metadata["user_id"] = self._user_id |
| if self._user_name: |
| metadata["user_name"] = self._user_name |
| if self._chat_id: |
| metadata["chat_id"] = self._chat_id |
| if self._chat_name: |
| metadata["chat_name"] = self._chat_name |
| if self._chat_type: |
| metadata["chat_type"] = self._chat_type |
| if self._thread_id: |
| metadata["thread_id"] = self._thread_id |
| if self._agent_identity: |
| metadata["agent_identity"] = self._agent_identity |
| return metadata |
|
|
| def _build_retain_kwargs( |
| self, |
| content: str, |
| *, |
| context: str | None = None, |
| document_id: str | None = None, |
| metadata: Dict[str, str] | None = None, |
| tags: List[str] | None = None, |
| retain_async: bool | None = None, |
| ) -> Dict[str, Any]: |
| kwargs: Dict[str, Any] = { |
| "bank_id": self._bank_id, |
| "content": content, |
| "metadata": metadata or self._build_metadata(message_count=1, turn_index=self._turn_index), |
| } |
| if context is not None: |
| kwargs["context"] = context |
| if document_id: |
| kwargs["document_id"] = document_id |
| if retain_async is not None: |
| kwargs["retain_async"] = retain_async |
| merged_tags = _normalize_retain_tags(self._retain_tags) |
| for tag in _normalize_retain_tags(tags): |
| if tag not in merged_tags: |
| merged_tags.append(tag) |
| if merged_tags: |
| kwargs["tags"] = merged_tags |
| return kwargs |
|
|
| def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None: |
| """Retain conversation turn in background (non-blocking). |
| |
| Respects retain_every_n_turns for batching. |
| """ |
| if not self._auto_retain: |
| logger.debug("sync_turn: skipped (auto_retain disabled)") |
| return |
|
|
| if session_id: |
| self._session_id = str(session_id).strip() |
|
|
| turn = json.dumps(self._build_turn_messages(user_content, assistant_content)) |
| self._session_turns.append(turn) |
| self._turn_counter += 1 |
| self._turn_index = self._turn_counter |
|
|
| if self._turn_counter % self._retain_every_n_turns != 0: |
| logger.debug("sync_turn: buffered turn %d (will retain at turn %d)", |
| self._turn_counter, self._turn_counter + (self._retain_every_n_turns - self._turn_counter % self._retain_every_n_turns)) |
| return |
|
|
| logger.debug("sync_turn: retaining %d turns, total session content %d chars", |
| len(self._session_turns), sum(len(t) for t in self._session_turns)) |
| content = "[" + ",".join(self._session_turns) + "]" |
|
|
| def _sync(): |
| try: |
| client = self._get_client() |
| item = self._build_retain_kwargs( |
| content, |
| context=self._retain_context, |
| metadata=self._build_metadata( |
| message_count=len(self._session_turns) * 2, |
| turn_index=self._turn_index, |
| ), |
| ) |
| item.pop("bank_id", None) |
| item.pop("retain_async", None) |
| logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d", |
| self._bank_id, self._session_id, self._retain_async, len(content), len(self._session_turns)) |
| _run_sync(client.aretain_batch( |
| bank_id=self._bank_id, |
| items=[item], |
| document_id=self._session_id, |
| retain_async=self._retain_async, |
| )) |
| logger.debug("Hindsight retain succeeded") |
| except Exception as e: |
| logger.warning("Hindsight sync failed: %s", e, exc_info=True) |
|
|
| if self._sync_thread and self._sync_thread.is_alive(): |
| self._sync_thread.join(timeout=5.0) |
| self._sync_thread = threading.Thread(target=_sync, daemon=True, name="hindsight-sync") |
| self._sync_thread.start() |
|
|
| def get_tool_schemas(self) -> List[Dict[str, Any]]: |
| if self._memory_mode == "context": |
| return [] |
| return [RETAIN_SCHEMA, RECALL_SCHEMA, REFLECT_SCHEMA] |
|
|
| def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str: |
| try: |
| client = self._get_client() |
| except Exception as e: |
| logger.warning("Hindsight client init failed: %s", e) |
| return tool_error(f"Hindsight client unavailable: {e}") |
|
|
| if tool_name == "hindsight_retain": |
| content = args.get("content", "") |
| if not content: |
| return tool_error("Missing required parameter: content") |
| context = args.get("context") |
| try: |
| retain_kwargs = self._build_retain_kwargs( |
| content, |
| context=context, |
| tags=args.get("tags"), |
| ) |
| logger.debug("Tool hindsight_retain: bank=%s, content_len=%d, context=%s", |
| self._bank_id, len(content), context) |
| _run_sync(client.aretain(**retain_kwargs)) |
| logger.debug("Tool hindsight_retain: success") |
| return json.dumps({"result": "Memory stored successfully."}) |
| except Exception as e: |
| logger.warning("hindsight_retain failed: %s", e, exc_info=True) |
| return tool_error(f"Failed to store memory: {e}") |
|
|
| elif tool_name == "hindsight_recall": |
| query = args.get("query", "") |
| if not query: |
| return tool_error("Missing required parameter: query") |
| try: |
| recall_kwargs: dict = { |
| "bank_id": self._bank_id, "query": query, "budget": self._budget, |
| "max_tokens": self._recall_max_tokens, |
| } |
| if self._recall_tags: |
| recall_kwargs["tags"] = self._recall_tags |
| recall_kwargs["tags_match"] = self._recall_tags_match |
| if self._recall_types: |
| recall_kwargs["types"] = self._recall_types |
| logger.debug("Tool hindsight_recall: bank=%s, query_len=%d, budget=%s", |
| self._bank_id, len(query), self._budget) |
| resp = _run_sync(client.arecall(**recall_kwargs)) |
| num_results = len(resp.results) if resp.results else 0 |
| logger.debug("Tool hindsight_recall: %d results", num_results) |
| if not resp.results: |
| return json.dumps({"result": "No relevant memories found."}) |
| lines = [f"{i}. {r.text}" for i, r in enumerate(resp.results, 1)] |
| return json.dumps({"result": "\n".join(lines)}) |
| except Exception as e: |
| logger.warning("hindsight_recall failed: %s", e, exc_info=True) |
| return tool_error(f"Failed to search memory: {e}") |
|
|
| elif tool_name == "hindsight_reflect": |
| query = args.get("query", "") |
| if not query: |
| return tool_error("Missing required parameter: query") |
| try: |
| logger.debug("Tool hindsight_reflect: bank=%s, query_len=%d, budget=%s", |
| self._bank_id, len(query), self._budget) |
| resp = _run_sync(client.areflect( |
| bank_id=self._bank_id, query=query, budget=self._budget |
| )) |
| logger.debug("Tool hindsight_reflect: response_len=%d", len(resp.text or "")) |
| return json.dumps({"result": resp.text or "No relevant memories found."}) |
| except Exception as e: |
| logger.warning("hindsight_reflect failed: %s", e, exc_info=True) |
| return tool_error(f"Failed to reflect: {e}") |
|
|
| return tool_error(f"Unknown tool: {tool_name}") |
|
|
| def shutdown(self) -> None: |
| logger.debug("Hindsight shutdown: waiting for background threads") |
| global _loop, _loop_thread |
| for t in (self._prefetch_thread, self._sync_thread): |
| if t and t.is_alive(): |
| t.join(timeout=5.0) |
| if self._client is not None: |
| try: |
| if self._mode == "local_embedded": |
| |
| |
| |
| try: |
| self._client.close() |
| except RuntimeError: |
| pass |
| else: |
| _run_sync(self._client.aclose()) |
| except Exception: |
| pass |
| self._client = None |
| |
| if _loop is not None and _loop.is_running(): |
| _loop.call_soon_threadsafe(_loop.stop) |
| if _loop_thread is not None: |
| _loop_thread.join(timeout=5.0) |
| _loop = None |
| _loop_thread = None |
|
|
|
|
| def register(ctx) -> None: |
| """Register Hindsight as a memory provider plugin.""" |
| ctx.register_memory_provider(HindsightMemoryProvider()) |
|
|