hmc-rag / src /persistence.py
webmuppet
Saved responses: store just the pair, not the full thread
baa1903
"""
Browser-local conversation persistence via localStorage.
Stores all conversations under a single localStorage key as a JSON array. The
session_state acts as an in-memory cache so we only round-trip to JS for the
initial load and saves.
Lifecycle:
- App boot calls `bootstrap_load()` once at the top of app.py to populate
the cache from localStorage.
- `save_conversation()` upserts the current conversation, capping the list
at MAX_CONVERSATIONS by `last_active_at`.
- `load_conversation(id)` and `delete_conversation(id)` work against the
cache + write through to localStorage.
streamlit-js-eval was chosen over streamlit-local-storage because the latter
has a documented `st.rerun()` race condition (issue #233) that breaks our
"New conversation" handler.
"""
from __future__ import annotations
import json
import uuid
from datetime import datetime, timezone
from typing import Optional
import streamlit as st
from streamlit_js_eval import (
get_local_storage,
streamlit_js_eval,
)
STORAGE_KEY = "hmc-conversations"
MAX_CONVERSATIONS = 20
# Saved responses are a separate concept from conversations:
# - Conversations: auto-saved whole threads (rejoin model, click to resume)
# - Saved responses: explicit per-Q&A bookmarks (fork model, click to start
# a divergent conversation from that point)
SAVED_RESPONSES_KEY = "hmc-saved-responses"
MAX_SAVED_RESPONSES = 50 # higher than conversations β€” smaller atomic unit
# Internal cache key on session_state β€” read by load_all(), written by save_all()
_CACHE_KEY = "_conversations_cache"
_SAVED_CACHE_KEY = "_saved_responses_cache"
# ── Helpers ───────────────────────────────────────────────────────────────
def now_iso() -> str:
"""Current UTC timestamp in ISO 8601 format."""
return datetime.now(timezone.utc).isoformat()
def new_conversation_id() -> str:
"""Generate a fresh conversation UUID."""
return str(uuid.uuid4())
def title_from_messages(messages: list[dict]) -> str:
"""Extract a readable title from the first user message.
Truncates to 80 characters with an ellipsis if longer. Falls back to
'Untitled conversation' if there are no user messages.
"""
first_user = next(
(m["content"] for m in messages if m.get("role") == "user"),
None,
)
if not first_user:
return "Untitled conversation"
text = first_user.strip()
if len(text) > 80:
return text[:80].rstrip() + "…"
return text
# ── Storage I/O ───────────────────────────────────────────────────────────
def bootstrap_load() -> None:
"""Read localStorage once at app boot, cache result in session_state.
Call this at the top of app.py, after session_state init. Subsequent
reads via load_all() use the cache.
streamlit-js-eval semantics:
- First render: component is mounting, get_local_storage() returns None.
- Streamlit reruns automatically once the component completes.
- Second render: get_local_storage() returns the actual value (the
stored string, or empty string if localStorage has no entry).
Critically we must NOT cache the first-render None as 'loaded' or we
short-circuit before the real value comes back. We only mark the cache
populated once we get a non-None response.
"""
if _CACHE_KEY in st.session_state:
return # Already loaded with a real value
raw = get_local_storage(STORAGE_KEY, component_key="ls_load")
if raw is None:
# First render β€” component still mounting. Don't cache; let the
# next render re-attempt after the auto-rerun.
return
if raw == "":
# Real response, no data in localStorage β€” valid 'empty' state
st.session_state[_CACHE_KEY] = []
return
try:
data = raw if isinstance(raw, list) else json.loads(raw)
st.session_state[_CACHE_KEY] = data if isinstance(data, list) else []
except (json.JSONDecodeError, TypeError):
st.session_state[_CACHE_KEY] = []
def load_all() -> list[dict]:
"""Return the cached conversations list. Empty list if not yet bootstrapped."""
return st.session_state.get(_CACHE_KEY, [])
def _save_all(conversations: list[dict]) -> None:
"""Write the full conversations array to localStorage and update the cache.
Bypasses streamlit-js-eval's set_local_storage helper because v1.0.0
interpolates the value into a single-quoted JS string β€” any apostrophe
in the JSON (very common in compliance answers, e.g. "I'll", "you're",
"doesn't") breaks the JS with 'missing ) after argument list' and the
save silently fails.
Workaround: build the JS ourselves with json.dumps(payload) for the
string literal β€” that produces a properly-escaped, JS-safe quoted
string regardless of what's in the content.
"""
payload = json.dumps(conversations)
js_string_literal = json.dumps(payload) # gives e.g. "[{\"id\": ...}]"
js = f"localStorage.setItem('{STORAGE_KEY}', {js_string_literal})"
streamlit_js_eval(js_expressions=js, key="ls_save")
st.session_state[_CACHE_KEY] = conversations
# ── Public API ────────────────────────────────────────────────────────────
def save_conversation(
conversation_id: str,
messages: list[dict],
history: list[dict],
results: dict,
) -> None:
"""Upsert a conversation, capping the list at MAX_CONVERSATIONS.
Skips if conversation_id is falsy or there are no messages yet (don't
persist empty conversations).
"""
if not conversation_id or not messages:
return
existing = load_all()
# Preserve original created_at if this conversation already exists
prior = next((c for c in existing if c.get("id") == conversation_id), None)
created_at = prior["created_at"] if prior else now_iso()
record = {
"id": conversation_id,
"title": title_from_messages(messages),
"created_at": created_at,
"last_active_at": now_iso(),
"messages": messages,
"history": history,
# JSON object keys must be strings β€” convert int keys for results dict
"results": {str(k): v for k, v in results.items()},
}
# Replace any existing entry with the same ID, then prepend the updated one
filtered = [c for c in existing if c.get("id") != conversation_id]
updated = [record] + filtered
# Sort by last_active_at desc and cap
updated.sort(key=lambda c: c.get("last_active_at", ""), reverse=True)
updated = updated[:MAX_CONVERSATIONS]
_save_all(updated)
def load_conversation(conversation_id: str) -> Optional[dict]:
"""Look up a single conversation by ID. Returns None if not found."""
for c in load_all():
if c.get("id") == conversation_id:
return c
return None
def delete_conversation(conversation_id: str) -> None:
"""Remove a conversation by ID. No-op if not found."""
existing = load_all()
updated = [c for c in existing if c.get("id") != conversation_id]
if len(updated) == len(existing):
return # Nothing to delete
_save_all(updated)
def restore_state(conversation: dict) -> None:
"""Populate st.session_state with a conversation's content.
Reverses the save_conversation transformation β€” converts string-keyed
results dict back to int keys.
"""
st.session_state.messages = conversation.get("messages", [])
st.session_state.history = conversation.get("history", [])
raw_results = conversation.get("results", {})
st.session_state.results = {int(k): v for k, v in raw_results.items()}
st.session_state.conversation_id = conversation["id"]
# Reset the starter-pill state so a restored conversation doesn't refire it
st.session_state.pop("starter_pill", None)
st.session_state.pop("_starter_handled", None)
# ── Deferred-write hook ───────────────────────────────────────────────────
#
# streamlit-js-eval's component renders LATE in the script lifecycle. If we
# call st.rerun() immediately after a delete (so the UI reflects the
# removal), Streamlit aborts the script before the component is sent to the
# frontend β€” the localStorage write is silently dropped. UI looks correct
# (cache was updated) but on browser refresh the deleted conversation comes
# back as a "zombie" from localStorage.
#
# Pattern: queue the delete on session_state, call st.rerun() to refresh
# the UI, then in the next render call process_pending() at the top of the
# script β€” that runs the actual delete + write inside a render that runs
# to completion (no st.rerun() between the write and the script ending), so
# the JS component lands.
_PENDING_KEY = "_pending_delete_conversation_id"
def queue_delete(conversation_id: str) -> None:
"""Queue a delete for processing on the next render."""
st.session_state[_PENDING_KEY] = conversation_id
def process_pending() -> None:
"""Process any queued delete operations. Call at the top of the script,
after bootstrap_load."""
pending_id = st.session_state.pop(_PENDING_KEY, None)
if pending_id:
delete_conversation(pending_id)
# ══════════════════════════════════════════════════════════════════════════
# Saved responses β€” per-Q&A bookmarks, fork-on-click
# ══════════════════════════════════════════════════════════════════════════
#
# A saved response is a snapshot of the conversation up to and including a
# specific Q&A pair. Clicking a saved response in the sidebar restores that
# snapshot into a NEW conversation (fork), leaving the original conversation
# untouched. The save action is idempotent β€” re-saving the same Q&A pair
# overwrites the existing record rather than creating a duplicate.
#
# Storage shape (one record per saved response):
# {
# "id": "<source_conversation_id>:<msg_index>",
# "question": "<user message text>",
# "answer": "<assistant message text>",
# "saved_at": "<iso timestamp>",
# "source_conversation_id": "<original conversation uuid>",
# "source_conversation_title": "<title at save time, for display>",
# "msg_index": <int β€” original position; traceability>,
# "messages_pair": [<user msg>, <assistant msg>],
# "history_pair": [<user>, <assistant snippet>],
# "result": <single result dict β€” citations, etc.>,
# }
#
# The pair is the atomic unit β€” restoring a saved response brings JUST
# that Q&A pair back into the chat, not the full conversation thread
# that led to it. The next question forks into a new conversation seeded
# only with this pair as context.
#
# The ID being deterministic (conversation_id + msg_index) is what gives
# us free idempotency β€” upsert against the same key always overwrites.
def saved_response_id(conversation_id: str, msg_index: int) -> str:
"""Deterministic ID per (conversation, message-index). Used as the
localStorage record key so re-saving the same Q&A pair is idempotent."""
return f"{conversation_id}:{msg_index}"
def bootstrap_load_saved() -> None:
"""Read localStorage saved-responses once at app boot, cache result.
Same lifecycle semantics as bootstrap_load() β€” see that function's
docstring for the streamlit-js-eval first-render dance."""
if _SAVED_CACHE_KEY in st.session_state:
return
raw = get_local_storage(SAVED_RESPONSES_KEY, component_key="ls_load_saved")
if raw is None:
# Component still mounting β€” don't cache, let auto-rerun re-attempt
return
if raw == "":
st.session_state[_SAVED_CACHE_KEY] = []
return
try:
data = raw if isinstance(raw, list) else json.loads(raw)
st.session_state[_SAVED_CACHE_KEY] = data if isinstance(data, list) else []
except (json.JSONDecodeError, TypeError):
st.session_state[_SAVED_CACHE_KEY] = []
def load_all_saved() -> list[dict]:
"""Return the cached saved-responses list. Empty if not yet bootstrapped."""
return st.session_state.get(_SAVED_CACHE_KEY, [])
def _save_all_saved(saved: list[dict]) -> None:
"""Write the full saved-responses array to localStorage.
Same JSON-double-encode trick as _save_all() β€” see that function's
docstring for why we bypass streamlit-js-eval's set_local_storage."""
payload = json.dumps(saved)
js_string_literal = json.dumps(payload)
js = f"localStorage.setItem('{SAVED_RESPONSES_KEY}', {js_string_literal})"
streamlit_js_eval(js_expressions=js, key="ls_save_saved")
st.session_state[_SAVED_CACHE_KEY] = saved
def save_response(
conversation_id: str,
msg_index: int,
question: str,
answer: str,
result: dict,
source_conversation_title: str = "",
) -> None:
"""Save a Q&A pair as a forkable artefact. Idempotent on
(conversation_id, msg_index) β€” re-saving overwrites the existing record.
Stores ONLY the pair (question + answer + that single result), not the
full conversation thread. Clicking the saved response in the sidebar
restores just this pair into the chat; new questions fork from there."""
if not conversation_id or msg_index < 0 or not question:
return
saved_id = saved_response_id(conversation_id, msg_index)
existing = load_all_saved()
# Preserve original saved_at on re-save so the sidebar order is stable
prior = next((s for s in existing if s.get("id") == saved_id), None)
saved_at = prior["saved_at"] if prior else now_iso()
record = {
"id": saved_id,
"question": question,
"answer": answer,
"saved_at": saved_at,
"source_conversation_id": conversation_id,
"source_conversation_title": source_conversation_title,
"msg_index": msg_index,
# The atomic unit β€” just the pair, no preceding context
"messages_pair": [
{"role": "user", "content": question},
{"role": "assistant", "content": answer},
],
# Condensed history mirrors the snippet shape used by run_query_stream
# (answer truncated to 500 chars) so a forked follow-up has consistent
# LLM-side context
"history_pair": [
{"role": "user", "content": question},
{"role": "assistant", "content": (answer or "")[:500]},
],
# The single result dict β€” citations, timing, sections, token usage
"result": result or {},
}
filtered = [s for s in existing if s.get("id") != saved_id]
updated = [record] + filtered
# Sort by saved_at desc and cap
updated.sort(key=lambda s: s.get("saved_at", ""), reverse=True)
updated = updated[:MAX_SAVED_RESPONSES]
_save_all_saved(updated)
def is_response_saved(conversation_id: str, msg_index: int) -> bool:
"""Check whether a Q&A pair is already in saved responses. Used by the
Save button to render its 'Saved βœ“' state."""
if not conversation_id:
return False
saved_id = saved_response_id(conversation_id, msg_index)
return any(s.get("id") == saved_id for s in load_all_saved())
def delete_saved_response(saved_id: str) -> None:
"""Remove a saved response by ID. No-op if not found."""
existing = load_all_saved()
updated = [s for s in existing if s.get("id") != saved_id]
if len(updated) == len(existing):
return
_save_all_saved(updated)
def restore_from_saved(saved: dict) -> None:
"""Fork: restore JUST the saved Q&A pair into session_state under a
NEW conversation_id. No preceding conversation context is loaded β€” the
pair is the atomic unit. New questions append to a fresh thread seeded
only with this pair as context.
The original conversation in localStorage is left untouched."""
pair = saved.get("messages_pair") or []
history = saved.get("history_pair") or []
result = saved.get("result") or {}
st.session_state.messages = list(pair)
st.session_state.history = list(history)
# The assistant message sits at index 1 in the restored 2-element list,
# so its result is keyed against 1 (regardless of where the assistant
# message sat in its original conversation).
st.session_state.results = {1: result} if pair else {}
# Mint a fresh conversation_id β€” this is the fork
st.session_state.conversation_id = new_conversation_id()
# Reset starter-pill state so a restored fork doesn't refire it
st.session_state.pop("starter_pill", None)
st.session_state.pop("_starter_handled", None)
# Deferred-delete pattern for saved responses β€” same rationale as the
# conversation queue_delete/process_pending pair above.
_PENDING_SAVED_KEY = "_pending_delete_saved_id"
def queue_delete_saved(saved_id: str) -> None:
"""Queue a saved-response delete for the next render."""
st.session_state[_PENDING_SAVED_KEY] = saved_id
def process_pending_saved() -> None:
"""Process any queued saved-response deletes. Call at the top of the
script alongside process_pending()."""
pending_id = st.session_state.pop(_PENDING_SAVED_KEY, None)
if pending_id:
delete_saved_response(pending_id)