Spaces:

webmuppetnz
/

hmc-rag

Running

hmc-rag / src /persistence.py

webmuppet

Saved responses: store just the pair, not the full thread

baa1903 1 day ago

18 kB

	"""
	Browser-local conversation persistence via localStorage.

	Stores all conversations under a single localStorage key as a JSON array. The
	session_state acts as an in-memory cache so we only round-trip to JS for the
	initial load and saves.

	Lifecycle:
	- App boot calls `bootstrap_load()` once at the top of app.py to populate
	the cache from localStorage.
	- `save_conversation()` upserts the current conversation, capping the list
	at MAX_CONVERSATIONS by `last_active_at`.
	- `load_conversation(id)` and `delete_conversation(id)` work against the
	cache + write through to localStorage.

	streamlit-js-eval was chosen over streamlit-local-storage because the latter
	has a documented `st.rerun()` race condition (issue #233) that breaks our
	"New conversation" handler.
	"""

	from __future__ import annotations

	import json
	import uuid
	from datetime import datetime, timezone
	from typing import Optional

	import streamlit as st
	from streamlit_js_eval import (
	get_local_storage,
	streamlit_js_eval,
	)

	STORAGE_KEY = "hmc-conversations"
	MAX_CONVERSATIONS = 20

	# Saved responses are a separate concept from conversations:
	# - Conversations: auto-saved whole threads (rejoin model, click to resume)
	# - Saved responses: explicit per-Q&A bookmarks (fork model, click to start
	# a divergent conversation from that point)
	SAVED_RESPONSES_KEY = "hmc-saved-responses"
	MAX_SAVED_RESPONSES = 50 # higher than conversations — smaller atomic unit

	# Internal cache key on session_state — read by load_all(), written by save_all()
	_CACHE_KEY = "_conversations_cache"
	_SAVED_CACHE_KEY = "_saved_responses_cache"


	# ── Helpers ───────────────────────────────────────────────────────────────


	def now_iso() -> str:
	"""Current UTC timestamp in ISO 8601 format."""
	return datetime.now(timezone.utc).isoformat()


	def new_conversation_id() -> str:
	"""Generate a fresh conversation UUID."""
	return str(uuid.uuid4())


	def title_from_messages(messages: list[dict]) -> str:
	"""Extract a readable title from the first user message.

	Truncates to 80 characters with an ellipsis if longer. Falls back to
	'Untitled conversation' if there are no user messages.
	"""
	first_user = next(
	(m["content"] for m in messages if m.get("role") == "user"),
	None,
	)
	if not first_user:
	return "Untitled conversation"
	text = first_user.strip()
	if len(text) > 80:
	return text[:80].rstrip() + "…"
	return text


	# ── Storage I/O ───────────────────────────────────────────────────────────


	def bootstrap_load() -> None:
	"""Read localStorage once at app boot, cache result in session_state.

	Call this at the top of app.py, after session_state init. Subsequent
	reads via load_all() use the cache.

	streamlit-js-eval semantics:
	- First render: component is mounting, get_local_storage() returns None.
	- Streamlit reruns automatically once the component completes.
	- Second render: get_local_storage() returns the actual value (the
	stored string, or empty string if localStorage has no entry).

	Critically we must NOT cache the first-render None as 'loaded' or we
	short-circuit before the real value comes back. We only mark the cache
	populated once we get a non-None response.
	"""
	if _CACHE_KEY in st.session_state:
	return # Already loaded with a real value

	raw = get_local_storage(STORAGE_KEY, component_key="ls_load")

	if raw is None:
	# First render — component still mounting. Don't cache; let the
	# next render re-attempt after the auto-rerun.
	return

	if raw == "":
	# Real response, no data in localStorage — valid 'empty' state
	st.session_state[_CACHE_KEY] = []
	return

	try:
	data = raw if isinstance(raw, list) else json.loads(raw)
	st.session_state[_CACHE_KEY] = data if isinstance(data, list) else []
	except (json.JSONDecodeError, TypeError):
	st.session_state[_CACHE_KEY] = []


	def load_all() -> list[dict]:
	"""Return the cached conversations list. Empty list if not yet bootstrapped."""
	return st.session_state.get(_CACHE_KEY, [])


	def _save_all(conversations: list[dict]) -> None:
	"""Write the full conversations array to localStorage and update the cache.

	Bypasses streamlit-js-eval's set_local_storage helper because v1.0.0
	interpolates the value into a single-quoted JS string — any apostrophe
	in the JSON (very common in compliance answers, e.g. "I'll", "you're",
	"doesn't") breaks the JS with 'missing ) after argument list' and the
	save silently fails.

	Workaround: build the JS ourselves with json.dumps(payload) for the
	string literal — that produces a properly-escaped, JS-safe quoted
	string regardless of what's in the content.
	"""
	payload = json.dumps(conversations)
	js_string_literal = json.dumps(payload) # gives e.g. "[{\"id\": ...}]"
	js = f"localStorage.setItem('{STORAGE_KEY}', {js_string_literal})"
	streamlit_js_eval(js_expressions=js, key="ls_save")
	st.session_state[_CACHE_KEY] = conversations


	# ── Public API ────────────────────────────────────────────────────────────


	def save_conversation(
	conversation_id: str,
	messages: list[dict],
	history: list[dict],
	results: dict,
	) -> None:
	"""Upsert a conversation, capping the list at MAX_CONVERSATIONS.

	Skips if conversation_id is falsy or there are no messages yet (don't
	persist empty conversations).
	"""
	if not conversation_id or not messages:
	return

	existing = load_all()
	# Preserve original created_at if this conversation already exists
	prior = next((c for c in existing if c.get("id") == conversation_id), None)
	created_at = prior["created_at"] if prior else now_iso()

	record = {
	"id": conversation_id,
	"title": title_from_messages(messages),
	"created_at": created_at,
	"last_active_at": now_iso(),
	"messages": messages,
	"history": history,
	# JSON object keys must be strings — convert int keys for results dict
	"results": {str(k): v for k, v in results.items()},
	}

	# Replace any existing entry with the same ID, then prepend the updated one
	filtered = [c for c in existing if c.get("id") != conversation_id]
	updated = [record] + filtered

	# Sort by last_active_at desc and cap
	updated.sort(key=lambda c: c.get("last_active_at", ""), reverse=True)
	updated = updated[:MAX_CONVERSATIONS]

	_save_all(updated)


	def load_conversation(conversation_id: str) -> Optional[dict]:
	"""Look up a single conversation by ID. Returns None if not found."""
	for c in load_all():
	if c.get("id") == conversation_id:
	return c
	return None


	def delete_conversation(conversation_id: str) -> None:
	"""Remove a conversation by ID. No-op if not found."""
	existing = load_all()
	updated = [c for c in existing if c.get("id") != conversation_id]
	if len(updated) == len(existing):
	return # Nothing to delete
	_save_all(updated)


	def restore_state(conversation: dict) -> None:
	"""Populate st.session_state with a conversation's content.

	Reverses the save_conversation transformation — converts string-keyed
	results dict back to int keys.
	"""
	st.session_state.messages = conversation.get("messages", [])
	st.session_state.history = conversation.get("history", [])
	raw_results = conversation.get("results", {})
	st.session_state.results = {int(k): v for k, v in raw_results.items()}
	st.session_state.conversation_id = conversation["id"]
	# Reset the starter-pill state so a restored conversation doesn't refire it
	st.session_state.pop("starter_pill", None)
	st.session_state.pop("_starter_handled", None)


	# ── Deferred-write hook ───────────────────────────────────────────────────
	#
	# streamlit-js-eval's component renders LATE in the script lifecycle. If we
	# call st.rerun() immediately after a delete (so the UI reflects the
	# removal), Streamlit aborts the script before the component is sent to the
	# frontend — the localStorage write is silently dropped. UI looks correct
	# (cache was updated) but on browser refresh the deleted conversation comes
	# back as a "zombie" from localStorage.
	#
	# Pattern: queue the delete on session_state, call st.rerun() to refresh
	# the UI, then in the next render call process_pending() at the top of the
	# script — that runs the actual delete + write inside a render that runs
	# to completion (no st.rerun() between the write and the script ending), so
	# the JS component lands.

	_PENDING_KEY = "_pending_delete_conversation_id"


	def queue_delete(conversation_id: str) -> None:
	"""Queue a delete for processing on the next render."""
	st.session_state[_PENDING_KEY] = conversation_id


	def process_pending() -> None:
	"""Process any queued delete operations. Call at the top of the script,
	after bootstrap_load."""
	pending_id = st.session_state.pop(_PENDING_KEY, None)
	if pending_id:
	delete_conversation(pending_id)


	# ══════════════════════════════════════════════════════════════════════════
	# Saved responses — per-Q&A bookmarks, fork-on-click
	# ══════════════════════════════════════════════════════════════════════════
	#
	# A saved response is a snapshot of the conversation up to and including a
	# specific Q&A pair. Clicking a saved response in the sidebar restores that
	# snapshot into a NEW conversation (fork), leaving the original conversation
	# untouched. The save action is idempotent — re-saving the same Q&A pair
	# overwrites the existing record rather than creating a duplicate.
	#
	# Storage shape (one record per saved response):
	# {
	# "id": "<source_conversation_id>:<msg_index>",
	# "question": "<user message text>",
	# "answer": "<assistant message text>",
	# "saved_at": "<iso timestamp>",
	# "source_conversation_id": "<original conversation uuid>",
	# "source_conversation_title": "<title at save time, for display>",
	# "msg_index": <int — original position; traceability>,
	# "messages_pair": [<user msg>, <assistant msg>],
	# "history_pair": [<user>, <assistant snippet>],
	# "result": <single result dict — citations, etc.>,
	# }
	#
	# The pair is the atomic unit — restoring a saved response brings JUST
	# that Q&A pair back into the chat, not the full conversation thread
	# that led to it. The next question forks into a new conversation seeded
	# only with this pair as context.
	#
	# The ID being deterministic (conversation_id + msg_index) is what gives
	# us free idempotency — upsert against the same key always overwrites.


	def saved_response_id(conversation_id: str, msg_index: int) -> str:
	"""Deterministic ID per (conversation, message-index). Used as the
	localStorage record key so re-saving the same Q&A pair is idempotent."""
	return f"{conversation_id}:{msg_index}"


	def bootstrap_load_saved() -> None:
	"""Read localStorage saved-responses once at app boot, cache result.
	Same lifecycle semantics as bootstrap_load() — see that function's
	docstring for the streamlit-js-eval first-render dance."""
	if _SAVED_CACHE_KEY in st.session_state:
	return

	raw = get_local_storage(SAVED_RESPONSES_KEY, component_key="ls_load_saved")

	if raw is None:
	# Component still mounting — don't cache, let auto-rerun re-attempt
	return

	if raw == "":
	st.session_state[_SAVED_CACHE_KEY] = []
	return

	try:
	data = raw if isinstance(raw, list) else json.loads(raw)
	st.session_state[_SAVED_CACHE_KEY] = data if isinstance(data, list) else []
	except (json.JSONDecodeError, TypeError):
	st.session_state[_SAVED_CACHE_KEY] = []


	def load_all_saved() -> list[dict]:
	"""Return the cached saved-responses list. Empty if not yet bootstrapped."""
	return st.session_state.get(_SAVED_CACHE_KEY, [])


	def _save_all_saved(saved: list[dict]) -> None:
	"""Write the full saved-responses array to localStorage.
	Same JSON-double-encode trick as _save_all() — see that function's
	docstring for why we bypass streamlit-js-eval's set_local_storage."""
	payload = json.dumps(saved)
	js_string_literal = json.dumps(payload)
	js = f"localStorage.setItem('{SAVED_RESPONSES_KEY}', {js_string_literal})"
	streamlit_js_eval(js_expressions=js, key="ls_save_saved")
	st.session_state[_SAVED_CACHE_KEY] = saved


	def save_response(
	conversation_id: str,
	msg_index: int,
	question: str,
	answer: str,
	result: dict,
	source_conversation_title: str = "",
	) -> None:
	"""Save a Q&A pair as a forkable artefact. Idempotent on
	(conversation_id, msg_index) — re-saving overwrites the existing record.

	Stores ONLY the pair (question + answer + that single result), not the
	full conversation thread. Clicking the saved response in the sidebar
	restores just this pair into the chat; new questions fork from there."""
	if not conversation_id or msg_index < 0 or not question:
	return

	saved_id = saved_response_id(conversation_id, msg_index)
	existing = load_all_saved()

	# Preserve original saved_at on re-save so the sidebar order is stable
	prior = next((s for s in existing if s.get("id") == saved_id), None)
	saved_at = prior["saved_at"] if prior else now_iso()

	record = {
	"id": saved_id,
	"question": question,
	"answer": answer,
	"saved_at": saved_at,
	"source_conversation_id": conversation_id,
	"source_conversation_title": source_conversation_title,
	"msg_index": msg_index,
	# The atomic unit — just the pair, no preceding context
	"messages_pair": [
	{"role": "user", "content": question},
	{"role": "assistant", "content": answer},
	],
	# Condensed history mirrors the snippet shape used by run_query_stream
	# (answer truncated to 500 chars) so a forked follow-up has consistent
	# LLM-side context
	"history_pair": [
	{"role": "user", "content": question},
	{"role": "assistant", "content": (answer or "")[:500]},
	],
	# The single result dict — citations, timing, sections, token usage
	"result": result or {},
	}

	filtered = [s for s in existing if s.get("id") != saved_id]
	updated = [record] + filtered

	# Sort by saved_at desc and cap
	updated.sort(key=lambda s: s.get("saved_at", ""), reverse=True)
	updated = updated[:MAX_SAVED_RESPONSES]

	_save_all_saved(updated)


	def is_response_saved(conversation_id: str, msg_index: int) -> bool:
	"""Check whether a Q&A pair is already in saved responses. Used by the
	Save button to render its 'Saved ✓' state."""
	if not conversation_id:
	return False
	saved_id = saved_response_id(conversation_id, msg_index)
	return any(s.get("id") == saved_id for s in load_all_saved())


	def delete_saved_response(saved_id: str) -> None:
	"""Remove a saved response by ID. No-op if not found."""
	existing = load_all_saved()
	updated = [s for s in existing if s.get("id") != saved_id]
	if len(updated) == len(existing):
	return
	_save_all_saved(updated)


	def restore_from_saved(saved: dict) -> None:
	"""Fork: restore JUST the saved Q&A pair into session_state under a
	NEW conversation_id. No preceding conversation context is loaded — the
	pair is the atomic unit. New questions append to a fresh thread seeded
	only with this pair as context.

	The original conversation in localStorage is left untouched."""
	pair = saved.get("messages_pair") or []
	history = saved.get("history_pair") or []
	result = saved.get("result") or {}

	st.session_state.messages = list(pair)
	st.session_state.history = list(history)
	# The assistant message sits at index 1 in the restored 2-element list,
	# so its result is keyed against 1 (regardless of where the assistant
	# message sat in its original conversation).
	st.session_state.results = {1: result} if pair else {}
	# Mint a fresh conversation_id — this is the fork
	st.session_state.conversation_id = new_conversation_id()
	# Reset starter-pill state so a restored fork doesn't refire it
	st.session_state.pop("starter_pill", None)
	st.session_state.pop("_starter_handled", None)


	# Deferred-delete pattern for saved responses — same rationale as the
	# conversation queue_delete/process_pending pair above.
	_PENDING_SAVED_KEY = "_pending_delete_saved_id"


	def queue_delete_saved(saved_id: str) -> None:
	"""Queue a saved-response delete for the next render."""
	st.session_state[_PENDING_SAVED_KEY] = saved_id


	def process_pending_saved() -> None:
	"""Process any queued saved-response deletes. Call at the top of the
	script alongside process_pending()."""
	pending_id = st.session_state.pop(_PENDING_SAVED_KEY, None)
	if pending_id:
	delete_saved_response(pending_id)