Spaces:

webmuppetnz
/

hmc-rag

Running

File size: 17,981 Bytes

"""
Browser-local conversation persistence via localStorage.

Stores all conversations under a single localStorage key as a JSON array. The
session_state acts as an in-memory cache so we only round-trip to JS for the
initial load and saves.

Lifecycle:
- App boot calls `bootstrap_load()` once at the top of app.py to populate
  the cache from localStorage.
- `save_conversation()` upserts the current conversation, capping the list
  at MAX_CONVERSATIONS by `last_active_at`.
- `load_conversation(id)` and `delete_conversation(id)` work against the
  cache + write through to localStorage.

streamlit-js-eval was chosen over streamlit-local-storage because the latter
has a documented `st.rerun()` race condition (issue #233) that breaks our
"New conversation" handler.
"""

from __future__ import annotations

import json
import uuid
from datetime import datetime, timezone
from typing import Optional

import streamlit as st
from streamlit_js_eval import (
    get_local_storage,
    streamlit_js_eval,
)

STORAGE_KEY = "hmc-conversations"
MAX_CONVERSATIONS = 20

# Saved responses are a separate concept from conversations:
# - Conversations: auto-saved whole threads (rejoin model, click to resume)
# - Saved responses: explicit per-Q&A bookmarks (fork model, click to start
#   a divergent conversation from that point)
SAVED_RESPONSES_KEY = "hmc-saved-responses"
MAX_SAVED_RESPONSES = 50  # higher than conversations — smaller atomic unit

# Internal cache key on session_state — read by load_all(), written by save_all()
_CACHE_KEY = "_conversations_cache"
_SAVED_CACHE_KEY = "_saved_responses_cache"


# ── Helpers ───────────────────────────────────────────────────────────────


def now_iso() -> str:
    """Current UTC timestamp in ISO 8601 format."""
    return datetime.now(timezone.utc).isoformat()


def new_conversation_id() -> str:
    """Generate a fresh conversation UUID."""
    return str(uuid.uuid4())


def title_from_messages(messages: list[dict]) -> str:
    """Extract a readable title from the first user message.

    Truncates to 80 characters with an ellipsis if longer. Falls back to
    'Untitled conversation' if there are no user messages.
    """
    first_user = next(
        (m["content"] for m in messages if m.get("role") == "user"),
        None,
    )
    if not first_user:
        return "Untitled conversation"
    text = first_user.strip()
    if len(text) > 80:
        return text[:80].rstrip() + "…"
    return text


# ── Storage I/O ───────────────────────────────────────────────────────────


def bootstrap_load() -> None:
    """Read localStorage once at app boot, cache result in session_state.

    Call this at the top of app.py, after session_state init. Subsequent
    reads via load_all() use the cache.

    streamlit-js-eval semantics:
    - First render: component is mounting, get_local_storage() returns None.
    - Streamlit reruns automatically once the component completes.
    - Second render: get_local_storage() returns the actual value (the
      stored string, or empty string if localStorage has no entry).

    Critically we must NOT cache the first-render None as 'loaded' or we
    short-circuit before the real value comes back. We only mark the cache
    populated once we get a non-None response.
    """
    if _CACHE_KEY in st.session_state:
        return  # Already loaded with a real value

    raw = get_local_storage(STORAGE_KEY, component_key="ls_load")

    if raw is None:
        # First render — component still mounting. Don't cache; let the
        # next render re-attempt after the auto-rerun.
        return

    if raw == "":
        # Real response, no data in localStorage — valid 'empty' state
        st.session_state[_CACHE_KEY] = []
        return

    try:
        data = raw if isinstance(raw, list) else json.loads(raw)
        st.session_state[_CACHE_KEY] = data if isinstance(data, list) else []
    except (json.JSONDecodeError, TypeError):
        st.session_state[_CACHE_KEY] = []


def load_all() -> list[dict]:
    """Return the cached conversations list. Empty list if not yet bootstrapped."""
    return st.session_state.get(_CACHE_KEY, [])


def _save_all(conversations: list[dict]) -> None:
    """Write the full conversations array to localStorage and update the cache.

    Bypasses streamlit-js-eval's set_local_storage helper because v1.0.0
    interpolates the value into a single-quoted JS string — any apostrophe
    in the JSON (very common in compliance answers, e.g. "I'll", "you're",
    "doesn't") breaks the JS with 'missing ) after argument list' and the
    save silently fails.

    Workaround: build the JS ourselves with json.dumps(payload) for the
    string literal — that produces a properly-escaped, JS-safe quoted
    string regardless of what's in the content.
    """
    payload = json.dumps(conversations)
    js_string_literal = json.dumps(payload)  # gives e.g. "[{\"id\": ...}]"
    js = f"localStorage.setItem('{STORAGE_KEY}', {js_string_literal})"
    streamlit_js_eval(js_expressions=js, key="ls_save")
    st.session_state[_CACHE_KEY] = conversations


# ── Public API ────────────────────────────────────────────────────────────


def save_conversation(
    conversation_id: str,
    messages: list[dict],
    history: list[dict],
    results: dict,
) -> None:
    """Upsert a conversation, capping the list at MAX_CONVERSATIONS.

    Skips if conversation_id is falsy or there are no messages yet (don't
    persist empty conversations).
    """
    if not conversation_id or not messages:
        return

    existing = load_all()
    # Preserve original created_at if this conversation already exists
    prior = next((c for c in existing if c.get("id") == conversation_id), None)
    created_at = prior["created_at"] if prior else now_iso()

    record = {
        "id": conversation_id,
        "title": title_from_messages(messages),
        "created_at": created_at,
        "last_active_at": now_iso(),
        "messages": messages,
        "history": history,
        # JSON object keys must be strings — convert int keys for results dict
        "results": {str(k): v for k, v in results.items()},
    }

    # Replace any existing entry with the same ID, then prepend the updated one
    filtered = [c for c in existing if c.get("id") != conversation_id]
    updated = [record] + filtered

    # Sort by last_active_at desc and cap
    updated.sort(key=lambda c: c.get("last_active_at", ""), reverse=True)
    updated = updated[:MAX_CONVERSATIONS]

    _save_all(updated)


def load_conversation(conversation_id: str) -> Optional[dict]:
    """Look up a single conversation by ID. Returns None if not found."""
    for c in load_all():
        if c.get("id") == conversation_id:
            return c
    return None


def delete_conversation(conversation_id: str) -> None:
    """Remove a conversation by ID. No-op if not found."""
    existing = load_all()
    updated = [c for c in existing if c.get("id") != conversation_id]
    if len(updated) == len(existing):
        return  # Nothing to delete
    _save_all(updated)


def restore_state(conversation: dict) -> None:
    """Populate st.session_state with a conversation's content.

    Reverses the save_conversation transformation — converts string-keyed
    results dict back to int keys.
    """
    st.session_state.messages = conversation.get("messages", [])
    st.session_state.history = conversation.get("history", [])
    raw_results = conversation.get("results", {})
    st.session_state.results = {int(k): v for k, v in raw_results.items()}
    st.session_state.conversation_id = conversation["id"]
    # Reset the starter-pill state so a restored conversation doesn't refire it
    st.session_state.pop("starter_pill", None)
    st.session_state.pop("_starter_handled", None)


# ── Deferred-write hook ───────────────────────────────────────────────────
#
# streamlit-js-eval's component renders LATE in the script lifecycle. If we
# call st.rerun() immediately after a delete (so the UI reflects the
# removal), Streamlit aborts the script before the component is sent to the
# frontend — the localStorage write is silently dropped. UI looks correct
# (cache was updated) but on browser refresh the deleted conversation comes
# back as a "zombie" from localStorage.
#
# Pattern: queue the delete on session_state, call st.rerun() to refresh
# the UI, then in the next render call process_pending() at the top of the
# script — that runs the actual delete + write inside a render that runs
# to completion (no st.rerun() between the write and the script ending), so
# the JS component lands.

_PENDING_KEY = "_pending_delete_conversation_id"


def queue_delete(conversation_id: str) -> None:
    """Queue a delete for processing on the next render."""
    st.session_state[_PENDING_KEY] = conversation_id


def process_pending() -> None:
    """Process any queued delete operations. Call at the top of the script,
    after bootstrap_load."""
    pending_id = st.session_state.pop(_PENDING_KEY, None)
    if pending_id:
        delete_conversation(pending_id)


# ══════════════════════════════════════════════════════════════════════════
# Saved responses — per-Q&A bookmarks, fork-on-click
# ══════════════════════════════════════════════════════════════════════════
#
# A saved response is a snapshot of the conversation up to and including a
# specific Q&A pair. Clicking a saved response in the sidebar restores that
# snapshot into a NEW conversation (fork), leaving the original conversation
# untouched. The save action is idempotent — re-saving the same Q&A pair
# overwrites the existing record rather than creating a duplicate.
#
# Storage shape (one record per saved response):
#   {
#     "id":                          "<source_conversation_id>:<msg_index>",
#     "question":                    "<user message text>",
#     "answer":                      "<assistant message text>",
#     "saved_at":                    "<iso timestamp>",
#     "source_conversation_id":      "<original conversation uuid>",
#     "source_conversation_title":   "<title at save time, for display>",
#     "msg_index":                   <int — original position; traceability>,
#     "messages_pair":               [<user msg>, <assistant msg>],
#     "history_pair":                [<user>, <assistant snippet>],
#     "result":                      <single result dict — citations, etc.>,
#   }
#
# The pair is the atomic unit — restoring a saved response brings JUST
# that Q&A pair back into the chat, not the full conversation thread
# that led to it. The next question forks into a new conversation seeded
# only with this pair as context.
#
# The ID being deterministic (conversation_id + msg_index) is what gives
# us free idempotency — upsert against the same key always overwrites.


def saved_response_id(conversation_id: str, msg_index: int) -> str:
    """Deterministic ID per (conversation, message-index). Used as the
    localStorage record key so re-saving the same Q&A pair is idempotent."""
    return f"{conversation_id}:{msg_index}"


def bootstrap_load_saved() -> None:
    """Read localStorage saved-responses once at app boot, cache result.
    Same lifecycle semantics as bootstrap_load() — see that function's
    docstring for the streamlit-js-eval first-render dance."""
    if _SAVED_CACHE_KEY in st.session_state:
        return

    raw = get_local_storage(SAVED_RESPONSES_KEY, component_key="ls_load_saved")

    if raw is None:
        # Component still mounting — don't cache, let auto-rerun re-attempt
        return

    if raw == "":
        st.session_state[_SAVED_CACHE_KEY] = []
        return

    try:
        data = raw if isinstance(raw, list) else json.loads(raw)
        st.session_state[_SAVED_CACHE_KEY] = data if isinstance(data, list) else []
    except (json.JSONDecodeError, TypeError):
        st.session_state[_SAVED_CACHE_KEY] = []


def load_all_saved() -> list[dict]:
    """Return the cached saved-responses list. Empty if not yet bootstrapped."""
    return st.session_state.get(_SAVED_CACHE_KEY, [])


def _save_all_saved(saved: list[dict]) -> None:
    """Write the full saved-responses array to localStorage.
    Same JSON-double-encode trick as _save_all() — see that function's
    docstring for why we bypass streamlit-js-eval's set_local_storage."""
    payload = json.dumps(saved)
    js_string_literal = json.dumps(payload)
    js = f"localStorage.setItem('{SAVED_RESPONSES_KEY}', {js_string_literal})"
    streamlit_js_eval(js_expressions=js, key="ls_save_saved")
    st.session_state[_SAVED_CACHE_KEY] = saved


def save_response(
    conversation_id: str,
    msg_index: int,
    question: str,
    answer: str,
    result: dict,
    source_conversation_title: str = "",
) -> None:
    """Save a Q&A pair as a forkable artefact. Idempotent on
    (conversation_id, msg_index) — re-saving overwrites the existing record.

    Stores ONLY the pair (question + answer + that single result), not the
    full conversation thread. Clicking the saved response in the sidebar
    restores just this pair into the chat; new questions fork from there."""
    if not conversation_id or msg_index < 0 or not question:
        return

    saved_id = saved_response_id(conversation_id, msg_index)
    existing = load_all_saved()

    # Preserve original saved_at on re-save so the sidebar order is stable
    prior = next((s for s in existing if s.get("id") == saved_id), None)
    saved_at = prior["saved_at"] if prior else now_iso()

    record = {
        "id": saved_id,
        "question": question,
        "answer": answer,
        "saved_at": saved_at,
        "source_conversation_id": conversation_id,
        "source_conversation_title": source_conversation_title,
        "msg_index": msg_index,
        # The atomic unit — just the pair, no preceding context
        "messages_pair": [
            {"role": "user", "content": question},
            {"role": "assistant", "content": answer},
        ],
        # Condensed history mirrors the snippet shape used by run_query_stream
        # (answer truncated to 500 chars) so a forked follow-up has consistent
        # LLM-side context
        "history_pair": [
            {"role": "user", "content": question},
            {"role": "assistant", "content": (answer or "")[:500]},
        ],
        # The single result dict — citations, timing, sections, token usage
        "result": result or {},
    }

    filtered = [s for s in existing if s.get("id") != saved_id]
    updated = [record] + filtered

    # Sort by saved_at desc and cap
    updated.sort(key=lambda s: s.get("saved_at", ""), reverse=True)
    updated = updated[:MAX_SAVED_RESPONSES]

    _save_all_saved(updated)


def is_response_saved(conversation_id: str, msg_index: int) -> bool:
    """Check whether a Q&A pair is already in saved responses. Used by the
    Save button to render its 'Saved ✓' state."""
    if not conversation_id:
        return False
    saved_id = saved_response_id(conversation_id, msg_index)
    return any(s.get("id") == saved_id for s in load_all_saved())


def delete_saved_response(saved_id: str) -> None:
    """Remove a saved response by ID. No-op if not found."""
    existing = load_all_saved()
    updated = [s for s in existing if s.get("id") != saved_id]
    if len(updated) == len(existing):
        return
    _save_all_saved(updated)


def restore_from_saved(saved: dict) -> None:
    """Fork: restore JUST the saved Q&A pair into session_state under a
    NEW conversation_id. No preceding conversation context is loaded — the
    pair is the atomic unit. New questions append to a fresh thread seeded
    only with this pair as context.

    The original conversation in localStorage is left untouched."""
    pair = saved.get("messages_pair") or []
    history = saved.get("history_pair") or []
    result = saved.get("result") or {}

    st.session_state.messages = list(pair)
    st.session_state.history = list(history)
    # The assistant message sits at index 1 in the restored 2-element list,
    # so its result is keyed against 1 (regardless of where the assistant
    # message sat in its original conversation).
    st.session_state.results = {1: result} if pair else {}
    # Mint a fresh conversation_id — this is the fork
    st.session_state.conversation_id = new_conversation_id()
    # Reset starter-pill state so a restored fork doesn't refire it
    st.session_state.pop("starter_pill", None)
    st.session_state.pop("_starter_handled", None)


# Deferred-delete pattern for saved responses — same rationale as the
# conversation queue_delete/process_pending pair above.
_PENDING_SAVED_KEY = "_pending_delete_saved_id"


def queue_delete_saved(saved_id: str) -> None:
    """Queue a saved-response delete for the next render."""
    st.session_state[_PENDING_SAVED_KEY] = saved_id


def process_pending_saved() -> None:
    """Process any queued saved-response deletes. Call at the top of the
    script alongside process_pending()."""
    pending_id = st.session_state.pop(_PENDING_SAVED_KEY, None)
    if pending_id:
        delete_saved_response(pending_id)