Spaces:

implicit-personalization
/

persona-ui

Running

App Files Files Community

Jac-Zac commited on 23 days ago

Commit

db3d901

1 Parent(s): 99c28ab

Big refactor and feature addition to analyses + support latest persona-vector

Browse files

Files changed (19) hide show

README.md +2 -2
app.py +35 -26
pyproject.toml +1 -2
state.py +5 -5
tabs/analysis.py +1 -0
tabs/{compare.py → analysis_core.py} +649 -198
tabs/chat.py +63 -68
tabs/chat_shared.py +105 -0
tabs/chat_ui.py +16 -12
tabs/compare_chat.py +26 -28
tabs/extract.py +20 -18
tabs/probe_ui.py +1 -1
utils/{compare_sources.py → analysis_sources.py} +44 -192
utils/chat.py +6 -10
utils/chat_export.py +2 -2
utils/contrast.py +3 -1
utils/datasets.py +11 -5
utils/helpers.py +26 -16
uv.lock +13 -15

README.md CHANGED Viewed

@@ -20,7 +20,7 @@ Streamlit interface for persona vector extraction, analysis, and chat.
 A web app built on top of [persona-vectors](../persona-vectors) that provides three tabs:
 - **Chat** — interactive conversations with a model using persona-based system prompts (templated or biography)
-- **Compare** — load local or Hub persona vectors and explore cosine similarity, PCA, UMAP, and similarity views
 - **Extract** — run activation extraction from HuggingFace persona datasets or a local JSONL dataset directly from the browser
 ## Repository Layout
@@ -31,7 +31,7 @@ persona-ui/
 ├── state.py                 # Session state management (chat history, KV cache)
 ├── tabs/
 │   ├── chat.py              # Chat tab
-│   ├── compare.py           # Activation comparison tab
 │   ├── compare_chat.py      # Side-by-side chat comparison mode
 │   ├── extract.py           # Extraction tab
 │   └── probe_ui.py          # Probe upload and tracing controls

 A web app built on top of [persona-vectors](../persona-vectors) that provides three tabs:
 - **Chat** — interactive conversations with a model using persona-based system prompts (templated or biography)
+- **Compare** — load local or Hub persona vectors and explore cosine similarity, PCA, UMAP, attribute-colored projections, and dendrograms
 - **Extract** — run activation extraction from HuggingFace persona datasets or a local JSONL dataset directly from the browser
 ## Repository Layout
 ├── state.py                 # Session state management (chat history, KV cache)
 ├── tabs/
 │   ├── chat.py              # Chat tab
+│   ├── analysis.py          # Analysis tab (cosine similarity, PCA, UMAP, Isomap, dendrogram)
 │   ├── compare_chat.py      # Side-by-side chat comparison mode
 │   ├── extract.py           # Extraction tab
 │   └── probe_ui.py          # Probe upload and tracing controls

app.py CHANGED Viewed

@@ -4,13 +4,26 @@ from dataclasses import dataclass
 import streamlit as st
 from dotenv import load_dotenv
-from utils.helpers import DATASET_SOURCES
 load_dotenv()
 DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "google/gemma-2-2b-it")
 REMOTE_DEFAULT_MODEL = os.environ.get("REMOTE_DEFAULT_MODEL", "google/gemma-2-9b-it")
-_LAST_LOCAL_MODEL_KEY = "sidebar:last_local_model"
-_LAST_REMOTE_MODEL_KEY = "sidebar:last_remote_model"
 _TABS = ["Chat", "Analysis", "Extract"]
@@ -35,9 +48,9 @@ def _remote_model_input(remote_models: list[str]) -> str:
         model_name = st.text_input(
             "Model",
             value=st.session_state.get(
-                "sidebar__remote_model_custom_value", last_remote
             ),
-            key="sidebar__remote_model_custom_value",
             help="NDIF model id. Use this to cold-load a remote model.",
         )
         st.session_state[_LAST_REMOTE_MODEL_KEY] = model_name
@@ -46,16 +59,16 @@ def _remote_model_input(remote_models: list[str]) -> str:
     custom = st.toggle(
         "Custom remote model",
         value=False,
-        key="sidebar__remote_model_custom_enabled",
         help="Enter any NDIF-loadable model id, even if it is not currently running.",
     )
     if custom:
         model_name = st.text_input(
             "Model",
             value=st.session_state.get(
-                "sidebar__remote_model_custom_value", last_remote
             ),
-            key="sidebar__remote_model_custom_value",
             help="NDIF model id. Example: openai/gpt-oss-20b",
         )
         st.caption(
@@ -63,20 +76,20 @@ def _remote_model_input(remote_models: list[str]) -> str:
             "Custom model ids can cold-load if your NDIF account allows it."
         )
     else:
-        default_model = st.session_state.get("sidebar__remote_model", last_remote)
         if default_model not in remote_models:
             default_model = (
                 REMOTE_DEFAULT_MODEL
                 if REMOTE_DEFAULT_MODEL in remote_models
                 else remote_models[0]
             )
-        if st.session_state.get("sidebar__remote_model") not in remote_models:
-            st.session_state["sidebar__remote_model"] = default_model
         model_name = st.selectbox(
             "Model",
             options=remote_models,
             index=remote_models.index(default_model),
-            key="sidebar__remote_model",
             help="Running NDIF model.",
         )
     st.session_state[_LAST_REMOTE_MODEL_KEY] = model_name
@@ -84,15 +97,13 @@ def _remote_model_input(remote_models: list[str]) -> str:
 def _sidebar_controls() -> SidebarState:
-    from utils.runtime import list_remote_models
     with st.sidebar:
         st.markdown("## Persona UI")
-        if "sidebar__active_tab" not in st.session_state:
-            st.session_state["sidebar__active_tab"] = "Chat"
-        active_tab = st.session_state["sidebar__active_tab"]
         for tab_name, icon in zip(_TABS, _TAB_ICONS, strict=True):
             is_selected = tab_name == active_tab
             if st.button(
@@ -102,13 +113,13 @@ def _sidebar_controls() -> SidebarState:
                 type="primary" if is_selected else "secondary",
                 icon=icon,
             ):
-                st.session_state["sidebar__active_tab"] = tab_name
                 st.rerun()
         if active_tab == "Analysis":
             model_name = st.session_state.get(_LAST_LOCAL_MODEL_KEY, DEFAULT_MODEL)
             dataset_source = st.session_state.get(
-                "sidebar__dataset_source",
                 DATASET_SOURCES[0],
             )
             return SidebarState(
@@ -120,7 +131,7 @@ def _sidebar_controls() -> SidebarState:
         st.divider()
         st.caption("Runtime")
-        remote = st.toggle("Remote (NDIF)", value=False, key="sidebar__remote")
         if remote:
             model_name = _remote_model_input(list_remote_models())
@@ -128,7 +139,7 @@ def _sidebar_controls() -> SidebarState:
             model_name = st.text_input(
                 "Model",
                 value=st.session_state.get(_LAST_LOCAL_MODEL_KEY, DEFAULT_MODEL),
-                key="sidebar__local_model",
                 help="Local model id or path.",
             )
             st.session_state[_LAST_LOCAL_MODEL_KEY] = model_name
@@ -137,7 +148,7 @@ def _sidebar_controls() -> SidebarState:
         dataset_source = st.selectbox(
             "Source",
             DATASET_SOURCES,
-            key="sidebar__dataset_source",
             help="Dataset for Chat and Extract.",
         )
@@ -153,8 +164,6 @@ def main() -> None:
     """Run the Streamlit app."""
     st.set_page_config(page_title="Persona UI", layout="wide")
-    from utils.theme import install_catppuccin_theme
     install_catppuccin_theme(st.get_option("theme.base"))
     sidebar = _sidebar_controls()
@@ -164,9 +173,9 @@ def main() -> None:
         render_extract_tab(sidebar.remote, sidebar.model_name, sidebar.dataset_source)
     elif sidebar.active_tab == "Analysis":
-        from tabs.compare import render_compare_tab
-        render_compare_tab()
     else:
         from tabs.chat import render_chat_tab

 import streamlit as st
 from dotenv import load_dotenv
+from utils.helpers import DATASET_SOURCES, session_key
+from utils.runtime import list_remote_models
+from utils.theme import install_catppuccin_theme
 load_dotenv()
 DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "google/gemma-2-2b-it")
 REMOTE_DEFAULT_MODEL = os.environ.get("REMOTE_DEFAULT_MODEL", "google/gemma-2-9b-it")
+_LAST_LOCAL_MODEL_KEY = session_key("sidebar", "last_local_model")
+_LAST_REMOTE_MODEL_KEY = session_key("sidebar", "last_remote_model")
+_SIDEBAR_ACTIVE_TAB_KEY = session_key("sidebar", "active_tab")
+_SIDEBAR_REMOTE_MODEL_CUSTOM_VALUE_KEY = session_key(
+    "sidebar", "remote_model_custom_value"
+)
+_SIDEBAR_REMOTE_MODEL_CUSTOM_ENABLED_KEY = session_key(
+    "sidebar", "remote_model_custom_enabled"
+)
+_SIDEBAR_REMOTE_MODEL_KEY = session_key("sidebar", "remote_model")
+_SIDEBAR_LOCAL_MODEL_KEY = session_key("sidebar", "local_model")
+_SIDEBAR_REMOTE_KEY = session_key("sidebar", "remote")
+_SIDEBAR_DATASET_SOURCE_KEY = session_key("sidebar", "dataset_source")
 _TABS = ["Chat", "Analysis", "Extract"]
         model_name = st.text_input(
             "Model",
             value=st.session_state.get(
+                _SIDEBAR_REMOTE_MODEL_CUSTOM_VALUE_KEY, last_remote
             ),
+            key=_SIDEBAR_REMOTE_MODEL_CUSTOM_VALUE_KEY,
             help="NDIF model id. Use this to cold-load a remote model.",
         )
         st.session_state[_LAST_REMOTE_MODEL_KEY] = model_name
     custom = st.toggle(
         "Custom remote model",
         value=False,
+        key=_SIDEBAR_REMOTE_MODEL_CUSTOM_ENABLED_KEY,
         help="Enter any NDIF-loadable model id, even if it is not currently running.",
     )
     if custom:
         model_name = st.text_input(
             "Model",
             value=st.session_state.get(
+                _SIDEBAR_REMOTE_MODEL_CUSTOM_VALUE_KEY, last_remote
             ),
+            key=_SIDEBAR_REMOTE_MODEL_CUSTOM_VALUE_KEY,
             help="NDIF model id. Example: openai/gpt-oss-20b",
         )
         st.caption(
             "Custom model ids can cold-load if your NDIF account allows it."
         )
     else:
+        default_model = st.session_state.get(_SIDEBAR_REMOTE_MODEL_KEY, last_remote)
         if default_model not in remote_models:
             default_model = (
                 REMOTE_DEFAULT_MODEL
                 if REMOTE_DEFAULT_MODEL in remote_models
                 else remote_models[0]
             )
+        if st.session_state.get(_SIDEBAR_REMOTE_MODEL_KEY) not in remote_models:
+            st.session_state[_SIDEBAR_REMOTE_MODEL_KEY] = default_model
         model_name = st.selectbox(
             "Model",
             options=remote_models,
             index=remote_models.index(default_model),
+            key=_SIDEBAR_REMOTE_MODEL_KEY,
             help="Running NDIF model.",
         )
     st.session_state[_LAST_REMOTE_MODEL_KEY] = model_name
 def _sidebar_controls() -> SidebarState:
     with st.sidebar:
         st.markdown("## Persona UI")
+        if _SIDEBAR_ACTIVE_TAB_KEY not in st.session_state:
+            st.session_state[_SIDEBAR_ACTIVE_TAB_KEY] = "Chat"
+        active_tab = st.session_state[_SIDEBAR_ACTIVE_TAB_KEY]
         for tab_name, icon in zip(_TABS, _TAB_ICONS, strict=True):
             is_selected = tab_name == active_tab
             if st.button(
                 type="primary" if is_selected else "secondary",
                 icon=icon,
             ):
+                st.session_state[_SIDEBAR_ACTIVE_TAB_KEY] = tab_name
                 st.rerun()
         if active_tab == "Analysis":
             model_name = st.session_state.get(_LAST_LOCAL_MODEL_KEY, DEFAULT_MODEL)
             dataset_source = st.session_state.get(
+                _SIDEBAR_DATASET_SOURCE_KEY,
                 DATASET_SOURCES[0],
             )
             return SidebarState(
         st.divider()
         st.caption("Runtime")
+        remote = st.toggle("Remote (NDIF)", value=False, key=_SIDEBAR_REMOTE_KEY)
         if remote:
             model_name = _remote_model_input(list_remote_models())
             model_name = st.text_input(
                 "Model",
                 value=st.session_state.get(_LAST_LOCAL_MODEL_KEY, DEFAULT_MODEL),
+                key=_SIDEBAR_LOCAL_MODEL_KEY,
                 help="Local model id or path.",
             )
             st.session_state[_LAST_LOCAL_MODEL_KEY] = model_name
         dataset_source = st.selectbox(
             "Source",
             DATASET_SOURCES,
+            key=_SIDEBAR_DATASET_SOURCE_KEY,
             help="Dataset for Chat and Extract.",
         )
     """Run the Streamlit app."""
     st.set_page_config(page_title="Persona UI", layout="wide")
     install_catppuccin_theme(st.get_option("theme.base"))
     sidebar = _sidebar_controls()
         render_extract_tab(sidebar.remote, sidebar.model_name, sidebar.dataset_source)
     elif sidebar.active_tab == "Analysis":
+        from tabs.analysis import render_analysis_tab
+        render_analysis_tab()
     else:
         from tabs.chat import render_chat_tab

pyproject.toml CHANGED Viewed

@@ -5,8 +5,7 @@ description = "Streamlit UI for persona-vectors"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
-    "persona-vectors>=0.7.3",
-    "persona-data>=0.4.2",
     "datasets>=4.8.5",
     "huggingface-hub>=1.14.0",
     "streamlit>=1.44.0",

 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
+    "persona-vectors>=0.8.0",
     "datasets>=4.8.5",
     "huggingface-hub>=1.14.0",
     "streamlit>=1.44.0",

state.py CHANGED Viewed

@@ -2,7 +2,8 @@ from typing import Literal, NotRequired, TypedDict
 import streamlit as st
-_CHAT_STATE_PREFIX = "chat_state::"
 PendingChatAction = Literal["new_user_prompt", "regenerate_after_edit"]
@@ -22,7 +23,7 @@ class ChatState(TypedDict):
 def chat_session_key(model_name: str, dataset_source: str) -> str:
     """Build the session-state key for a chat context."""
-    return f"{_CHAT_STATE_PREFIX}{model_name}::{dataset_source}"
 def default_chat_state() -> ChatState:
@@ -48,9 +49,8 @@ def reset_chat_context_state(
         st.session_state.pop(key, None)
-def get_chat_state(model_name: str, _remote: bool, dataset_source: str) -> ChatState:
     """Return the mutable chat state for the active context."""
     key = chat_session_key(model_name, dataset_source)
-    state = st.session_state.setdefault(key, default_chat_state())
-    return state

 import streamlit as st
+from utils.helpers import session_key
 PendingChatAction = Literal["new_user_prompt", "regenerate_after_edit"]
 def chat_session_key(model_name: str, dataset_source: str) -> str:
     """Build the session-state key for a chat context."""
+    return session_key("chat_state", model_name, dataset_source)
 def default_chat_state() -> ChatState:
         st.session_state.pop(key, None)
+def get_chat_state(model_name: str, dataset_source: str) -> ChatState:
     """Return the mutable chat state for the active context."""
     key = chat_session_key(model_name, dataset_source)
+    return st.session_state.setdefault(key, default_chat_state())

tabs/analysis.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .analysis_core import render_analysis_tab

tabs/{compare.py → analysis_core.py} RENAMED Viewed

@@ -7,7 +7,12 @@ from pathlib import Path
 import plotly.graph_objects as go
 import streamlit as st
 from persona_data.environment import get_artifacts_dir
-from persona_data.synth_persona import BASELINE_PERSONA_ID
 from persona_vectors.extraction import MaskStrategy
 from persona_vectors.plots import (
     build_layered_figure,
@@ -15,10 +20,11 @@ from persona_vectors.plots import (
     build_similarity_figures,
     plot_layer_similarity,
     plot_persona_dendrogram,
     save_plot_html,
 )
-from utils.compare_sources import (
     DEFAULT_COMPARE_MODEL,
     DEFAULT_HUB_REPO,
     SOURCE_HUB,
@@ -28,13 +34,13 @@ from utils.compare_sources import (
     activation_store_cached,
     available_variants,
     hub_models_by_mask_strategy,
-    load_persona_vectors_lean,
-    load_variant_vectors_lean,
     local_model_matches,
     local_model_options_cached,
     persona_names_cached,
     personas_cached,
-    release_store_cache,
     store_cache_parts,
     store_id,
     store_layers_cached,
@@ -57,32 +63,45 @@ def _filename(*parts: str) -> str:
 # Keep compare-tab selection state separate so projection defaults do not
 # overwrite cosine similarity defaults.
-_LAST_COSINE_PERSONAS_KEY = "compare:last_personas:cosine"
-_LAST_PROJECTION_PERSONAS_KEY = "compare:last_personas:projection"
-_LAST_SIMILARITY_PERSONAS_KEY = "compare:last_personas:similarity"
-_LAST_MASK_STRATEGY_KEY = "compare:last_mask_strategy"
-_LAST_SOURCE_KEY = "compare:last_source"
 _DEFAULT_LAYER_FRAMES = 16
 _DEFAULT_PERSONA_LIMITS = {
     "similarity": 120,
     "pca": 500,
     "umap": 500,
     "dendro": 160,
 }
 _MAX_SIMILARITY_CELLS = 4_000_000
 _MAX_PAIR_TRAJECTORY_TRACES = 500
-_CLUSTER_METHODS = {
-    "K-means": "kmeans",
-    "Agglomerative": "agglomerative",
-    "HDBSCAN": "hdbscan",
-}
 _CLUSTER_MODES = {
     "Mean across layers": "mean_across_layers",
     "First selected layer": "first_layer",
     "Per layer": "per_layer",
 }
-_CLUSTER_LINKAGES = ["ward", "complete", "average", "single"]
 def _is_assistant_persona(persona_id: str, persona_name: str | None = None) -> bool:
@@ -107,6 +126,98 @@ class CosineSelection:
 class PersonaOptions:
     regular_ids: list[str]
     assistant_id: str | None
 def _layers_for_variant(
@@ -133,7 +244,7 @@ def _load_persona_vectors(
     persona_ids: list[str],
 ):
     source, location, model_name = store_cache_parts(store)
-    return load_persona_vectors_lean(
         source,
         location,
         model_name,
@@ -150,7 +261,7 @@ def _load_variant_vectors(
     persona_ids: list[str],
 ):
     source, location, model_name = store_cache_parts(store)
-    return load_variant_vectors_lean(
         source,
         location,
         model_name,
@@ -160,22 +271,55 @@ def _load_variant_vectors(
     )
-def _clear_old_figure_states(current_key: str) -> None:
     for key in list(st.session_state):
         if key == current_key or not isinstance(key, str):
             continue
         parts = key.split("::", 2)
-        if len(parts) >= 2 and parts[0] == "load" and parts[1].endswith("_fig_state"):
             st.session_state.pop(key, None)
 def _store_figure_state(key: str, value: object) -> None:
     _clear_old_figure_states(key)
     st.session_state[key] = value
 def _release_vector_memory(store: Store, variants: list[str] | tuple[str, ...]) -> None:
-    release_store_cache(store, variants)
     gc.collect()
@@ -203,10 +347,22 @@ def _render_layer_frame_controls(
         "Layer frames",
         min_value=2,
         max_value=len(layers),
-        value=_DEFAULT_LAYER_FRAMES,
         key=widget_key("load", "layer_frames", scope, store_id(store)),
         help="Limit animated Plotly frames to keep browser and RAM usage bounded.",
     )
     selected = _evenly_spaced_layers(layers, frame_count)
     st.caption(f"Using {len(selected)} of {len(layers)} layers.")
     return selected
@@ -259,7 +415,11 @@ def _load_persona_options(
     if not regular_ids and assistant_id is None:
         st.info("No personas found for this model and variant.")
         return None
-    return PersonaOptions(regular_ids=regular_ids, assistant_id=assistant_id)
 def _seed_persona_memory(
@@ -366,6 +526,7 @@ def _select_artifact_personas(
         empty_message=empty_message,
     )
     if options is None:
         return []
     default_count, include_assistant_default = _seed_persona_memory(
@@ -393,6 +554,7 @@ def _select_artifact_personas(
     st.session_state[remembered_count_key] = persona_count
     st.session_state[remembered_assistant_key] = include_assistant
     st.session_state[remember_key] = persona_ids
     if not persona_ids:
         st.info("Select at least one persona or include the Assistant persona.")
@@ -415,7 +577,9 @@ def _render_save_buttons(
     if st.button("Save HTML", key=widget_key("load", "save_html", key_suffix)):
         try:
             _style_plotly_figures(figs)
-            paths = [save_plot_html(fig, fn) for fig, fn in zip(figs, filenames)]
             st.success(f"Saved {len(paths)} HTML file(s) to `artifacts/plots`.")
         except Exception as exc:
             st.error(f"Could not save HTML: {exc}")
@@ -430,7 +594,11 @@ def _style_plotly_figures(figs: list[object]) -> None:
 def _plotly_chart(fig: object) -> None:
     _style_plotly_figures([fig])
-    st.plotly_chart(fig, width="stretch")
 def _render_mask_strategy_select(scope: str) -> MaskStrategy:
@@ -584,7 +752,7 @@ def _render_cosine_similarity(
         selection.persona_key,
     )
     filename = _filename(
-        "compare",
         "cosine",
         store.model_name,
         mask_strategy.value,
@@ -592,7 +760,7 @@ def _render_cosine_similarity(
         selection.variant_b,
     )
     pairs_filename = _filename(
-        "compare",
         "cosine_pairs",
         store.model_name,
         mask_strategy.value,
@@ -605,7 +773,7 @@ def _render_cosine_similarity(
         type="primary",
         key=widget_key(
             "load",
-            "compare_vectors",
             store_id(store),
             store.model_name,
             mask_strategy.value,
@@ -650,19 +818,29 @@ def _select_single_variant_samples(
     scope: str,
     *,
     remember_key: str,
     default_count_limit: int,
 ) -> tuple[str, list[str], str, list[int]] | None:
     variants = available_variants(store, mask_strategy)
     if not variants:
         st.info("No variants with saved vectors for this model.")
         return None
     variant = st.selectbox(
         "Variant",
         options=variants,
-        index=variants.index("biography") if "biography" in variants else 0,
         format_func=prompt_variant_label,
-        key=widget_key("load", "variant", scope, store_id(store)),
     )
     persona_ids = _select_artifact_personas(
         store,
         [variant],
@@ -684,6 +862,352 @@ def _select_single_variant_samples(
     return variant, persona_ids, persona_key, selected_layers
 def _render_layered_figure_analysis(
     store: Store,
     mask_strategy: MaskStrategy,
@@ -707,124 +1231,60 @@ def _render_layered_figure_analysis(
         mask_strategy,
         scope,
         remember_key=remember_key,
         default_count_limit=default_count_limit,
     )
     if selected is None:
         return
     variant, persona_ids, persona_key, selected_layers = selected
-    pair_trajectories = False
-    if include_pair_trajectories:
-        pair_count = len(persona_ids) * (len(persona_ids) - 1) // 2
-        if pair_count > _MAX_PAIR_TRAJECTORY_TRACES:
-            st.caption(
-                "Pair trajectories hidden because this selection would create "
-                f"{pair_count:,} Plotly traces."
-            )
-        else:
-            pair_trajectories = st.checkbox(
-                "Pair trajectories",
-                value=False,
-                key=widget_key("load", "pair_trajectories", scope, store_id(store)),
-                help="Adds one line per persona pair. Keep this off for larger selections.",
-            )
-    if figure_kind == "similarity":
-        similarity_cells = len(persona_ids) * len(persona_ids) * len(selected_layers)
-        if similarity_cells > _MAX_SIMILARITY_CELLS:
-            st.error(
-                "Reduce personas or layer frames before generating the similarity "
-                f"matrix ({similarity_cells:,} cells selected)."
-            )
             return
-    n_clusters = None
-    cluster_mode = None
-    cluster_method = None
-    cluster_linkage = None
-    min_cluster_size = None
-    if figure_kind in {"pca", "umap"}:
-        use_clusters = st.toggle(
-            "Color by clusters",
-            value=False,
-            key=widget_key("load", "clusters_enabled", scope, store_id(store)),
-            help="Cluster persona vectors and color points by cluster.",
-        )
-        if use_clusters:
-            method_label = st.selectbox(
-                "Cluster algorithm",
-                options=list(_CLUSTER_METHODS),
-                index=0,
-                key=widget_key("load", "cluster_method", scope, store_id(store)),
-            )
-            cluster_method = _CLUSTER_METHODS[method_label]
-            if cluster_method in {"kmeans", "agglomerative"}:
-                n_clusters = st.slider(
-                    "K (clusters)",
-                    min_value=2,
-                    max_value=min(10, len(persona_ids)),
-                    value=min(3, len(persona_ids)),
-                    key=widget_key("load", "cluster_k", scope, store_id(store)),
-                )
-            if cluster_method == "agglomerative":
-                cluster_linkage = st.selectbox(
-                    "Linkage",
-                    options=_CLUSTER_LINKAGES,
-                    index=0,
-                    key=widget_key("load", "cluster_linkage", scope, store_id(store)),
-                )
-            if cluster_method == "hdbscan":
-                min_cluster_size = st.slider(
-                    "Minimum cluster size",
-                    min_value=2,
-                    max_value=len(persona_ids),
-                    value=min(5, len(persona_ids)),
-                    key=widget_key(
-                        "load",
-                        "cluster_min_cluster_size",
-                        scope,
-                        store_id(store),
-                    ),
-                )
-            mode_label = st.selectbox(
-                "Cluster fit",
-                options=list(_CLUSTER_MODES),
-                index=0,
-                key=widget_key("load", "cluster_mode", scope, store_id(store)),
-                help=(
-                    "Mean across layers is the previous behavior. First selected "
-                    "layer keeps one fixed clustering from the first frame. Per layer "
-                    "recomputes clustering for each animation frame."
-                ),
-            )
-            cluster_mode = _CLUSTER_MODES[mode_label]
-    fig_key = widget_key(
-        "load",
-        f"{scope}_fig_state",
-        store_id(store),
-        store.model_name,
-        mask_strategy.value,
-        figure_kind,
-        str(n_components),
-        str(n_clusters),
-        str(cluster_mode),
-        str(cluster_method),
-        str(cluster_linkage),
-        str(min_cluster_size),
-        variant,
-        "persona_vector",
-        persona_key,
-        "_".join(map(str, selected_layers)),
-        str(pair_trajectories),
     )
     filename = scope
-    _clear_old_figure_states(fig_key)
     if st.button(button_label, type="primary"):
         build_label = {
             "umap": "Computing UMAP projections…",
             "pca": "Computing PCA projections…",
             "similarity": "Computing similarity matrices…",
         }.get(figure_kind, "Building figure…")
         progress = st.progress(0, text="Loading activation vectors…")
@@ -837,63 +1297,44 @@ def _render_layered_figure_analysis(
                 persona_ids,
             )
             progress.progress(55, text=build_label)
-            build_kwargs = {}
-            if figure_kind in {"umap", "pca"}:
-                build_kwargs["n_components"] = n_components
-                if cluster_method is not None:
-                    build_kwargs["cluster_method"] = cluster_method
-                    build_kwargs["n_clusters"] = n_clusters
-                    build_kwargs["cluster_mode"] = cluster_mode
-                    if cluster_linkage is not None:
-                        build_kwargs["cluster_linkage"] = cluster_linkage
-                    if min_cluster_size is not None:
-                        build_kwargs["min_cluster_size"] = min_cluster_size
-            if figure_kind == "similarity" and pair_trajectories:
-                main_fig, extra_fig = build_similarity_figures(
-                    samples,
-                    layers=selected_layers,
-                    title=title_fn(variant),
-                    pair_title=(
-                        "Pair similarity trajectories - "
-                        f"{prompt_variant_label(variant)} - persona vectors"
-                    ),
-                )
-            else:
-                main_fig = build_layered_figure(
-                    samples,
-                    figure_kind,
-                    layers=selected_layers,
-                    title=title_fn(variant),
-                    **build_kwargs,
-                )
-                if figure_kind in {"umap", "pca"}:
-                    main_fig.update_layout(height=700)
-                extra_fig = (
-                    build_pair_similarity_figure(
-                        samples,
-                        layers=selected_layers,
-                        title=(
-                            "Pair similarity trajectories - "
-                            f"{prompt_variant_label(variant)} - persona vectors"
-                        ),
-                    )
-                    if pair_trajectories
-                    else None
-                )
             progress.progress(90, text="Storing figure state…")
             n_samples = samples.vectors.shape[0]
             del samples
-            _store_figure_state(fig_key, (main_fig, extra_fig, n_samples))
             progress.progress(100, text="Done.")
         except Exception as exc:
             st.error(f"Could not build figure: {exc}")
-            st.session_state.pop(fig_key, None)
         finally:
             _release_vector_memory(store, [variant])
             progress.empty()
-    if fig_key in st.session_state:
-        main_fig, extra_fig, n_samples = st.session_state[fig_key]
         _plotly_chart(main_fig)
         figs = [main_fig]
         filenames = [filename]
@@ -906,7 +1347,7 @@ def _render_layered_figure_analysis(
         st.success(f"Loaded {n_samples} samples.")
-_LAST_DENDRO_PERSONAS_KEY = "compare:last_personas:dendro"
 _DENDRO_LINKAGE_OPTIONS = ["ward", "complete", "average", "single"]
@@ -1108,7 +1549,7 @@ def _render_hub_model_select(
     mask_strategy: MaskStrategy,
 ) -> str:
     fallback_model = st.session_state.get(
-        "compare:hub_model_fallback",
         DEFAULT_COMPARE_MODEL,
     )
     try:
@@ -1118,7 +1559,7 @@ def _render_hub_model_select(
         return st.text_input(
             "Hub model",
             value=fallback_model,
-            key="compare:hub_model_fallback",
             help="Compare-only model id to use if Hub config discovery is unavailable.",
         )
@@ -1130,7 +1571,7 @@ def _render_hub_model_select(
         return st.text_input(
             "Hub model",
             value=fallback_model,
-            key="compare:hub_model_fallback",
             help="Compare-only model id to use for this Hub repo.",
         )
@@ -1155,31 +1596,31 @@ def _render_local_model_select(
     artifacts_root: str,
     mask_strategy: MaskStrategy,
 ) -> str:
-    fallback_model = st.session_state.get("compare:local_model", DEFAULT_COMPARE_MODEL)
     model_options = local_model_options_cached(artifacts_root, mask_strategy.value)
     if not model_options:
         return st.text_input(
             "Local model",
             value=fallback_model,
-            key="compare:local_model",
             help="Compare-only local model id or path.",
         )
     custom = st.toggle(
         "Custom local model",
         value=False,
-        key="compare:local_model_custom_enabled",
         help="Enter a model id/path manually instead of choosing from activation directories.",
     )
     if custom:
         return st.text_input(
             "Local model",
             value=fallback_model,
-            key="compare:local_model",
             help="Compare-only local model id or path.",
         )
-    previous_model = st.session_state.get("compare:local_model_select", fallback_model)
     if not any(local_model_matches(previous_model, option) for option in model_options):
         previous_model = fallback_model
     default_model = next(
@@ -1194,10 +1635,10 @@ def _render_local_model_select(
         "Local model",
         options=model_options,
         index=model_options.index(default_model),
-        key="compare:local_model_select",
         help="Models discovered under the selected artifacts root.",
     )
-    st.session_state["compare:local_model"] = selected
     return selected
@@ -1205,8 +1646,8 @@ def _build_store(source: str, mask_strategy: MaskStrategy) -> Store:
     if source == SOURCE_HUB:
         repo = st.text_input(
             "Hub repo",
-            value=st.session_state.get("compare:hub_repo", DEFAULT_HUB_REPO),
-            key="compare:hub_repo",
             help="Hugging Face dataset published by `scripts/push_to_hf.py`.",
         )
         hub_model_name = _render_hub_model_select(repo, mask_strategy)
@@ -1219,7 +1660,7 @@ def _build_store(source: str, mask_strategy: MaskStrategy) -> Store:
     artifacts_root = st.text_input(
         "Artifacts root",
         value=str(get_artifacts_dir() / "activations"),
-        key="compare:artifacts_root",
     )
     artifacts_root = str(Path(artifacts_root).expanduser())
     local_model_name = _render_local_model_select(artifacts_root, mask_strategy)
@@ -1231,12 +1672,12 @@ def _build_store(source: str, mask_strategy: MaskStrategy) -> Store:
     )
-def render_compare_tab() -> None:
     """Render the analysis tab."""
     st.title("Analysis")
     st.caption(
-        "Analyse persona vectors by cosine similarity, PCA, UMAP, or hierarchical clustering."
     )
     source = _render_source_select()
@@ -1279,13 +1720,23 @@ def render_compare_tab() -> None:
         _render_dendrogram_analysis(store, mask_strategy)
         return
     dimension_choice = st.segmented_control(
         "Projection dimensions",
-        options=["2D", "3D"],
-        default="2D",
-        key=widget_key("load", "projection_dims", analysis_mode),
         label_visibility="collapsed",
     )
     n_components = 3 if dimension_choice == "3D" else 2
     dim_suffix = "" if n_components == 2 else " (3D)"
     _render_layered_figure_analysis(

 import plotly.graph_objects as go
 import streamlit as st
 from persona_data.environment import get_artifacts_dir
+from persona_data.synth_persona import BASELINE_PERSONA_ID, SynthPersonaDataset
+from persona_vectors.attributes import (
+    DEFAULT_MAX_ATTRIBUTE_CATEGORIES,
+    attribute_color_kwargs,
+    attribute_display_label,
+)
 from persona_vectors.extraction import MaskStrategy
 from persona_vectors.plots import (
     build_layered_figure,
     build_similarity_figures,
     plot_layer_similarity,
     plot_persona_dendrogram,
+    prepare_layered_projection_data,
     save_plot_html,
 )
+from utils.analysis_sources import (
     DEFAULT_COMPARE_MODEL,
     DEFAULT_HUB_REPO,
     SOURCE_HUB,
     activation_store_cached,
     available_variants,
     hub_models_by_mask_strategy,
+    load_persona_vectors_cached,
+    load_variant_vectors_cached,
     local_model_matches,
     local_model_options_cached,
     persona_names_cached,
     personas_cached,
+    release_hf_store_cache,
     store_cache_parts,
     store_id,
     store_layers_cached,
 # Keep compare-tab selection state separate so projection defaults do not
 # overwrite cosine similarity defaults.
+_LAST_COSINE_PERSONAS_KEY = "analysis:last_personas:cosine"
+_LAST_PROJECTION_PERSONAS_KEY = "analysis:last_personas:projection"
+_LAST_SIMILARITY_PERSONAS_KEY = "analysis:last_personas:similarity"
+_LAST_MASK_STRATEGY_KEY = "analysis:last_mask_strategy"
+_LAST_SOURCE_KEY = "analysis:last_source"
+_LAST_PROJECTION_VARIANT_KEY = "analysis:last_projection_variant"
+_LAST_SIMILARITY_VARIANT_KEY = "analysis:last_similarity_variant"
+_LAST_PROJECTION_COLOR_MODE_KEY = "analysis:last_projection_color_mode"
+_LAST_PROJECTION_ATTRIBUTE_KEY = "analysis:last_projection_attribute"
+_LAST_PROJECTION_CLUSTER_K_KEY = "analysis:last_projection_cluster_k"
+_LAST_PROJECTION_CLUSTER_MODE_KEY = "analysis:last_projection_cluster_mode"
+_LAST_PROJECTION_HIGHLIGHTS_KEY = "analysis:last_projection_highlights"
+_LAST_PROJECTION_DIMS_KEY = "analysis:last_projection_dims"
+_LAST_LAYER_FRAMES_KEY = "analysis:last_layer_frames"
 _DEFAULT_LAYER_FRAMES = 16
 _DEFAULT_PERSONA_LIMITS = {
     "similarity": 120,
     "pca": 500,
     "umap": 500,
+    "isomap": 500,
     "dendro": 160,
 }
 _MAX_SIMILARITY_CELLS = 4_000_000
 _MAX_PAIR_TRAJECTORY_TRACES = 500
+_DEFAULT_GRAPH_NEIGHBORS = 5
+_PROJECTION_KINDS = {"pca", "umap", "isomap"}
 _CLUSTER_MODES = {
     "Mean across layers": "mean_across_layers",
     "First selected layer": "first_layer",
     "Per layer": "per_layer",
 }
+_PROJECTION_COLOR_MODES = ["Persona", "K-means clusters", "Persona attribute"]
+_MAX_ATTRIBUTE_CATEGORIES = DEFAULT_MAX_ATTRIBUTE_CATEGORIES
+@st.cache_resource(show_spinner=False)
+def _synth_persona_dataset() -> SynthPersonaDataset:
+    return SynthPersonaDataset()
 def _is_assistant_persona(persona_id: str, persona_name: str | None = None) -> bool:
 class PersonaOptions:
     regular_ids: list[str]
     assistant_id: str | None
+    persona_names: dict[str, str]
+@dataclass(frozen=True)
+class ProjectionColorConfig:
+    color_mode: str = "Persona"
+    n_clusters: int | None = None
+    cluster_mode: str | None = None
+    attribute_name: str | None = None
+    highlight_persona_ids: tuple[str, ...] = ()
+    highlight_persona_key: str = ""
+@dataclass(frozen=True)
+class LayeredFigureStateKeys:
+    figure: str
+    projection: str | None = None
+_HIGHLIGHT_OTHER_LABEL = "Other"
+_HIGHLIGHT_OTHER_COLOR = "rgba(148, 163, 184, 0.35)"
+def _persona_names_state_key(widget_scope: str) -> str:
+    return widget_key("load", "persona_names", widget_scope)
+def _persona_display_label(persona_names: dict[str, str], persona_id: str) -> str:
+    name = persona_names.get(persona_id, persona_id)
+    return f"{name} ({persona_id})" if name != persona_id else persona_id
+def _highlight_persona_groups(
+    persona_ids: list[str],
+    persona_names: dict[str, str],
+    highlight_persona_ids: tuple[str, ...],
+) -> list[str] | None:
+    if not highlight_persona_ids:
+        return None
+    highlighted = set(highlight_persona_ids)
+    return [
+        (
+            _persona_display_label(persona_names, persona_id)
+            if persona_id in highlighted
+            else _HIGHLIGHT_OTHER_LABEL
+        )
+        for persona_id in persona_ids
+    ]
+def _sequence_to_list(value: object) -> list[object] | None:
+    if value is None or isinstance(value, (str, bytes)):
+        return None
+    if isinstance(value, list):
+        return value
+    if isinstance(value, tuple):
+        return list(value)
+    try:
+        return list(value)
+    except TypeError:
+        return None
+def _gray_out_unselected_personas(fig: go.Figure) -> None:
+    def _gray_trace(trace: object) -> None:
+        marker = getattr(trace, "marker", None)
+        if marker is None:
+            return
+        colors = _sequence_to_list(getattr(marker, "color", None))
+        labels = _sequence_to_list(getattr(trace, "customdata", None))
+        if colors is not None and labels is not None and len(colors) == len(labels):
+            trace.marker.color = [
+                (
+                    _HIGHLIGHT_OTHER_COLOR
+                    if str(label) == _HIGHLIGHT_OTHER_LABEL
+                    else color
+                )
+                for label, color in zip(labels, colors, strict=True)
+            ]
+            return
+        if getattr(trace, "name", None) == _HIGHLIGHT_OTHER_LABEL:
+            trace.marker.color = _HIGHLIGHT_OTHER_COLOR
+            trace.opacity = 0.28
+    for trace in fig.data:
+        _gray_trace(trace)
+    for frame in fig.frames:
+        for trace in frame.data:
+            _gray_trace(trace)
 def _layers_for_variant(
     persona_ids: list[str],
 ):
     source, location, model_name = store_cache_parts(store)
+    return load_persona_vectors_cached(
         source,
         location,
         model_name,
     persona_ids: list[str],
 ):
     source, location, model_name = store_cache_parts(store)
+    return load_variant_vectors_cached(
         source,
         location,
         model_name,
     )
+def _clear_old_load_states(current_key: str, suffix: str) -> None:
     for key in list(st.session_state):
         if key == current_key or not isinstance(key, str):
             continue
         parts = key.split("::", 2)
+        if len(parts) >= 2 and parts[0] == "load" and parts[1].endswith(suffix):
             st.session_state.pop(key, None)
+def _clear_old_figure_states(current_key: str) -> None:
+    _clear_old_load_states(current_key, "_fig_state")
+def _clear_old_projection_states(current_key: str) -> None:
+    _clear_old_load_states(current_key, "_projection_state")
 def _store_figure_state(key: str, value: object) -> None:
     _clear_old_figure_states(key)
     st.session_state[key] = value
+def _seed_selectbox_key(
+    *,
+    key: str,
+    remember_key: str,
+    options: list[str],
+    default: str,
+) -> str:
+    value = st.session_state.get(key, st.session_state.get(remember_key, default))
+    if value not in options:
+        value = default
+    return value
+def _remember_multiselect(
+    *,
+    key: str,
+    remember_key: str,
+    options: list[str],
+) -> list[str]:
+    remembered = st.session_state.get(key, st.session_state.get(remember_key, []))
+    if not isinstance(remembered, list):
+        remembered = []
+    return [value for value in remembered if value in options]
 def _release_vector_memory(store: Store, variants: list[str] | tuple[str, ...]) -> None:
+    release_hf_store_cache(store, variants)
     gc.collect()
         "Layer frames",
         min_value=2,
         max_value=len(layers),
+        value=min(
+            max(
+                int(
+                    st.session_state.get(
+                        _LAST_LAYER_FRAMES_KEY,
+                        _DEFAULT_LAYER_FRAMES,
+                    )
+                ),
+                2,
+            ),
+            len(layers),
+        ),
         key=widget_key("load", "layer_frames", scope, store_id(store)),
         help="Limit animated Plotly frames to keep browser and RAM usage bounded.",
     )
+    st.session_state[_LAST_LAYER_FRAMES_KEY] = frame_count
     selected = _evenly_spaced_layers(layers, frame_count)
     st.caption(f"Using {len(selected)} of {len(layers)} layers.")
     return selected
     if not regular_ids and assistant_id is None:
         st.info("No personas found for this model and variant.")
         return None
+    return PersonaOptions(
+        regular_ids=regular_ids,
+        assistant_id=assistant_id,
+        persona_names=persona_names,
+    )
 def _seed_persona_memory(
         empty_message=empty_message,
     )
     if options is None:
+        st.session_state.pop(_persona_names_state_key(widget_scope), None)
         return []
     default_count, include_assistant_default = _seed_persona_memory(
     st.session_state[remembered_count_key] = persona_count
     st.session_state[remembered_assistant_key] = include_assistant
     st.session_state[remember_key] = persona_ids
+    st.session_state[_persona_names_state_key(widget_scope)] = options.persona_names
     if not persona_ids:
         st.info("Select at least one persona or include the Assistant persona.")
     if st.button("Save HTML", key=widget_key("load", "save_html", key_suffix)):
         try:
             _style_plotly_figures(figs)
+            paths = [
+                save_plot_html(fig, fn) for fig, fn in zip(figs, filenames, strict=True)
+            ]
             st.success(f"Saved {len(paths)} HTML file(s) to `artifacts/plots`.")
         except Exception as exc:
             st.error(f"Could not save HTML: {exc}")
 def _plotly_chart(fig: object) -> None:
     _style_plotly_figures([fig])
+    st.plotly_chart(
+        fig,
+        width="stretch",
+        config={"responsive": True, "displaylogo": False},
+    )
 def _render_mask_strategy_select(scope: str) -> MaskStrategy:
         selection.persona_key,
     )
     filename = _filename(
+        "analysis",
         "cosine",
         store.model_name,
         mask_strategy.value,
         selection.variant_b,
     )
     pairs_filename = _filename(
+        "analysis",
         "cosine_pairs",
         store.model_name,
         mask_strategy.value,
         type="primary",
         key=widget_key(
             "load",
+            "analysis_vectors",
             store_id(store),
             store.model_name,
             mask_strategy.value,
     scope: str,
     *,
     remember_key: str,
+    variant_remember_key: str,
     default_count_limit: int,
 ) -> tuple[str, list[str], str, list[int]] | None:
     variants = available_variants(store, mask_strategy)
     if not variants:
         st.info("No variants with saved vectors for this model.")
         return None
+    variant_key = widget_key("load", "variant", scope, store_id(store))
+    default_variant = "biography" if "biography" in variants else variants[0]
+    selected_variant = _seed_selectbox_key(
+        key=variant_key,
+        remember_key=variant_remember_key,
+        options=variants,
+        default=default_variant,
+    )
     variant = st.selectbox(
         "Variant",
         options=variants,
+        index=variants.index(selected_variant),
         format_func=prompt_variant_label,
+        key=variant_key,
     )
+    st.session_state[variant_remember_key] = variant
     persona_ids = _select_artifact_personas(
         store,
         [variant],
     return variant, persona_ids, persona_key, selected_layers
+def _render_pair_trajectory_control(
+    *,
+    enabled: bool,
+    persona_count: int,
+    scope: str,
+    store: Store,
+) -> bool:
+    if not enabled:
+        return False
+    pair_count = persona_count * (persona_count - 1) // 2
+    if pair_count > _MAX_PAIR_TRAJECTORY_TRACES:
+        st.caption(
+            "Pair trajectories hidden because this selection would create "
+            f"{pair_count:,} Plotly traces."
+        )
+        return False
+    return st.checkbox(
+        "Pair trajectories",
+        value=False,
+        key=widget_key("load", "pair_trajectories", scope, store_id(store)),
+        help="Adds one line per persona pair. Keep this off for larger selections.",
+    )
+def _validate_layered_figure_size(
+    figure_kind: str,
+    persona_count: int,
+    selected_layers: list[int],
+) -> bool:
+    if figure_kind != "similarity":
+        return True
+    similarity_cells = persona_count * persona_count * len(selected_layers)
+    if similarity_cells <= _MAX_SIMILARITY_CELLS:
+        return True
+    st.error(
+        "Reduce personas or layer frames before generating the similarity "
+        f"matrix ({similarity_cells:,} cells selected)."
+    )
+    return False
+def _render_projection_color_config(
+    store: Store,
+    scope: str,
+    persona_ids: list[str],
+) -> ProjectionColorConfig | None:
+    widget_scope = f"{scope}:{store_id(store)}"
+    persona_key = personas_fingerprint(persona_ids)
+    persona_names = st.session_state.get(
+        _persona_names_state_key(widget_scope),
+        {},
+    )
+    color_mode_key = widget_key("load", "color_mode", scope, store_id(store))
+    selected_color_mode = _seed_selectbox_key(
+        key=color_mode_key,
+        remember_key=_LAST_PROJECTION_COLOR_MODE_KEY,
+        options=_PROJECTION_COLOR_MODES,
+        default="Persona",
+    )
+    color_mode = st.selectbox(
+        "Color by",
+        options=_PROJECTION_COLOR_MODES,
+        index=_PROJECTION_COLOR_MODES.index(selected_color_mode),
+        key=color_mode_key,
+    )
+    st.session_state[_LAST_PROJECTION_COLOR_MODE_KEY] = color_mode
+    if color_mode == "K-means clusters":
+        max_clusters = min(10, len(persona_ids))
+        if max_clusters < 2:
+            st.info("Select at least two personas to use K-means coloring.")
+            return None
+        cluster_key = widget_key("load", "cluster_k", scope, store_id(store))
+        default_clusters = min(3, len(persona_ids))
+        if cluster_key not in st.session_state:
+            st.session_state[cluster_key] = min(
+                max(
+                    int(
+                        st.session_state.get(
+                            _LAST_PROJECTION_CLUSTER_K_KEY,
+                            default_clusters,
+                        )
+                    ),
+                    2,
+                ),
+                max_clusters,
+            )
+        n_clusters = st.slider(
+            "K (clusters)",
+            min_value=2,
+            max_value=max_clusters,
+            key=cluster_key,
+        )
+        mode_key = widget_key("load", "cluster_mode", scope, store_id(store))
+        mode_options = list(_CLUSTER_MODES)
+        selected_mode = _seed_selectbox_key(
+            key=mode_key,
+            remember_key=_LAST_PROJECTION_CLUSTER_MODE_KEY,
+            options=mode_options,
+            default=mode_options[0],
+        )
+        mode_label = st.selectbox(
+            "Cluster fit",
+            options=mode_options,
+            index=mode_options.index(selected_mode),
+            key=mode_key,
+            help=(
+                "Mean across layers is the previous behavior. First selected "
+                "layer keeps one fixed clustering from the first frame. Per layer "
+                "recomputes clustering for each animation frame."
+            ),
+        )
+        st.session_state[_LAST_PROJECTION_CLUSTER_K_KEY] = n_clusters
+        st.session_state[_LAST_PROJECTION_CLUSTER_MODE_KEY] = mode_label
+        return ProjectionColorConfig(
+            color_mode=color_mode,
+            n_clusters=n_clusters,
+            cluster_mode=_CLUSTER_MODES[mode_label],
+        )
+    if color_mode == "Persona attribute":
+        persona_dataset = _synth_persona_dataset()
+        attribute_options = list(persona_dataset.attribute_names)
+        if not attribute_options:
+            st.info("No persona attributes are available for this dataset.")
+            return None
+        default_attribute = (
+            attribute_options.index("sex") if "sex" in attribute_options else 0
+        )
+        attribute_key = widget_key("load", "attribute", scope, store_id(store))
+        selected_attribute = _seed_selectbox_key(
+            key=attribute_key,
+            remember_key=_LAST_PROJECTION_ATTRIBUTE_KEY,
+            options=attribute_options,
+            default=attribute_options[default_attribute],
+        )
+        attribute_name = st.selectbox(
+            "Attribute",
+            options=attribute_options,
+            index=attribute_options.index(selected_attribute),
+            format_func=lambda name: attribute_display_label(persona_dataset, name),
+            key=attribute_key,
+        )
+        st.session_state[_LAST_PROJECTION_ATTRIBUTE_KEY] = attribute_name
+        info = persona_dataset.attribute_info(attribute_name)
+        if info.get("high_cardinality"):
+            st.caption(
+                "High-cardinality categorical attributes are grouped to the "
+                f"top {_MAX_ATTRIBUTE_CATEGORIES} values plus Other."
+            )
+        return ProjectionColorConfig(
+            color_mode=color_mode,
+            attribute_name=attribute_name,
+        )
+    highlight_persona_ids: tuple[str, ...] = ()
+    if persona_ids:
+        highlight_key = widget_key(
+            "load", "persona_highlight", scope, store_id(store), persona_key
+        )
+        highlighted = st.multiselect(
+            "Highlight personas",
+            options=persona_ids,
+            default=_remember_multiselect(
+                key=highlight_key,
+                remember_key=_LAST_PROJECTION_HIGHLIGHTS_KEY,
+                options=persona_ids,
+            ),
+            format_func=lambda persona_id: _persona_display_label(
+                persona_names, persona_id
+            ),
+            key=highlight_key,
+            help=(
+                "Select a few personas to keep their default colors while the rest "
+                "are grayed out."
+            ),
+        )
+        highlight_persona_ids = tuple(highlighted)
+        st.session_state[_LAST_PROJECTION_HIGHLIGHTS_KEY] = list(highlighted)
+    highlight_persona_key = (
+        personas_fingerprint(highlight_persona_ids) if highlight_persona_ids else ""
+    )
+    return ProjectionColorConfig(
+        color_mode=color_mode,
+        highlight_persona_ids=highlight_persona_ids,
+        highlight_persona_key=highlight_persona_key,
+    )
+def _layered_figure_state_keys(
+    store: Store,
+    mask_strategy: MaskStrategy,
+    *,
+    scope: str,
+    figure_kind: str,
+    n_components: int,
+    color_config: ProjectionColorConfig,
+    variant: str,
+    persona_key: str,
+    selected_layers: list[int],
+    pair_trajectories: bool,
+) -> LayeredFigureStateKeys:
+    layer_key = "_".join(map(str, selected_layers))
+    figure_key = widget_key(
+        "load",
+        f"{scope}_fig_state",
+        store_id(store),
+        store.model_name,
+        mask_strategy.value,
+        figure_kind,
+        str(n_components),
+        color_config.color_mode,
+        str(color_config.attribute_name),
+        str(color_config.n_clusters),
+        str(color_config.cluster_mode),
+        str(color_config.highlight_persona_key),
+        variant,
+        "persona_vector",
+        persona_key,
+        layer_key,
+        str(pair_trajectories),
+    )
+    if figure_kind not in _PROJECTION_KINDS:
+        return LayeredFigureStateKeys(figure=figure_key)
+    graph_overlay = figure_kind == "isomap"
+    projection_key = widget_key(
+        "load",
+        f"{scope}_projection_state",
+        store_id(store),
+        store.model_name,
+        mask_strategy.value,
+        figure_kind,
+        str(n_components),
+        str(graph_overlay),
+        str(_DEFAULT_GRAPH_NEIGHBORS),
+        variant,
+        "persona_vector",
+        persona_key,
+        layer_key,
+    )
+    return LayeredFigureStateKeys(figure=figure_key, projection=projection_key)
+def _projection_build_kwargs(
+    samples,
+    *,
+    figure_kind: str,
+    selected_layers: list[int],
+    n_components: int,
+    color_config: ProjectionColorConfig,
+    persona_ids: list[str],
+    persona_names: dict[str, str],
+    projection_key: str | None,
+) -> dict:
+    if figure_kind not in _PROJECTION_KINDS:
+        return {}
+    graph_overlay = figure_kind == "isomap"
+    build_kwargs = {
+        "n_components": n_components,
+        "graph_overlay": graph_overlay,
+        "graph_n_neighbors": _DEFAULT_GRAPH_NEIGHBORS,
+    }
+    if color_config.n_clusters is not None:
+        build_kwargs["n_clusters"] = color_config.n_clusters
+        build_kwargs["cluster_mode"] = color_config.cluster_mode
+    if projection_key is not None:
+        projection_data = st.session_state.get(projection_key)
+        if projection_data is None:
+            projection_data = prepare_layered_projection_data(
+                samples,
+                figure_kind,
+                layers=selected_layers,
+                n_components=n_components,
+                graph_overlay=graph_overlay,
+                graph_n_neighbors=_DEFAULT_GRAPH_NEIGHBORS,
+            )
+            st.session_state[projection_key] = projection_data
+        build_kwargs["projection_data"] = projection_data
+    if color_config.attribute_name is not None:
+        build_kwargs.update(
+            attribute_color_kwargs(
+                _synth_persona_dataset(),
+                color_config.attribute_name,
+                persona_ids,
+                max_categories=_MAX_ATTRIBUTE_CATEGORIES,
+            )
+        )
+    if color_config.color_mode == "Persona" and color_config.highlight_persona_ids:
+        groups = _highlight_persona_groups(
+            persona_ids,
+            persona_names,
+            color_config.highlight_persona_ids,
+        )
+        if groups is not None:
+            build_kwargs["groups"] = groups
+    return build_kwargs
+def _build_layered_analysis_figures(
+    samples,
+    *,
+    figure_kind: str,
+    selected_layers: list[int],
+    variant: str,
+    title_fn: Callable[[str], str],
+    pair_trajectories: bool,
+    build_kwargs: dict,
+) -> tuple[go.Figure, go.Figure | None]:
+    if figure_kind == "similarity" and pair_trajectories:
+        return build_similarity_figures(
+            samples,
+            layers=selected_layers,
+            title=title_fn(variant),
+            pair_title=(
+                "Pair similarity trajectories - "
+                f"{prompt_variant_label(variant)} - persona vectors"
+            ),
+        )
+    main_fig = build_layered_figure(
+        samples,
+        figure_kind,
+        layers=selected_layers,
+        title=title_fn(variant),
+        **build_kwargs,
+    )
+    if figure_kind in _PROJECTION_KINDS:
+        main_fig.update_layout(height=700)
+    extra_fig = (
+        build_pair_similarity_figure(
+            samples,
+            layers=selected_layers,
+            title=(
+                "Pair similarity trajectories - "
+                f"{prompt_variant_label(variant)} - persona vectors"
+            ),
+        )
+        if pair_trajectories
+        else None
+    )
+    return main_fig, extra_fig
 def _render_layered_figure_analysis(
     store: Store,
     mask_strategy: MaskStrategy,
         mask_strategy,
         scope,
         remember_key=remember_key,
+        variant_remember_key=(
+            _LAST_PROJECTION_VARIANT_KEY
+            if figure_kind in _PROJECTION_KINDS
+            else _LAST_SIMILARITY_VARIANT_KEY
+        ),
         default_count_limit=default_count_limit,
     )
     if selected is None:
         return
     variant, persona_ids, persona_key, selected_layers = selected
+    pair_trajectories = _render_pair_trajectory_control(
+        enabled=include_pair_trajectories,
+        persona_count=len(persona_ids),
+        scope=scope,
+        store=store,
+    )
+    if not _validate_layered_figure_size(
+        figure_kind, len(persona_ids), selected_layers
+    ):
+        return
+    color_config = ProjectionColorConfig()
+    if figure_kind in _PROJECTION_KINDS:
+        color_config = _render_projection_color_config(store, scope, persona_ids)
+        if color_config is None:
             return
+    state_keys = _layered_figure_state_keys(
+        store,
+        mask_strategy,
+        scope=scope,
+        figure_kind=figure_kind,
+        n_components=n_components,
+        color_config=color_config,
+        variant=variant,
+        persona_key=persona_key,
+        selected_layers=selected_layers,
+        pair_trajectories=pair_trajectories,
     )
+    if state_keys.projection is not None:
+        _clear_old_projection_states(state_keys.projection)
     filename = scope
+    _clear_old_figure_states(state_keys.figure)
+    persona_names = st.session_state.get(
+        _persona_names_state_key(f"{scope}:{store_id(store)}"),
+        {},
+    )
     if st.button(button_label, type="primary"):
         build_label = {
             "umap": "Computing UMAP projections…",
             "pca": "Computing PCA projections…",
+            "isomap": "Computing Isomap projections…",
             "similarity": "Computing similarity matrices…",
         }.get(figure_kind, "Building figure…")
         progress = st.progress(0, text="Loading activation vectors…")
                 persona_ids,
             )
             progress.progress(55, text=build_label)
+            build_kwargs = _projection_build_kwargs(
+                samples,
+                figure_kind=figure_kind,
+                selected_layers=selected_layers,
+                n_components=n_components,
+                color_config=color_config,
+                persona_ids=persona_ids,
+                persona_names=persona_names,
+                projection_key=state_keys.projection,
+            )
+            main_fig, extra_fig = _build_layered_analysis_figures(
+                samples,
+                figure_kind=figure_kind,
+                selected_layers=selected_layers,
+                variant=variant,
+                title_fn=title_fn,
+                pair_trajectories=pair_trajectories,
+                build_kwargs=build_kwargs,
+            )
+            if (
+                color_config.color_mode == "Persona"
+                and color_config.highlight_persona_ids
+            ):
+                _gray_out_unselected_personas(main_fig)
             progress.progress(90, text="Storing figure state…")
             n_samples = samples.vectors.shape[0]
             del samples
+            _store_figure_state(state_keys.figure, (main_fig, extra_fig, n_samples))
             progress.progress(100, text="Done.")
         except Exception as exc:
             st.error(f"Could not build figure: {exc}")
+            st.session_state.pop(state_keys.figure, None)
         finally:
             _release_vector_memory(store, [variant])
             progress.empty()
+    if state_keys.figure in st.session_state:
+        main_fig, extra_fig, n_samples = st.session_state[state_keys.figure]
         _plotly_chart(main_fig)
         figs = [main_fig]
         filenames = [filename]
         st.success(f"Loaded {n_samples} samples.")
+_LAST_DENDRO_PERSONAS_KEY = "analysis:last_personas:dendro"
 _DENDRO_LINKAGE_OPTIONS = ["ward", "complete", "average", "single"]
     mask_strategy: MaskStrategy,
 ) -> str:
     fallback_model = st.session_state.get(
+        "analysis:hub_model_fallback",
         DEFAULT_COMPARE_MODEL,
     )
     try:
         return st.text_input(
             "Hub model",
             value=fallback_model,
+            key="analysis:hub_model_fallback",
             help="Compare-only model id to use if Hub config discovery is unavailable.",
         )
         return st.text_input(
             "Hub model",
             value=fallback_model,
+            key="analysis:hub_model_fallback",
             help="Compare-only model id to use for this Hub repo.",
         )
     artifacts_root: str,
     mask_strategy: MaskStrategy,
 ) -> str:
+    fallback_model = st.session_state.get("analysis:local_model", DEFAULT_COMPARE_MODEL)
     model_options = local_model_options_cached(artifacts_root, mask_strategy.value)
     if not model_options:
         return st.text_input(
             "Local model",
             value=fallback_model,
+            key="analysis:local_model",
             help="Compare-only local model id or path.",
         )
     custom = st.toggle(
         "Custom local model",
         value=False,
+        key="analysis:local_model_custom_enabled",
         help="Enter a model id/path manually instead of choosing from activation directories.",
     )
     if custom:
         return st.text_input(
             "Local model",
             value=fallback_model,
+            key="analysis:local_model",
             help="Compare-only local model id or path.",
         )
+    previous_model = st.session_state.get("analysis:local_model_select", fallback_model)
     if not any(local_model_matches(previous_model, option) for option in model_options):
         previous_model = fallback_model
     default_model = next(
         "Local model",
         options=model_options,
         index=model_options.index(default_model),
+        key="analysis:local_model_select",
         help="Models discovered under the selected artifacts root.",
     )
+    st.session_state["analysis:local_model"] = selected
     return selected
     if source == SOURCE_HUB:
         repo = st.text_input(
             "Hub repo",
+            value=st.session_state.get("analysis:hub_repo", DEFAULT_HUB_REPO),
+            key="analysis:hub_repo",
             help="Hugging Face dataset published by `scripts/push_to_hf.py`.",
         )
         hub_model_name = _render_hub_model_select(repo, mask_strategy)
     artifacts_root = st.text_input(
         "Artifacts root",
         value=str(get_artifacts_dir() / "activations"),
+        key="analysis:artifacts_root",
     )
     artifacts_root = str(Path(artifacts_root).expanduser())
     local_model_name = _render_local_model_select(artifacts_root, mask_strategy)
     )
+def render_analysis_tab() -> None:
     """Render the analysis tab."""
     st.title("Analysis")
     st.caption(
+        "Analyse persona vectors by cosine similarity, PCA, UMAP, Isomap, or hierarchical clustering."
     )
     source = _render_source_select()
         _render_dendrogram_analysis(store, mask_strategy)
         return
+    dim_options = ["2D", "3D"]
+    dim_key = widget_key("load", "projection_dims", analysis_mode)
+    remembered_dim = st.session_state.get(
+        dim_key,
+        st.session_state.get(_LAST_PROJECTION_DIMS_KEY, "2D"),
+    )
+    if remembered_dim not in dim_options:
+        remembered_dim = "2D"
     dimension_choice = st.segmented_control(
         "Projection dimensions",
+        options=dim_options,
+        default=remembered_dim,
+        key=dim_key,
         label_visibility="collapsed",
     )
+    if dimension_choice is not None:
+        st.session_state[_LAST_PROJECTION_DIMS_KEY] = dimension_choice
     n_components = 3 if dimension_choice == "3D" else 2
     dim_suffix = "" if n_components == 2 else " (3D)"
     _render_layered_figure_analysis(

tabs/chat.py CHANGED Viewed

@@ -1,50 +1,39 @@
 import streamlit as st
 from persona_data.synth_persona import PersonaData
-from state import ChatState, chat_session_key, get_chat_state, reset_chat_context_state
 from tabs.chat_ui import (
     GenerationConfig,
     render_advanced_settings,
     render_chat_window,
-    render_persona_prompt_controls,
     render_system_prompt,
 )
-from utils.chat import (
-    ChatReply,
-    build_chat_messages,
-    generate_chat_reply,
-    resolve_system_prompt,
-)
 from utils.chat_export import save_chat_export
-from utils.datasets import load_persona_list
-from utils.helpers import widget_key
 from utils.runtime import cached_model
-_LAST_PERSONA_ID_KEY = "chat:last_persona_id"
-_LAST_PROMPT_MODE_KEY = "chat:last_prompt_mode"
-_LAST_COMPARE_MODE_KEY = "chat:last_compare_mode"
-_LAST_PROBE_ENABLED_KEY = "chat:last_probe_enabled"
-_LAST_TOKEN_CONTRAST_KEY = "chat:last_token_contrast"
-def _load_personas(dataset_source: str) -> list[PersonaData] | None:
-    try:
-        personas, dataset_status = load_persona_list(
-            dataset_source,
-            personas_file=st.session_state.get("extract__personas_file"),
-            qa_file=st.session_state.get("extract__qa_file"),
-        )
-        st.caption(dataset_status)
-    except Exception as exc:
-        st.error(f"Could not load data: {exc}")
-        st.info("Check the selected dataset source or upload both JSONL files.")
-        return None
-    if not personas:
-        st.warning("No personas found in the selected dataset.")
-        st.info("Try a different dataset source or upload a non-empty personas file.")
-        return None
-    return personas
 def _render_single_chat_footer(
@@ -99,27 +88,32 @@ def _handle_single_chat_generation(
     chat_state: ChatState,
     active_system_prompt: str | None,
     generation: GenerationConfig,
-    pending_action: object,
     chat_log,
 ) -> None:
     messages = build_chat_messages(active_system_prompt, chat_state["messages"])
     with st.spinner("Generating reply..."):
         model = cached_model(model_name=model_name)
-        try:
-            reply: ChatReply = generate_chat_reply(
-                model=model,
-                messages=messages,
-                remote=remote,
-                **generation.to_generate_kwargs(),
-            )
-        except Exception as exc:
             with chat_log:
                 st.error(f"Could not generate a reply: {exc}")
                 st.info("Try a shorter prompt, reset the chat, or switch personas.")
             if pending_action == "new_user_prompt" and chat_state["messages"]:
                 chat_state["messages"].pop()
             return
     chat_state["messages"].append({"role": "assistant", "content": reply.text})
     st.rerun()
@@ -132,16 +126,14 @@ def render_chat_tab(remote: bool, model_name: str, dataset_source: str) -> None:
     st.caption("Chat with a persona, optionally side-by-side or with token contrast.")
     context_key = chat_session_key(model_name, dataset_source)
-    chat_state = get_chat_state(model_name, remote, dataset_source)
-    # Carry over persona / prompt selections across model or remote switches.
-    if chat_state["persona_id"] is None:
-        chat_state["persona_id"] = st.session_state.get(_LAST_PERSONA_ID_KEY)
-        chat_state["prompt_mode"] = st.session_state.get(
-            _LAST_PROMPT_MODE_KEY, "templated"
-        )
-    personas = _load_personas(dataset_source)
     if personas is None:
         return
@@ -166,7 +158,6 @@ def render_chat_tab(remote: bool, model_name: str, dataset_source: str) -> None:
         )
         return
-    # ── Single-chat mode ──────────────────────────────────────────────────────
     persona_select_key = widget_key(context_key, "persona_select")
     prompt_mode_select_key = widget_key(context_key, "system_prompt_select")
     prompt_key = widget_key(context_key, "custom_system_prompt")
@@ -176,6 +167,20 @@ def render_chat_tab(remote: bool, model_name: str, dataset_source: str) -> None:
     reset_key = widget_key(context_key, "reset")
     edit_key = widget_key(context_key, "edit_idx")
     def _reset_active_chat_context() -> None:
         reset_chat_context_state(
             chat_state,
@@ -187,17 +192,6 @@ def render_chat_tab(remote: bool, model_name: str, dataset_source: str) -> None:
         )
         st.session_state.pop(edit_key, None)
-    selected_persona, prompt_mode, changed_context = render_persona_prompt_controls(
-        personas,
-        chat_state["persona_id"],
-        chat_state["prompt_mode"],
-        persona_select_key,
-        prompt_mode_select_key,
-        column_widths=(2, 1),
-    )
-    st.session_state[_LAST_PERSONA_ID_KEY] = selected_persona.id
-    st.session_state[_LAST_PROMPT_MODE_KEY] = prompt_mode
     active_system_prompt = resolve_system_prompt(
         persona=selected_persona,
         mode=prompt_mode,
@@ -259,14 +253,15 @@ def render_chat_tab(remote: bool, model_name: str, dataset_source: str) -> None:
     user_prompt = st.chat_input("Ask something...", key=chat_input_key)
-    # Pass 1: user submitted — append message and rerun so it renders before generation.
     if user_prompt:
         chat_state["messages"].append({"role": "user", "content": user_prompt})
         st.session_state[pending_key] = "new_user_prompt"
         st.rerun()
-    # Pass 2: message is already rendered above; now run generation.
-    pending_action = st.session_state.pop(pending_key, None)
     if not pending_action:
         return

+from __future__ import annotations
+from typing import cast
 import streamlit as st
 from persona_data.synth_persona import PersonaData
+from state import (
+    ChatState,
+    PendingChatAction,
+    chat_session_key,
+    get_chat_state,
+    reset_chat_context_state,
+)
+from tabs.chat_shared import (
+    generate_chat_reply_result,
+    hydrate_chat_state,
+    load_chat_personas,
+    render_chat_selection,
+)
 from tabs.chat_ui import (
     GenerationConfig,
     render_advanced_settings,
     render_chat_window,
     render_system_prompt,
 )
+from utils.chat import build_chat_messages, resolve_system_prompt
 from utils.chat_export import save_chat_export
+from utils.helpers import session_key, widget_key
 from utils.runtime import cached_model
+_LAST_PERSONA_ID_KEY = session_key("chat", "last_persona_id")
+_LAST_PROMPT_MODE_KEY = session_key("chat", "last_prompt_mode")
+_LAST_COMPARE_MODE_KEY = session_key("chat", "last_compare_mode")
+_LAST_PROBE_ENABLED_KEY = session_key("chat", "last_probe_enabled")
+_LAST_TOKEN_CONTRAST_KEY = session_key("chat", "last_token_contrast")
 def _render_single_chat_footer(
     chat_state: ChatState,
     active_system_prompt: str | None,
     generation: GenerationConfig,
+    pending_action: PendingChatAction,
     chat_log,
 ) -> None:
     messages = build_chat_messages(active_system_prompt, chat_state["messages"])
     with st.spinner("Generating reply..."):
         model = cached_model(model_name=model_name)
+        def _show_error(exc: Exception) -> None:
             with chat_log:
                 st.error(f"Could not generate a reply: {exc}")
                 st.info("Try a shorter prompt, reset the chat, or switch personas.")
+        reply, error = generate_chat_reply_result(
+            model=model,
+            messages=messages,
+            remote=remote,
+            generation=generation,
+            on_error=_show_error,
+        )
+        if error is not None:
             if pending_action == "new_user_prompt" and chat_state["messages"]:
                 chat_state["messages"].pop()
             return
+        if reply is None:
+            return
     chat_state["messages"].append({"role": "assistant", "content": reply.text})
     st.rerun()
     st.caption("Chat with a persona, optionally side-by-side or with token contrast.")
     context_key = chat_session_key(model_name, dataset_source)
+    chat_state = get_chat_state(model_name, dataset_source)
+    hydrate_chat_state(
+        chat_state,
+        persisted_persona_key=_LAST_PERSONA_ID_KEY,
+        persisted_prompt_key=_LAST_PROMPT_MODE_KEY,
+    )
+    personas = load_chat_personas(dataset_source)
     if personas is None:
         return
         )
         return
     persona_select_key = widget_key(context_key, "persona_select")
     prompt_mode_select_key = widget_key(context_key, "system_prompt_select")
     prompt_key = widget_key(context_key, "custom_system_prompt")
     reset_key = widget_key(context_key, "reset")
     edit_key = widget_key(context_key, "edit_idx")
+    selection = render_chat_selection(
+        personas,
+        chat_state["persona_id"],
+        chat_state["prompt_mode"],
+        persona_select_key,
+        prompt_mode_select_key,
+        persisted_persona_key=_LAST_PERSONA_ID_KEY,
+        persisted_prompt_key=_LAST_PROMPT_MODE_KEY,
+        column_widths=(2, 1),
+    )
+    selected_persona = selection.persona
+    prompt_mode = selection.prompt_mode
+    changed_context = selection.changed
     def _reset_active_chat_context() -> None:
         reset_chat_context_state(
             chat_state,
         )
         st.session_state.pop(edit_key, None)
     active_system_prompt = resolve_system_prompt(
         persona=selected_persona,
         mode=prompt_mode,
     user_prompt = st.chat_input("Ask something...", key=chat_input_key)
     if user_prompt:
         chat_state["messages"].append({"role": "user", "content": user_prompt})
         st.session_state[pending_key] = "new_user_prompt"
         st.rerun()
+    pending_action = cast(
+        PendingChatAction | None,
+        st.session_state.pop(pending_key, None),
+    )
     if not pending_action:
         return

tabs/chat_shared.py ADDED Viewed

	@@ -0,0 +1,105 @@

+from __future__ import annotations
+from collections.abc import Callable
+from dataclasses import dataclass
+import streamlit as st
+from persona_data.synth_persona import PersonaData
+from state import ChatState
+from tabs.chat_ui import GenerationConfig, render_persona_prompt_controls
+from utils.chat import ChatReply, generate_chat_reply
+from utils.datasets import load_persona_list
+from utils.helpers import session_key
+@dataclass(frozen=True)
+class ChatSelection:
+    persona: PersonaData
+    prompt_mode: str
+    changed: bool
+def load_chat_personas(dataset_source: str) -> list[PersonaData] | None:
+    personas_file_key = session_key("extract", "personas_file")
+    qa_file_key = session_key("extract", "qa_file")
+    try:
+        personas, dataset_status = load_persona_list(
+            dataset_source,
+            personas_file=st.session_state.get(personas_file_key),
+            qa_file=st.session_state.get(qa_file_key),
+        )
+        st.caption(dataset_status)
+    except Exception as exc:
+        st.error(f"Could not load data: {exc}")
+        st.info("Check the selected dataset source or upload both JSONL files.")
+        return None
+    if not personas:
+        st.warning("No personas found in the selected dataset.")
+        st.info("Try a different dataset source or upload a non-empty personas file.")
+        return None
+    return personas
+def hydrate_chat_state(
+    state: ChatState,
+    *,
+    persisted_persona_key: str,
+    persisted_prompt_key: str,
+    default_prompt_mode: str = "templated",
+) -> None:
+    if state["persona_id"] is None:
+        state["persona_id"] = st.session_state.get(persisted_persona_key)
+        state["prompt_mode"] = st.session_state.get(
+            persisted_prompt_key,
+            default_prompt_mode,
+        )
+def render_chat_selection(
+    personas: list[PersonaData],
+    current_persona_id: str | None,
+    current_prompt_mode: str,
+    persona_key: str,
+    prompt_key: str,
+    *,
+    persisted_persona_key: str,
+    persisted_prompt_key: str,
+    column_widths: tuple[int, int] = (3, 2),
+) -> ChatSelection:
+    selected_persona, prompt_mode, changed = render_persona_prompt_controls(
+        personas,
+        current_persona_id,
+        current_prompt_mode,
+        persona_key,
+        prompt_key,
+        column_widths=column_widths,
+    )
+    st.session_state[persisted_persona_key] = selected_persona.id
+    st.session_state[persisted_prompt_key] = prompt_mode
+    return ChatSelection(selected_persona, prompt_mode, changed)
+def generate_chat_reply_result(
+    *,
+    model: object,
+    messages: list[dict[str, str]],
+    remote: bool,
+    generation: GenerationConfig,
+    on_error: Callable[[Exception], None] | None = None,
+) -> tuple[ChatReply | None, Exception | None]:
+    try:
+        return (
+            generate_chat_reply(
+                model=model,
+                messages=messages,
+                remote=remote,
+                **generation.to_generate_kwargs(),
+            ),
+            None,
+        )
+    except Exception as exc:
+        if on_error is not None:
+            on_error(exc)
+        return None, exc

tabs/chat_ui.py CHANGED Viewed

@@ -29,19 +29,21 @@ GENERATION_DEFAULTS = {
 _LAST_GEN_PREFIX = "chat:last_gen:"
-def _persisted_key(context_key: str, name: str, default) -> str:
     """Per-context widget key, seeded from the last cross-context value."""
-    last_key = f"{_LAST_GEN_PREFIX}{name}"
     key = widget_key(context_key, name)
     if key not in st.session_state:
-        st.session_state[key] = st.session_state.get(last_key, default)
     return key
-def _remember(name: str, value) -> None:
-    st.session_state[f"{_LAST_GEN_PREFIX}{name}"] = value
 @dataclass(frozen=True)
 class GenerationConfig:
     max_new_tokens: int
@@ -100,7 +102,7 @@ def _open_edit_dialog(
     save_col, cancel_col = st.columns(2)
     with save_col:
-        if st.button("Save", type="primary", use_container_width=True):
             messages[msg_index]["content"] = new_content
             messages[msg_index].pop("_contrast", None)
             if role == "assistant":
@@ -110,7 +112,7 @@ def _open_edit_dialog(
                 st.session_state[pending_key] = "regenerate_after_edit"
             st.rerun()
     with cancel_col:
-        if st.button("Cancel", use_container_width=True):
             st.rerun()
@@ -129,13 +131,13 @@ def _open_system_prompt_dialog(
     )
     save_col, cancel_col = st.columns(2)
     with save_col:
-        if st.button("Save", type="primary", use_container_width=True):
             st.session_state[prompt_key] = new_value
             if on_save is not None:
                 on_save()
             st.rerun()
     with cancel_col:
-        if st.button("Cancel", use_container_width=True):
             st.rerun()
@@ -307,7 +309,9 @@ def _render_generation_fragment(context_key: str, remote: bool) -> GenerationCon
         ("top_k", top_k),
         ("seed_enabled", seed_enabled),
     ):
-        _remember(name, value)
     do_sample = bool(use_sampling)
     return GenerationConfig(

 _LAST_GEN_PREFIX = "chat:last_gen:"
+def _last_generation_key(name: str) -> str:
+    return f"{_LAST_GEN_PREFIX}{name}"
+def _persisted_key(context_key: str, name: str, default: object) -> str:
     """Per-context widget key, seeded from the last cross-context value."""
     key = widget_key(context_key, name)
     if key not in st.session_state:
+        st.session_state[key] = st.session_state.get(
+            _last_generation_key(name),
+            default,
+        )
     return key
 @dataclass(frozen=True)
 class GenerationConfig:
     max_new_tokens: int
     save_col, cancel_col = st.columns(2)
     with save_col:
+        if st.button("Save", type="primary", width="stretch"):
             messages[msg_index]["content"] = new_content
             messages[msg_index].pop("_contrast", None)
             if role == "assistant":
                 st.session_state[pending_key] = "regenerate_after_edit"
             st.rerun()
     with cancel_col:
+        if st.button("Cancel", width="stretch"):
             st.rerun()
     )
     save_col, cancel_col = st.columns(2)
     with save_col:
+        if st.button("Save", type="primary", width="stretch"):
             st.session_state[prompt_key] = new_value
             if on_save is not None:
                 on_save()
             st.rerun()
     with cancel_col:
+        if st.button("Cancel", width="stretch"):
             st.rerun()
         ("top_k", top_k),
         ("seed_enabled", seed_enabled),
     ):
+        st.session_state[_last_generation_key(name)] = value
+    if seed is not None:
+        st.session_state[_last_generation_key("seed")] = seed
     do_sample = bool(use_sampling)
     return GenerationConfig(

tabs/compare_chat.py CHANGED Viewed

@@ -6,22 +6,21 @@ from nnterp import StandardizedTransformer
 from persona_data.synth_persona import PersonaData
 from state import ChatState, default_chat_state, reset_chat_context_state
-from utils.chat import (
-    ChatReply,
-    build_chat_messages,
-    generate_chat_reply,
-    resolve_system_prompt,
 )
 from utils.chat_export import save_chat_export
 from utils.contrast import compute_contrast, compute_contrast_pair
-from utils.helpers import persona_label, widget_key
 from utils.runtime import cached_model
 from .chat_ui import (
     GenerationConfig,
     render_chat_message,
     render_chat_window,
-    render_persona_prompt_controls,
     render_system_prompt,
 )
@@ -68,21 +67,26 @@ def _render_compare_panel(
     edit_key = widget_key(panel_key, "edit_idx")
     pending_key = widget_key(panel_key, "pending_regen")
-    persist_persona_key = f"chat:last_cmp_{side}_persona"
-    persist_prompt_key = f"chat:last_cmp_{side}_prompt"
-    if state["persona_id"] is None:
-        state["persona_id"] = st.session_state.get(persist_persona_key)
-        state["prompt_mode"] = st.session_state.get(persist_prompt_key, "templated")
-    selected_persona, prompt_mode, changed = render_persona_prompt_controls(
         personas,
         state["persona_id"],
         state["prompt_mode"],
         widget_key(panel_key, "persona"),
         widget_key(panel_key, "prompt_mode"),
     )
-    st.session_state[persist_persona_key] = selected_persona.id
-    st.session_state[persist_prompt_key] = prompt_mode
     if changed:
         reset_chat_context_state(
@@ -136,19 +140,13 @@ def _generate_panels(
     results: list[ChatReply | Exception] = []
     with st.spinner(spinner_label):
         for panel in panels:
-            try:
-                results.append(
-                    generate_chat_reply(
-                        model=model,
-                        messages=build_chat_messages(
-                            panel.prompt, panel.state["messages"]
-                        ),
-                        remote=remote,
-                        **generation.to_generate_kwargs(),
-                    )
-                )
-            except Exception as exc:
-                results.append(exc)
     return results

 from persona_data.synth_persona import PersonaData
 from state import ChatState, default_chat_state, reset_chat_context_state
+from tabs.chat_shared import (
+    generate_chat_reply_result,
+    hydrate_chat_state,
+    render_chat_selection,
 )
+from utils.chat import ChatReply, build_chat_messages, resolve_system_prompt
 from utils.chat_export import save_chat_export
 from utils.contrast import compute_contrast, compute_contrast_pair
+from utils.helpers import persona_label, session_key, widget_key
 from utils.runtime import cached_model
 from .chat_ui import (
     GenerationConfig,
     render_chat_message,
     render_chat_window,
     render_system_prompt,
 )
     edit_key = widget_key(panel_key, "edit_idx")
     pending_key = widget_key(panel_key, "pending_regen")
+    persist_persona_key = session_key("chat", f"last_cmp_{side}_persona")
+    persist_prompt_key = session_key("chat", f"last_cmp_{side}_prompt")
+    hydrate_chat_state(
+        state,
+        persisted_persona_key=persist_persona_key,
+        persisted_prompt_key=persist_prompt_key,
+    )
+    selection = render_chat_selection(
         personas,
         state["persona_id"],
         state["prompt_mode"],
         widget_key(panel_key, "persona"),
         widget_key(panel_key, "prompt_mode"),
+        persisted_persona_key=persist_persona_key,
+        persisted_prompt_key=persist_prompt_key,
     )
+    selected_persona = selection.persona
+    prompt_mode = selection.prompt_mode
+    changed = selection.changed
     if changed:
         reset_chat_context_state(
     results: list[ChatReply | Exception] = []
     with st.spinner(spinner_label):
         for panel in panels:
+            reply, error = generate_chat_reply_result(
+                model=model,
+                messages=build_chat_messages(panel.prompt, panel.state["messages"]),
+                remote=remote,
+                generation=generation,
+            )
+            results.append(reply if error is None else error)
     return results

tabs/extract.py CHANGED Viewed

@@ -5,7 +5,7 @@ import streamlit as st
 from catppuccin import PALETTE
 from persona_data.prompts import format_prompt
 from persona_data.synth_persona import BASELINE_PERSONA_ID, PersonaData, QAPair
-from persona_vectors.artifacts import PERSONA_VARIANTS
 from persona_vectors.extraction import (
     MaskStrategy,
     prepare_inputs_for_strategy,
@@ -14,11 +14,12 @@ from persona_vectors.extraction import (
 from persona_vectors.preview import TokenSegment, preview_token_segments
 from utils.controls import render_mask_strategy_select
-from utils.datasets import load_dataset, load_persona_list
 from utils.helpers import (
     NDIF_STATUS_ICONS,
     persona_label,
     prompt_variant_label,
     widget_key,
 )
 from utils.runtime import cached_model
@@ -29,6 +30,9 @@ _LAST_PERSONA_IDS_KEY = "extract:last_persona_ids"
 _LAST_MAX_QUESTIONS_KEY = "extract:last_max_questions"
 _LAST_MASK_STRATEGY_KEY = "extract:last_mask_strategy"
 _DEFAULT_MAX_QUESTIONS = 50
@@ -42,7 +46,7 @@ def _build_run_plan(
     selected_variants: list[str],
     runs: list[tuple[PersonaData, list[QAPair]]],
 ) -> list[tuple[PersonaData, list[QAPair], str]]:
-    """Cartesian product of personas × variants."""
     return [(p, qa, v) for v in selected_variants for p, qa in runs]
@@ -63,13 +67,13 @@ def _render_local_dataset_upload(dataset_source: str) -> None:
         st.file_uploader(
             "personas.jsonl",
             type=["jsonl"],
-            key="extract__personas_file",
             help="Expected fields: id, persona, templated_view, biography_view",
         )
         st.file_uploader(
             "qa.jsonl",
             type=["jsonl"],
-            key="extract__qa_file",
             help="Expected fields: id, qid, type, item_type, scope, question, answer",
         )
@@ -80,12 +84,14 @@ def _render_variant_controls(
     remote: bool,
     dataset_source: str,
 ) -> tuple[list[str], bool] | None:
-    default_variants = st.session_state.get(_LAST_VARIANTS_KEY, list(PERSONA_VARIANTS))
     selected_variants = st.multiselect(
         "Persona variants",
-        options=PERSONA_VARIANTS,
-        default=[v for v in default_variants if v in PERSONA_VARIANTS]
-        or list(PERSONA_VARIANTS),
         format_func=prompt_variant_label,
         key=_extract_widget_key(model_name, remote, dataset_source, "persona_variants"),
         help="Extract these variants for each selected persona.",
@@ -110,14 +116,10 @@ def _load_qa_dataset_personas(
     try:
         dataset, dataset_status = load_dataset(
             dataset_source,
-            personas_file=st.session_state.get("extract__personas_file"),
-            qa_file=st.session_state.get("extract__qa_file"),
-        )
-        personas, _ = load_persona_list(
-            dataset_source,
-            personas_file=st.session_state.get("extract__personas_file"),
-            qa_file=st.session_state.get("extract__qa_file"),
         )
         st.caption(dataset_status)
     except Exception as exc:
         st.error(f"Could not load data: {exc}")
@@ -289,10 +291,10 @@ def _render_extract_actions() -> tuple[bool, bool]:
         run_clicked = st.button(
             "Run extraction",
             type="primary",
-            use_container_width=True,
         )
     with preview_col:
-        preview_clicked = st.button("Preview tokens", use_container_width=True)
     return run_clicked, preview_clicked

 from catppuccin import PALETTE
 from persona_data.prompts import format_prompt
 from persona_data.synth_persona import BASELINE_PERSONA_ID, PersonaData, QAPair
+from persona_vectors.artifacts import SUPPORTED_VARIANTS
 from persona_vectors.extraction import (
     MaskStrategy,
     prepare_inputs_for_strategy,
 from persona_vectors.preview import TokenSegment, preview_token_segments
 from utils.controls import render_mask_strategy_select
+from utils.datasets import load_dataset, load_persona_list_from_dataset
 from utils.helpers import (
     NDIF_STATUS_ICONS,
     persona_label,
     prompt_variant_label,
+    session_key,
     widget_key,
 )
 from utils.runtime import cached_model
 _LAST_MAX_QUESTIONS_KEY = "extract:last_max_questions"
 _LAST_MASK_STRATEGY_KEY = "extract:last_mask_strategy"
+_PERSONAS_FILE_KEY = session_key("extract", "personas_file")
+_QA_FILE_KEY = session_key("extract", "qa_file")
 _DEFAULT_MAX_QUESTIONS = 50
     selected_variants: list[str],
     runs: list[tuple[PersonaData, list[QAPair]]],
 ) -> list[tuple[PersonaData, list[QAPair], str]]:
+    """Cartesian product of personas x variants."""
     return [(p, qa, v) for v in selected_variants for p, qa in runs]
         st.file_uploader(
             "personas.jsonl",
             type=["jsonl"],
+            key=_PERSONAS_FILE_KEY,
             help="Expected fields: id, persona, templated_view, biography_view",
         )
         st.file_uploader(
             "qa.jsonl",
             type=["jsonl"],
+            key=_QA_FILE_KEY,
             help="Expected fields: id, qid, type, item_type, scope, question, answer",
         )
     remote: bool,
     dataset_source: str,
 ) -> tuple[list[str], bool] | None:
+    default_variants = st.session_state.get(
+        _LAST_VARIANTS_KEY, list(SUPPORTED_VARIANTS)
+    )
     selected_variants = st.multiselect(
         "Persona variants",
+        options=SUPPORTED_VARIANTS,
+        default=[v for v in default_variants if v in SUPPORTED_VARIANTS]
+        or list(SUPPORTED_VARIANTS),
         format_func=prompt_variant_label,
         key=_extract_widget_key(model_name, remote, dataset_source, "persona_variants"),
         help="Extract these variants for each selected persona.",
     try:
         dataset, dataset_status = load_dataset(
             dataset_source,
+            personas_file=st.session_state.get(_PERSONAS_FILE_KEY),
+            qa_file=st.session_state.get(_QA_FILE_KEY),
         )
+        personas = load_persona_list_from_dataset(dataset)
         st.caption(dataset_status)
     except Exception as exc:
         st.error(f"Could not load data: {exc}")
         run_clicked = st.button(
             "Run extraction",
             type="primary",
+            width="stretch",
         )
     with preview_col:
+        preview_clicked = st.button("Preview tokens", width="stretch")
     return run_clicked, preview_clicked

tabs/probe_ui.py CHANGED Viewed

@@ -197,7 +197,7 @@ def _trace_requested(context_key: str) -> bool:
     if st.button(
         "Trace conversation",
         key=widget_key(context_key, "probe_trace"),
-        use_container_width=True,
     ):
         st.session_state[trace_key] = True
     return bool(st.session_state.get(trace_key, False))

     if st.button(
         "Trace conversation",
         key=widget_key(context_key, "probe_trace"),
+        width="stretch",
     ):
         st.session_state[trace_key] = True
     return bool(st.session_state.get(trace_key, False))

utils/{compare_sources.py → analysis_sources.py} RENAMED Viewed

@@ -1,12 +1,10 @@
 import os
 import streamlit as st
-import torch
-from persona_vectors.analysis import LayeredSamples
 from persona_vectors.artifacts import (
     ActivationStore,
     HFActivationStore,
-    activation_config_name,
     discover_activation_models,
     model_dir_name,
 )
@@ -25,28 +23,6 @@ SOURCE_LOCAL = "Local activations"
 SOURCES = (SOURCE_HUB, SOURCE_LOCAL)
-def _hub_split(repo_id: str, model_name: str, mask_strategy_value: str, variant: str):
-    from datasets import load_dataset
-    return load_dataset(
-        repo_id,
-        name=activation_config_name(model_name, mask_strategy_value),
-        split=variant,
-        keep_in_memory=False,
-    )
-def _hub_split_columns(
-    repo_id: str,
-    model_name: str,
-    mask_strategy_value: str,
-    variant: str,
-    columns: list[str],
-):
-    dataset = _hub_split(repo_id, model_name, mask_strategy_value, variant)
-    return dataset.select_columns(columns)
 @st.cache_resource(show_spinner=False, max_entries=1)
 def activation_store_cached(
     source: str,
@@ -67,8 +43,9 @@ def available_variants_cached(
     model_name: str,
     mask_strategy_value: str,
 ) -> list[str]:
-    store = activation_store_cached(source, location, model_name, mask_strategy_value)
-    return store.available_variants()
 @st.cache_data(show_spinner=False)
@@ -79,31 +56,9 @@ def personas_cached(
     mask_strategy_value: str,
     variants: tuple[str, ...],
 ) -> list[str]:
-    if source == SOURCE_HUB:
-        variant_ids = [
-            list(
-                _hub_split_columns(
-                    location,
-                    model_name,
-                    mask_strategy_value,
-                    variant,
-                    ["persona_id"],
-                )["persona_id"]
-            )
-            for variant in variants
-        ]
-        if not variant_ids:
-            return []
-        shared = set(variant_ids[0])
-        for ids in variant_ids[1:]:
-            shared &= set(ids)
-        return [persona_id for persona_id in variant_ids[0] if persona_id in shared]
-    store = activation_store_cached(source, location, model_name, mask_strategy_value)
-    return store.list_personas(
-        list(variants),
-        mask_strategy=MaskStrategy(mask_strategy_value),
-    )
 @st.cache_data(show_spinner=False)
@@ -115,31 +70,24 @@ def persona_names_cached(
     variants: tuple[str, ...],
     persona_ids: tuple[str, ...],
 ) -> dict[str, str]:
-    if source == SOURCE_HUB:
-        requested = set(persona_ids)
-        names: dict[str, str] = {}
-        for variant in variants:
-            metadata = _hub_split_columns(
-                location,
-                model_name,
-                mask_strategy_value,
-                variant,
-                ["persona_id", "name"],
-            )
-            for row in metadata:
-                persona_id = row["persona_id"]
-                if persona_id in requested and persona_id not in names:
-                    names[persona_id] = row.get("name") or persona_id
-                    if len(names) == len(requested):
-                        return {pid: names.get(pid, pid) for pid in persona_ids}
-        return {pid: names.get(pid, pid) for pid in persona_ids}
     store = activation_store_cached(source, location, model_name, mask_strategy_value)
-    return store.persona_names(
-        list(persona_ids),
-        variants=list(variants),
-        mask_strategy=MaskStrategy(mask_strategy_value),
-    )
 @st.cache_data(show_spinner=False)
@@ -151,11 +99,11 @@ def local_model_options_cached(
 @st.cache_data(show_spinner=False)
 def hub_models_by_mask_strategy(repo_id: str) -> dict[MaskStrategy, list[str]]:
-    raw = list_hub_vector_models(repo_id)
     return {
         MaskStrategy(strategy_value): models
-        for strategy_value, models in raw.items()
-        if strategy_value in {strategy.value for strategy in MaskStrategy}
     }
@@ -173,56 +121,14 @@ def store_id(store: Store) -> str:
 def available_variants(store: Store, mask_strategy: MaskStrategy) -> list[str]:
     source, location, model_name = store_cache_parts(store)
-    return available_variants_cached(
-        source,
-        location,
-        model_name,
-        mask_strategy.value,
-    )
-@st.cache_data(show_spinner=False)
-def store_layers_cached(
-    source: str,
-    location: str,
-    model_name: str,
-    mask_strategy_value: str,
-    variants: tuple[str, ...],
-    persona_ids: tuple[str, ...],
-) -> list[int]:
-    if source == SOURCE_HUB:
-        shared_layers: set[int] | None = None
-        requested = list(persona_ids)
-        for variant in variants:
-            dataset = _hub_split(location, model_name, mask_strategy_value, variant)
-            ids = list(dataset.select_columns(["persona_id"])["persona_id"])
-            sample_id = requested[0] if requested else (ids[0] if ids else None)
-            if sample_id is None:
-                return []
-            if requested and any(persona_id not in ids for persona_id in requested):
-                return []
-            vector = torch.as_tensor(dataset[ids.index(sample_id)]["vector"])
-            if vector.ndim != 2:
-                raise ValueError(
-                    f"tensor for {sample_id!r} must have shape (num_layers, hidden_size)"
-                )
-            layers = set(range(int(vector.shape[0])))
-            shared_layers = layers if shared_layers is None else shared_layers & layers
-        return sorted(shared_layers or set())
-    store = activation_store_cached(source, location, model_name, mask_strategy_value)
-    return store.list_layers(
-        list(variants),
-        list(persona_ids),
-        mask_strategy=MaskStrategy(mask_strategy_value),
-    )
 def local_model_matches(left: str, right: str) -> bool:
     return model_dir_name(left) == model_dir_name(right)
-def load_persona_vectors_lean(
     source: str,
     location: str,
     model_name: str,
@@ -230,61 +136,16 @@ def load_persona_vectors_lean(
     variant: str,
     persona_ids: tuple[str, ...],
 ) -> LayeredSamples:
-    if source != SOURCE_HUB:
-        from persona_vectors.analysis import load_persona_vectors
-        store = activation_store_cached(
-            source,
-            location,
-            model_name,
-            mask_strategy_value,
-        )
-        return load_persona_vectors(
-            store,
-            variant,
-            mask_strategy=MaskStrategy(mask_strategy_value),
-            persona_ids=list(persona_ids),
-        )
-    dataset = _hub_split(location, model_name, mask_strategy_value, variant)
-    metadata = dataset.select_columns(["persona_id", "name"])
-    index_by_id: dict[str, int] = {}
-    name_by_id: dict[str, str] = {}
-    requested = set(persona_ids)
-    for index, row in enumerate(metadata):
-        persona_id = row["persona_id"]
-        if persona_id in requested:
-            index_by_id[persona_id] = index
-            name_by_id[persona_id] = row.get("name") or persona_id
-            if len(index_by_id) == len(requested):
-                break
-    missing = [
-        persona_id for persona_id in persona_ids if persona_id not in index_by_id
-    ]
-    if missing:
-        raise FileNotFoundError(
-            f"Missing {len(missing)} persona vector(s) in {variant!r}: {missing[:3]}"
-        )
-    vectors, labels, hover_text = [], [], []
-    for persona_id in persona_ids:
-        name = name_by_id.get(persona_id, persona_id)
-        vector = torch.as_tensor(
-            dataset[index_by_id[persona_id]]["vector"],
-            dtype=torch.float32,
-        )
-        if vector.ndim != 2:
-            raise ValueError(
-                f"tensor for {persona_id!r} must have shape (num_layers, hidden_size)"
-            )
-        vectors.append(vector)
-        labels.append(name)
-        hover_text.append(f"Persona: {name}<br>ID: {persona_id}")
-    return LayeredSamples(torch.stack(vectors), labels, hover_text)
-def load_variant_vectors_lean(
     source: str,
     location: str,
     model_name: str,
@@ -293,27 +154,18 @@ def load_variant_vectors_lean(
     persona_ids: tuple[str, ...],
 ) -> dict[str, LayeredSamples]:
     return {
-        variant: load_persona_vectors_lean(
-            source,
-            location,
-            model_name,
-            mask_strategy_value,
-            variant,
-            persona_ids,
         )
         for variant in variants
     }
-def release_store_cache(
     store: Store,
     variants: list[str] | tuple[str, ...] | None = None,
 ) -> None:
-    cache = getattr(store, "_cache", None)
-    if not isinstance(cache, dict):
-        return
-    if variants is None:
-        cache.clear()
-        return
-    for variant in variants:
-        cache.pop(variant, None)

 import os
 import streamlit as st
+from persona_vectors.analysis import LayeredSamples, load_persona_vectors
 from persona_vectors.artifacts import (
     ActivationStore,
     HFActivationStore,
     discover_activation_models,
     model_dir_name,
 )
 SOURCES = (SOURCE_HUB, SOURCE_LOCAL)
 @st.cache_resource(show_spinner=False, max_entries=1)
 def activation_store_cached(
     source: str,
     model_name: str,
     mask_strategy_value: str,
 ) -> list[str]:
+    return activation_store_cached(
+        source, location, model_name, mask_strategy_value
+    ).available_variants()
 @st.cache_data(show_spinner=False)
     mask_strategy_value: str,
     variants: tuple[str, ...],
 ) -> list[str]:
+    return activation_store_cached(
+        source, location, model_name, mask_strategy_value
+    ).list_personas(list(variants))
 @st.cache_data(show_spinner=False)
     variants: tuple[str, ...],
     persona_ids: tuple[str, ...],
 ) -> dict[str, str]:
     store = activation_store_cached(source, location, model_name, mask_strategy_value)
+    names = store.persona_names(list(persona_ids), variants=list(variants))
+    # Preserve input order, fall back to the id when the row has no display name.
+    return {pid: names.get(pid, pid) for pid in persona_ids}
+@st.cache_data(show_spinner=False)
+def store_layers_cached(
+    source: str,
+    location: str,
+    model_name: str,
+    mask_strategy_value: str,
+    variants: tuple[str, ...],
+    persona_ids: tuple[str, ...],
+) -> list[int]:
+    return activation_store_cached(
+        source, location, model_name, mask_strategy_value
+    ).list_layers(list(variants), list(persona_ids))
 @st.cache_data(show_spinner=False)
 @st.cache_data(show_spinner=False)
 def hub_models_by_mask_strategy(repo_id: str) -> dict[MaskStrategy, list[str]]:
+    valid = {strategy.value for strategy in MaskStrategy}
     return {
         MaskStrategy(strategy_value): models
+        for strategy_value, models in list_hub_vector_models(repo_id).items()
+        if strategy_value in valid
     }
 def available_variants(store: Store, mask_strategy: MaskStrategy) -> list[str]:
     source, location, model_name = store_cache_parts(store)
+    return available_variants_cached(source, location, model_name, mask_strategy.value)
 def local_model_matches(left: str, right: str) -> bool:
     return model_dir_name(left) == model_dir_name(right)
+def load_persona_vectors_cached(
     source: str,
     location: str,
     model_name: str,
     variant: str,
     persona_ids: tuple[str, ...],
 ) -> LayeredSamples:
+    store = activation_store_cached(source, location, model_name, mask_strategy_value)
+    return load_persona_vectors(
+        store,
+        variant,
+        mask_strategy=MaskStrategy(mask_strategy_value),
+        persona_ids=list(persona_ids),
+    )
+def load_variant_vectors_cached(
     source: str,
     location: str,
     model_name: str,
     persona_ids: tuple[str, ...],
 ) -> dict[str, LayeredSamples]:
     return {
+        variant: load_persona_vectors_cached(
+            source, location, model_name, mask_strategy_value, variant, persona_ids
         )
         for variant in variants
     }
+def release_hf_store_cache(
     store: Store,
     variants: list[str] | tuple[str, ...] | None = None,
 ) -> None:
+    """Drop cached HF data for ``variants`` (or all) on Hub stores."""
+    release_cache = getattr(store, "release_cache", None)
+    if isinstance(store, HFActivationStore) and callable(release_cache):
+        release_cache(variants)

utils/chat.py CHANGED Viewed

@@ -5,11 +5,11 @@ from contextlib import contextmanager, nullcontext
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Literal
 from persona_data.prompts import format_messages, format_prompt, normalize_messages
 from persona_data.synth_persona import PersonaData
 if TYPE_CHECKING:
-    import torch
     from nnterp import StandardizedTransformer
 logger = logging.getLogger(__name__)
@@ -133,8 +133,6 @@ def format_generation_prompt(
 def resolve_saved_tensor(value: object) -> torch.Tensor:
     """Resolve an nnsight ``.save()`` proxy (or raw tensor) to a CPU tensor."""
-    import torch
     resolved = value.value if getattr(value, "value", None) is not None else value
     if not isinstance(resolved, torch.Tensor):
         raise TypeError(f"Trace result did not resolve to a tensor: {type(resolved)!r}")
@@ -160,8 +158,6 @@ def _seeded_rng(seed: int | None):
         yield
         return
-    import torch
     cuda_ctx = torch.random.fork_rng(devices=range(torch.cuda.device_count()))
     mps_ctx = (
         torch.random.fork_rng(devices=range(1), device_type="mps")
@@ -207,8 +203,6 @@ def generate_chat_reply(
         ChatReply with generated text and token ids.
     """
-    import torch
     tokenizer = model.tokenizer
     prompt, prompt_token_count = format_generation_prompt(messages, tokenizer)
@@ -228,9 +222,11 @@ def generate_chat_reply(
         generation_kwargs["repetition_penalty"] = repetition_penalty
     # `remote` is captured by nnsight's RemoteableMixin.trace() and is NOT
     # forwarded to the underlying model's generate
-    with _seeded_rng(seed if do_sample and not remote else None):
-        with model.generate(prompt, remote=remote, **generation_kwargs) as tracer:
-            generated = tracer.result.save()
     if getattr(generated, "value", None) is not None:
         generated = generated.value

 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Literal
+import torch
 from persona_data.prompts import format_messages, format_prompt, normalize_messages
 from persona_data.synth_persona import PersonaData
 if TYPE_CHECKING:
     from nnterp import StandardizedTransformer
 logger = logging.getLogger(__name__)
 def resolve_saved_tensor(value: object) -> torch.Tensor:
     """Resolve an nnsight ``.save()`` proxy (or raw tensor) to a CPU tensor."""
     resolved = value.value if getattr(value, "value", None) is not None else value
     if not isinstance(resolved, torch.Tensor):
         raise TypeError(f"Trace result did not resolve to a tensor: {type(resolved)!r}")
         yield
         return
     cuda_ctx = torch.random.fork_rng(devices=range(torch.cuda.device_count()))
     mps_ctx = (
         torch.random.fork_rng(devices=range(1), device_type="mps")
         ChatReply with generated text and token ids.
     """
     tokenizer = model.tokenizer
     prompt, prompt_token_count = format_generation_prompt(messages, tokenizer)
         generation_kwargs["repetition_penalty"] = repetition_penalty
     # `remote` is captured by nnsight's RemoteableMixin.trace() and is NOT
     # forwarded to the underlying model's generate
+    with (
+        _seeded_rng(seed if do_sample and not remote else None),
+        model.generate(prompt, remote=remote, **generation_kwargs) as tracer,
+    ):
+        generated = tracer.result.save()
     if getattr(generated, "value", None) is not None:
         generated = generated.value

utils/chat_export.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 from dataclasses import asdict, is_dataclass
-from datetime import datetime, timezone
 from pathlib import Path
 from utils.helpers import slugify
@@ -72,7 +72,7 @@ def save_chat_export(
     )
     export_dir.mkdir(parents=True, exist_ok=True)
-    timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
     filename_parts = [
         timestamp,
         slugify(persona_name or persona_id),

 import json
 from dataclasses import asdict, is_dataclass
+from datetime import UTC, datetime
 from pathlib import Path
 from utils.helpers import slugify
     )
     export_dir.mkdir(parents=True, exist_ok=True)
+    timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ")
     filename_parts = [
         timestamp,
         slugify(persona_name or persona_id),

utils/contrast.py CHANGED Viewed

@@ -244,7 +244,9 @@ def render_contrast_html(result: TokenContrast) -> str:
     it is, with a hover tooltip showing the raw Δlog P, plus a legend.
     """
     spans: list[str] = []
-    for token, weight, raw in zip(result.tokens, result.weights, result.raw_diffs):
         bg = _weight_to_bg(weight)
         tip = escape(f"Δlog P(A−B): {raw:+.3f}")
         text = escape(token)

     it is, with a hover tooltip showing the raw Δlog P, plus a legend.
     """
     spans: list[str] = []
+    for token, weight, raw in zip(
+        result.tokens, result.weights, result.raw_diffs, strict=True
+    ):
         bg = _weight_to_bg(weight)
         tip = escape(f"Δlog P(A−B): {raw:+.3f}")
         text = escape(token)

utils/datasets.py CHANGED Viewed

@@ -13,7 +13,7 @@ from persona_data.nemotron_personas import (
 from persona_data.synth_persona import PersonaDataset as LocalPersonaDataset
 from persona_data.synth_persona import SynthPersonaDataset
-from .helpers import DATASET_SOURCES
 @st.cache_resource(show_spinner=False)
@@ -63,6 +63,12 @@ def load_persona_list(
     """
     dataset, status = load_dataset(dataset_source, personas_file, qa_file)
     cached = getattr(dataset, "_persona_list_cache", None)
     if cached is None:
         cached = list(dataset)
@@ -70,7 +76,7 @@ def load_persona_list(
             dataset._persona_list_cache = cached
         except (AttributeError, TypeError):
             pass
-    return cached, status
 def load_dataset(
@@ -86,13 +92,13 @@ def load_dataset(
 ]:
     """Load the selected dataset source for the UI."""
-    if dataset_source == DATASET_SOURCES[0]:
         return _cached_dataset(SynthPersonaDataset), "SynthPersona"
-    if dataset_source == DATASET_SOURCES[1]:
         return _cached_dataset(NemotronPersonasFranceDataset), "Nemotron France"
-    if dataset_source == DATASET_SOURCES[2]:
         return _cached_dataset(NemotronPersonasUSADataset), "Nemotron USA"
     if personas_file is None or qa_file is None:

 from persona_data.synth_persona import PersonaDataset as LocalPersonaDataset
 from persona_data.synth_persona import SynthPersonaDataset
+from .helpers import DatasetSource
 @st.cache_resource(show_spinner=False)
     """
     dataset, status = load_dataset(dataset_source, personas_file, qa_file)
+    return load_persona_list_from_dataset(dataset), status
+def load_persona_list_from_dataset(dataset: Any) -> list:
+    """Materialize and cache personas from an already-loaded dataset."""
     cached = getattr(dataset, "_persona_list_cache", None)
     if cached is None:
         cached = list(dataset)
             dataset._persona_list_cache = cached
         except (AttributeError, TypeError):
             pass
+    return cached
 def load_dataset(
 ]:
     """Load the selected dataset source for the UI."""
+    if dataset_source == DatasetSource.SYNTH_PERSONA.value:
         return _cached_dataset(SynthPersonaDataset), "SynthPersona"
+    if dataset_source == DatasetSource.NEMOTRON_FRANCE.value:
         return _cached_dataset(NemotronPersonasFranceDataset), "Nemotron France"
+    if dataset_source == DatasetSource.NEMOTRON_USA.value:
         return _cached_dataset(NemotronPersonasUSADataset), "Nemotron USA"
     if personas_file is None or qa_file is None:

utils/helpers.py CHANGED Viewed

@@ -1,9 +1,21 @@
 import hashlib
 import re
 from collections.abc import Iterable
 from persona_data.synth_persona import PersonaData
 # Variant key -> human-readable label mapping
 VARIANT_LABELS = {
     "empty": "None",
@@ -16,21 +28,21 @@ VARIANT_LABELS = {
 CHAT_PROMPT_MODES = ("empty", "templated", "biography", "custom")
 CHAT_PROMPT_MODE_LABELS = [VARIANT_LABELS[key] for key in CHAT_PROMPT_MODES]
 CHAT_PROMPT_MODE_LABEL_TO_KEY = {VARIANT_LABELS[key]: key for key in CHAT_PROMPT_MODES}
-DATASET_SOURCES = [
-    "HuggingFace: synth-persona",
-    "HuggingFace: nemotron-france",
-    "HuggingFace: nemotron-usa",
-    "Local JSONL upload",
 ]
-ANALYSIS_MODES = ["Cosine similarity", "Similarity matrix", "PCA", "UMAP", "Dendrogram"]
 ANALYSIS_HELP_TEXT = {
     "Cosine similarity": "Compare layer-wise alignment between variants.",
     "Similarity matrix": "Compare centered pairwise similarity between persona vectors by layer, with pair trajectories across layers.",
     "PCA": "Project per-persona vectors into a 2D or 3D global view.",
     "UMAP": "Project per-persona vectors into a 2D or 3D local-neighborhood view.",
     "Dendrogram": "Hierarchical clustering of persona vectors — shows biography and templated side by side for direct comparison.",
 }
@@ -56,6 +68,12 @@ def widget_key(*parts: str) -> str:
     return "::".join(parts)
 def personas_fingerprint(persona_ids: Iterable[str]) -> str:
     """Stable short fingerprint for a set of persona ids.
@@ -78,11 +96,3 @@ def persona_label(persona: PersonaData) -> str:
     """Format a persona for selection widgets."""
     return f"{persona.name} ({persona.id})"
-def persona_display_label(persona_id: str, persona_name: str | None) -> str:
-    """Format a persona id with an optional display name."""
-    if persona_name:
-        return f"{persona_name} ({persona_id})"
-    return persona_id

 import hashlib
 import re
 from collections.abc import Iterable
+from enum import Enum
 from persona_data.synth_persona import PersonaData
+class DatasetSource(str, Enum):
+    SYNTH_PERSONA = "HuggingFace: synth-persona"
+    NEMOTRON_FRANCE = "HuggingFace: nemotron-france"
+    NEMOTRON_USA = "HuggingFace: nemotron-usa"
+    LOCAL_UPLOAD = "Local JSONL upload"
+DATASET_SOURCES = [s.value for s in DatasetSource]
 # Variant key -> human-readable label mapping
 VARIANT_LABELS = {
     "empty": "None",
 CHAT_PROMPT_MODES = ("empty", "templated", "biography", "custom")
 CHAT_PROMPT_MODE_LABELS = [VARIANT_LABELS[key] for key in CHAT_PROMPT_MODES]
 CHAT_PROMPT_MODE_LABEL_TO_KEY = {VARIANT_LABELS[key]: key for key in CHAT_PROMPT_MODES}
+ANALYSIS_MODES = [
+    "Cosine similarity",
+    "Similarity matrix",
+    "PCA",
+    "UMAP",
+    "Isomap",
+    "Dendrogram",
 ]
 ANALYSIS_HELP_TEXT = {
     "Cosine similarity": "Compare layer-wise alignment between variants.",
     "Similarity matrix": "Compare centered pairwise similarity between persona vectors by layer, with pair trajectories across layers.",
     "PCA": "Project per-persona vectors into a 2D or 3D global view.",
     "UMAP": "Project per-persona vectors into a 2D or 3D local-neighborhood view.",
+    "Isomap": "Project per-persona vectors with graph-geodesic distances to probe manifold-like geometry.",
     "Dendrogram": "Hierarchical clustering of persona vectors — shows biography and templated side by side for direct comparison.",
 }
     return "::".join(parts)
+def session_key(*parts: str) -> str:
+    """Generate a colon-separated Streamlit session-state key from parts."""
+    return ":".join(parts)
 def personas_fingerprint(persona_ids: Iterable[str]) -> str:
     """Stable short fingerprint for a set of persona ids.
     """Format a persona for selection widgets."""
     return f"{persona.name} ({persona.id})"

uv.lock CHANGED Viewed

@@ -748,11 +748,11 @@ wheels = [
 [[package]]
 name = "idna"
-version = "3.14"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/05/b1/efac073e0c297ecf2fb33c346989a529d4e19164f1759102dee5953ee17e/idna-3.14.tar.gz", hash = "sha256:466d810d7a2cc1022bea9b037c39728d51ae7dad40d480fc9b7d7ecf98ba8ee3", size = 198272, upload-time = "2026-05-10T20:32:15.935Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6c/3c/3f62dee257eb3d6b2c1ef2a09d36d9793c7111156a73b5654d2c2305e5ce/idna-3.14-py3-none-any.whl", hash = "sha256:e677eaf072e290f7b725f9acf0b3a2bd55f9fd6f7c70abe5f0e34823d0accf69", size = 72184, upload-time = "2026-05-10T20:32:14.295Z" },
 ]
 [[package]]
@@ -1559,7 +1559,7 @@ wheels = [
 [[package]]
 name = "persona-data"
-version = "0.4.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
@@ -1568,9 +1568,9 @@ dependencies = [
     { name = "python-dotenv" },
     { name = "torch" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a4/2f/099a74e54846172a20b697b46b285eb2f0004e1db530308d6b4ff1f19079/persona_data-0.4.2.tar.gz", hash = "sha256:7870292a79b3943a77c31595140de3b2243b783222590248d09891de70e7fe1b", size = 9276, upload-time = "2026-05-08T13:59:27.58Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/57/03/e76a48b41ee00684a4430269007e217e70f59e2597d7c862d93cfc5ac78b/persona_data-0.4.2-py3-none-any.whl", hash = "sha256:c881d6fb71af87a6fa773284076e4cb55794db6dc447a7eb0047eee2b389c855", size = 11914, upload-time = "2026-05-08T13:59:28.198Z" },
 ]
 [[package]]
@@ -1581,7 +1581,6 @@ dependencies = [
     { name = "catppuccin" },
     { name = "datasets" },
     { name = "huggingface-hub" },
-    { name = "persona-data" },
     { name = "persona-vectors" },
     { name = "plotly" },
     { name = "python-dotenv" },
@@ -1593,8 +1592,7 @@ requires-dist = [
     { name = "catppuccin", specifier = ">=2.5.0" },
     { name = "datasets", specifier = ">=4.8.5" },
     { name = "huggingface-hub", specifier = ">=1.14.0" },
-    { name = "persona-data", specifier = ">=0.4.2" },
-    { name = "persona-vectors", specifier = ">=0.7.3" },
     { name = "plotly", specifier = ">=6.6.0" },
     { name = "python-dotenv", specifier = ">=1.2.2" },
     { name = "streamlit", specifier = ">=1.44.0" },
@@ -1602,7 +1600,7 @@ requires-dist = [
 [[package]]
 name = "persona-vectors"
-version = "0.7.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "datasets" },
@@ -1621,9 +1619,9 @@ dependencies = [
     { name = "transformers" },
     { name = "umap-learn" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6d/36/25d766934dc43f60faeba8a51c698da78bdd9af2e5d191b7ce8721612dc4/persona_vectors-0.7.3.tar.gz", hash = "sha256:75a90e68142097419a2f1cf6d21878dc5202234c12ed342d63349796255baad6", size = 28641, upload-time = "2026-05-12T10:04:37.21Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6b/e7/db961133fda6755e215e6cd9d4058a1cb93719d05ab6e24030c5da885d15/persona_vectors-0.7.3-py3-none-any.whl", hash = "sha256:abf07b6715321a16b218aede69f7efac7bf6a309e090db62ee376e3f09240fde", size = 33224, upload-time = "2026-05-12T10:04:36.351Z" },
 ]
 [[package]]
@@ -2838,7 +2836,7 @@ wheels = [
 [[package]]
 name = "transformers"
-version = "5.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
@@ -2851,9 +2849,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typer" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f2/36/390075693b76d4fb4a2bea360fb6080347763bd1f1147c49ed0ed938778c/transformers-5.8.0.tar.gz", hash = "sha256:6cc9a1f0291d16b1c1b735bad775e78ebefff7722701d4e28f98aaaa2bd6fb91", size = 8528141, upload-time = "2026-05-05T16:50:04.778Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/97/7b/5621d08b34ac35deb9fa14b58d27d124d21ef125ee1c64bc724ca47dfb63/transformers-5.8.0-py3-none-any.whl", hash = "sha256:e9d2cae6d195a7e1e05164c5ebf26142a7044e4dc4267274f4809204f92827e4", size = 10630279, upload-time = "2026-05-05T16:50:01.026Z" },
 ]
 [[package]]

 [[package]]
 name = "idna"
+version = "3.15"
 source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/82/77/7b3966d0b9d1d31a36ddf1746926a11dface89a83409bf1483f0237aa758/idna-3.15.tar.gz", hash = "sha256:ca962446ea538f7092a95e057da437618e886f4d349216d2b1e294abfdb65fdc", size = 199245, upload-time = "2026-05-12T22:45:57.011Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/23/408243171aa9aaba178d3e2559159c24c1171a641aa83b67bdd3394ead8e/idna-3.15-py3-none-any.whl", hash = "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", size = 72340, upload-time = "2026-05-12T22:45:55.733Z" },
 ]
 [[package]]
 [[package]]
 name = "persona-data"
+version = "0.5.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
     { name = "python-dotenv" },
     { name = "torch" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/de/9f/2257b6df8c28f0844b88f64a200a4d27f82ea10a16e657ba9fd02f561135/persona_data-0.5.1.tar.gz", hash = "sha256:5ac4467c449905fecf26a743b7128f76dbd984a076426c3ce854a13394c1fc5c", size = 10336, upload-time = "2026-05-13T11:55:00.356Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/55/ec/328013ee81672ba800777b3a9c24f18dc7cb3a93223391e3476cac55fa1b/persona_data-0.5.1-py3-none-any.whl", hash = "sha256:ccf230b4028d08b9345910b57de6ea4b60e9ec7f65ce12203f69693988314543", size = 13078, upload-time = "2026-05-13T11:55:01.402Z" },
 ]
 [[package]]
     { name = "catppuccin" },
     { name = "datasets" },
     { name = "huggingface-hub" },
     { name = "persona-vectors" },
     { name = "plotly" },
     { name = "python-dotenv" },
     { name = "catppuccin", specifier = ">=2.5.0" },
     { name = "datasets", specifier = ">=4.8.5" },
     { name = "huggingface-hub", specifier = ">=1.14.0" },
+    { name = "persona-vectors", specifier = ">=0.8.0" },
     { name = "plotly", specifier = ">=6.6.0" },
     { name = "python-dotenv", specifier = ">=1.2.2" },
     { name = "streamlit", specifier = ">=1.44.0" },
 [[package]]
 name = "persona-vectors"
+version = "0.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "datasets" },
     { name = "transformers" },
     { name = "umap-learn" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/76/22/8a0ca0e6e54ebd8dd07a4064c2890ec33b68ad81a00e4e93c4f9eee2bcf7/persona_vectors-0.8.0.tar.gz", hash = "sha256:3775afc7e04ab1d02582e9c4b3f2d124174ea40d376dd2b91492457a747dd553", size = 31938, upload-time = "2026-05-13T20:00:46.357Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/a6/7f67a7df27d78db706cbc9afd5d5ca4b52970b9005717c3bfcc0ce90ec71/persona_vectors-0.8.0-py3-none-any.whl", hash = "sha256:08b37a749f98b764d22d4c943158922338ab054729f7137eff2c3a167e2b2ae5", size = 36838, upload-time = "2026-05-13T20:00:47.252Z" },
 ]
 [[package]]
 [[package]]
 name = "transformers"
+version = "5.8.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
     { name = "tqdm" },
     { name = "typer" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/e7/e6/4134ea2fbea322cddc7ffc94a0d8ee47fe32ce8e876b320cd37d88edfc4d/transformers-5.8.1.tar.gz", hash = "sha256:4dd5b6de4105725104d84fd6abd74b305f4debfc251b38c648ee5dd087cf543b", size = 8532019, upload-time = "2026-05-13T03:21:57.234Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/fc/b1/8be7e7ef0b5200491312201918b6125ef9c9df9dd0f0240ccef9ac824e6b/transformers-5.8.1-py3-none-any.whl", hash = "sha256:5340fb95962162cdfdae5cc91d7f8fedd92ed75216c1154c5e1f590fcf56dd0e", size = 10632882, upload-time = "2026-05-13T03:21:52.876Z" },
 ]
 [[package]]