Spaces:

implicit-personalization
/

persona-ui

Running

App Files Files Community

Jac-Zac commited on Apr 4

Commit

a89a7f1

0 Parent(s):

First commit

Browse files

Files changed (21) hide show

.env.example +19 -0
.gitignore +64 -0
README.md +79 -0
WARNING.md +3 -0
app.py +111 -0
pyproject.toml +28 -0
state.py +59 -0
tabs/__init__.py +0 -0
tabs/chat.py +636 -0
tabs/compare.py +354 -0
tabs/extract.py +195 -0
utils/__init__.py +1 -0
utils/artifacts.py +249 -0
utils/chat.py +226 -0
utils/chat_export.py +117 -0
utils/datasets.py +59 -0
utils/extraction.py +151 -0
utils/helpers.py +66 -0
utils/local_dataset.py +72 -0
utils/runtime.py +53 -0
uv.lock +0 -0

.env.example ADDED Viewed

	@@ -0,0 +1,19 @@

+# Copy this file to .env and fill in the values.
+# NDIF API key for remote nnsight execution
+# Required only when REMOTE=True in notebook.py
+# Get yours at https://login.ndif.us
+NDIF_API_KEY=your-ndif-api-key-here
+# HuggingFace model cache directory
+# Defaults to ~/.cache/huggingface if unset
+# Useful when working on a cluster with a shared cache or limited home quota
+HF_HOME=/path/to/your/hf/cache
+# Root directory for all generated artifacts (activations, plots, etc.)
+# Defaults to artifacts if unset
+ARTIFACTS_DIR=artifacts
+# Default model IDs shown in the sidebar (optional — change to override the built-in defaults)
+# DEFAULT_MODEL=google/gemma-2-2b-it
+# REMOTE_DEFAULT_MODEL=google/gemma-2-9b-it

.gitignore ADDED Viewed

	@@ -0,0 +1,64 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+.venv/
+venv/
+ENV/
+env/
+# Environment variables — .env.example is intentionally tracked
+.env
+.env.*
+!.env.example
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+# Jupyter
+.ipynb_checkpoints/
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+# OS
+.DS_Store
+Thumbs.db
+# Project specific
+results/
+outputs/
+artifacts/
+*.json.bak
+*.jsonl
+*.jsonl.bak
+# Tmp to avoid pushing things I'm testing
+__marimo__/
+AGENTS.md
+# notebook_marimo.py

README.md ADDED Viewed

	@@ -0,0 +1,79 @@

+# Persona UI
+Streamlit interface for persona vector extraction, analysis, and chat.
+> [!WARNING]
+> This is a proof-of-concept UI, mostly vibe-coded. It will likely be replaced by a proper frontend/backend in the future.
+## Overview
+A web app built on top of [persona-vectors](../persona-vectors) that provides three tabs:
+- **Chat** — interactive conversations with a model using persona-based system prompts (templated or biography)
+- **Compare** — load saved activations and explore layer-wise cosine similarity, PCA, and UMAP projections
+- **Extract** — run activation extraction from HuggingFace or a local JSONL dataset directly from the browser
+## Repository Layout
+```
+persona-ui/
+├── app.py                   # Main entry point (Streamlit)
+├── state.py                 # Session state management (chat history, KV cache)
+├── tabs/
+│   ├── chat.py              # Chat tab
+│   ├── compare.py           # Activation comparison tab
+│   └── extract.py           # Extraction tab
+└── utils/
+    ├── artifacts.py         # Load saved activations metadata
+    ├── chat.py              # Chat generation logic
+    ├── chat_export.py       # Export chat logs to JSON
+    ├── datasets.py          # Dataset loader wrapper
+    ├── extraction.py        # Extraction orchestration
+    ├── helpers.py           # UI labels and slug helpers
+    ├── local_dataset.py     # Local JSONL dataset parsing
+    └── runtime.py           # Model caching and NDIF queries
+```
+Dataset loading and environment helpers are provided by the sibling
+[persona-data](../persona-data) package. Core extraction, analysis, and
+steering logic comes from [persona-vectors](../persona-vectors).
+## Installation
+```bash
+uv sync
+cp .env.example .env
+```
+## Quickstart
+```bash
+streamlit run app.py
+```
+## Configuration
+Copy `.env.example` to `.env` and fill in:
+```bash
+NDIF_API_KEY=...       # Required for remote (NDIF) model execution
+HF_HOME=...            # Optional: HuggingFace cache directory
+ARTIFACTS_DIR=...      # Optional: where activations are read from (default: ./artifacts)
+```
+The app picks up this file automatically via `load_env()` on startup.
+## Saved Artifacts
+The Compare and Extract tabs read from / write to:
+```
+artifacts/
+├── activations/<model_dir>/<prompt_variant>/<persona_id>/
+│   ├── activations.safetensors
+│   └── metadata.json
+└── chats/<model_dir>/<prompt_variant>/
+    └── <export>.json
+```
+`<model_dir>` is the model name with `/` replaced by `__` (e.g. `google__gemma-2-9b-it`).

WARNING.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # WARNING 🚨
2	+
3	+ This part of the project is majorly vibe-coded. Mostly becuase it will probably be changed in the future to support an actual interace backhand / frontand without streamlit. And is as of now mostly a proof of concept and an easy development part of the project.

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import os
+from pathlib import Path
+import streamlit as st
+from dotenv import load_dotenv
+# Load .env early so DEFAULT_MODEL / REMOTE_DEFAULT_MODEL can be overridden via env
+load_dotenv(Path(__file__).parent / ".env")
+from utils.helpers import DATASET_SOURCES
+DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "google/gemma-2-2b-it")
+REMOTE_DEFAULT_MODEL = os.environ.get("REMOTE_DEFAULT_MODEL", "google/gemma-2-9b-it")
+def _sidebar_controls() -> tuple[bool, str, str, str]:
+    from utils.runtime import list_remote_models
+    with st.sidebar:
+        st.markdown("# Persona UI")
+        st.caption("Chat, extract, and compare persona runs.")
+        if "sidebar__active_tab" not in st.session_state:
+            st.session_state["sidebar__active_tab"] = _TABS[0]
+        active_tab = st.session_state["sidebar__active_tab"]
+        for tab_name, icon in zip(_TABS, _TAB_ICONS, strict=True):
+            is_selected = tab_name == active_tab
+            if st.button(
+                tab_name,
+                key=f"sidebar__tab__{tab_name.lower()}",
+                use_container_width=True,
+                type="primary" if is_selected else "secondary",
+                icon=icon,
+            ):
+                st.session_state["sidebar__active_tab"] = tab_name
+                st.rerun()
+        st.divider()
+        st.caption("Runtime")
+        remote = st.toggle("Remote (NDIF)", value=False, key="sidebar__remote")
+        if remote:
+            remote_models = list_remote_models()
+            if remote_models:
+                default_model = (
+                    REMOTE_DEFAULT_MODEL
+                    if REMOTE_DEFAULT_MODEL in remote_models
+                    else remote_models[0]
+                )
+                model_name = st.selectbox(
+                    "Model",
+                    options=remote_models,
+                    index=remote_models.index(default_model),
+                    key="sidebar__remote_model",
+                    help="Running NDIF model.",
+                )
+            else:
+                st.error("No running NDIF models found.")
+                model_name = REMOTE_DEFAULT_MODEL
+        else:
+            model_name = st.text_input(
+                "Model",
+                value=DEFAULT_MODEL,
+                key="sidebar__local_model",
+                help="Local model id or path.",
+            )
+        st.caption("Data")
+        dataset_source = st.selectbox(
+            "Source",
+            DATASET_SOURCES,
+            key="sidebar__dataset_source",
+            help="Dataset for Chat and Extract.",
+        )
+    return remote, model_name, dataset_source, active_tab
+_TABS = ["Chat", "Compare", "Extract"]
+_TAB_ICONS = [":material/chat:", ":material/search:", ":material/tune:"]
+def main() -> None:
+    """Run the Streamlit app."""
+    # Deferred: importing torch is slow; keep it after dotenv load (done at
+    # module level above) so the Streamlit page config renders immediately.
+    import torch
+    torch.set_grad_enabled(False)
+    st.set_page_config(page_title="Persona UI", layout="wide")
+    remote, model_name, dataset_source, active_tab = _sidebar_controls()
+    if active_tab == "Extract":
+        from tabs.extract import render_extract_tab
+        render_extract_tab(remote, model_name, dataset_source)
+    elif active_tab == "Compare":
+        from tabs.compare import render_compare_tab
+        render_compare_tab(model_name)
+    else:
+        from tabs.chat import render_chat_tab
+        render_chat_tab(remote, model_name, dataset_source)
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,28 @@

+[project]
+name = "persona-ui"
+version = "0.1.0"
+description = "Streamlit UI for persona-vectors"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "persona-vectors",
+    "persona-data",
+    "nnterp>=1.3.0",
+    "streamlit>=1.44.0",
+    "plotly>=6.6.0",
+    "kaleido>=1.0.0",
+    "python-dotenv>=1.2.2",
+    "torch>=2.10.0",
+    "transformers>=5.2.0",
+]
+[tool.uv.sources]
+# NOTE: Switch to git sources after pushing the new package structure
+persona-vectors = { git = "ssh://git@github.com/implicit-personalization/persona-vectors.git" }
+persona-data = { git = "ssh://git@github.com/implicit-personalization/persona-data.git" }
+# persona-vectors = { path = "../persona-vectors", editable = true }
+# persona-data = { path = "../persona-data", editable = true }
+# [build-system]
+# requires = ["uv_build>=0.11.3,<0.12"]
+# build-backend = "uv_build"

state.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import streamlit as st
+_CHAT_STATE_PREFIX = "chat_state::"
+def chat_session_key(model_name: str, dataset_source: str) -> str:
+    """Build the session-state key for a chat context."""
+    return f"{_CHAT_STATE_PREFIX}{model_name}::{dataset_source}"
+def _default_chat_state() -> dict[str, object]:
+    return {
+        "messages": [],
+        "persona_id": None,
+        "prompt_mode": "templated",
+        "past_key_values": None,
+    }
+def _evict_inactive_kv_caches(active_key: str) -> None:
+    """Drop past_key_values from every chat context except the active one."""
+    for key in st.session_state:
+        if (
+            isinstance(key, str)
+            and key.startswith(_CHAT_STATE_PREFIX)
+            and key != active_key
+        ):
+            state = st.session_state[key]
+            if isinstance(state, dict) and state.get("past_key_values") is not None:
+                state["past_key_values"] = None
+def get_chat_state(
+    model_name: str, remote: bool, dataset_source: str
+) -> dict[str, object]:
+    """Return the mutable chat state for the active context."""
+    key = chat_session_key(model_name, dataset_source)
+    state = st.session_state.get(key)
+    if state is None:
+        state = _default_chat_state()
+        st.session_state[key] = state
+    else:
+        for default_key, default_value in _default_chat_state().items():
+            state.setdefault(default_key, default_value)
+    _evict_inactive_kv_caches(key)
+    if remote and state.get("past_key_values") is not None:
+        state["past_key_values"] = None
+    return state
+def reset_chat_state(model_name: str, remote: bool, dataset_source: str) -> None:
+    """Reset chat history and cache for the active context."""
+    state = get_chat_state(model_name, remote, dataset_source)
+    state["messages"] = []
+    state["past_key_values"] = None

tabs/__init__.py ADDED Viewed

File without changes

tabs/chat.py ADDED Viewed

	@@ -0,0 +1,636 @@

+import threading
+from concurrent.futures import ThreadPoolExecutor
+from contextlib import nullcontext
+import streamlit as st
+from state import chat_session_key, get_chat_state, reset_chat_state
+from utils.chat import ChatReply, generate_chat_reply, resolve_system_prompt
+from utils.chat_export import save_chat_export
+from utils.datasets import load_dataset
+from utils.helpers import (
+    MODE_LABEL_TO_KEY,
+    MODE_LABELS,
+    VARIANT_LABELS,
+    persona_label,
+    widget_key,
+)
+from utils.runtime import cached_model
+_VISIBLE_MESSAGE_COUNT = 5
+_model_lock = threading.Lock()
+def _render_chat_message(message: dict[str, str]) -> None:
+    if not message.get("content"):
+        return
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+def _clear_chat_ui_state(*keys: str) -> None:
+    for key in keys:
+        st.session_state.pop(key, None)
+def _generation_dict(gen_kwargs: dict, advanced_generation: bool) -> dict[str, object]:
+    return {
+        "max_new_tokens": int(gen_kwargs["max_new_tokens"]),
+        "advanced_generation": bool(advanced_generation),
+        "use_sampling": bool(gen_kwargs["do_sample"]),
+        "temperature": float(gen_kwargs["temperature"]),
+        "top_p": float(gen_kwargs["top_p"]),
+        "top_k": int(gen_kwargs["top_k"]),
+        "repetition_penalty": float(gen_kwargs["repetition_penalty"]),
+        "seed": gen_kwargs["seed"],
+    }
+# ── Compare mode helpers ───────────────────────────────────────────────────────
+def _panel_state(panel_key: str) -> dict:
+    """Get or initialise compare-panel chat state stored in session_state."""
+    if panel_key not in st.session_state:
+        st.session_state[panel_key] = {
+            "messages": [],
+            "persona_id": None,
+            "prompt_mode": "templated",
+            "past_key_values": None,
+        }
+    return st.session_state[panel_key]
+def _render_compare_panel(
+    side: str,
+    context_key: str,
+    personas: list,
+    remote: bool,
+    model_name: str,
+    dataset_source: str,
+    gen_kwargs: dict,
+    advanced_generation: bool,
+) -> dict:
+    """Render persona/prompt controls + chat log for one compare panel.
+    Returns a dict with keys needed by the generation step:
+      panel_key, state, active_system_prompt, selected_persona, chat_log
+    """
+    panel_key = widget_key(context_key, f"cmp_{side}")
+    state = _panel_state(panel_key)
+    # ── Per-panel selectors ──────────────────────────────────────────────────
+    p_col, m_col = st.columns([3, 2])
+    with p_col:
+        selected_index = next(
+            (i for i, p in enumerate(personas) if p.id == state["persona_id"]), 0
+        )
+        selected_persona = st.selectbox(
+            "Persona",
+            options=personas,
+            index=selected_index,
+            format_func=persona_label,
+            key=widget_key(panel_key, "persona"),
+        )
+    with m_col:
+        current_label = VARIANT_LABELS.get(state["prompt_mode"], "None")
+        prompt_mode_label = st.selectbox(
+            "Prompt",
+            options=MODE_LABELS,
+            index=MODE_LABELS.index(current_label),
+            key=widget_key(panel_key, "prompt_mode"),
+        )
+    prompt_mode = MODE_LABEL_TO_KEY[prompt_mode_label]
+    # Reset state when persona or mode changes.
+    changed = (
+        state["persona_id"] != selected_persona.id
+        or state["prompt_mode"] != prompt_mode
+    )
+    if changed:
+        state["messages"] = []
+        state["past_key_values"] = None
+        state["persona_id"] = selected_persona.id
+        state["prompt_mode"] = prompt_mode
+        _clear_chat_ui_state(
+            widget_key(panel_key, "custom_prompt"),
+            widget_key(panel_key, "show_all"),
+        )
+    # ── System prompt ────────────────────────────────────────────────────────
+    active_system_prompt = resolve_system_prompt(
+        persona=selected_persona, mode=prompt_mode
+    )
+    custom_prompt_key = widget_key(panel_key, "custom_prompt")
+    if prompt_mode != "empty":
+        if custom_prompt_key not in st.session_state:
+            st.session_state[custom_prompt_key] = active_system_prompt
+        with st.expander("Edit prompt", expanded=False):
+            active_system_prompt = (
+                st.text_area(
+                    "prompt",
+                    key=custom_prompt_key,
+                    height=150,
+                    label_visibility="collapsed",
+                )
+                or None
+            )
+    export_success_message: str | None = None
+    action_col1, action_col2 = st.columns(2)
+    with action_col1:
+        if st.button(
+            "Export chat",
+            key=widget_key(panel_key, "export_chat"),
+            use_container_width=True,
+        ):
+            export_path = save_chat_export(
+                model_name=model_name,
+                dataset_source=dataset_source,
+                persona_id=selected_persona.id,
+                persona_name=getattr(selected_persona, "name", None),
+                panel_label=side,
+                prompt_mode=prompt_mode,
+                system_prompt=active_system_prompt,
+                messages=state["messages"],
+                generation=_generation_dict(gen_kwargs, advanced_generation),
+            )
+            export_success_message = f"Saved chat export to {export_path}"
+    with action_col2:
+        if st.button(
+            "Reset chat",
+            key=widget_key(panel_key, "reset"),
+            use_container_width=True,
+            type="secondary",
+        ):
+            state["messages"] = []
+            state["past_key_values"] = None
+            _clear_chat_ui_state(
+                widget_key(panel_key, "custom_prompt"),
+                widget_key(panel_key, "show_all"),
+            )
+            st.rerun()
+    if export_success_message:
+        st.success(export_success_message)
+    # ── Message history ──────────────────────────────────────────────────────
+    show_all_key = widget_key(panel_key, "show_all")
+    messages = state["messages"]
+    if len(messages) > _VISIBLE_MESSAGE_COUNT and not st.session_state.get(
+        show_all_key, False
+    ):
+        hidden_count = len(messages) - _VISIBLE_MESSAGE_COUNT
+        if st.button(
+            f"Show earlier ({hidden_count} hidden)",
+            key=widget_key(panel_key, "show_all_btn"),
+        ):
+            st.session_state[show_all_key] = True
+            st.rerun()
+        visible = messages[-_VISIBLE_MESSAGE_COUNT:]
+    else:
+        visible = messages
+    chat_log = st.container()
+    with chat_log:
+        for msg in visible:
+            _render_chat_message(msg)
+    return {
+        "panel_key": panel_key,
+        "state": state,
+        "active_system_prompt": active_system_prompt,
+        "selected_persona": selected_persona,
+        "chat_log": chat_log,
+    }
+def _generate_for_panel(
+    panel: dict,
+    model,
+    remote: bool,
+    gen_kwargs: dict,
+) -> ChatReply:
+    """Run generate_chat_reply for one compare panel. Thread-safe."""
+    messages = []
+    if panel["active_system_prompt"]:
+        messages.append({"role": "system", "content": panel["active_system_prompt"]})
+    messages.extend(panel["state"]["messages"])
+    ctx = nullcontext() if remote else _model_lock
+    with ctx:
+        return generate_chat_reply(
+            model=model,
+            messages=messages,
+            remote=remote,
+            past_key_values=panel["state"]["past_key_values"],
+            **gen_kwargs,
+        )
+def _render_compare_mode(
+    remote: bool,
+    model_name: str,
+    context_key: str,
+    dataset_source: str,
+    personas: list,
+    gen_kwargs: dict,
+    advanced_generation: bool,
+) -> None:
+    """Render the full side-by-side comparison UI."""
+    left_col, right_col = st.columns(2)
+    with left_col:
+        left = _render_compare_panel(
+            "left",
+            context_key,
+            personas,
+            remote,
+            model_name,
+            dataset_source,
+            gen_kwargs,
+            advanced_generation,
+        )
+    with right_col:
+        right = _render_compare_panel(
+            "right",
+            context_key,
+            personas,
+            remote,
+            model_name,
+            dataset_source,
+            gen_kwargs,
+            advanced_generation,
+        )
+    user_prompt = st.chat_input(
+        "Ask both...",
+        key=widget_key(context_key, "cmp_input"),
+    )
+    if not user_prompt:
+        return
+    model = cached_model(model_name=model_name, remote=remote)
+    panels = [(left, left_col), (right, right_col)]
+    for panel, col in panels:
+        panel["state"]["messages"].append({"role": "user", "content": user_prompt})
+        with col:
+            with panel["chat_log"]:
+                _render_chat_message({"role": "user", "content": user_prompt})
+    # Generate both responses in parallel (remote: truly concurrent; local: serialised via lock).
+    with st.spinner("Generating..."):
+        with ThreadPoolExecutor(max_workers=2) as executor:
+            futures = [
+                executor.submit(_generate_for_panel, panel, model, remote, gen_kwargs)
+                for panel, col in panels
+            ]
+            results = []
+            for future in futures:
+                try:
+                    results.append(future.result())
+                except Exception as exc:
+                    results.append(exc)
+    for (panel, col), result in zip(panels, results):
+        if isinstance(result, Exception):
+            with col:
+                with panel["chat_log"]:
+                    st.error(f"Generation failed: {result}")
+            panel["state"]["messages"].pop()
+            continue
+        panel["state"]["messages"].append({"role": "assistant", "content": result.text})
+        panel["state"]["past_key_values"] = (
+            result.past_key_values if not remote else None
+        )
+        with col:
+            with panel["chat_log"]:
+                _render_chat_message({"role": "assistant", "content": result.text})
+# ── Main tab entry point ───────────────────────────────────────────────────────
+def render_chat_tab(remote: bool, model_name: str, dataset_source: str) -> None:
+    """Render the chat tab."""
+    st.title("Chat")
+    context_key = chat_session_key(model_name, dataset_source)
+    chat_state = get_chat_state(model_name, remote, dataset_source)
+    try:
+        dataset, dataset_status = load_dataset(dataset_source)
+        st.caption(dataset_status)
+    except Exception as exc:
+        st.error(f"Could not load data: {exc}")
+        st.info("Check the selected dataset source or upload both JSONL files.")
+        return
+    personas = list(dataset)
+    if not personas:
+        st.warning("No personas found in the selected dataset.")
+        st.info("Try a different dataset source or upload a non-empty personas file.")
+        return
+    # ── Generation settings ───────────────────────────────────────────────────
+    with st.expander("Advanced", expanded=False):
+        config_col1, config_col2 = st.columns([2, 1])
+        with config_col1:
+            max_new_tokens = st.slider(
+                "Max new tokens",
+                min_value=16,
+                max_value=512,
+                value=256,
+                step=16,
+                key=widget_key(context_key, "max_new_tokens"),
+            )
+        with config_col2:
+            repetition_penalty = st.slider(
+                "Repetition penalty",
+                min_value=0.5,
+                max_value=2.0,
+                value=1.0,
+                step=0.05,
+                key=widget_key(context_key, "repetition_penalty"),
+            )
+        use_sampling = st.checkbox(
+            "Random sampling",
+            value=False,
+            key=widget_key(context_key, "use_sampling"),
+        )
+        sampling_disabled = not use_sampling
+        sampling_col1, sampling_col2, sampling_col3 = st.columns(3)
+        with sampling_col1:
+            temperature = st.slider(
+                "Temperature",
+                min_value=0.01,
+                max_value=2.0,
+                value=1.0,
+                step=0.01,
+                disabled=sampling_disabled,
+                key=widget_key(context_key, "temperature"),
+            )
+        with sampling_col2:
+            top_p = st.slider(
+                "Top-p",
+                min_value=0.01,
+                max_value=1.0,
+                value=1.0,
+                step=0.01,
+                disabled=sampling_disabled,
+                key=widget_key(context_key, "top_p"),
+            )
+        with sampling_col3:
+            top_k = st.slider(
+                "Top-k (0 = off)",
+                min_value=0,
+                max_value=100,
+                value=50,
+                step=1,
+                disabled=sampling_disabled,
+                key=widget_key(context_key, "top_k"),
+            )
+        seed_disabled = sampling_disabled or remote
+        seed_enabled = st.checkbox(
+            "Fix seed",
+            value=False,
+            disabled=seed_disabled,
+            key=widget_key(context_key, "seed_enabled"),
+        )
+        if seed_enabled:
+            seed = int(
+                st.number_input(
+                    "Seed",
+                    min_value=0,
+                    max_value=2_147_483_647,
+                    value=0,
+                    step=1,
+                    disabled=seed_disabled,
+                    key=widget_key(context_key, "seed"),
+                )
+            )
+        else:
+            seed = None
+        if remote:
+            st.caption("Seed is local-only and disabled for remote runs.")
+    advanced_generation = (
+        max_new_tokens != 256
+        or use_sampling
+        or temperature != 1.0
+        or top_p != 1.0
+        or top_k != 50
+        or repetition_penalty != 1.0
+        or seed is not None
+    )
+    do_sample = bool(use_sampling)
+    generation_seed = seed if do_sample and seed is not None and not remote else None
+    gen_kwargs = dict(
+        max_new_tokens=int(max_new_tokens),
+        do_sample=do_sample,
+        temperature=temperature,
+        top_p=top_p,
+        top_k=top_k,
+        repetition_penalty=repetition_penalty,
+        seed=generation_seed,
+    )
+    # ── Mode toggle ───────────────────────────────────────────────────────────
+    compare_mode = st.toggle(
+        "Compare mode",
+        value=False,
+        key=widget_key(context_key, "compare_mode"),
+        help="Side-by-side: send one message to two independent persona/prompt configurations.",
+    )
+    if compare_mode:
+        _render_compare_mode(
+            remote,
+            model_name,
+            context_key,
+            dataset_source,
+            personas,
+            gen_kwargs,
+            advanced_generation,
+        )
+        return
+    # ── Single-chat mode ──────────────────────────────────────────────────────
+    persona_select_key = widget_key(context_key, "persona_select")
+    prompt_mode_select_key = widget_key(context_key, "system_prompt_select")
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        selected_index = next(
+            (i for i, p in enumerate(personas) if p.id == chat_state["persona_id"]),
+            0,
+        )
+        selected_persona = st.selectbox(
+            "Persona",
+            options=personas,
+            index=selected_index,
+            format_func=persona_label,
+            key=persona_select_key,
+        )
+    with col2:
+        current_mode_label = VARIANT_LABELS.get(chat_state["prompt_mode"], "None")
+        prompt_mode_label = st.selectbox(
+            "Prompt",
+            options=MODE_LABELS,
+            index=MODE_LABELS.index(current_mode_label),
+            key=prompt_mode_select_key,
+        )
+        prompt_mode = MODE_LABEL_TO_KEY[prompt_mode_label]
+    active_system_prompt = resolve_system_prompt(
+        persona=selected_persona,
+        mode=prompt_mode,
+    )
+    chat_input_key = widget_key(context_key, "chat_input")
+    show_all_key = widget_key(context_key, "show_all_messages")
+    custom_prompt_key = widget_key(context_key, "custom_system_prompt")
+    pending_key = widget_key(context_key, "pending_prompt")
+    export_success_message: str | None = None
+    action_col1, action_col2 = st.columns(2)
+    with action_col1:
+        if st.button("Reset chat", use_container_width=True, type="secondary"):
+            reset_chat_state(model_name, remote, dataset_source)
+            _clear_chat_ui_state(
+                chat_input_key,
+                show_all_key,
+                custom_prompt_key,
+                pending_key,
+            )
+            st.rerun()
+    with action_col2:
+        if st.button("Export chat", use_container_width=True):
+            export_path = save_chat_export(
+                model_name=model_name,
+                dataset_source=dataset_source,
+                persona_id=selected_persona.id,
+                persona_name=getattr(selected_persona, "name", None),
+                prompt_mode=prompt_mode,
+                system_prompt=active_system_prompt,
+                messages=chat_state["messages"],
+                generation=_generation_dict(gen_kwargs, advanced_generation),
+            )
+            export_success_message = f"Saved chat export to {export_path}"
+    if export_success_message:
+        st.success(export_success_message)
+    changed_context = (
+        chat_state["persona_id"] != selected_persona.id
+        or chat_state["prompt_mode"] != prompt_mode
+    )
+    if changed_context:
+        had_history = bool(chat_state["messages"])
+        chat_state["persona_id"] = selected_persona.id
+        chat_state["prompt_mode"] = prompt_mode
+        reset_chat_state(model_name, remote, dataset_source)
+        _clear_chat_ui_state(
+            chat_input_key,
+            show_all_key,
+            custom_prompt_key,
+            pending_key,
+        )
+        if had_history:
+            st.info("Chat history reset because the persona or system prompt changed.")
+    chat_log = st.container()
+    with chat_log:
+        # System prompt as first item in conversation — collapsed by default, editable.
+        if prompt_mode != "empty":
+            if custom_prompt_key not in st.session_state:
+                st.session_state[custom_prompt_key] = active_system_prompt
+            with st.expander("Edit prompt", expanded=False):
+                active_system_prompt = (
+                    st.text_area(
+                        "Prompt",
+                        key=custom_prompt_key,
+                        height=200,
+                        label_visibility="collapsed",
+                    )
+                    or None
+                )
+        # Collapse older messages, show only the most recent ones.
+        messages = chat_state["messages"]
+        if len(messages) > _VISIBLE_MESSAGE_COUNT and not st.session_state.get(
+            show_all_key, False
+        ):
+            hidden_count = len(messages) - _VISIBLE_MESSAGE_COUNT
+            if st.button(
+                f"Show earlier messages ({hidden_count} hidden)",
+                key=widget_key(context_key, "show_all_btn"),
+            ):
+                st.session_state[show_all_key] = True
+                st.rerun()
+            visible_messages = messages[-_VISIBLE_MESSAGE_COUNT:]
+        else:
+            visible_messages = messages
+        for message in visible_messages:
+            _render_chat_message(message)
+    user_prompt = st.chat_input(
+        "Ask something...",
+        key=chat_input_key,
+    )
+    # Pass 1: user submitted — append message and rerun so it renders before generation.
+    if user_prompt:
+        chat_state["messages"].append({"role": "user", "content": user_prompt})
+        st.session_state[pending_key] = True
+        st.rerun()
+    # Pass 2: message is already rendered above; now run generation.
+    if not st.session_state.pop(pending_key, False):
+        return
+    messages = []
+    if active_system_prompt:
+        messages.append({"role": "system", "content": active_system_prompt})
+    messages.extend(chat_state["messages"])
+    with st.spinner("Generating reply..."):
+        model = cached_model(model_name=model_name, remote=remote)
+        try:
+            reply: ChatReply = generate_chat_reply(
+                model=model,
+                messages=messages,
+                remote=remote,
+                past_key_values=chat_state["past_key_values"],
+                **gen_kwargs,
+            )
+        except Exception as exc:
+            with chat_log:
+                st.error(f"Could not generate a reply: {exc}")
+                st.info("Try a shorter prompt, reset the chat, or switch personas.")
+            chat_state["messages"].pop()
+            return
+    chat_state["messages"].append({"role": "assistant", "content": reply.text})
+    chat_state["past_key_values"] = reply.past_key_values if not remote else None
+    save_chat_export(
+        model_name=model_name,
+        dataset_source=dataset_source,
+        persona_id=selected_persona.id,
+        persona_name=getattr(selected_persona, "name", None),
+        prompt_mode=prompt_mode,
+        system_prompt=active_system_prompt,
+        messages=chat_state["messages"],
+        generation=_generation_dict(gen_kwargs, advanced_generation),
+    )
+    st.rerun()

tabs/compare.py ADDED Viewed

	@@ -0,0 +1,354 @@

+import streamlit as st
+from persona_data.environment import get_artifacts_dir
+from persona_vectors.analysis import build_embedding_figure, project_pca, project_umap
+from persona_vectors.plots import (
+    plot_multiple_layer_similarities,
+    save_plot_html,
+    save_plot_png,
+)
+from utils.artifacts import (
+    artifact_persona_options,
+    list_available_layers,
+    load_cosine_traces,
+    load_embedding_samples,
+)
+from utils.helpers import (
+    ANALYSIS_HELP_TEXT,
+    ANALYSIS_LABELS,
+    ANALYSIS_MODES,
+    PROMPT_VARIANTS,
+    persona_display_label,
+    prompt_variant_label,
+    slugify,
+    widget_key,
+)
+def _filename(*parts: str) -> str:
+    return "__".join(slugify(part) for part in parts if part)
+def _select_artifact_personas(
+    artifacts_root: str,
+    model_name: str,
+    variants: list[str],
+) -> tuple[list[str], dict[str, str]]:
+    persona_options, persona_names = artifact_persona_options(
+        artifacts_root,
+        model_name,
+        variants,
+    )
+    if not persona_options:
+        if len(variants) > 1:
+            st.info(
+                "No personas have saved activations for all selected variants. Run extraction for both variants first."
+            )
+        else:
+            st.info("No personas found for this model yet. Run extraction first.")
+        return [], persona_names
+    persona_ids = st.multiselect(
+        "Personas",
+        options=persona_options,
+        default=persona_options[:1] if len(persona_options) > 1 else persona_options,
+        format_func=lambda persona_id: persona_display_label(
+            persona_id, persona_names.get(persona_id)
+        ),
+        key=widget_key("load", "personas", model_name, *variants),
+    )
+    return persona_ids, persona_names
+def _render_cosine_similarity(
+    artifacts_root: str,
+    model_name: str,
+) -> None:
+    col1, col2 = st.columns(2)
+    with col1:
+        variant_a = st.selectbox(
+            "Variant A",
+            options=PROMPT_VARIANTS,
+            index=0,
+            format_func=prompt_variant_label,
+            key=widget_key("load", "variant_a"),
+        )
+    with col2:
+        variant_b = st.selectbox(
+            "Variant B",
+            options=PROMPT_VARIANTS,
+            index=min(1, len(PROMPT_VARIANTS) - 1),
+            format_func=prompt_variant_label,
+            key=widget_key("load", "variant_b"),
+        )
+    if variant_a == variant_b:
+        st.warning("Choose two different variants to compare.")
+        return
+    persona_ids, _ = _select_artifact_personas(
+        artifacts_root,
+        model_name,
+        [variant_a, variant_b],
+    )
+    if not persona_ids:
+        return
+    cosine_fig_key = widget_key("load", "cosine_fig_state", model_name)
+    filename = _filename("compare", "cosine", model_name, variant_a, variant_b)
+    if st.button("Compare vectors", type="primary"):
+        traces, loaded_names, errors = load_cosine_traces(
+            artifacts_root,
+            model_name,
+            persona_ids,
+            variant_a,
+            variant_b,
+        )
+        if errors:
+            for err in errors:
+                st.error(f"Failed to load vectors: `{err}`")
+        if not traces:
+            st.error("No personas loaded successfully.")
+            st.info(
+                "Check that extraction has been run for both variants and selected personas."
+            )
+            st.session_state.pop(cosine_fig_key, None)
+            return
+        display_traces = [
+            (
+                persona_display_label(persona_id, loaded_names.get(persona_id)),
+                short,
+                long,
+            )
+            for persona_id, short, long in traces
+        ]
+        fig = plot_multiple_layer_similarities(
+            display_traces,
+            title=f"{prompt_variant_label(variant_a)} vs {prompt_variant_label(variant_b)}",
+            show=False,
+        )
+        st.session_state[cosine_fig_key] = (fig, len(traces))
+    if cosine_fig_key in st.session_state:
+        fig, n_traces = st.session_state[cosine_fig_key]
+        st.plotly_chart(fig, use_container_width=True)
+        save_col1, save_col2 = st.columns(2)
+        with save_col1:
+            if st.button("Save HTML", key=widget_key("load", "save_cosine_html")):
+                output_path = save_plot_html(fig, filename)
+                st.success(f"Saved HTML to `{output_path}`")
+        with save_col2:
+            if st.button("Save PNG", key=widget_key("load", "save_cosine_png")):
+                try:
+                    output_path = save_plot_png(fig, filename)
+                    st.success(f"Saved PNG to `{output_path}`")
+                except Exception as exc:
+                    st.error(f"Could not save PNG: {exc}")
+        st.success(f"Loaded {n_traces} personas for cosine comparison.")
+def _render_embedding_analysis(
+    artifacts_root: str,
+    model_name: str,
+    analysis_mode: str,
+) -> None:
+    selected_variant = st.selectbox(
+        "Variant",
+        options=PROMPT_VARIANTS,
+        format_func=prompt_variant_label,
+        key=widget_key("load", "variant"),
+    )
+    persona_ids, persona_names = _select_artifact_personas(
+        artifacts_root,
+        model_name,
+        [selected_variant],
+    )
+    if not persona_ids:
+        return
+    layer_options = list_available_layers(
+        artifacts_root,
+        model_name,
+        [selected_variant],
+        persona_ids,
+    )
+    if not layer_options:
+        st.info(
+            "No shared layers are available for the selected personas. Try fewer personas or a different variant."
+        )
+        return
+    persona_key = "_".join(sorted(persona_ids))
+    layer_key = widget_key("load", "layers", model_name, selected_variant, persona_key)
+    default_layers = [
+        layer
+        for layer in st.session_state.get(layer_key, layer_options[:3])
+        if layer in layer_options
+    ] or layer_options[:3]
+    selected_layers = st.multiselect(
+        "Layers",
+        options=layer_options,
+        default=default_layers,
+        key=layer_key,
+    )
+    if not selected_layers:
+        st.info("Select at least one layer.")
+        return
+    button_label = (
+        "Generate PCA projection"
+        if analysis_mode == "PCA"
+        else "Generate UMAP projection"
+    )
+    embedding_fig_key = widget_key(
+        "load", "embedding_fig_state", model_name, analysis_mode
+    )
+    if st.button(button_label, type="primary"):
+        progress = st.progress(0, text="Preparing projections...")
+        def update_progress(current: int, total: int, loaded: int) -> None:
+            fraction = current / total if total else 1.0
+            progress.progress(
+                fraction,
+                text=f"Processing layer {current}/{total} ({loaded} plot(s) ready)",
+            )
+        project_fn = project_pca if analysis_mode == "PCA" else project_umap
+        try:
+            plots, errors = load_embedding_samples(
+                artifacts_root,
+                model_name,
+                persona_ids,
+                selected_variant,
+                selected_layers,
+                project_fn,
+                persona_names,
+                progress_fn=update_progress,
+            )
+            if errors:
+                for err in errors:
+                    if (
+                        "missing layer" in err
+                        or "no selected personas have this layer" in err
+                    ):
+                        st.warning(f"Skipping unavailable data: `{err}`")
+                    else:
+                        st.error(f"Failed to load vectors: `{err}`")
+            if not plots:
+                st.warning(
+                    "No projections could be built for the current persona/layer selection."
+                )
+                st.info("Try fewer personas, fewer layers, or a different variant.")
+                st.session_state.pop(embedding_fig_key, None)
+            else:
+                title_prefix, x_label, y_label = ANALYSIS_LABELS[analysis_mode]
+                rendered_figures: list[tuple[int, object]] = []
+                for layer_idx, coords, labels, hover_text in plots:
+                    fig = build_embedding_figure(
+                        coords=coords,
+                        labels=labels,
+                        title=f"{title_prefix}, layer {layer_idx}",
+                        x_label=x_label,
+                        y_label=y_label,
+                        hover_text=hover_text,
+                    )
+                    rendered_figures.append((layer_idx, fig))
+                total_samples = sum(coords.shape[0] for _, coords, _, _ in plots)
+                st.session_state[embedding_fig_key] = (
+                    rendered_figures,
+                    persona_key,
+                    selected_variant,
+                    total_samples,
+                )
+        finally:
+            progress.empty()
+    if embedding_fig_key in st.session_state:
+        rendered_figures, saved_persona_key, saved_variant, total_samples = (
+            st.session_state[embedding_fig_key]
+        )
+        cols = st.columns(2)
+        for idx, (layer_idx, fig) in enumerate(rendered_figures):
+            with cols[idx % 2]:
+                st.plotly_chart(fig, use_container_width=True)
+        st.success(
+            f"Loaded {total_samples} samples across {len(rendered_figures)} layers."
+        )
+        filenames = [
+            _filename(
+                "compare",
+                analysis_mode,
+                model_name,
+                saved_variant,
+                saved_persona_key,
+                str(layer_idx),
+            )
+            for layer_idx, _ in rendered_figures
+        ]
+        save_col1, save_col2 = st.columns(2)
+        with save_col1:
+            if st.button(
+                "Save HTML",
+                key=widget_key("load", "save_embedding_html", analysis_mode),
+            ):
+                saved_paths = [
+                    save_plot_html(fig, fn)
+                    for (_, fig), fn in zip(rendered_figures, filenames)
+                ]
+                st.success(
+                    f"Saved {len(saved_paths)} HTML plot(s) to `artifacts/plots`."
+                )
+        with save_col2:
+            if st.button(
+                "Save PNG",
+                key=widget_key("load", "save_embedding_png", analysis_mode),
+            ):
+                try:
+                    saved_paths = [
+                        save_plot_png(fig, fn)
+                        for (_, fig), fn in zip(rendered_figures, filenames)
+                    ]
+                    st.success(
+                        f"Saved {len(saved_paths)} PNG plot(s) to `artifacts/plots`."
+                    )
+                except Exception as exc:
+                    st.error(f"Could not save PNGs: {exc}")
+def render_compare_tab(model_name: str) -> None:
+    """Render the compare tab."""
+    st.title("Compare")
+    st.caption("Compare saved activations by cosine similarity, PCA, or UMAP.")
+    st.subheader("Analysis")
+    with st.expander("Advanced", expanded=False):
+        artifacts_root = st.text_input(
+            "Artifacts root",
+            value=str(get_artifacts_dir() / "activations"),
+        )
+    analysis_mode = st.segmented_control(
+        "Analysis mode",
+        options=ANALYSIS_MODES,
+        default=ANALYSIS_MODES[0],
+        key=widget_key("load", "analysis_mode"),
+        label_visibility="collapsed",
+    )
+    if analysis_mode is None:
+        analysis_mode = ANALYSIS_MODES[0]
+    st.caption(ANALYSIS_HELP_TEXT[analysis_mode])
+    if analysis_mode == "Cosine similarity":
+        _render_cosine_similarity(artifacts_root, model_name)
+        return
+    _render_embedding_analysis(artifacts_root, model_name, analysis_mode)

tabs/extract.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import streamlit as st
+from utils.datasets import load_dataset
+from utils.extraction import run_extraction
+from utils.helpers import (
+    PROMPT_VARIANTS,
+    persona_label,
+    prompt_variant_label,
+    widget_key,
+)
+from utils.runtime import cached_model
+def _extract_widget_key(
+    model_name: str, remote: bool, dataset_source: str, suffix: str
+) -> str:
+    return widget_key("extract", str(remote), model_name, dataset_source, suffix)
+def _render_local_dataset_uploads() -> None:
+    """Render file inputs for local dataset uploads."""
+    with st.expander("Local dataset upload", expanded=True):
+        st.file_uploader(
+            "personas.jsonl",
+            type=["jsonl"],
+            key="extract__personas_file",
+            help="Expected fields: id, persona, templated_prompt, biography_md",
+        )
+        st.file_uploader(
+            "qa.jsonl",
+            type=["jsonl"],
+            key="extract__qa_file",
+            help="Expected fields: id, qid, type, question, answer, difficulty",
+        )
+def render_extract_tab(remote: bool, model_name: str, dataset_source: str) -> None:
+    """Render the extraction tab."""
+    st.title("Extract")
+    if dataset_source == "Local JSONL upload":
+        _render_local_dataset_uploads()
+    selected_variants = st.multiselect(
+        "Prompt variants",
+        options=PROMPT_VARIANTS,
+        default=PROMPT_VARIANTS,
+        format_func=prompt_variant_label,
+        key=_extract_widget_key(model_name, remote, dataset_source, "prompt_variants"),
+    )
+    if not selected_variants:
+        st.info("Select at least one prompt variant.")
+        return
+    try:
+        dataset, dataset_status = load_dataset(dataset_source)
+        st.caption(dataset_status)
+    except Exception as exc:
+        st.error(f"Could not load data: {exc}")
+        st.info(
+            "Upload both JSONL files or switch to the built-in SynthPersona source."
+        )
+        return
+    personas = list(dataset)
+    if not personas:
+        st.warning("No personas found in the selected dataset.")
+        st.info(
+            "Try another dataset source or check that the personas file is not empty."
+        )
+        return
+    selected_personas = st.multiselect(
+        "Personas",
+        options=personas,
+        default=[personas[0]] if personas else [],
+        format_func=persona_label,
+        key=_extract_widget_key(model_name, remote, dataset_source, "persona_select"),
+    )
+    if not selected_personas:
+        st.info("Select at least one persona.")
+        return
+    qa_filter_type: str | None
+    qa_filter_difficulty: list[int] | None
+    with st.expander("Advanced", expanded=False):
+        st.caption("Filters")
+        col1, col2, col3 = st.columns([2, 2, 1])
+        with col1:
+            qa_type_select = st.selectbox(
+                "QA type",
+                options=["all", "explicit", "implicit"],
+                index=0,
+                key=_extract_widget_key(
+                    model_name, remote, dataset_source, "qa_type_select"
+                ),
+            )
+            qa_filter_type = (
+                qa_type_select if qa_type_select in ("explicit", "implicit") else None
+            )
+        with col2:
+            difficulty_values = st.multiselect(
+                "Difficulty",
+                options=[1, 2, 3],
+                default=[1, 2, 3],
+                key=_extract_widget_key(
+                    model_name, remote, dataset_source, "difficulty_select"
+                ),
+            )
+            qa_filter_difficulty = difficulty_values if difficulty_values else None
+        # Pre-load QA pairs for all selected personas to validate filters and set slider range.
+        qa_by_persona = {
+            p.id: dataset.get_qa(
+                p.id, type=qa_filter_type, difficulty=qa_filter_difficulty
+            )
+            for p in selected_personas
+        }
+        personas_without_qa = [p for p in selected_personas if not qa_by_persona[p.id]]
+        if personas_without_qa:
+            names = ", ".join(p.name for p in personas_without_qa)
+            st.warning(f"No QA pairs match filters for: {names}. They will be skipped.")
+        personas_to_run = [p for p in selected_personas if qa_by_persona[p.id]]
+        if not personas_to_run:
+            st.info("No personas have matching QA pairs. Widen the filters.")
+            return
+        min_qa_count = min(len(qa_by_persona[p.id]) for p in personas_to_run)
+        with col3:
+            max_questions = st.slider(
+                "Max questions",
+                min_value=1,
+                max_value=min_qa_count,
+                value=min_qa_count,
+                key=_extract_widget_key(
+                    model_name, remote, dataset_source, "max_questions"
+                ),
+            )
+    run_clicked = st.button("Run extraction", type="primary")
+    if not run_clicked:
+        return
+    status_box = st.empty()
+    status_box.info("Extraction in progress...")
+    progress = st.progress(0, text="Preparing extraction...")
+    with st.spinner("Loading model..."):
+        model = cached_model(model_name=model_name, remote=remote)
+    try:
+        total_steps = len(personas_to_run) * len(selected_variants)
+        step = 0
+        results = []
+        for persona in personas_to_run:
+            qa_pairs = qa_by_persona[persona.id][:max_questions]
+            for variant in selected_variants:
+                progress.progress(
+                    step / total_steps if total_steps else 1.0,
+                    text=f"{persona.name} · {prompt_variant_label(variant)} ({step + 1}/{total_steps})",
+                )
+                variant_results = run_extraction(
+                    model=model,
+                    model_name=model_name,
+                    persona=persona,
+                    qa_pairs=qa_pairs,
+                    variants=[variant],
+                    remote=remote,
+                )
+                results.extend(variant_results)
+                step += 1
+        progress.progress(1.0, text="Extraction complete")
+    except Exception as exc:
+        st.error(f"Extraction failed: {exc}")
+        return
+    finally:
+        progress.empty()
+    status_box.success("Extraction complete")
+    st.success(f"Saved {len(results)} artifact set(s)")
+    for result in results:
+        st.markdown(
+            f"- **{result.persona_name}** · {prompt_variant_label(result.variant)}: "
+            f"{result.n_questions} questions, {result.n_layers} layers, {result.d_model} hidden size"
+        )

utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Utility helpers for the Streamlit UI."""

utils/artifacts.py ADDED Viewed

	@@ -0,0 +1,249 @@

+import logging
+from collections.abc import Callable
+from pathlib import Path
+import streamlit as st
+import torch
+from persona_vectors.activation_io import (
+    load_activation_metadata,
+    load_per_question_vectors,
+)
+logger = logging.getLogger(__name__)
+def model_dir_name(model_name: str) -> str:
+    """Encode a model name for use in artifact paths."""
+    return model_name.replace("/", "__")
+def list_available_personas(
+    artifacts_root: str | Path,
+    model_name: str,
+    variants: list[str],
+) -> list[str]:
+    """List persona ids available for every requested variant."""
+    shared_personas: set[str] | None = None
+    root = Path(artifacts_root)
+    for variant in variants:
+        model_dir = root / model_dir_name(model_name) / variant
+        if not model_dir.exists():
+            return []
+        variant_personas = {d.name for d in model_dir.iterdir() if d.is_dir()}
+        if shared_personas is None:
+            shared_personas = variant_personas
+        else:
+            shared_personas &= variant_personas
+        if not shared_personas:
+            return []
+    return sorted(shared_personas or set())
+def load_persona_names(
+    artifacts_root: str | Path,
+    model_name: str,
+    variants: list[str],
+    persona_ids: list[str],
+) -> dict[str, str]:
+    """Load display names from saved activation metadata."""
+    names: dict[str, str] = {}
+    for persona_id in persona_ids:
+        for variant in variants:
+            try:
+                metadata = load_activation_metadata(
+                    root_dir=artifacts_root,
+                    model_name=model_name,
+                    prompt_variant=variant,
+                    persona_id=persona_id,
+                )
+            except Exception:
+                logger.debug(
+                    "Failed to load metadata for persona %s variant %s",
+                    persona_id,
+                    variant,
+                    exc_info=True,
+                )
+                continue
+            persona_name = metadata.get("persona_name")
+            if isinstance(persona_name, str) and persona_name:
+                names[persona_id] = persona_name
+                break
+    return names
+def artifact_persona_options(
+    artifacts_root: str | Path,
+    model_name: str,
+    variants: list[str],
+) -> tuple[list[str], dict[str, str]]:
+    """Return persona ids and names for the selected artifacts."""
+    persona_options = list_available_personas(artifacts_root, model_name, variants)
+    persona_names = load_persona_names(
+        artifacts_root,
+        model_name,
+        variants,
+        persona_options,
+    )
+    return persona_options, persona_names
+@st.cache_data(show_spinner=False)
+def list_available_layers(
+    artifacts_root: str,
+    model_name: str,
+    variants: list[str],
+    persona_ids: list[str],
+) -> list[int]:
+    """List layer indices shared by all matching saved activation files."""
+    shared_layers: set[int] | None = None
+    for variant in variants:
+        for persona_id in persona_ids:
+            try:
+                vectors, _ = load_per_question_vectors(
+                    root_dir=artifacts_root,
+                    model_name=model_name,
+                    prompt_variant=variant,
+                    persona_id=persona_id,
+                )
+            except Exception:
+                logger.debug(
+                    "Failed to load vectors for persona %s variant %s",
+                    persona_id,
+                    variant,
+                    exc_info=True,
+                )
+                continue
+            layers = set(range(vectors.shape[1]))
+            if shared_layers is None:
+                shared_layers = layers
+            else:
+                shared_layers &= layers
+    return sorted(shared_layers or set())
+def load_cosine_traces(
+    artifacts_root: str | Path,
+    model_name: str,
+    persona_ids: list[str],
+    variant_a: str,
+    variant_b: str,
+) -> tuple[list[tuple[str, torch.Tensor, torch.Tensor]], dict[str, str], list[str]]:
+    """Load mean activation traces for pairwise cosine-similarity plots."""
+    persona_names = load_persona_names(
+        artifacts_root,
+        model_name,
+        [variant_a, variant_b],
+        persona_ids,
+    )
+    traces: list[tuple[str, torch.Tensor, torch.Tensor]] = []
+    errors: list[str] = []
+    for persona_id in persona_ids:
+        try:
+            vectors_a, _ = load_per_question_vectors(
+                root_dir=artifacts_root,
+                model_name=model_name,
+                prompt_variant=variant_a,
+                persona_id=persona_id,
+            )
+            vectors_b, _ = load_per_question_vectors(
+                root_dir=artifacts_root,
+                model_name=model_name,
+                prompt_variant=variant_b,
+                persona_id=persona_id,
+            )
+        except Exception as exc:
+            errors.append(f"{persona_id}: {exc}")
+            continue
+        traces.append(
+            (persona_id, vectors_a.float().mean(dim=0), vectors_b.float().mean(dim=0))
+        )
+    return traces, persona_names, errors
+def load_embedding_samples(
+    artifacts_root: str | Path,
+    model_name: str,
+    persona_ids: list[str],
+    variant: str,
+    selected_layers: list[int],
+    project_fn: Callable[[torch.Tensor], torch.Tensor],
+    persona_names: dict[str, str],
+    progress_fn: Callable[[int, int, int], None] | None = None,
+) -> tuple[list[tuple[int, torch.Tensor, list[str], list[str]]], list[str]]:
+    """Load samples for 2D projections without re-reading each layer from disk."""
+    plots: list[tuple[int, torch.Tensor, list[str], list[str]]] = []
+    errors: list[str] = []
+    vectors_by_persona: dict[str, torch.Tensor] = {}
+    for persona_id in persona_ids:
+        try:
+            vectors, _ = load_per_question_vectors(
+                root_dir=artifacts_root,
+                model_name=model_name,
+                prompt_variant=variant,
+                persona_id=persona_id,
+            )
+        except Exception as exc:
+            errors.append(f"{persona_id} / {variant}: {exc}")
+            continue
+        vectors_by_persona[persona_id] = vectors
+    total_layers = len(selected_layers)
+    for idx, layer_idx in enumerate(selected_layers, start=1):
+        samples: list[torch.Tensor] = []
+        labels: list[str] = []
+        hover_text: list[str] = []
+        for persona_id, vectors in vectors_by_persona.items():
+            if layer_idx >= vectors.shape[1]:
+                errors.append(f"{persona_id} / {variant}: missing layer {layer_idx}")
+                continue
+            layer_vectors = vectors[:, layer_idx, :]
+            samples.append(layer_vectors)
+            labels.extend([persona_id] * layer_vectors.shape[0])
+            display_name = persona_names.get(persona_id) or persona_id
+            hover_text.extend(
+                [
+                    f"<b>{display_name}</b><br>{variant}",
+                ]
+                * layer_vectors.shape[0]
+            )
+        if not samples:
+            errors.append(f"Layer {layer_idx}: no selected personas have this layer")
+        else:
+            all_samples = torch.cat(samples, dim=0)
+            if all_samples.shape[0] < 2:
+                errors.append(
+                    f"Layer {layer_idx}: need at least 2 samples after filtering selected personas"
+                )
+            else:
+                try:
+                    coords = project_fn(all_samples)
+                    plots.append((layer_idx, coords, labels, hover_text))
+                except Exception as exc:
+                    errors.append(f"Layer {layer_idx}: {exc}")
+        if progress_fn is not None:
+            progress_fn(idx, total_layers, len(plots))
+    return plots, errors

utils/chat.py ADDED Viewed

	@@ -0,0 +1,226 @@

+import logging
+from contextlib import contextmanager, nullcontext
+from dataclasses import dataclass
+from typing import Literal
+import torch
+from nnterp import StandardizedTransformer
+logger = logging.getLogger(__name__)
+from persona_data.synth_persona import PersonaData
+from persona_data.prompts import (
+    format_biography_prompt,
+    format_templated_prompt,
+    normalize_messages,
+)
+SystemPromptMode = Literal["empty", "templated", "biography", "custom"]
+_CUSTOM_PROMPT_DEFAULT = "You are a helpful assistant."
+@dataclass
+class ChatReply:
+    text: str
+    prompt_tokens: int
+    output_tokens: int
+    past_key_values: object | None
+def resolve_system_prompt(
+    persona: PersonaData | None,
+    mode: SystemPromptMode,
+) -> str:
+    """Resolve the active system prompt for chat.
+    Args:
+        persona: Selected persona, if any.
+        mode: Prompt mode selected in the UI.
+    Returns:
+        The rendered system prompt string.
+    """
+    if persona is None:
+        return ""
+    if mode == "templated":
+        return format_templated_prompt(persona.templated_prompt)
+    if mode == "biography":
+        return format_biography_prompt(persona.biography_md)
+    if mode == "custom":
+        return _CUSTOM_PROMPT_DEFAULT
+    return ""
+def _format_plain_messages(
+    messages: list[dict[str, str]], add_generation_prompt: bool
+) -> str:
+    """Format messages as plain ``Role: content`` text, used as a last-resort fallback."""
+    lines: list[str] = []
+    for message in messages:
+        role = message["role"]
+        content = message["content"]
+        if role == "system":
+            if content:
+                lines.append(f"System: {content}")
+        elif role == "user":
+            lines.append(f"User: {content}")
+        elif role == "assistant":
+            lines.append(f"Assistant: {content}")
+        else:
+            lines.append(f"{role.title()}: {content}")
+    if add_generation_prompt and (not lines or not lines[-1].startswith("Assistant:")):
+        lines.append("Assistant:")
+    return "\n\n".join(lines)
+def _format_generation_prompt(
+    messages: list[dict[str, str]], tokenizer: object
+) -> tuple[str, int]:
+    """Render messages into a single prompt string and count prompt tokens.
+    Tries the tokenizer's chat template first, falls back to normalized messages,
+    then to a plain-text format if both template attempts fail.
+    """
+    normalized_messages = messages
+    try:
+        prompt = tokenizer.apply_chat_template(
+            normalized_messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+    except Exception:
+        logger.debug(
+            "Chat template failed on raw messages, trying normalized", exc_info=True
+        )
+        normalized_messages = normalize_messages(messages)
+        try:
+            prompt = tokenizer.apply_chat_template(
+                normalized_messages,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+        except Exception:
+            logger.debug(
+                "Chat template failed on normalized messages, falling back to plain format",
+                exc_info=True,
+            )
+            prompt = _format_plain_messages(
+                normalized_messages,
+                add_generation_prompt=True,
+            )
+    prompt_token_count = tokenizer(prompt, return_tensors="pt").input_ids.shape[1]
+    return prompt, prompt_token_count
+@contextmanager
+def _seeded_rng(seed: int | None):
+    """Context manager that forks the RNG state and sets a deterministic seed."""
+    if seed is None:
+        yield
+        return
+    cuda_ctx = torch.random.fork_rng(devices=range(torch.cuda.device_count()))
+    mps_ctx = (
+        torch.random.fork_rng(devices=range(1), device_type="mps")
+        if hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
+        else nullcontext()
+    )
+    with cuda_ctx, mps_ctx:
+        torch.manual_seed(seed)
+        yield
+def generate_chat_reply(
+    model: StandardizedTransformer,
+    messages: list[dict[str, str]],
+    remote: bool,
+    past_key_values: object | None = None,
+    max_new_tokens: int = 256,
+    do_sample: bool = False,
+    temperature: float = 1.0,
+    top_p: float = 1.0,
+    top_k: int = 50,
+    repetition_penalty: float = 1.0,
+    seed: int | None = None,
+) -> ChatReply:
+    """Generate one assistant reply from a full chat history.
+    The helper uses ``model.generate`` so it works with both local and NDIF-backed
+    nnsight models. The full conversation is re-rendered each turn and the cache from
+    the previous turn is reused when available.
+    Args:
+        model: Loaded standardized nnterp model.
+        messages: Full chat history, including any system prompt as the first message.
+        remote: Whether to execute the generation on NDIF.
+        past_key_values: Cache returned by the previous generation step.
+        max_new_tokens: Maximum number of assistant tokens to generate.
+        do_sample: Whether to sample from the model distribution.
+        temperature: Sampling temperature, used only when sampling is enabled.
+        top_p: Nucleus sampling threshold, used only when sampling is enabled.
+        top_k: Top-k cutoff, used only when sampling is enabled.
+        repetition_penalty: Repetition penalty applied during decoding.
+        seed: Optional local RNG seed for sampled generation.
+    Returns:
+        ChatReply with generated text and the updated cache.
+    """
+    tokenizer = model.tokenizer
+    prompt, prompt_token_count = _format_generation_prompt(messages, tokenizer)
+    generation_kwargs: dict[str, object] = {
+        "max_new_tokens": max_new_tokens,
+        "return_dict_in_generate": True,
+        "use_cache": True,
+    }
+    if do_sample:
+        generation_kwargs["do_sample"] = True
+        generation_kwargs["temperature"] = temperature
+        generation_kwargs["top_p"] = top_p
+        generation_kwargs["top_k"] = top_k
+    if repetition_penalty != 1.0:
+        generation_kwargs["repetition_penalty"] = repetition_penalty
+    if past_key_values is not None and not remote:
+        generation_kwargs["past_key_values"] = past_key_values
+    if remote:
+        generation_kwargs["remote"] = True
+        # WARNING: NDIF returns caches on CPU, so cross-turn cache reuse is not stable.
+    with _seeded_rng(seed if do_sample and not remote else None):
+        with model.generate(prompt, **generation_kwargs) as tracer:
+            generated = tracer.result.save()
+    if hasattr(generated, "value") and getattr(generated, "value") is not None:
+        generated = generated.value
+    if not hasattr(generated, "sequences"):
+        raise ValueError("Generation did not return token sequences")
+    sequences = generated.sequences
+    if not isinstance(sequences, torch.Tensor):
+        raise TypeError("Generated sequences must be a tensor")
+    generated_ids = sequences[0, prompt_token_count:]
+    text = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
+    output_tokens = int(sequences.shape[1] - prompt_token_count)
+    return ChatReply(
+        text=text,
+        prompt_tokens=prompt_token_count,
+        output_tokens=max(0, output_tokens),
+        past_key_values=(
+            getattr(generated, "past_key_values", None) if not remote else None
+        ),
+    )

utils/chat_export.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from persona_data.environment import get_artifacts_dir
+from utils.artifacts import model_dir_name
+from utils.helpers import slugify
+def build_chat_export_payload(
+    *,
+    model_name: str,
+    dataset_source: str,
+    persona_id: str,
+    persona_name: str | None,
+    panel_label: str | None,
+    prompt_mode: str,
+    system_prompt: str | None,
+    messages: list[dict[str, str]],
+    generation: dict[str, object],
+) -> dict[str, object]:
+    """Build a JSON-serializable snapshot of the current chat session.
+    Args:
+        model_name: Model identifier used for the chat.
+        dataset_source: Human-readable dataset source label.
+        persona_id: Selected persona id.
+        persona_name: Selected persona display name, if available.
+        prompt_mode: Active system prompt mode.
+        messages: Conversation messages without the system prompt.
+        generation: Generation settings used for the chat.
+    Returns:
+        A JSON-serializable dictionary.
+    """
+    return {
+        "model_name": model_name,
+        "dataset_source": dataset_source,
+        "persona": {
+            "id": persona_id,
+            "name": persona_name,
+        },
+        "panel_label": panel_label,
+        "prompt_mode": prompt_mode,
+        "generation": generation,
+        "messages": (
+            [{"role": "system", "content": system_prompt}] if system_prompt else []
+        )
+        + messages,
+    }
+def save_chat_export(
+    *,
+    model_name: str,
+    dataset_source: str,
+    persona_id: str,
+    persona_name: str | None,
+    prompt_mode: str,
+    system_prompt: str | None,
+    messages: list[dict[str, str]],
+    generation: dict[str, object],
+    panel_label: str | None = None,
+) -> Path:
+    """Save the current chat session to ``artifacts/chats`` as JSON.
+    Args:
+        model_name: Model identifier used for the chat.
+        dataset_source: Human-readable dataset source label.
+        persona_id: Selected persona id.
+        persona_name: Selected persona display name, if available.
+        prompt_mode: Active system prompt mode.
+        system_prompt: Current system prompt text, if any.
+        messages: Conversation messages without the system prompt.
+        generation: Generation settings used for the chat.
+    Returns:
+        The path the export was written to.
+    """
+    payload = build_chat_export_payload(
+        model_name=model_name,
+        dataset_source=dataset_source,
+        persona_id=persona_id,
+        persona_name=persona_name,
+        panel_label=panel_label,
+        prompt_mode=prompt_mode,
+        system_prompt=system_prompt,
+        messages=messages,
+        generation=generation,
+    )
+    export_dir = (
+        get_artifacts_dir()
+        / "chats"
+        / model_dir_name(model_name)
+        / slugify(dataset_source)
+        / slugify(persona_id)
+    )
+    export_dir.mkdir(parents=True, exist_ok=True)
+    timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
+    filename_parts = [
+        timestamp,
+        slugify(persona_name or persona_id),
+        slugify(prompt_mode),
+    ]
+    if panel_label:
+        filename_parts.append(slugify(panel_label))
+    export_path = export_dir / f"{'__'.join(filename_parts)}.json"
+    export_path.write_text(
+        f"{json.dumps(payload, indent=2, ensure_ascii=False)}\n",
+        encoding="utf-8",
+    )
+    return export_path

utils/datasets.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import atexit
+import shutil
+from pathlib import Path
+from tempfile import mkdtemp
+from typing import Any
+import streamlit as st
+from persona_data.synth_persona import SynthPersonaDataset
+from .helpers import DATASET_SOURCES
+from .local_dataset import LocalPersonaDataset
+@st.cache_resource(show_spinner=False)
+def cached_hf_dataset() -> SynthPersonaDataset:
+    """Load the default SynthPersona HuggingFace dataset once."""
+    return SynthPersonaDataset()
+def _upload_cache_dir() -> Path:
+    cache_dir = st.session_state.get("_upload_cache_dir")
+    if cache_dir is None:
+        cache_dir = mkdtemp(prefix="persona_vectors_uploads_")
+        st.session_state["_upload_cache_dir"] = cache_dir
+        # Register cleanup so the temp dir is removed when the server process exits.
+        atexit.register(shutil.rmtree, cache_dir, ignore_errors=True)
+    return Path(cache_dir)
+def _uploaded_file_to_temp_path(uploaded_file: Any, stem: str) -> Path:
+    suffix = Path(uploaded_file.name).suffix or ".jsonl"
+    temp_path = _upload_cache_dir() / f"{stem}{suffix}"
+    data = uploaded_file.getvalue()
+    if temp_path.exists() and temp_path.stat().st_size == len(data):
+        return temp_path
+    temp_path.write_bytes(data)
+    return temp_path
+def load_dataset(
+    dataset_source: str,
+) -> tuple[SynthPersonaDataset | LocalPersonaDataset, str]:
+    """Load the selected dataset source for the UI."""
+    if dataset_source == DATASET_SOURCES[0]:
+        return cached_hf_dataset(), "SynthPersona"
+    personas_file = st.session_state.get("extract__personas_file")
+    qa_file = st.session_state.get("extract__qa_file")
+    if personas_file is None or qa_file is None:
+        raise ValueError("Upload both personas.jsonl and qa.jsonl files")
+    personas_path = _uploaded_file_to_temp_path(personas_file, stem="personas")
+    qa_path = _uploaded_file_to_temp_path(qa_file, stem="qa")
+    return (
+        LocalPersonaDataset(personas_path=personas_path, qa_path=qa_path),
+        "Local upload",
+    )

utils/extraction.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import gc
+import logging
+from dataclasses import dataclass
+import torch
+from nnterp import StandardizedTransformer
+logger = logging.getLogger(__name__)
+from persona_data.environment import get_artifacts_dir
+from persona_data.synth_persona import PersonaData, QAPair
+from persona_vectors.activation_io import save_per_question_vectors
+from persona_vectors.activations import extract_activations
+from persona_data.prompts import (
+    format_biography_prompt,
+    format_messages,
+    format_templated_prompt,
+)
+@dataclass
+class VariantExtractionResult:
+    variant: str
+    output_dir: str
+    n_questions: int
+    n_layers: int
+    d_model: int
+    persona_name: str = ""
+def _prepare_inputs(
+    tokenizer: object,
+    system_prompt: str,
+    qa_pairs: list[QAPair],
+) -> tuple[list[str], list[torch.Tensor], list[str]]:
+    """Format QA pairs into tokenized prompts with answer-token masks.
+    Args:
+        tokenizer: HuggingFace-compatible tokenizer from the model.
+        system_prompt: System prompt to prepend to each conversation.
+        qa_pairs: List of question-answer pairs to format.
+    Returns:
+        A tuple of (full_texts, token_masks, questions) where full_texts are
+        the rendered prompt strings, token_masks are boolean tensors marking
+        answer tokens, and questions are the raw question strings.
+    """
+    full_texts: list[str] = []
+    token_masks: list[torch.Tensor] = []
+    questions: list[str] = []
+    for qa in qa_pairs:
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": qa.question},
+            {"role": "assistant", "content": qa.answer},
+        ]
+        full_prompt, answer_start = format_messages(messages, tokenizer)
+        seq_len = tokenizer(full_prompt, return_tensors="pt").input_ids.shape[1]
+        full_texts.append(full_prompt)
+        token_masks.append(torch.arange(seq_len) >= answer_start)
+        questions.append(qa.question)
+    return full_texts, token_masks, questions
+def run_extraction(
+    model: StandardizedTransformer,
+    model_name: str,
+    persona: PersonaData,
+    qa_pairs: list[QAPair],
+    variants: list[str],
+    remote: bool,
+) -> list[VariantExtractionResult]:
+    """Run activation extraction and save outputs for selected variants.
+    Args:
+        model: Loaded standardized nnterp model.
+        model_name: HuggingFace model identifier used for artifact paths.
+        persona: The persona whose QA pairs are being extracted.
+        qa_pairs: Question-answer pairs to run extraction on.
+        variants: Prompt variants to extract (e.g. ``"templated"``, ``"biography"``).
+        remote: Whether to execute on NDIF.
+    Returns:
+        A list of extraction results, one per variant.
+    Raises:
+        ValueError: If ``qa_pairs`` is empty or an unsupported variant is given.
+    """
+    if not qa_pairs:
+        raise ValueError("No QA pairs selected for extraction")
+    tokenizer = model.tokenizer
+    activations_dir = get_artifacts_dir() / "activations"
+    system_prompt_by_variant = {
+        "templated": format_templated_prompt(persona.templated_prompt),
+        "biography": format_biography_prompt(persona.biography_md),
+    }
+    results: list[VariantExtractionResult] = []
+    for variant in variants:
+        if variant not in system_prompt_by_variant:
+            raise ValueError(f"Unsupported variant: {variant}")
+        full_texts, token_masks, questions = _prepare_inputs(
+            tokenizer=tokenizer,
+            system_prompt=system_prompt_by_variant[variant],
+            qa_pairs=qa_pairs,
+        )
+        per_question_vectors = extract_activations(
+            model=model,
+            full_texts=full_texts,
+            token_masks=token_masks,
+            remote=remote,
+        )
+        artifact_dir = save_per_question_vectors(
+            root_dir=activations_dir,
+            model_name=model_name,
+            prompt_variant=variant,
+            persona_id=persona.id,
+            persona_name=persona.name,
+            per_question_vectors=per_question_vectors,
+            questions=questions,
+        )
+        results.append(
+            VariantExtractionResult(
+                variant=variant,
+                output_dir=str(artifact_dir),
+                n_questions=per_question_vectors.shape[0],
+                n_layers=per_question_vectors.shape[1],
+                d_model=per_question_vectors.shape[2],
+                persona_name=persona.name,
+            )
+        )
+        # Free activation tensors between variants to keep memory bounded.
+        del per_question_vectors, full_texts, token_masks
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        if hasattr(torch, "mps") and hasattr(torch.mps, "empty_cache"):
+            torch.mps.empty_cache()
+    return results

utils/helpers.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from persona_data.synth_persona import PersonaData
+# Variant key -> human-readable label mapping
+VARIANT_LABELS = {
+    "empty": "None",
+    "templated": "Template",
+    "biography": "Biography",
+    "custom": "Custom",
+}
+# Variants that correspond to actual system prompts (excludes "empty")
+PROMPT_VARIANTS = ["templated", "biography"]
+# For selectbox options: list of labels in definition order
+MODE_LABELS = list(VARIANT_LABELS.values())
+# Reverse lookup: label -> key
+MODE_LABEL_TO_KEY = {v: k for k, v in VARIANT_LABELS.items()}
+DATASET_SOURCES = ["HuggingFace: synth-persona", "Local JSONL upload"]
+ANALYSIS_MODES = ["Cosine similarity", "PCA", "UMAP"]
+ANALYSIS_LABELS = {
+    "PCA": ("PCA", "PC1", "PC2"),
+    "UMAP": ("UMAP", "UMAP 1", "UMAP 2"),
+}
+ANALYSIS_HELP_TEXT = {
+    "Cosine similarity": "Compare layer-wise alignment between variants.",
+    "PCA": "Project the selected layers into a global 2D view.",
+    "UMAP": "Project the selected layers into a local-neighborhood 2D view.",
+}
+def slugify(value: str) -> str:
+    """Convert a string to a slug safe for filenames and URLs."""
+    import re
+    return re.sub(r"[^a-z0-9]+", "_", value.lower()).strip("_") or "unknown"
+def widget_key(*parts: str) -> str:
+    """Generate a namespaced Streamlit widget key from parts."""
+    return "::".join(parts)
+def prompt_variant_label(variant: str) -> str:
+    """Return a human-friendly prompt-variant label."""
+    return VARIANT_LABELS.get(variant, variant.title())
+def persona_label(persona: PersonaData) -> str:
+    """Format a persona for selection widgets."""
+    return f"{persona.name} ({persona.id})"
+def persona_display_label(persona_id: str, persona_name: str | None) -> str:
+    """Format a persona id with an optional display name."""
+    if persona_name:
+        return f"{persona_name} ({persona_id})"
+    return persona_id

utils/local_dataset.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import json
+from collections import defaultdict
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterator, Literal
+from persona_data.synth_persona import PersonaData, QAPair
+@dataclass
+class LocalPersonaDataset:
+    """Dataset loaded from local JSONL files."""
+    personas_path: Path
+    qa_path: Path
+    def __post_init__(self) -> None:
+        with self.personas_path.open() as f:
+            self._personas: list[PersonaData] = []
+            for line in f:
+                if not line.strip():
+                    continue
+                data = json.loads(line)
+                self._personas.append(
+                    PersonaData(
+                        id=data["id"],
+                        persona=data["persona"],
+                        templated_prompt=data["templated_prompt"],
+                        biography_md=data["biography_md"],
+                    )
+                )
+        self._qa: dict[str, list[QAPair]] = defaultdict(list)
+        with self.qa_path.open() as f:
+            for line in f:
+                if not line.strip():
+                    continue
+                data = json.loads(line)
+                self._qa[data["id"]].append(
+                    QAPair(
+                        qid=data["qid"],
+                        type=data["type"],
+                        question=data["question"],
+                        answer=data["answer"],
+                        difficulty=data["difficulty"],
+                    )
+                )
+    def __len__(self) -> int:
+        return len(self._personas)
+    def __iter__(self) -> Iterator[PersonaData]:
+        return iter(self._personas)
+    def __getitem__(self, idx: int) -> PersonaData:
+        return self._personas[idx]
+    def get_qa(
+        self,
+        persona_id: str,
+        type: Literal["explicit", "implicit"] | None = None,
+        difficulty: int | list[int] | None = None,
+    ) -> list[QAPair]:
+        pairs = self._qa.get(persona_id, [])
+        if type is not None:
+            pairs = [pair for pair in pairs if pair.type == type]
+        if difficulty is not None:
+            levels = {difficulty} if isinstance(difficulty, int) else set(difficulty)
+            pairs = [pair for pair in pairs if pair.difficulty in levels]
+        return pairs

utils/runtime.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import logging
+import streamlit as st
+logger = logging.getLogger(__name__)
+@st.cache_data(show_spinner=False, ttl=30)
+def list_remote_models() -> list[str]:
+    """Return the NDIF language models that are currently running."""
+    import nnsight
+    try:
+        status = nnsight.ndif_status()
+    except Exception:
+        logger.warning("Failed to fetch NDIF status", exc_info=True)
+        return []
+    model_names: list[str] = []
+    for entry in status.values():
+        if not isinstance(entry, dict):
+            continue
+        if entry.get("model_class") not in {"LanguageModel", "StandardizedTransformer"}:
+            continue
+        state = entry.get("state")
+        state_name = getattr(state, "name", None) or getattr(state, "value", None)
+        if state_name != "RUNNING":
+            continue
+        repo_id = entry.get("repo_id")
+        if isinstance(repo_id, str):
+            model_names.append(repo_id)
+    return sorted(set(model_names))
+@st.cache_resource(show_spinner=False, max_entries=1)
+def cached_model(model_name: str, remote: bool):
+    """Load and cache a standardized nnterp model.
+    Streamlit reruns this app on every interaction, so caching keeps one loaded
+    model instance per ``(model_name, remote)`` instead of reloading weights on
+    every widget change.
+    """
+    from nnterp import StandardizedTransformer
+    # HACK: For now do it like this because of the bug.
+    # model = StandardizedTransformer(model_name, remote=True)
+    return StandardizedTransformer(model_name)

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff